In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Loading the dataset
insurance_data=pd.read_csv('insurance_pre.csv')
insurance_data

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [3]:
#Getting the column names of insurance dataset
insurance_data.columns

Index(['age', 'sex', 'bmi', 'children', 'smoker', 'charges'], dtype='object')

In [4]:
#Converting categorical data into numerical data
ins_data_numerical=pd.get_dummies(insurance_data,drop_first=True)
ins_data_numerical

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,0,1
1,18,33.770,1,1725.55230,1,0
2,28,33.000,3,4449.46200,1,0
3,33,22.705,0,21984.47061,1,0
4,32,28.880,0,3866.85520,1,0
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,1,0
1334,18,31.920,0,2205.98080,0,0
1335,18,36.850,0,1629.83350,0,0
1336,21,25.800,0,2007.94500,0,0


In [5]:
#Getting the column names of insurance dataset, after converting categorical data to numerical 
ins_data_numerical.columns

Index(['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes'], dtype='object')

In [6]:
#Storing input and output into separate variables
indep_ins_Data=ins_data_numerical[['age', 'bmi', 'children', 'sex_male', 'smoker_yes']]
dep_ins_data=ins_data_numerical[['charges']]

In [7]:
#Splitting data into train and test
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(indep_ins_Data,dep_ins_data,test_size=1/3,random_state=0)

In [8]:
#Applying standardization to bring data into same scale
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

In [11]:
#Building a Grid search cross validated SVM model using train data
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
parameter_grid = {'kernel':['linear','rbf','poly','sigmoid'], 'C':[10,100,1000,5000,10000], 'gamma':['auto','scale']}
gridSearchSVMModel=GridSearchCV(SVR(),parameter_grid,refit=True,verbose=3,n_jobs=-1)
gridSearchSVMModel=gridSearchSVMModel.fit(X_train,y_train)

Fitting 5 folds for each of 40 candidates, totalling 200 fits


  y = column_or_1d(y, warn=True)


In [15]:
#Getting the best params from the grid and displaying the same
result=gridSearchSVMModel.cv_results_
print("The best parameter for which R_score value would be high is{}:".format(gridSearchSVMModel.best_params_))

The R_score value for best parameter {'C': 10000, 'gamma': 'scale', 'kernel': 'rbf'}:


In [16]:
#Creating a table and displaying the results
table=pd.DataFrame().from_dict(result)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.058139,0.00679,0.010044,0.00217,10,auto,linear,"{'C': 10, 'gamma': 'auto', 'kernel': 'linear'}",0.387624,0.461268,0.288301,0.34054,0.297825,0.355112,0.063693,25
1,0.071015,0.007125,0.038555,0.006173,10,auto,rbf,"{'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}",-0.004176,0.022594,-0.118956,-0.082926,-0.103473,-0.057387,0.056205,35
2,0.053311,0.003961,0.014534,0.003935,10,auto,poly,"{'C': 10, 'gamma': 'auto', 'kernel': 'poly'}",0.04742,0.077536,-0.060527,-0.009476,-0.050823,0.000826,0.054025,32
3,0.073461,0.003612,0.016748,0.002303,10,auto,sigmoid,"{'C': 10, 'gamma': 'auto', 'kernel': 'sigmoid'}",0.044787,0.081689,-0.072355,-0.027541,-0.05147,-0.004978,0.058648,34
4,0.0466,0.005315,0.011232,0.002656,10,scale,linear,"{'C': 10, 'gamma': 'scale', 'kernel': 'linear'}",0.387624,0.461268,0.288301,0.34054,0.297825,0.355112,0.063693,25
5,0.066873,0.011354,0.036138,0.001628,10,scale,rbf,"{'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}",-0.003956,0.022453,-0.119035,-0.082925,-0.10351,-0.057395,0.05623,36
6,0.050716,0.006571,0.005725,0.005938,10,scale,poly,"{'C': 10, 'gamma': 'scale', 'kernel': 'poly'}",0.043648,0.07978,-0.059229,-0.009498,-0.050317,0.000877,0.053658,31
7,0.070332,0.005671,0.01345,0.003905,10,scale,sigmoid,"{'C': 10, 'gamma': 'scale', 'kernel': 'sigmoid'}",0.043946,0.08223,-0.072132,-0.027546,-0.051337,-0.004968,0.058595,33
8,0.065141,0.011698,0.009242,0.001857,100,auto,linear,"{'C': 100, 'gamma': 'auto', 'kernel': 'linear'}",0.596232,0.635776,0.566816,0.588799,0.537415,0.585008,0.0326,19
9,0.061826,0.005588,0.038931,0.006652,100,auto,rbf,"{'C': 100, 'gamma': 'auto', 'kernel': 'rbf'}",0.303414,0.319385,0.155546,0.208414,0.161756,0.229703,0.069348,29


In [17]:
#Getting the inputs from the user and predicting for future
age_input=float(input("Age:"))
bmi_input=float(input("BMI:"))
children_input=float(input("Children:"))
sex_male_input=int(input("Sex Male 0 or 1:"))
smoker_yes_input=int(input("Smoker Yes 0 or 1:"))

Age:30
BMI:20.8
Children:0.0
Sex Male 0 or 1:1
Smoker Yes 0 or 1:0


In [18]:
Future_Prediction=gridSearchSVMModel.predict([[age_input,bmi_input,children_input,sex_male_input,smoker_yes_input]])
print("Future_Prediction={}".format(Future_Prediction))

Future_Prediction=[17332.30453654]
