## To predict the insurance charge based on customers age, bmi, children, sex and smoking habit.

### Grid Search - For Automatic selection of Best Model by trying with different playable parameters Support Vector Machine Algorithm.

In [1]:
#Library file load
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
#Dataset Load
data=pd.read_csv("insurance.csv")

In [3]:
data.shape

(1338, 6)

In [4]:
data.head()

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.9,0,yes,16884.924
1,18,male,33.77,1,no,1725.5523
2,28,male,33.0,3,no,4449.462
3,33,male,22.705,0,no,21984.47061
4,32,male,28.88,0,no,3866.8552


In [5]:
data=pd.get_dummies(data,drop_first=True)

In [6]:
data.head()

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.9,0,16884.924,0,1
1,18,33.77,1,1725.5523,1,0
2,28,33.0,3,4449.462,1,0
3,33,22.705,0,21984.47061,1,0
4,32,28.88,0,3866.8552,1,0


In [7]:
x=data[["age","bmi","children","sex_male","smoker_yes"]]
y=data["charges"]

In [8]:
import warnings
warnings.filterwarnings('ignore')

In [9]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test= train_test_split(x,y,test_size=1/3,random_state=0)

In [10]:
x_train.head()

Unnamed: 0,age,bmi,children,sex_male,smoker_yes
482,18,31.35,0,0,0
338,50,32.3,1,1,1
356,46,43.89,3,1,0
869,25,24.3,3,0,0
182,22,19.95,3,1,0


In [11]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
x_train=sc.fit_transform(x_train)
x_test=sc.fit_transform(x_test)

In [12]:
x_train

array([[-1.53963418,  0.11036616, -0.90788827, -0.98885138, -0.49929923],
       [ 0.74809711,  0.26412451, -0.0755796 ,  1.01127431,  2.00280702],
       [ 0.4621307 ,  2.13997636,  1.58903774,  1.01127431, -0.49929923],
       ...,
       [ 0.03318108, -0.90443894, -0.90788827,  1.01127431, -0.49929923],
       [-1.46814257,  0.7869029 , -0.90788827,  1.01127431, -0.49929923],
       [-0.46726014, -1.96941782, -0.0755796 , -0.98885138, -0.49929923]])

In [13]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
param_grid = {'kernel':['rbf','poly','sigmoid','linear'],
              'C':[10,100,500,1000,2000,3000],'gamma':['auto','scale']} 

grid = GridSearchCV(SVR(), param_grid, refit = True, verbose = 3,n_jobs=-1)   
# fitting the model for grid search 
grid.fit(x_train, y_train) 

Fitting 5 folds for each of 48 candidates, totalling 240 fits


GridSearchCV(estimator=SVR(), n_jobs=-1,
             param_grid={'C': [10, 100, 500, 1000, 2000, 3000],
                         'gamma': ['auto', 'scale'],
                         'kernel': ['rbf', 'poly', 'sigmoid', 'linear']},
             verbose=3)

In [14]:
# best parameter after tuning 
re=grid.cv_results_
#print(re)
grid_predictions = grid.predict(x_test)    
# print classification report 
from sklearn.metrics import mean_squared_error
rmse=mean_squared_error(y_test,grid_predictions,squared=False)
print("The RMSE value for best parameter {}:".format(grid.best_params_),rmse)

The RMSE value for best parameter {'C': 3000, 'gamma': 'scale', 'kernel': 'poly'}: 5083.331444626086


In [15]:
table=pd.DataFrame.from_dict(re)

In [16]:
table.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.078352,0.016574,0.052967,0.01874,10,auto,rbf,"{'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}",-0.004176,0.022594,-0.118956,-0.082926,-0.103473,-0.057387,0.056205,43
1,0.044771,0.005772,0.006596,0.000491,10,auto,poly,"{'C': 10, 'gamma': 'auto', 'kernel': 'poly'}",0.04742,0.077536,-0.060527,-0.009476,-0.050823,0.000826,0.054025,40
2,0.059962,0.002189,0.010195,0.0004,10,auto,sigmoid,"{'C': 10, 'gamma': 'auto', 'kernel': 'sigmoid'}",0.044787,0.081689,-0.072355,-0.027541,-0.05147,-0.004978,0.058648,42
3,0.045969,0.005095,0.009595,0.004627,10,auto,linear,"{'C': 10, 'gamma': 'auto', 'kernel': 'linear'}",0.387624,0.461268,0.288301,0.34054,0.297825,0.355112,0.063693,33
4,0.112929,0.014204,0.06456,0.016671,10,scale,rbf,"{'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}",-0.003956,0.022453,-0.119035,-0.082925,-0.10351,-0.057395,0.05623,44
