In [1]:
import pandas as pd

In [3]:
dataset = pd.read_csv("insurance_pre.csv")
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [5]:
dataset = pd.get_dummies(dataset,dtype = int, drop_first = True)
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,0,1
1,18,33.770,1,1725.55230,1,0
2,28,33.000,3,4449.46200,1,0
3,33,22.705,0,21984.47061,1,0
4,32,28.880,0,3866.85520,1,0
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,1,0
1334,18,31.920,0,2205.98080,0,0
1335,18,36.850,0,1629.83350,0,0
1336,21,25.800,0,2007.94500,0,0


In [7]:
dataset.columns

Index(['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes'], dtype='object')

In [9]:
independent = dataset[["age","sex_male","bmi","children","smoker_yes"]]
dependent = dataset[["charges"]]

In [11]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [13]:
svr_param_grid = {
    "kernel" : ["linear","rbf","poly","sigmoid"],
    "C" : [10,100,500,1000,200,3000] 
}

dt_param_grid = {
    "criterion" : ["squared_error", "friedman_mse", "absolute_error", "poisson"],
    "max_features" : [None, "sqrt","log2"] ,
    "splitter" : ["best","random"]
}

rf_param_grid = {
    "criterion" : ["squared_error", "friedman_mse", "absolute_error", "poisson"],
    "max_features" : [None, "sqrt","log2"] ,
    "n_estimators" : [10,100]
}
 
svr_grid = GridSearchCV(SVR(),svr_param_grid,refit = True,verbose = 3, n_jobs = -1)
dt_grid = GridSearchCV(DecisionTreeRegressor(),dt_param_grid,refit = True,verbose = 3, n_jobs = -1)
rf_grid = GridSearchCV(RandomForestRegressor(),rf_param_grid,refit = True,verbose = 3, n_jobs = -1)


In [15]:
svr_grid.fit(independent,dependent)
dt_grid.fit(independent,dependent)
rf_grid.fit(independent,dependent)

Fitting 5 folds for each of 24 candidates, totalling 120 fits


  y = column_or_1d(y, warn=True)


Fitting 5 folds for each of 24 candidates, totalling 120 fits
Fitting 5 folds for each of 24 candidates, totalling 120 fits


  return fit_method(estimator, *args, **kwargs)


In [17]:
svr_grid.best_params_

{'C': 1000, 'kernel': 'linear'}

In [19]:
dt_grid.best_params_

{'criterion': 'poisson', 'max_features': None, 'splitter': 'best'}

In [21]:
rf_grid.best_params_

{'criterion': 'squared_error', 'max_features': 'log2', 'n_estimators': 100}

In [23]:
svr_grid.cv_results_

{'mean_fit_time': array([0.34333005, 0.13523641, 0.11353493, 0.14880171, 0.26774311,
        0.12534394, 0.11748881, 0.14321241, 0.66019373, 0.13025155,
        0.13505635, 0.14797268, 1.05670171, 0.13535371, 0.16219506,
        0.14702778, 0.45984187, 0.121523  , 0.13030853, 0.14870968,
        3.11322646, 0.14280272, 0.2374629 , 0.15183377]),
 'std_fit_time': array([0.10824036, 0.00560625, 0.00923474, 0.0101538 , 0.01644888,
        0.00252294, 0.00490638, 0.00320633, 0.06652302, 0.00304119,
        0.00426335, 0.00595514, 0.11877554, 0.00650162, 0.00740094,
        0.00357321, 0.17797399, 0.0016132 , 0.00229509, 0.00707257,
        0.72732969, 0.00522485, 0.01502347, 0.00572224]),
 'mean_score_time': array([0.0913125 , 0.06956997, 0.01876988, 0.03015513, 0.01849747,
        0.06748571, 0.01926751, 0.03114781, 0.01675968, 0.06536684,
        0.01809578, 0.03220272, 0.01646461, 0.06664467, 0.0189497 ,
        0.03051629, 0.01740751, 0.06600699, 0.01877265, 0.03123512,
        0.014817

In [25]:
svr_re = svr_grid.cv_results_

dt_re = dt_grid.cv_results_

rf_re = rf_grid.cv_results_

# table = pd.DataFrame.from_dict(re)
# table

In [27]:
svr_re

{'mean_fit_time': array([0.34333005, 0.13523641, 0.11353493, 0.14880171, 0.26774311,
        0.12534394, 0.11748881, 0.14321241, 0.66019373, 0.13025155,
        0.13505635, 0.14797268, 1.05670171, 0.13535371, 0.16219506,
        0.14702778, 0.45984187, 0.121523  , 0.13030853, 0.14870968,
        3.11322646, 0.14280272, 0.2374629 , 0.15183377]),
 'std_fit_time': array([0.10824036, 0.00560625, 0.00923474, 0.0101538 , 0.01644888,
        0.00252294, 0.00490638, 0.00320633, 0.06652302, 0.00304119,
        0.00426335, 0.00595514, 0.11877554, 0.00650162, 0.00740094,
        0.00357321, 0.17797399, 0.0016132 , 0.00229509, 0.00707257,
        0.72732969, 0.00522485, 0.01502347, 0.00572224]),
 'mean_score_time': array([0.0913125 , 0.06956997, 0.01876988, 0.03015513, 0.01849747,
        0.06748571, 0.01926751, 0.03114781, 0.01675968, 0.06536684,
        0.01809578, 0.03220272, 0.01646461, 0.06664467, 0.0189497 ,
        0.03051629, 0.01740751, 0.06600699, 0.01877265, 0.03123512,
        0.014817

In [29]:
dt_re

{'mean_fit_time': array([0.19021664, 0.00857568, 0.00916729, 0.00937667, 0.01076188,
        0.00877419, 0.01038446, 0.01179872, 0.01106653, 0.00767717,
        0.0111701 , 0.0082912 , 0.04882097, 0.03371563, 0.03014779,
        0.02194676, 0.02944384, 0.02345672, 0.01936679, 0.01461787,
        0.01438174, 0.01458225, 0.01416268, 0.01257792]),
 'std_fit_time': array([0.08470032, 0.00101698, 0.00213408, 0.00205299, 0.00239342,
        0.00146613, 0.00049624, 0.00574061, 0.00261098, 0.00124504,
        0.00540795, 0.00243204, 0.00109025, 0.00131405, 0.00281918,
        0.00206983, 0.00305196, 0.00100584, 0.00544316, 0.00091629,
        0.00198492, 0.00104062, 0.00116166, 0.00101743]),
 'mean_score_time': array([0.0061832 , 0.00618491, 0.00638371, 0.00638103, 0.00478764,
        0.00719504, 0.00518651, 0.00498166, 0.00631251, 0.00561128,
        0.00518513, 0.00619564, 0.00398984, 0.00359025, 0.00409288,
        0.00559716, 0.00478759, 0.0046957 , 0.0046936 , 0.00405598,
        0.005389

In [31]:
rf_re

{'mean_fit_time': array([0.60988779, 0.63639655, 0.09140177, 0.4511939 , 0.05427341,
        0.45705824, 0.06725082, 0.58190513, 0.05377026, 0.45709243,
        0.05587664, 0.45736814, 0.25096202, 2.43444552, 0.1448523 ,
        1.55822968, 0.17405405, 1.64774876, 0.12434487, 0.94980602,
        0.08976398, 0.79896364, 0.09640284, 0.75980954]),
 'std_fit_time': array([0.27084272, 0.07154381, 0.03150187, 0.01472615, 0.00150839,
        0.00742414, 0.00410764, 0.00464011, 0.00146095, 0.01607287,
        0.00280146, 0.01560074, 0.00441575, 0.06019631, 0.00560625,
        0.12543831, 0.02502388, 0.02941994, 0.02062356, 0.0214392 ,
        0.00472656, 0.01945552, 0.01531528, 0.11608556]),
 'mean_score_time': array([0.00619087, 0.05366449, 0.00698214, 0.02218542, 0.00699492,
        0.02027955, 0.00610242, 0.02054968, 0.00717645, 0.02517276,
        0.00759392, 0.0197793 , 0.00691943, 0.01806097, 0.00669618,
        0.01926575, 0.00600038, 0.02193127, 0.00718126, 0.01946158,
        0.010670

In [33]:
svr_table = pd.DataFrame.from_dict(svr_re)
svr_table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.34333,0.10824,0.091313,0.036538,10,linear,"{'C': 10, 'kernel': 'linear'}",-0.018943,0.029886,0.022167,0.003112,-0.066653,-0.006086,0.034672,7
1,0.135236,0.005606,0.06957,0.002695,10,rbf,"{'C': 10, 'kernel': 'rbf'}",-0.110409,-0.097459,-0.074488,-0.099597,-0.124428,-0.101276,0.016459,9
2,0.113535,0.009235,0.01877,0.000397,10,poly,"{'C': 10, 'kernel': 'poly'}",-0.125153,-0.081505,-0.111907,-0.131602,-0.179328,-0.125899,0.031794,13
3,0.148802,0.010154,0.030155,0.001708,10,sigmoid,"{'C': 10, 'kernel': 'sigmoid'}",-0.116743,-0.119312,-0.085214,-0.10821,-0.108716,-0.107639,0.012029,10
4,0.267743,0.016449,0.018497,0.001983,100,linear,"{'C': 100, 'kernel': 'linear'}",0.559313,0.521798,0.547691,0.538805,0.508518,0.535225,0.018122,5
5,0.125344,0.002523,0.067486,0.001949,100,rbf,"{'C': 100, 'kernel': 'rbf'}",-0.160182,-0.107253,-0.12343,-0.137753,-0.209551,-0.147634,0.035511,18
6,0.117489,0.004906,0.019268,0.000877,100,poly,"{'C': 100, 'kernel': 'poly'}",-0.132885,-0.082461,-0.12922,-0.139962,-0.189219,-0.134749,0.033948,15
7,0.143212,0.003206,0.031148,0.001782,100,sigmoid,"{'C': 100, 'kernel': 'sigmoid'}",-0.142455,-0.160831,-0.117544,-0.137984,-0.130402,-0.137843,0.014263,16
8,0.660194,0.066523,0.01676,0.001164,500,linear,"{'C': 500, 'kernel': 'linear'}",0.632135,0.587195,0.619095,0.610472,0.589494,0.607678,0.017242,3
9,0.130252,0.003041,0.065367,0.001787,500,rbf,"{'C': 500, 'kernel': 'rbf'}",-0.160916,-0.103514,-0.128312,-0.139323,-0.213145,-0.149042,0.037006,19


In [35]:
dt_table = pd.DataFrame.from_dict(dt_re)
dt_table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.190217,0.0847,0.006183,0.00193152,squared_error,,best,"{'criterion': 'squared_error', 'max_features':...",0.727836,0.613656,0.732028,0.734655,0.681047,0.697844,0.04646,8
1,0.008576,0.001017,0.006185,0.001164037,squared_error,,random,"{'criterion': 'squared_error', 'max_features':...",0.685613,0.620592,0.73827,0.679433,0.696374,0.684056,0.037795,12
2,0.009167,0.002134,0.006384,0.001620253,squared_error,sqrt,best,"{'criterion': 'squared_error', 'max_features':...",0.752888,0.627336,0.713032,0.704302,0.653168,0.690145,0.044661,10
3,0.009377,0.002053,0.006381,0.00119965,squared_error,sqrt,random,"{'criterion': 'squared_error', 'max_features':...",0.695164,0.534464,0.719885,0.720208,0.65999,0.665942,0.069326,16
4,0.010762,0.002393,0.004788,0.0009772721,squared_error,log2,best,"{'criterion': 'squared_error', 'max_features':...",0.754495,0.636523,0.721162,0.639975,0.700202,0.690471,0.046035,9
5,0.008774,0.001466,0.007195,0.002018791,squared_error,log2,random,"{'criterion': 'squared_error', 'max_features':...",0.612762,0.53152,0.743824,0.661957,0.667986,0.64361,0.069985,21
6,0.010384,0.000496,0.005187,0.0009662412,friedman_mse,,best,"{'criterion': 'friedman_mse', 'max_features': ...",0.724003,0.656545,0.729275,0.714232,0.685698,0.701951,0.027238,6
7,0.011799,0.005741,0.004982,0.0008941734,friedman_mse,,random,"{'criterion': 'friedman_mse', 'max_features': ...",0.738274,0.587757,0.741927,0.76097,0.698311,0.705448,0.062278,4
8,0.011067,0.002611,0.006313,0.002391831,friedman_mse,sqrt,best,"{'criterion': 'friedman_mse', 'max_features': ...",0.665494,0.574085,0.703705,0.703911,0.719921,0.673423,0.052801,13
9,0.007677,0.001245,0.005611,0.001897384,friedman_mse,sqrt,random,"{'criterion': 'friedman_mse', 'max_features': ...",0.728444,0.492735,0.630681,0.612861,0.685929,0.63013,0.079944,22


In [37]:
rf_table = pd.DataFrame.from_dict(rf_re)
rf_table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.609888,0.270843,0.006191,0.000989,squared_error,,10,"{'criterion': 'squared_error', 'max_features':...",0.832143,0.754516,0.841748,0.802056,0.808975,0.807887,0.030393,23
1,0.636397,0.071544,0.053664,0.03037,squared_error,,100,"{'criterion': 'squared_error', 'max_features':...",0.849166,0.77003,0.856296,0.815858,0.834553,0.82518,0.030854,10
2,0.091402,0.031502,0.006982,0.000892,squared_error,sqrt,10,"{'criterion': 'squared_error', 'max_features':...",0.848401,0.747933,0.851746,0.829662,0.826923,0.820933,0.037802,13
3,0.451194,0.014726,0.022185,0.002015,squared_error,sqrt,100,"{'criterion': 'squared_error', 'max_features':...",0.854384,0.778622,0.854507,0.825775,0.836348,0.829927,0.027903,8
4,0.054273,0.001508,0.006995,0.001098,squared_error,log2,10,"{'criterion': 'squared_error', 'max_features':...",0.831354,0.735843,0.851447,0.815655,0.824898,0.811839,0.039775,22
5,0.457058,0.007424,0.02028,0.001708,squared_error,log2,100,"{'criterion': 'squared_error', 'max_features':...",0.857409,0.780031,0.866292,0.830946,0.838427,0.834621,0.030101,1
6,0.067251,0.004108,0.006102,0.00067,friedman_mse,,10,"{'criterion': 'friedman_mse', 'max_features': ...",0.835211,0.765833,0.839037,0.802931,0.835288,0.81566,0.028138,20
7,0.581905,0.00464,0.02055,0.002249,friedman_mse,,100,"{'criterion': 'friedman_mse', 'max_features': ...",0.847992,0.773403,0.85111,0.815489,0.835897,0.824778,0.028571,11
8,0.05377,0.001461,0.007176,0.001155,friedman_mse,sqrt,10,"{'criterion': 'friedman_mse', 'max_features': ...",0.841934,0.763838,0.856658,0.808384,0.826077,0.819378,0.032091,15
9,0.457092,0.016073,0.025173,0.004293,friedman_mse,sqrt,100,"{'criterion': 'friedman_mse', 'max_features': ...",0.857654,0.783162,0.864323,0.828591,0.838532,0.834453,0.028679,2


In [55]:
from sklearn.metrics import r2_score

#SvM R2 Score Value
y_svm_pred = svr_grid.predict(independent)
svm_r2_score = r2_score(dependent,y_svm_pred)
svm_r2_score

0.7179049746029801

In [57]:


#Decision Tree R2 Score Value
y_dt_pred = dt_grid.predict(independent)
dt_r2_score = r2_score(dependent,y_dt_pred)
dt_r2_score

0.998667156135576

In [61]:
#Random Forest R2 Score Value
y_rf_pred = rf_grid.predict(independent)
rf_r2_score = r2_score(dependent,y_dt_pred)
rf_r2_score
 

0.998667156135576

In [69]:
#Saving the ML Model (Decision Tree Model => R2 Score : 0.99
import pickle 
pickle.dump(dt_grid,open("GRID_SEARCH_FINAL_MODEL.sav","wb"))

In [71]:
#Predicting Using Decision Tree
dt_res = dt_grid.predict([[24,25.5,0,1,0]])
dt_res
 




array([2775.19215])