In [8]:
import pandas as pd

In [12]:
dataset = pd.read_csv("insurance_pre.csv")
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [32]:
dataset = pd.get_dummies(dataset,dtype = int, drop_first = True)
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,0,1
1,18,33.770,1,1725.55230,1,0
2,28,33.000,3,4449.46200,1,0
3,33,22.705,0,21984.47061,1,0
4,32,28.880,0,3866.85520,1,0
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,1,0
1334,18,31.920,0,2205.98080,0,0
1335,18,36.850,0,1629.83350,0,0
1336,21,25.800,0,2007.94500,0,0


In [34]:
dataset.columns

Index(['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes'], dtype='object')

In [38]:
independent = dataset[["age","sex_male","bmi","children","smoker_yes"]]
dependent = dataset[["charges"]]

In [82]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [88]:
svr_param_grid = {
    "kernel" : ["linear","rbf","poly","sigmoid"],
    "C" : [10,100,500,1000,200,3000] 
}

dt_param_grid = {
    "criterion" : ["squared_error", "friedman_mse", "absolute_error", "poisson"],
    "max_features" : [None, "sqrt","log2"] ,
    "splitter" : ["best","random"]
}

rf_param_grid = {
    "criterion" : ["squared_error", "friedman_mse", "absolute_error", "poisson"],
    "max_features" : [None, "sqrt","log2"] ,
    "n_estimators" : [10,100]
}
 
svr_grid = GridSearchCV(SVR(),svr_param_grid,refit = True,verbose = 3, n_jobs = -1)
dt_grid = GridSearchCV(DecisionTreeRegressor(),dt_param_grid,refit = True,verbose = 3, n_jobs = -1)
rf_grid = GridSearchCV(RandomForestRegressor(),rf_param_grid,refit = True,verbose = 3, n_jobs = -1)


In [90]:
svr_grid.fit(independent,dependent)
dt_grid.fit(independent,dependent)
rf_grid.fit(independent,dependent)

Fitting 5 folds for each of 24 candidates, totalling 120 fits


  y = column_or_1d(y, warn=True)


Fitting 5 folds for each of 24 candidates, totalling 120 fits
Fitting 5 folds for each of 24 candidates, totalling 120 fits


  return fit_method(estimator, *args, **kwargs)


In [110]:
svr_grid.best_params_

{'C': 1000, 'kernel': 'linear'}

In [112]:
dt_grid.best_params_

{'criterion': 'poisson', 'max_features': None, 'splitter': 'best'}

In [114]:
rf_grid.best_params_

{'criterion': 'friedman_mse', 'max_features': 'sqrt', 'n_estimators': 100}

In [116]:
svr_grid.cv_results_

{'mean_fit_time': array([0.17552934, 0.13603578, 0.11289763, 0.16435976, 0.29401307,
        0.14122143, 0.12207351, 0.15139484, 0.65225506, 0.13803148,
        0.14102249, 0.14899902, 1.03383489, 0.14321585, 0.17273836,
        0.16115975, 0.53676505, 0.17154217, 0.15837626, 0.17213945,
        3.16913705, 0.17732601, 0.29660602, 0.16376376]),
 'std_fit_time': array([0.0270719 , 0.01209295, 0.00336003, 0.00798849, 0.01039148,
        0.00737172, 0.00371067, 0.00603048, 0.07869215, 0.00569778,
        0.00723538, 0.00162049, 0.10983066, 0.00663003, 0.00973749,
        0.01101185, 0.23593144, 0.02520707, 0.01222007, 0.00678233,
        0.52797327, 0.01801247, 0.0552434 , 0.00820581]),
 'mean_score_time': array([0.02134447, 0.07021232, 0.01974735, 0.03550763, 0.01914959,
        0.070612  , 0.01934829, 0.03291225, 0.01795502, 0.06782312,
        0.01895046, 0.0341095 , 0.01855111, 0.06941419, 0.0195477 ,
        0.03271461, 0.02413535, 0.08178163, 0.02094393, 0.03789983,
        0.016355

In [122]:
svr_re = svr_grid.cv_results_

dt_re = dt_grid.cv_results_

rf_re = rf_grid.cv_results_

# table = pd.DataFrame.from_dict(re)
# table

In [124]:
svr_re

{'mean_fit_time': array([0.17552934, 0.13603578, 0.11289763, 0.16435976, 0.29401307,
        0.14122143, 0.12207351, 0.15139484, 0.65225506, 0.13803148,
        0.14102249, 0.14899902, 1.03383489, 0.14321585, 0.17273836,
        0.16115975, 0.53676505, 0.17154217, 0.15837626, 0.17213945,
        3.16913705, 0.17732601, 0.29660602, 0.16376376]),
 'std_fit_time': array([0.0270719 , 0.01209295, 0.00336003, 0.00798849, 0.01039148,
        0.00737172, 0.00371067, 0.00603048, 0.07869215, 0.00569778,
        0.00723538, 0.00162049, 0.10983066, 0.00663003, 0.00973749,
        0.01101185, 0.23593144, 0.02520707, 0.01222007, 0.00678233,
        0.52797327, 0.01801247, 0.0552434 , 0.00820581]),
 'mean_score_time': array([0.02134447, 0.07021232, 0.01974735, 0.03550763, 0.01914959,
        0.070612  , 0.01934829, 0.03291225, 0.01795502, 0.06782312,
        0.01895046, 0.0341095 , 0.01855111, 0.06941419, 0.0195477 ,
        0.03271461, 0.02413535, 0.08178163, 0.02094393, 0.03789983,
        0.016355

In [126]:
dt_re

{'mean_fit_time': array([0.01057138, 0.01076946, 0.00957198, 0.00658355, 0.00837374,
        0.00757899, 0.01037192, 0.00757494, 0.0097703 , 0.00953383,
        0.01057053, 0.00841908, 0.0478724 , 0.03470659, 0.02612886,
        0.02154169, 0.02931929, 0.02174177, 0.01495972, 0.01196666,
        0.01396222, 0.01256862, 0.01276555, 0.00997229]),
 'std_fit_time': array([0.00119655, 0.00212915, 0.00135309, 0.00048996, 0.00101189,
        0.00101867, 0.0004894 , 0.00080046, 0.00425123, 0.00384348,
        0.00386675, 0.00230852, 0.00189178, 0.0027781 , 0.00159596,
        0.00135285, 0.00381497, 0.00171578, 0.00063052, 0.00109328,
        0.0016677 , 0.00101335, 0.00074633, 0.00218429]),
 'mean_score_time': array([0.00418935, 0.00499187, 0.00439043, 0.00598502, 0.00678105,
        0.00498981, 0.00519037, 0.00458956, 0.00638275, 0.00518227,
        0.01021652, 0.00474577, 0.00398903, 0.00418963, 0.00518732,
        0.00438886, 0.00538783, 0.00459094, 0.003793  , 0.00438886,
        0.003790

In [128]:
rf_re

{'mean_fit_time': array([0.07141008, 0.63709645, 0.05624876, 0.4739327 , 0.05345516,
        0.48250585, 0.07140975, 0.63869138, 0.05604844, 0.48569884,
        0.05465288, 0.47592673, 0.2708745 , 2.82385077, 0.15358744,
        1.70464339, 0.17456069, 1.60769958, 0.10392127, 1.01847792,
        0.0885622 , 0.84613881, 0.09035745, 0.78729248]),
 'std_fit_time': array([0.00421289, 0.01116673, 0.00606976, 0.00525444, 0.00162084,
        0.01413549, 0.00184936, 0.00953556, 0.00329937, 0.0139633 ,
        0.00375322, 0.00818536, 0.01049203, 0.12838854, 0.00531351,
        0.13542258, 0.00739911, 0.02702383, 0.00420325, 0.01532578,
        0.00477947, 0.01353913, 0.00573305, 0.08689907]),
 'mean_score_time': array([0.00798149, 0.02413564, 0.00638375, 0.0205451 , 0.00678329,
        0.02293983, 0.00618262, 0.0219389 , 0.00638537, 0.02234063,
        0.00638361, 0.02214074, 0.00558558, 0.02054744, 0.00578518,
        0.02034655, 0.00894837, 0.01934853, 0.00758114, 0.02439942,
        0.006984

In [130]:
svr_table = pd.DataFrame.from_dict(svr_re)
svr_table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.175529,0.027072,0.021344,0.006069,10,linear,"{'C': 10, 'kernel': 'linear'}",-0.018943,0.029886,0.022167,0.003112,-0.066653,-0.006086,0.034672,7
1,0.136036,0.012093,0.070212,0.002054,10,rbf,"{'C': 10, 'kernel': 'rbf'}",-0.110409,-0.097459,-0.074488,-0.099597,-0.124428,-0.101276,0.016459,9
2,0.112898,0.00336,0.019747,0.001934,10,poly,"{'C': 10, 'kernel': 'poly'}",-0.125153,-0.081505,-0.111907,-0.131602,-0.179328,-0.125899,0.031794,13
3,0.16436,0.007988,0.035508,0.003764,10,sigmoid,"{'C': 10, 'kernel': 'sigmoid'}",-0.116743,-0.119312,-0.085214,-0.10821,-0.108716,-0.107639,0.012029,10
4,0.294013,0.010391,0.01915,0.001466,100,linear,"{'C': 100, 'kernel': 'linear'}",0.559313,0.521798,0.547691,0.538805,0.508518,0.535225,0.018122,5
5,0.141221,0.007372,0.070612,0.002475,100,rbf,"{'C': 100, 'kernel': 'rbf'}",-0.160182,-0.107253,-0.12343,-0.137753,-0.209551,-0.147634,0.035511,18
6,0.122074,0.003711,0.019348,0.001493,100,poly,"{'C': 100, 'kernel': 'poly'}",-0.132885,-0.082461,-0.12922,-0.139962,-0.189219,-0.134749,0.033948,15
7,0.151395,0.00603,0.032912,0.00063,100,sigmoid,"{'C': 100, 'kernel': 'sigmoid'}",-0.142455,-0.160831,-0.117544,-0.137984,-0.130402,-0.137843,0.014263,16
8,0.652255,0.078692,0.017955,0.002096,500,linear,"{'C': 500, 'kernel': 'linear'}",0.632135,0.587195,0.619095,0.610472,0.589494,0.607678,0.017242,3
9,0.138031,0.005698,0.067823,0.00432,500,rbf,"{'C': 500, 'kernel': 'rbf'}",-0.160916,-0.103514,-0.128312,-0.139323,-0.213145,-0.149042,0.037006,19


In [132]:
dt_table = pd.DataFrame.from_dict(dt_re)
dt_table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.010571,0.001197,0.004189,0.0004,squared_error,,best,"{'criterion': 'squared_error', 'max_features':...",0.729059,0.642838,0.74204,0.728065,0.682946,0.70499,0.036968,3
1,0.010769,0.002129,0.004992,0.002523,squared_error,,random,"{'criterion': 'squared_error', 'max_features':...",0.70414,0.603811,0.752931,0.755316,0.691037,0.701447,0.055138,6
2,0.009572,0.001353,0.00439,0.000489,squared_error,sqrt,best,"{'criterion': 'squared_error', 'max_features':...",0.718671,0.50514,0.726148,0.675122,0.697109,0.664438,0.081617,14
3,0.006584,0.00049,0.005985,0.002601,squared_error,sqrt,random,"{'criterion': 'squared_error', 'max_features':...",0.587008,0.577938,0.723327,0.734658,0.628944,0.650375,0.066554,17
4,0.008374,0.001012,0.006781,0.002394,squared_error,log2,best,"{'criterion': 'squared_error', 'max_features':...",0.758878,0.570301,0.677921,0.698402,0.678121,0.676725,0.060907,10
5,0.007579,0.001019,0.00499,0.00089,squared_error,log2,random,"{'criterion': 'squared_error', 'max_features':...",0.74595,0.626794,0.569483,0.643711,0.640552,0.645298,0.057002,18
6,0.010372,0.000489,0.00519,0.001465,friedman_mse,,best,"{'criterion': 'friedman_mse', 'max_features': ...",0.717657,0.651649,0.744594,0.738351,0.685712,0.707593,0.034713,2
7,0.007575,0.0008,0.00459,0.000798,friedman_mse,,random,"{'criterion': 'friedman_mse', 'max_features': ...",0.667555,0.619063,0.71631,0.716888,0.771933,0.69835,0.051605,8
8,0.00977,0.004251,0.006383,0.001353,friedman_mse,sqrt,best,"{'criterion': 'friedman_mse', 'max_features': ...",0.691289,0.411231,0.700703,0.724689,0.681575,0.641897,0.116218,20
9,0.009534,0.003843,0.005182,0.003423,friedman_mse,sqrt,random,"{'criterion': 'friedman_mse', 'max_features': ...",0.714246,0.557392,0.690648,0.637523,0.656291,0.65122,0.053933,16


In [134]:
rf_table = pd.DataFrame.from_dict(rf_re)
rf_table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.07141,0.004213,0.007981,0.003566,squared_error,,10,"{'criterion': 'squared_error', 'max_features':...",0.847573,0.75222,0.869801,0.81686,0.829175,0.823126,0.039698,13
1,0.637096,0.011167,0.024136,0.002706,squared_error,,100,"{'criterion': 'squared_error', 'max_features':...",0.847575,0.770004,0.856672,0.817168,0.836482,0.82558,0.030753,10
2,0.056249,0.00607,0.006384,0.001017,squared_error,sqrt,10,"{'criterion': 'squared_error', 'max_features':...",0.841869,0.76119,0.85051,0.816757,0.822016,0.818469,0.031211,17
3,0.473933,0.005254,0.020545,0.001354,squared_error,sqrt,100,"{'criterion': 'squared_error', 'max_features':...",0.859717,0.780065,0.86261,0.831205,0.833974,0.833514,0.02965,4
4,0.053455,0.001621,0.006783,0.001324,squared_error,log2,10,"{'criterion': 'squared_error', 'max_features':...",0.841107,0.75993,0.805066,0.808697,0.823014,0.807563,0.026983,23
5,0.482506,0.014135,0.02294,0.002092,squared_error,log2,100,"{'criterion': 'squared_error', 'max_features':...",0.85733,0.779904,0.860964,0.828091,0.834046,0.832067,0.029031,7
6,0.07141,0.001849,0.006183,0.001165,friedman_mse,,10,"{'criterion': 'friedman_mse', 'max_features': ...",0.835974,0.772369,0.85368,0.813384,0.830483,0.821178,0.027586,14
7,0.638691,0.009536,0.021939,0.000629,friedman_mse,,100,"{'criterion': 'friedman_mse', 'max_features': ...",0.844879,0.772809,0.855766,0.815517,0.831101,0.824014,0.028941,11
8,0.056048,0.003299,0.006385,0.001017,friedman_mse,sqrt,10,"{'criterion': 'friedman_mse', 'max_features': ...",0.85082,0.766637,0.844727,0.817334,0.814458,0.818795,0.029798,16
9,0.485699,0.013963,0.022341,0.001017,friedman_mse,sqrt,100,"{'criterion': 'friedman_mse', 'max_features': ...",0.862332,0.778242,0.864426,0.830345,0.839978,0.835065,0.031243,1


In [142]:

dt_res = dt_grid.predict([[24,25.5,0,1,0]])
dt_res
# Future_Prediction=grid.
# ,→predict([[age_input,bmi_input,children_input,sex_male_input,smoker_yes_input]])#␣
# ,→change the paramter,play with it.
# print("Future_Prediction={}".format(Future_Prediction))




array([2775.19215])