In [1]:
# Importing Libraries

import matplotlib.pyplot as plt
import numpy as np

In [2]:
import pandas as pd

# Reading the Dataset

dataset = pd.read_csv("insurance_pre.csv")
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [3]:
# Converting the nominal data to numeric data

dataset = pd.get_dummies(dataset, drop_first = True)
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,0,1
1,18,33.770,1,1725.55230,1,0
2,28,33.000,3,4449.46200,1,0
3,33,22.705,0,21984.47061,1,0
4,32,28.880,0,3866.85520,1,0
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,1,0
1334,18,31.920,0,2205.98080,0,0
1335,18,36.850,0,1629.83350,0,0
1336,21,25.800,0,2007.94500,0,0


In [4]:
# Getting the column names

dataset.columns

Index(['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes'], dtype='object')

In [5]:
# Splitting the dataset into input and output variables

independent = dataset[['age', 'bmi', 'children', 'sex_male', 'smoker_yes']]
independent

Unnamed: 0,age,bmi,children,sex_male,smoker_yes
0,19,27.900,0,0,1
1,18,33.770,1,1,0
2,28,33.000,3,1,0
3,33,22.705,0,1,0
4,32,28.880,0,1,0
...,...,...,...,...,...
1333,50,30.970,3,1,0
1334,18,31.920,0,0,0
1335,18,36.850,0,0,0
1336,21,25.800,0,0,0


In [6]:
dependent = dataset[["charges"]]
dependent

Unnamed: 0,charges
0,16884.92400
1,1725.55230
2,4449.46200
3,21984.47061
4,3866.85520
...,...
1333,10600.54830
1334,2205.98080
1335,1629.83350
1336,2007.94500


In [15]:
#Importing GridSearchCv and DecissiontreeRegressor to create the best model 

from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
param_grid = {'criterion' :['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
              'max_features' : [None, 'sqrt', 'log2'], 'splitter' : ['best', 'random']}

grid = GridSearchCV(DecisionTreeRegressor(), param_grid, refit = True, verbose = 3, n_jobs = -1 )
grid.fit(independent,dependent)

Fitting 5 folds for each of 24 candidates, totalling 120 fits


In [16]:
# Printing the best model from the grid

re = grid.cv_results_

print("The R_Score for Best parameter is {} : ".format(grid.best_params_))

The R_Score for Best parameter is {'criterion': 'absolute_error', 'max_features': 'sqrt', 'splitter': 'best'} : 


In [11]:
# getting all the grid values in table format

table = pd.DataFrame.from_dict(re)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.026785,0.00664,0.004832,0.001457,squared_error,,best,"{'criterion': 'squared_error', 'max_features':...",0.727831,0.652241,0.737099,0.736132,0.67445,0.705551,0.035316,6
1,0.017419,0.007535,0.003083,0.002781,squared_error,,random,"{'criterion': 'squared_error', 'max_features':...",0.6957,0.434101,0.72425,0.686801,0.71395,0.65096,0.109227,21
2,0.011708,0.001355,0.004537,0.00253,squared_error,sqrt,best,"{'criterion': 'squared_error', 'max_features':...",0.750878,0.614623,0.781401,0.650887,0.696007,0.698759,0.061521,7
3,0.007654,0.004525,0.006059,0.004075,squared_error,sqrt,random,"{'criterion': 'squared_error', 'max_features':...",0.703072,0.546293,0.632162,0.700369,0.745779,0.665535,0.069849,17
4,0.007432,0.006068,0.0,0.0,squared_error,log2,best,"{'criterion': 'squared_error', 'max_features':...",0.756563,0.636133,0.726019,0.603463,0.656255,0.675687,0.056978,12
5,0.006298,0.007713,0.008331,0.006331,squared_error,log2,random,"{'criterion': 'squared_error', 'max_features':...",0.643489,0.573429,0.662179,0.769912,0.624497,0.654701,0.064765,19
6,0.014885,0.007115,0.002168,0.004337,friedman_mse,,best,"{'criterion': 'friedman_mse', 'max_features': ...",0.726216,0.644503,0.743626,0.705561,0.673073,0.698596,0.035823,8
7,0.011124,0.010014,0.006397,0.007369,friedman_mse,,random,"{'criterion': 'friedman_mse', 'max_features': ...",0.688316,0.607168,0.642681,0.702317,0.694817,0.66706,0.03646,15
8,0.011399,0.005831,0.001753,0.003505,friedman_mse,sqrt,best,"{'criterion': 'friedman_mse', 'max_features': ...",0.716577,0.65583,0.675704,0.681459,0.760613,0.698037,0.036917,9
9,0.003505,0.004293,0.006005,0.007354,friedman_mse,sqrt,random,"{'criterion': 'friedman_mse', 'max_features': ...",0.706308,0.577462,0.731908,0.673332,0.67174,0.67215,0.052355,13


In [17]:
# Getting the inputs for future prediction

age_input = float(input('Age: '))
bmi_input = float(input('BMI: '))
children_input = float(input('Children: '))
sex_male_input = float(input('Sex Male 0 or 1: '))
smoker_yes_input = float(input('Smoker yes 0 or 1: '))

Age: 32
BMI: 43
Children: 2
Sex Male 0 or 1: 0
Smoker yes 0 or 1: 1


In [18]:
# Evaluating and printing the Predicted value

Future_Prediction = grid.predict([[age_input,bmi_input,children_input,sex_male_input,smoker_yes_input]])
print("Future Prediction {} : ".format(Future_Prediction))                                  

Future Prediction [43896.3763] : 


