In [2]:
#importing the Libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
#Reading the Dataset
dataset=pd.read_csv('insurance_pre.csv')
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [4]:
#Convert the "sex" column to a categorical type
dataset['sex']=dataset['sex'].astype('category')

#Convert the categorical values into numerical codes
dataset['sex']=dataset['sex'].cat.codes

In [5]:
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,0,27.900,0,yes,16884.92400
1,18,1,33.770,1,no,1725.55230
2,28,1,33.000,3,no,4449.46200
3,33,1,22.705,0,no,21984.47061
4,32,1,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,1,30.970,3,no,10600.54830
1334,18,0,31.920,0,no,2205.98080
1335,18,0,36.850,0,no,1629.83350
1336,21,0,25.800,0,no,2007.94500


In [6]:
#Convert the "smoker" column to a categorical type
dataset['smoker']=dataset['smoker'].astype('category')

#Convert the categorical values into numerical codes
dataset['smoker']=dataset['smoker'].cat.codes
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,0,27.900,0,1,16884.92400
1,18,1,33.770,1,0,1725.55230
2,28,1,33.000,3,0,4449.46200
3,33,1,22.705,0,0,21984.47061
4,32,1,28.880,0,0,3866.85520
...,...,...,...,...,...,...
1333,50,1,30.970,3,0,10600.54830
1334,18,0,31.920,0,0,2205.98080
1335,18,0,36.850,0,0,1629.83350
1336,21,0,25.800,0,0,2007.94500


In [10]:
#dataset = pd.get_dummies(dataset, drop_first=True)
#dataset = dataset.astype(int)

In [11]:
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,0,27.900,0,1,16884.92400
1,18,1,33.770,1,0,1725.55230
2,28,1,33.000,3,0,4449.46200
3,33,1,22.705,0,0,21984.47061
4,32,1,28.880,0,0,3866.85520
...,...,...,...,...,...,...
1333,50,1,30.970,3,0,10600.54830
1334,18,0,31.920,0,0,2205.98080
1335,18,0,36.850,0,0,1629.83350
1336,21,0,25.800,0,0,2007.94500


In [12]:
indep=dataset[['age', 'bmi', 'children','sex', 'smoker']]
dep=dataset[['charges']]

In [13]:
#Splitting into training and test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(indep, dep, test_size=1/3, random_state=0)

In [14]:
#preprocessing
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [8]:
#Model Selection - Using GridSearchCV

In [15]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
param_grid = {'criterion':['squared_error','absolute_error','friedman_mse','poisson'], 'max_features': [None,'sqrt','log2'],'splitter':['best','random']}

grid = GridSearchCV(DecisionTreeRegressor(), param_grid, refit = True, verbose= 3, n_jobs=-1)

#fitting the model for grid search
grid.fit(x_train, y_train)

Fitting 5 folds for each of 24 candidates, totalling 120 fits


In [16]:
# print best parameter after tuning
#print(grid.best_params_)
re=grid.cv_results_
#print(re)
grid_predictions = grid.predict(x_test)

In [17]:
# print classification report
from sklearn.metrics import r2_score
r_score=r2_score(y_test, grid_predictions)
print("The R_score value for best parameter {}:".format(grid.best_params_),r_score)

The R_score value for best parameter {'criterion': 'absolute_error', 'max_features': None, 'splitter': 'random'}: 0.7507548166481172


In [18]:
table=pd.DataFrame.from_dict(re)

In [19]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.009973,0.002092,0.003791,0.001163203,squared_error,,best,"{'criterion': 'squared_error', 'max_features':...",0.712978,0.435058,0.767659,0.612373,0.654231,0.63646,0.113602,3
1,0.007779,0.003646,0.002394,0.0007982733,squared_error,,random,"{'criterion': 'squared_error', 'max_features':...",0.559138,0.600811,0.680222,0.552379,0.599258,0.598362,0.045521,9
2,0.00738,0.001494,0.001995,5.223489e-07,squared_error,sqrt,best,"{'criterion': 'squared_error', 'max_features':...",0.269984,0.614201,0.741106,0.355822,0.673373,0.530897,0.184474,23
3,0.006781,0.002631,0.002993,0.0008917504,squared_error,sqrt,random,"{'criterion': 'squared_error', 'max_features':...",0.606768,0.597413,0.656794,0.534621,0.373069,0.553733,0.098333,20
4,0.005785,0.0004,0.002792,0.0007471947,squared_error,log2,best,"{'criterion': 'squared_error', 'max_features':...",0.583455,0.450997,0.608644,0.648456,0.682807,0.594872,0.079528,10
5,0.005783,0.002706,0.003192,0.001464481,squared_error,log2,random,"{'criterion': 'squared_error', 'max_features':...",0.6064,0.498911,0.618445,0.549951,0.526553,0.560052,0.04587,19
6,0.045702,0.006358,0.002395,0.0007980348,absolute_error,,best,"{'criterion': 'absolute_error', 'max_features'...",0.637337,0.579353,0.68488,0.586234,0.630786,0.623718,0.038339,7
7,0.025132,0.003753,0.001996,1.07685e-06,absolute_error,,random,"{'criterion': 'absolute_error', 'max_features'...",0.668573,0.633181,0.660313,0.672217,0.632468,0.65335,0.017199,1
8,0.019746,0.00213,0.004588,0.002326745,absolute_error,sqrt,best,"{'criterion': 'absolute_error', 'max_features'...",0.313627,0.68698,0.531383,0.676363,0.540754,0.549821,0.134934,21
9,0.016553,0.001198,0.002394,0.0004875288,absolute_error,sqrt,random,"{'criterion': 'absolute_error', 'max_features'...",0.581496,0.31942,0.718986,0.46038,0.597769,0.53561,0.135642,22


In [20]:
age_input=float(input("Age:"))
bmi_input=float(input("BMI:"))
children_input=float(input("Children:"))
sex_male_input=int(input("Sex Male 0 or 1:"))
smoker_yes_input=int(input("Smoker Yes 0 or 1:"))

Age: 34
BMI: 23
Children: 3
Sex Male 0 or 1: 1
Smoker Yes 0 or 1: 2


In [21]:
Future_Prediction = grid.predict([[age_input,bmi_input,children_input,sex_male_input,smoker_yes_input]])
#change the paramter,play with it
print("Future_Prediction={}".format(Future_Prediction))

Future_Prediction=[46151.1245]
