In [42]:
#importing the Libraries
import numpy as np
import matplotlib.pyplot as py
import pandas as pd

In [43]:
#Reading the Dataset
dataset=pd.read_csv('insurance_pre.csv')
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [44]:
dataset = pd.get_dummies(dataset, drop_first=True)
dataset = dataset.astype(int)
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27,0,16884,0,1
1,18,33,1,1725,1,0
2,28,33,3,4449,1,0
3,33,22,0,21984,1,0
4,32,28,0,3866,1,0
...,...,...,...,...,...,...
1333,50,30,3,10600,1,0
1334,18,31,0,2205,0,0
1335,18,36,0,1629,0,0
1336,21,25,0,2007,0,0


In [45]:
indep=dataset[['age', 'bmi', 'children','sex_male', 'smoker_yes']]
dep=dataset['charges']

In [46]:
#Splitting into training and test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(indep, dep, test_size=1/3, random_state=0)

In [47]:
#preprocessing
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [48]:
#Model Selection - Using GridSearchCV

In [60]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
param_grid = {'criterion':['squared_error','absolute_error','friedman_mse','poisson'], 'max_features': [None,'sqrt','log2'],'splitter':['best','random']}

grid = GridSearchCV(DecisionTreeRegressor(), param_grid, refit = True, verbose= 3,n_jobs=-1)

#fitting the model for grid search
grid.fit(x_train, y_train)

Fitting 5 folds for each of 24 candidates, totalling 120 fits


In [62]:
# print best parameter after tuning
#print(grid.best_params_)
re=grid.cv_results_
#print(re)
grid_predictions = grid.predict(x_test)

In [63]:
# print classification report
from sklearn.metrics import r2_score
r_score=r2_score(y_test,grid_predictions)
print("The R_score value for best parameter {}:".format(grid.best_params_),r_score)

The R_score value for best parameter {'criterion': 'poisson', 'max_features': None, 'splitter': 'random'}: 0.7198367645678225


In [64]:
table=pd.DataFrame.from_dict(re)

In [65]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.007578,0.002327,0.001795,0.0007464681,squared_error,,best,"{'criterion': 'squared_error', 'max_features':...",0.736754,0.502037,0.716226,0.594017,0.668652,0.643537,0.086114,9
1,0.005983,0.001783,0.001398,0.0004897266,squared_error,,random,"{'criterion': 'squared_error', 'max_features':...",0.740267,0.613467,0.706939,0.587819,0.623621,0.654423,0.058633,7
2,0.004987,0.000632,0.000998,4.909339e-07,squared_error,sqrt,best,"{'criterion': 'squared_error', 'max_features':...",0.725496,0.558949,0.574205,0.675347,0.674828,0.641765,0.064272,10
3,0.004388,0.001017,0.001196,0.0003994947,squared_error,sqrt,random,"{'criterion': 'squared_error', 'max_features':...",0.464593,0.580682,0.597776,0.499479,0.546438,0.537794,0.049676,23
4,0.005387,0.001357,0.001196,0.0003993131,squared_error,log2,best,"{'criterion': 'squared_error', 'max_features':...",0.569162,0.585963,0.683653,0.598975,0.666242,0.620799,0.045545,16
5,0.003589,0.000488,0.001199,0.0003984332,squared_error,log2,random,"{'criterion': 'squared_error', 'max_features':...",0.620121,0.545844,0.614461,0.497984,0.580585,0.571799,0.045507,19
6,0.07859,0.039438,0.000997,8.583069e-07,absolute_error,,best,"{'criterion': 'absolute_error', 'max_features'...",0.680213,0.614213,0.697546,0.531941,0.621139,0.629011,0.058348,14
7,0.022738,0.001163,0.001795,0.001596403,absolute_error,,random,"{'criterion': 'absolute_error', 'max_features'...",0.71578,0.630825,0.691697,0.674822,0.627487,0.668122,0.034393,3
8,0.021541,0.003254,0.001398,0.0007974912,absolute_error,sqrt,best,"{'criterion': 'absolute_error', 'max_features'...",0.688388,0.594431,0.66439,0.697083,0.636782,0.656215,0.037328,5
9,0.015357,0.001197,0.000998,8.869684e-07,absolute_error,sqrt,random,"{'criterion': 'absolute_error', 'max_features'...",0.707869,0.56806,0.755717,0.661533,0.447329,0.628102,0.10959,15


In [66]:
age_input=float(input("Age:"))
bmi_input=float(input("BMI:"))
children_input=float(input("Children:"))
sex_male_input=int(input("Sex Male 0 or 1:"))
smoker_yes_input=int(input("Smoker Yes 0 or 1:"))

Age: 34
BMI: 23
Children: 2
Sex Male 0 or 1: 1
Smoker Yes 0 or 1: 1


In [67]:
Future_Prediction = grid.predict([[age_input,bmi_input,children_input,sex_male_input,smoker_yes_input]])
#change the paramter,play with it.
print("Future_Prediction={}".format(Future_Prediction))

Future_Prediction=[46200.]
