In [1]:
#importing the Libraies
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Reading the Dataset
dataset = pd.read_csv('insurance_pre.csv')

In [3]:
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [4]:
dataset=pd.get_dummies(dataset,drop_first=True)

In [5]:
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,0,1
1,18,33.770,1,1725.55230,1,0
2,28,33.000,3,4449.46200,1,0
3,33,22.705,0,21984.47061,1,0
4,32,28.880,0,3866.85520,1,0
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,1,0
1334,18,31.920,0,2205.98080,0,0
1335,18,36.850,0,1629.83350,0,0
1336,21,25.800,0,2007.94500,0,0


In [6]:
indep=dataset[['age', 'bmi', 'children','sex_male', 'smoker_yes']]
dep=dataset['charges']

In [7]:
#split into training set and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(indep, dep, test_size = 1/3, random_state = 0)


In [8]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

### Model creation

In [9]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
param_grid = {'criterion':['mse','mae','friedman_mse'],
              'max_features': ['auto','sqrt','log2'],
              'splitter':['best','random']} 



grid = GridSearchCV(DecisionTreeRegressor(), param_grid, refit = True, verbose = 3,n_jobs=-1) 
   
# fitting the model for grid search 
grid.fit(X_train, y_train) 
 



Fitting 5 folds for each of 18 candidates, totalling 90 fits


GridSearchCV(estimator=DecisionTreeRegressor(), n_jobs=-1,
             param_grid={'criterion': ['mse', 'mae', 'friedman_mse'],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'splitter': ['best', 'random']},
             verbose=3)

In [10]:
# print best parameter after tuning 
#print(grid.best_params_) 
re=grid.cv_results_
#print(re)
grid_predictions = grid.predict(X_test) 
   
# print classification report 
from sklearn.metrics import r2_score
r_score=r2_score(y_test,grid_predictions)

print("The R_score value for best parameter {}:".format(grid.best_params_),r_score)



The R_score value for best parameter {'criterion': 'mae', 'max_features': 'auto', 'splitter': 'random'}: 0.7834595712527417


In [11]:
table=pd.DataFrame.from_dict(re)

In [12]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.008338,0.001885,0.001959,0.000105,mse,auto,best,"{'criterion': 'mse', 'max_features': 'auto', '...",0.672286,0.472863,0.750638,0.654411,0.663363,0.642712,0.09158,4
1,0.009101,0.00478,0.001609,0.002346,mse,auto,random,"{'criterion': 'mse', 'max_features': 'auto', '...",0.570462,0.562037,0.672759,0.615138,0.645504,0.61318,0.042518,8
2,0.019655,0.00779,0.005128,0.001576,mse,sqrt,best,"{'criterion': 'mse', 'max_features': 'sqrt', '...",0.627298,0.327925,0.657417,0.459495,0.68546,0.551519,0.136645,17
3,0.011099,0.005871,0.002932,0.002646,mse,sqrt,random,"{'criterion': 'mse', 'max_features': 'sqrt', '...",0.630217,0.510233,0.627826,0.504349,0.543699,0.563265,0.055348,15
4,0.011493,0.005373,0.008482,0.002407,mse,log2,best,"{'criterion': 'mse', 'max_features': 'log2', '...",0.753962,0.597953,0.643698,0.6513,0.530353,0.635453,0.073234,5
5,0.007878,0.002094,0.006181,0.001,mse,log2,random,"{'criterion': 'mse', 'max_features': 'log2', '...",0.577538,0.586741,0.708355,0.711769,0.444551,0.605791,0.098904,11
6,0.147789,0.024644,0.005923,0.004141,mae,auto,best,"{'criterion': 'mae', 'max_features': 'auto', '...",0.667064,0.574155,0.69473,0.583066,0.634787,0.63076,0.046701,7
7,0.119185,0.014615,0.004321,0.002905,mae,auto,random,"{'criterion': 'mae', 'max_features': 'auto', '...",0.699943,0.676228,0.685063,0.590213,0.679947,0.666279,0.038879,1
8,0.090616,0.008359,0.001607,0.001503,mae,sqrt,best,"{'criterion': 'mae', 'max_features': 'sqrt', '...",0.574566,0.529574,0.695675,0.617948,0.544272,0.592407,0.059833,13
9,0.081931,0.009061,0.003165,0.00203,mae,sqrt,random,"{'criterion': 'mae', 'max_features': 'sqrt', '...",0.610107,0.538773,0.753902,0.55688,0.599793,0.611891,0.075749,9


In [13]:
age_input=float(input("Age:"))
bmi_input=float(input("BMI:"))
children_input=float(input("Children:"))
sex_male_input=int(input("Sex Male 0 or 1:"))
smoker_yes_input=int(input("Smoker Yes 0 or 1:"))

Age:24
BMI:28
Children:0
Sex Male 0 or 1:0
Smoker Yes 0 or 1:0


In [14]:
Future_Prediction=grid.predict([[age_input,bmi_input,children_input,sex_male_input,smoker_yes_input]])# change the paramter,play with it.
print("Future_Prediction={}".format(Future_Prediction))

Future_Prediction=[63770.42801]
