In [None]:
import pandas as ps

## 1.Data collection

In [2]:
Data_set=ps.read_csv("insurance_pre.csv")

In [3]:
Data_set

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [4]:
Data_set=ps.get_dummies(Data_set,drop_first=True)
Data_set

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,0,1
1,18,33.770,1,1725.55230,1,0
2,28,33.000,3,4449.46200,1,0
3,33,22.705,0,21984.47061,1,0
4,32,28.880,0,3866.85520,1,0
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,1,0
1334,18,31.920,0,2205.98080,0,0
1335,18,36.850,0,1629.83350,0,0
1336,21,25.800,0,2007.94500,0,0


In [5]:
Data_set.columns

Index(['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes'], dtype='object')

## 2. Input & Output Split

In [7]:
Independent=Data_set[['age', 'bmi', 'children', 'sex_male', 'smoker_yes']]
Independent

Unnamed: 0,age,bmi,children,sex_male,smoker_yes
0,19,27.900,0,0,1
1,18,33.770,1,1,0
2,28,33.000,3,1,0
3,33,22.705,0,1,0
4,32,28.880,0,1,0
...,...,...,...,...,...
1333,50,30.970,3,1,0
1334,18,31.920,0,0,0
1335,18,36.850,0,0,0
1336,21,25.800,0,0,0


In [9]:
Dependent=Data_set[['charges']]
Dependent

Unnamed: 0,charges
0,16884.92400
1,1725.55230
2,4449.46200
3,21984.47061
4,3866.85520
...,...
1333,10600.54830
1334,2205.98080
1335,1629.83350
1336,2007.94500


## 3. Split the train and test data

In [10]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(Independent, Dependent, test_size=0.30, random_state=0)

## 4. Model creation

In [11]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor

p_grid={'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'], 'splitter':['best','random'],'max_features':['sqrt', 'log2'] }

grid=GridSearchCV(DecisionTreeRegressor(random_state=42),p_grid, refit=True, verbose=3, n_jobs=-1)
grid.fit(Independent, Dependent)


Fitting 5 folds for each of 16 candidates, totalling 80 fits


## 5. Evalutation Metric 

In [12]:
#since we are using GridSearchCV so it will do Evalution the metric and give the best model

In [13]:
re=grid.cv_results_
# print the best parametar after tuning 
print("The Score value for the best model {}".format(grid.best_params_),"Score :",grid.best_score_)

The Score value for the best model {'criterion': 'friedman_mse', 'max_features': 'sqrt', 'splitter': 'best'} Score : 0.6975227781904909


In [14]:
table=ps.DataFrame.from_dict(re)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.033855,0.011864,0.016976,0.005317,squared_error,sqrt,best,"{'criterion': 'squared_error', 'max_features':...",0.738749,0.640654,0.710509,0.679699,0.710067,0.695936,0.033361,3
1,0.030918,0.015327,0.018482,0.010439,squared_error,sqrt,random,"{'criterion': 'squared_error', 'max_features':...",0.652415,0.559122,0.686364,0.642094,0.560956,0.62019,0.051255,15
2,0.022435,0.006449,0.011922,0.003233,squared_error,log2,best,"{'criterion': 'squared_error', 'max_features':...",0.738749,0.640654,0.710509,0.679699,0.710067,0.695936,0.033361,3
3,0.037864,0.014135,0.016178,0.004087,squared_error,log2,random,"{'criterion': 'squared_error', 'max_features':...",0.652415,0.559122,0.686364,0.642094,0.560956,0.62019,0.051255,15
4,0.032707,0.014139,0.011071,0.004021,friedman_mse,sqrt,best,"{'criterion': 'friedman_mse', 'max_features': ...",0.739081,0.640641,0.718205,0.679626,0.710061,0.697523,0.034257,1
5,0.02834,0.004497,0.026439,0.012258,friedman_mse,sqrt,random,"{'criterion': 'friedman_mse', 'max_features': ...",0.652415,0.559122,0.686503,0.642059,0.561569,0.620334,0.051146,13
6,0.031602,0.006385,0.017393,0.006456,friedman_mse,log2,best,"{'criterion': 'friedman_mse', 'max_features': ...",0.739081,0.640641,0.718205,0.679626,0.710061,0.697523,0.034257,1
7,0.030772,0.007004,0.015831,0.006415,friedman_mse,log2,random,"{'criterion': 'friedman_mse', 'max_features': ...",0.652415,0.559122,0.686503,0.642059,0.561569,0.620334,0.051146,13
8,0.105204,0.009304,0.017776,0.00789,absolute_error,sqrt,best,"{'criterion': 'absolute_error', 'max_features'...",0.714016,0.542273,0.699056,0.714731,0.76922,0.687859,0.076611,5
9,0.074921,0.012183,0.010214,0.002331,absolute_error,sqrt,random,"{'criterion': 'absolute_error', 'max_features'...",0.631721,0.528585,0.621562,0.670972,0.669209,0.62441,0.051805,11


In [15]:
age=int(input("Enter your age :"))
bmi=float(input("Enter your BMI : "))
children=int(input("Enter Children count:"))
Gen=int(input("Enter the gander if male enter 1, if female enter 0 :"))
smok=int(input("Enter if somker enter 1, if no somker enter 0 :"))

Enter your age :19
Enter your BMI : 27.900
Enter Children count:0
Enter the gander if male enter 1, if female enter 0 :0
Enter if somker enter 1, if no somker enter 0 :1


In [17]:
result=grid.predict([[age,bmi,children,Gen,smok]])



In [18]:
result

array([16884.924])