In [1]:
#importing the Libraies
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Reading the Dataset
dataset = pd.read_csv('insurance_pre.csv')

In [3]:
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [4]:
# As the above dataset contains sex and smoker columns as a categorical value 
# We need to convert that into numerical value using below one hot encoding method.
dataset=pd.get_dummies(dataset,drop_first=True) # Removing the repeated value column

In [5]:
dataset # Once the categorical columns converted into numerical column values

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,0,1
1,18,33.770,1,1725.55230,1,0
2,28,33.000,3,4449.46200,1,0
3,33,22.705,0,21984.47061,1,0
4,32,28.880,0,3866.85520,1,0
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,1,0
1334,18,31.920,0,2205.98080,0,0
1335,18,36.850,0,1629.83350,0,0
1336,21,25.800,0,2007.94500,0,0


In [6]:
indep=dataset[['age', 'bmi', 'children','sex_male', 'smoker_yes']] # separating the input values
dep=dataset['charges'] # separating the output values

In [7]:
# Consolidating the playable parameters probability and also considering the cross validation
# Considering different portions of a data set as a test and traint set
from sklearn.model_selection import GridSearchCV 
from sklearn.ensemble import RandomForestRegressor
param_grid = {'criterion':['mse','mae'],
'max_features': ['auto','sqrt','log2'],
'n_estimators':[10,100]} # Giving all possible HTPs
grid = GridSearchCV(RandomForestRegressor(), param_grid, refit = True, verbose = 3,n_jobs=-1) 
# refit = True means consider the best result model as a result model
# refit = False means consider the last combination model as a result model
# fitting the model for grid search
grid.fit(indep, dep) # Passing whole dataset input and output variable

Fitting 5 folds for each of 12 candidates, totalling 60 fits




GridSearchCV(estimator=RandomForestRegressor(), n_jobs=-1,
             param_grid={'criterion': ['mse', 'mae'],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'n_estimators': [10, 100]},
             verbose=3)

In [8]:
# print best parameter after tuning
print(grid.best_params_)
re=grid.cv_results_
print("The R_score value for best parameter {}:".format(grid.best_params_))

{'criterion': 'mae', 'max_features': 'sqrt', 'n_estimators': 100}
The R_score value for best parameter {'criterion': 'mae', 'max_features': 'sqrt', 'n_estimators': 100}:


In [9]:
re


{'mean_fit_time': array([0.22223358, 1.00665727, 0.1398427 , 0.79910026, 0.14552965,
        0.78200626, 0.90360622, 7.97894216, 0.42453437, 4.6227416 ,
        0.59188614, 4.23160214]),
 'std_fit_time': array([0.11850686, 0.05722068, 0.01992329, 0.07936955, 0.03563578,
        0.03472059, 0.14913924, 1.3419929 , 0.04406608, 0.75614136,
        0.06374787, 0.40649498]),
 'mean_score_time': array([0.00923271, 0.0750176 , 0.00803556, 0.07500467, 0.01693473,
        0.06405902, 0.02686963, 0.10854459, 0.00859737, 0.08093662,
        0.03116398, 0.07711048]),
 'std_score_time': array([0.00038392, 0.00585661, 0.00066786, 0.01606098, 0.01131473,
        0.00346496, 0.02079309, 0.06033898, 0.00135627, 0.0063353 ,
        0.02248325, 0.01333954]),
 'param_criterion': masked_array(data=['mse', 'mse', 'mse', 'mse', 'mse', 'mse', 'mae', 'mae',
                    'mae', 'mae', 'mae', 'mae'],
              mask=[False, False, False, False, False, False, False, False,
                    False, Fal

In [10]:
table=pd.DataFrame.from_dict(re) # converting the dictionary format into readable format

In [11]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.222234,0.118507,0.009233,0.000384,mse,auto,10,"{'criterion': 'mse', 'max_features': 'auto', '...",0.842255,0.741232,0.830639,0.797964,0.823389,0.807096,0.035993,12
1,1.006657,0.057221,0.075018,0.005857,mse,auto,100,"{'criterion': 'mse', 'max_features': 'auto', '...",0.84393,0.770532,0.851459,0.818559,0.836457,0.824188,0.028964,5
2,0.139843,0.019923,0.008036,0.000668,mse,sqrt,10,"{'criterion': 'mse', 'max_features': 'sqrt', '...",0.846648,0.772154,0.844054,0.807771,0.808979,0.815921,0.027444,9
3,0.7991,0.07937,0.075005,0.016061,mse,sqrt,100,"{'criterion': 'mse', 'max_features': 'sqrt', '...",0.855075,0.783286,0.857093,0.825374,0.840978,0.832361,0.027052,3
4,0.14553,0.035636,0.016935,0.011315,mse,log2,10,"{'criterion': 'mse', 'max_features': 'log2', '...",0.846952,0.75898,0.840026,0.809266,0.813933,0.813831,0.031024,10
5,0.782006,0.034721,0.064059,0.003465,mse,log2,100,"{'criterion': 'mse', 'max_features': 'log2', '...",0.855079,0.776866,0.861188,0.829562,0.833634,0.831266,0.029766,4
6,0.903606,0.149139,0.02687,0.020793,mae,auto,10,"{'criterion': 'mae', 'max_features': 'auto', '...",0.826412,0.745498,0.837728,0.80491,0.832245,0.809359,0.033815,11
7,7.978942,1.341993,0.108545,0.060339,mae,auto,100,"{'criterion': 'mae', 'max_features': 'auto', '...",0.839155,0.767318,0.849373,0.826473,0.837624,0.823989,0.029251,6
8,0.424534,0.044066,0.008597,0.001356,mae,sqrt,10,"{'criterion': 'mae', 'max_features': 'sqrt', '...",0.845613,0.77409,0.823388,0.807274,0.83867,0.817807,0.025541,8
9,4.622742,0.756141,0.080937,0.006335,mae,sqrt,100,"{'criterion': 'mae', 'max_features': 'sqrt', '...",0.857543,0.782411,0.860466,0.829799,0.841395,0.834323,0.028242,1


In [None]:
age_input=float(input("Age:"))
bmi_input=float(input("BMI:"))
children_input=float(input("Children:"))
sex_male_input=int(input("Sex Male 0 or 1:"))
smoker_yes_input=int(input("Smoker Yes 0 or 1:"))

In [None]:
Future_Prediction=grid.predict([[age_input,bmi_input,children_input,sex_male_input,smoker_yes_input]])
#change the paramter,play with it.
print("Future_Prediction={}".format(Future_Prediction))