In [1]:
#importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
#Reading the Dataset
dataset = pd.read_csv("insurance_pre.csv")

In [3]:
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [4]:
dataset = pd.get_dummies(dataset, drop_first = True)

In [5]:
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,False,True
1,18,33.770,1,1725.55230,True,False
2,28,33.000,3,4449.46200,True,False
3,33,22.705,0,21984.47061,True,False
4,32,28.880,0,3866.85520,True,False
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,True,False
1334,18,31.920,0,2205.98080,False,False
1335,18,36.850,0,1629.83350,False,False
1336,21,25.800,0,2007.94500,False,False


In [6]:
independent = dataset[['age', 'bmi', 'children', 'sex_male', 'smoker_yes']]
dependent = dataset[['charges']]

In [7]:
#split into training set and test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(independent, dependent, test_size = 0.30, random_state = 0)

In [8]:
#Standardisation
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
#to calculate mean and variance
x_train = sc.fit_transform(x_train) #transform: to assign the transform values to x_train
x_test = sc.transform(x_test)

In [9]:
#GridSearch
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
param_grid = {'fit_intercept':[True, False], 'copy_X':[True, False], 'n_jobs':[1, 5], 'positive':[True, False]}

grid = GridSearchCV(LinearRegression(), param_grid, refit = True, verbose = 3, n_jobs = -1)
#fitting the model for grid search
grid.fit(x_train, y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits


In [10]:
#print best parameter after tuning
#print(grid.best_params_)
result = grid.cv_results_

print("The R_score value for best parameter {}:".format(grid.best_params_))

The R_score value for best parameter {'copy_X': True, 'fit_intercept': True, 'n_jobs': 1, 'positive': True}:


In [11]:
table = pd.DataFrame.from_dict(result)

In [12]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_copy_X,param_fit_intercept,param_n_jobs,param_positive,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.004589,0.003994,0.003124,0.005071,True,True,1,True,"{'copy_X': True, 'fit_intercept': True, 'n_job...",0.763511,0.703146,0.730655,0.695634,0.71908,0.722405,0.023902,1
1,0.006623,0.006708,0.0,0.0,True,True,1,False,"{'copy_X': True, 'fit_intercept': True, 'n_job...",0.763511,0.702938,0.730655,0.695551,0.719087,0.722348,0.023954,5
2,0.011731,0.003487,0.00124,0.001533,True,True,5,True,"{'copy_X': True, 'fit_intercept': True, 'n_job...",0.763511,0.703146,0.730655,0.695634,0.71908,0.722405,0.023902,1
3,0.005184,0.006417,0.002203,0.001976,True,True,5,False,"{'copy_X': True, 'fit_intercept': True, 'n_job...",0.763511,0.702938,0.730655,0.695551,0.719087,0.722348,0.023954,5
4,0.002195,0.001279,0.000463,0.000926,True,False,1,True,"{'copy_X': True, 'fit_intercept': False, 'n_jo...",-0.698095,-0.586653,-0.459207,-0.598656,-0.416113,-0.551745,0.101781,9
5,0.003521,0.003928,0.00211,0.004221,True,False,1,False,"{'copy_X': True, 'fit_intercept': False, 'n_jo...",-0.698095,-0.586805,-0.459207,-0.600488,-0.419146,-0.552748,0.101157,13
6,0.014077,0.002305,0.000858,0.001274,True,False,5,True,"{'copy_X': True, 'fit_intercept': False, 'n_jo...",-0.698095,-0.586653,-0.459207,-0.598656,-0.416113,-0.551745,0.101781,9
7,0.000739,0.001017,0.003399,0.002956,True,False,5,False,"{'copy_X': True, 'fit_intercept': False, 'n_jo...",-0.698095,-0.586805,-0.459207,-0.600488,-0.419146,-0.552748,0.101157,13
8,0.002809,0.003455,0.00338,0.003598,False,True,1,True,"{'copy_X': False, 'fit_intercept': True, 'n_jo...",0.763511,0.703146,0.730655,0.695634,0.71908,0.722405,0.023902,1
9,0.002616,0.001758,0.001838,0.002024,False,True,1,False,"{'copy_X': False, 'fit_intercept': True, 'n_jo...",0.763511,0.702938,0.730655,0.695551,0.719087,0.722348,0.023954,5


In [13]:
age_input = int(input("Age: "))
bmi_input = float(input("BMI: "))
children_input = int(input("Children: "))
sex_male_input = int(input("Sex Male 0 or 1: "))
smoker_yes_input = int(input("Smoker yes 0 or 1: "))

Age: 19
BMI: 27.9
Children: 0
Sex Male 0 or 1: 0
Smoker yes 0 or 1: 1


In [14]:
new_data = [[age_input, bmi_input, children_input, sex_male_input, smoker_yes_input]]

In [15]:
new_data_standardized = sc.transform(new_data)



In [17]:
Future_Prediction = grid.predict(new_data_standardized)
print("Future_Prediction={}".format(Future_Prediction))

Future_Prediction=[[25194.15086277]]
