In [1]:
# Importing the library and reading the dataset

import pandas as pd
dataset= pd.read_csv("insurance_pre.csv")
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [2]:
# Preprocessing the dataset to convert nominal data to numeric data

dataset = pd.get_dummies(dataset, drop_first = True)
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,0,1
1,18,33.770,1,1725.55230,1,0
2,28,33.000,3,4449.46200,1,0
3,33,22.705,0,21984.47061,1,0
4,32,28.880,0,3866.85520,1,0
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,1,0
1334,18,31.920,0,2205.98080,0,0
1335,18,36.850,0,1629.83350,0,0
1336,21,25.800,0,2007.94500,0,0


In [3]:
# Getting the column names of the dataset

dataset.columns

Index(['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes'], dtype='object')

In [4]:
# Splitting the input and the output data

independent = dataset[['age', 'bmi', 'children', 'sex_male', 'smoker_yes' ]]
independent

Unnamed: 0,age,bmi,children,sex_male,smoker_yes
0,19,27.900,0,0,1
1,18,33.770,1,1,0
2,28,33.000,3,1,0
3,33,22.705,0,1,0
4,32,28.880,0,1,0
...,...,...,...,...,...
1333,50,30.970,3,1,0
1334,18,31.920,0,0,0
1335,18,36.850,0,0,0
1336,21,25.800,0,0,0


In [5]:
dependent = dataset[['charges']]
dependent

Unnamed: 0,charges
0,16884.92400
1,1725.55230
2,4449.46200
3,21984.47061
4,3866.85520
...,...
1333,10600.54830
1334,2205.98080
1335,1629.83350
1336,2007.94500


In [6]:
# Splitting the train and the test data

from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(independent, dependent, test_size = 0.30, random_state = 0)

In [7]:
# Creating Ridge model with grid search

from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge
grid_params = {'solver':['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga', 'lbfgs']}
grid = GridSearchCV(Ridge(), grid_params, refit = True, verbose = 3, n_jobs = -1)
grid.fit(X_train, Y_train)

Fitting 5 folds for each of 8 candidates, totalling 40 fits


5 fits failed out of a total of 40.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\reema\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\reema\anaconda3\Lib\site-packages\sklearn\base.py", line 1151, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\reema\anaconda3\Lib\site-packages\sklearn\linear_model\_ridge.py", line 1142, in fit
    return super().fit(X, y, sample_weight=sample_weight)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C

In [10]:
# To print the besr parameter

re = grid.cv_results_
print("The best parameters are : {} ".format(grid.best_params_))

The best parameters are : {'solver': 'lsqr'} 


In [11]:
# To print the grid report in table format

table = pd.DataFrame.from_dict(re)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_solver,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.117494,0.01603,0.002169,0.004338,auto,{'solver': 'auto'},0.76254,0.703716,0.730036,0.696884,0.718561,0.722348,0.023171,2
1,0.071217,0.044595,0.00534,0.00673,svd,{'solver': 'svd'},0.76254,0.703716,0.730036,0.696884,0.718561,0.722348,0.023171,2
2,0.006758,0.005237,0.006971,0.004985,cholesky,{'solver': 'cholesky'},0.76254,0.703716,0.730036,0.696884,0.718561,0.722348,0.023171,2
3,0.000265,0.000529,0.004637,0.00569,lsqr,{'solver': 'lsqr'},0.76254,0.703716,0.730036,0.696884,0.718561,0.722348,0.023171,1
4,0.002665,0.004773,0.004119,0.005045,sparse_cg,{'solver': 'sparse_cg'},0.76254,0.703716,0.730036,0.696884,0.718561,0.722348,0.023171,5
5,0.011396,0.005431,0.0,0.0,sag,{'solver': 'sag'},0.762467,0.703775,0.729988,0.696976,0.718523,0.722346,0.023114,6
6,0.012072,0.003318,0.002445,0.002697,saga,{'solver': 'saga'},0.762338,0.703848,0.729937,0.697127,0.71846,0.722342,0.023023,7
7,0.004705,0.002705,0.0,0.0,lbfgs,{'solver': 'lbfgs'},,,,,,,,8


In [12]:
# Getting the inputs for future prediction

age_input = float(input('Age: '))
bmi_input = float(input('BMI: '))
children_input = float(input('Children: '))
sex_male_input = float(input('Sex Male 0 or 1: '))
smoker_yes_input = float(input('Smoker yes 0 or 1: '))

Age: 32
BMI: 43
Children: 2
Sex Male 0 or 1: 0
Smoker yes 0 or 1: 1


In [13]:
# Evaluating and printing the Predicted value

Future_Prediction = grid.predict([[age_input,bmi_input,children_input,sex_male_input,smoker_yes_input]])
print("Future Prediction {} : ".format(Future_Prediction))   

Future Prediction [[34227.16394885]] : 


