In [1]:
# Importing the library and reading the dataset

import pandas as pd
dataset= pd.read_csv("insurance_pre.csv")
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [2]:
# Preprocessing the dataset to convert nominal data to numeric data

dataset = pd.get_dummies(dataset, drop_first = True)
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,0,1
1,18,33.770,1,1725.55230,1,0
2,28,33.000,3,4449.46200,1,0
3,33,22.705,0,21984.47061,1,0
4,32,28.880,0,3866.85520,1,0
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,1,0
1334,18,31.920,0,2205.98080,0,0
1335,18,36.850,0,1629.83350,0,0
1336,21,25.800,0,2007.94500,0,0


In [3]:
# Getting the column names of the dataset

dataset.columns

Index(['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes'], dtype='object')

In [4]:
# Splitting the input and the output data

independent = dataset[['age', 'bmi', 'children', 'sex_male', 'smoker_yes' ]]
independent

Unnamed: 0,age,bmi,children,sex_male,smoker_yes
0,19,27.900,0,0,1
1,18,33.770,1,1,0
2,28,33.000,3,1,0
3,33,22.705,0,1,0
4,32,28.880,0,1,0
...,...,...,...,...,...
1333,50,30.970,3,1,0
1334,18,31.920,0,0,0
1335,18,36.850,0,0,0
1336,21,25.800,0,0,0


In [5]:
dependent = dataset[['charges']]
dependent

Unnamed: 0,charges
0,16884.92400
1,1725.55230
2,4449.46200
3,21984.47061
4,3866.85520
...,...
1333,10600.54830
1334,2205.98080
1335,1629.83350
1336,2007.94500


In [6]:
# Splitting the train and the test data

from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(independent, dependent, test_size = 0.30, random_state = 0)

In [7]:
# Creating Quantile model with grid search

from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import QuantileRegressor
grid_params = {'solver':['highs-ds', 'highs-ipm', 'highs', 'interior-point', 'revised simplex'], 'quantile':[0.3,0.5,0.7]}
grid = GridSearchCV(QuantileRegressor(), grid_params, refit = True, verbose = 3, n_jobs = -1)
grid.fit(X_train, Y_train)

Fitting 5 folds for each of 15 candidates, totalling 75 fits


  y = column_or_1d(y, warn=True)
Status is 4: Numerical difficulties encountered.
Result message of linprog:
The solution does not satisfy the constraints within the required tolerance of 3.16E-04, yet no errors were raised and there is no certificate of infeasibility or unboundedness. Check whether the slack and constraint residuals are acceptable; if not, consider enabling presolve, adjusting the tolerance option(s), and/or using a different method. Please consider submitting a bug report.


In [8]:
# Printing the best parameter

re = grid.cv_results_
print("The best Parameters are : {} ".format(grid.best_params_))

The best Parameters are : {'quantile': 0.7, 'solver': 'interior-point'} 


In [9]:
# Printing the parameter in  the table format

table = pd.DataFrame.from_dict(re)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_quantile,param_solver,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.165549,0.09924,0.01285,0.010169,0.3,highs-ds,"{'quantile': 0.3, 'solver': 'highs-ds'}",-0.246252,-0.036785,-0.294941,-0.321663,-0.218809,-0.22369,0.100142,11
1,0.076409,0.024661,0.008359,0.0049,0.3,highs-ipm,"{'quantile': 0.3, 'solver': 'highs-ipm'}",-0.246252,-0.037301,-0.294941,-0.321663,-0.218809,-0.223793,0.099949,15
2,0.062965,0.013388,0.00368,0.003194,0.3,highs,"{'quantile': 0.3, 'solver': 'highs'}",-0.246252,-0.036785,-0.294941,-0.321663,-0.218809,-0.22369,0.100142,11
3,51.080019,5.278035,0.066418,0.077801,0.3,interior-point,"{'quantile': 0.3, 'solver': 'interior-point'}",-0.246236,-0.037223,-0.294941,-0.321632,-0.218799,-0.223766,0.099972,13
4,11.481961,0.962748,0.010978,0.007749,0.3,revised simplex,"{'quantile': 0.3, 'solver': 'revised simplex'}",-0.246252,-0.037301,-0.294941,-0.321663,-0.218809,-0.223793,0.099949,14
5,0.130602,0.043777,0.011151,0.013561,0.5,highs-ds,"{'quantile': 0.5, 'solver': 'highs-ds'}",-0.192718,0.011845,-0.247548,-0.26361,-0.165805,-0.171567,0.098352,8
6,0.101373,0.012151,0.009069,0.006613,0.5,highs-ipm,"{'quantile': 0.5, 'solver': 'highs-ipm'}",-0.192718,0.011845,-0.247548,-0.26361,-0.165805,-0.171567,0.098352,8
7,0.093505,0.039725,0.006721,0.008886,0.5,highs,"{'quantile': 0.5, 'solver': 'highs'}",-0.192718,0.011845,-0.247548,-0.26361,-0.165805,-0.171567,0.098352,8
8,54.914325,1.179803,0.026265,0.024489,0.5,interior-point,"{'quantile': 0.5, 'solver': 'interior-point'}",-0.192719,0.011854,-0.24755,-0.263603,-0.165806,-0.171565,0.098354,6
9,13.591969,0.430785,0.006626,0.001345,0.5,revised simplex,"{'quantile': 0.5, 'solver': 'revised simplex'}",-0.192718,0.011845,-0.247548,-0.26361,-0.165805,-0.171567,0.098352,7


In [10]:
# Getting the input values for prediction

age_input = float(input('Age: '))
bmi_input = float(input('BMI: '))
children_input = float(input('Children: '))
sex_male_input = float(input('Sex Male 0 or 1: '))
smoker_yes_input = float(input('Smoker yes 0 or 1: '))

Age: 32
BMI: 43
Children: 2
Sex Male 0 or 1: 0
Smoker yes 0 or 1: 1


In [11]:
# Predicting the output

Future_predict = grid.predict([[age_input, bmi_input, children_input, sex_male_input, smoker_yes_input]])
print("Future prediction  = {} ".format(Future_predict))

Future prediction  = [13831.11475208] 


