In [1]:
import pandas as ps

## 1.Data collection

In [2]:
Data=ps.read_csv("insurance_pre.csv")
Data

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [3]:
#The data set have a categorical data so we are using Nominal menthod which is converting string to number
Data=ps.get_dummies(Data, drop_first=True)

In [4]:
Data

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,0,1
1,18,33.770,1,1725.55230,1,0
2,28,33.000,3,4449.46200,1,0
3,33,22.705,0,21984.47061,1,0
4,32,28.880,0,3866.85520,1,0
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,1,0
1334,18,31.920,0,2205.98080,0,0
1335,18,36.850,0,1629.83350,0,0
1336,21,25.800,0,2007.94500,0,0


In [5]:
Data.columns

Index(['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes'], dtype='object')

## 2.Input and Output Split

In [20]:
#Independent=Data[['R&D Spend', 'Administration', 'Marketing Spend', 'State_Florida', 'State_New York']]
Independent=Data[['age', 'bmi', 'children', 'sex_male', 'smoker_yes']]
Independent

Unnamed: 0,age,bmi,children,sex_male,smoker_yes
0,19,27.900,0,0,1
1,18,33.770,1,1,0
2,28,33.000,3,1,0
3,33,22.705,0,1,0
4,32,28.880,0,1,0
...,...,...,...,...,...
1333,50,30.970,3,1,0
1334,18,31.920,0,0,0
1335,18,36.850,0,0,0
1336,21,25.800,0,0,0


In [21]:
#Dependent=Data[['Profit']]
Dependent=Data[['charges']]
Dependent

Unnamed: 0,charges
0,16884.92400
1,1725.55230
2,4449.46200
3,21984.47061
4,3866.85520
...,...
1333,10600.54830
1334,2205.98080
1335,1629.83350
1336,2007.94500


## 3.Split the train and test data

In [32]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(Independent, Dependent, test_size=0.30, random_state=0)

In [33]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
x_train=sc.fit_transform(x_train)
x_test=sc.transform(x_test)

In [34]:
#Here we are using Gird Search CV (cross validation) so no need to use train and test split, Gird will do that all the method.

## 4. Model creation

In [42]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR

p_grid={'kernel':['linear','poly', 'rbf', 'sigmoid'], 'C':[10,100,500,1000],'gamma':['scale', 'auto'] }
#poly will take more time so if we run code without standard we can remove poly then code will runn fast.
#SVM mostly support standard data so better to work with standard data 

grid=GridSearchCV(SVR(),p_grid, refit=True, verbose=3, n_jobs=-1)
grid.fit(x_train, y_train)


##The data is split into 5 parts (folds), and the model is trained 5 times, each time using 4 folds for training and 1 fold for validation.
#50 models with different settings that are being tested. we have 3 parmeter so 5x4x2=40 
#5 rows and 40 columns = 200 fits 

Fitting 5 folds for each of 32 candidates, totalling 160 fits


  y = column_or_1d(y, warn=True)


## 5. Evalutation metric 

In [43]:
#since we are using GridSearchCV so it will do Evalution the metric and give the best model

In [44]:
re=grid.cv_results_
# print the best parametar after tuning 
print("The Score value for the best model {}".format(grid.best_params_),"Score :",grid.best_score_)

The Score value for the best model {'C': 1000, 'gamma': 'auto', 'kernel': 'poly'} Score : 0.7935546451669515


In [45]:
table=ps.DataFrame.from_dict(re)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.035486,0.000444,0.007136,0.000238,10,scale,linear,"{'C': 10, 'gamma': 'scale', 'kernel': 'linear'}",0.377969,0.479601,0.317872,0.337979,0.324422,0.367569,0.059777,21
1,0.034928,0.001228,0.007002,2e-06,10,scale,poly,"{'C': 10, 'gamma': 'scale', 'kernel': 'poly'}",0.054964,0.071297,-0.046513,-0.024157,-0.049652,0.001188,0.051594,27
2,0.03763,0.000802,0.017576,0.00079,10,scale,rbf,"{'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}",0.004126,0.013244,-0.103775,-0.095165,-0.101602,-0.056634,0.053486,32
3,0.040767,0.000635,0.009018,0.000922,10,scale,sigmoid,"{'C': 10, 'gamma': 'scale', 'kernel': 'sigmoid'}",0.049644,0.076323,-0.046798,-0.040824,-0.046521,-0.001635,0.053474,29
4,0.033956,0.001473,0.006801,0.001027,10,auto,linear,"{'C': 10, 'gamma': 'auto', 'kernel': 'linear'}",0.377969,0.479601,0.317872,0.337979,0.324422,0.367569,0.059777,21
5,0.03224,0.001492,0.007162,0.000946,10,auto,poly,"{'C': 10, 'gamma': 'auto', 'kernel': 'poly'}",0.056274,0.069532,-0.045601,-0.025079,-0.049592,0.001107,0.051309,28
6,0.037587,0.001397,0.017947,0.001005,10,auto,rbf,"{'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}",0.004055,0.013366,-0.103821,-0.095119,-0.101604,-0.056625,0.053504,31
7,0.0418,0.001498,0.009227,0.001016,10,auto,sigmoid,"{'C': 10, 'gamma': 'auto', 'kernel': 'sigmoid'}",0.049905,0.075905,-0.046585,-0.041004,-0.046507,-0.001657,0.053391,30
8,0.041917,0.001015,0.006974,0.000832,100,scale,linear,"{'C': 100, 'gamma': 'scale', 'kernel': 'linear'}",0.584474,0.655818,0.55338,0.576939,0.546881,0.583498,0.038787,9
9,0.036479,0.001395,0.00886,0.002084,100,scale,poly,"{'C': 100, 'gamma': 'scale', 'kernel': 'poly'}",0.537117,0.579373,0.47184,0.537966,0.424478,0.510155,0.054955,16


In [46]:
age=int(input("Enter your age :"))
bmi=float(input("Enter your BMI : "))
children=int(input("Enter Children count:"))
Gen=int(input("Enter the gander if male enter 1, if female enter 0 :"))
smok=int(input("Enter if somker enter 1, if no somker enter 0 :"))

Enter your age :19
Enter your BMI : 27.900
Enter Children count:0
Enter the gander if male enter 1, if female enter 0 :0
Enter if somker enter 1, if no somker enter 0 :1


In [47]:
result=grid.predict([[age,bmi,children,Gen,smok]])

In [48]:
result

array([1513081.48057374])