In [1]:
###-----------------
### Import Libraries
###-----------------

import pandas as pd
import numpy as np

from sklearn.model_selection import KFold,cross_val_score,GridSearchCV
from sklearn.model_selection import train_test_split

from sklearn.linear_model import Ridge,Lasso,ElasticNet

from sklearn.metrics import log_loss,accuracy_score
from sklearn.metrics import mean_squared_error

from sklearn.preprocessing import LabelEncoder

import warnings
warnings.filterwarnings('ignore')

In [2]:
insure=pd.read_csv('insurance.csv')

insure.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [3]:
dum_ins=pd.get_dummies(insure,drop_first=True)

X=dum_ins.drop('charges',axis=1)
y=dum_ins['charges']

In [4]:
kfold=KFold(n_splits=5,shuffle=True,random_state=23)

elastic=ElasticNet(alpha=0.1,l1_ratio=0.3)

results=cross_val_score(elastic,X,y,cv=kfold,scoring='r2')

print(results.mean())

0.6896851975844127


In [5]:
l1_rat=np.linspace(0.001,0.999,20)
alpha=np.linspace(0.001,4,20)

params={'alpha':alpha,'l1_ratio':l1_rat}
elastic=ElasticNet()

gcv=GridSearchCV(elastic,param_grid=params,
                 cv=kfold,scoring='r2')

gcv.fit(X,y)


print(gcv.best_params_)
print(gcv.best_score_)

{'alpha': 0.6324210526315789, 'l1_ratio': 0.999}
0.7472365796783413


In [6]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=23)

In [7]:
#With Pipeline #Comparison between StadardScaler and MinMaxScaler and getting the max values
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler,MinMaxScaler 
from sklearn.pipeline import Pipeline

kfold=KFold(n_splits=5,shuffle=True,random_state=23)
scalar=StandardScaler()
knr=KNeighborsRegressor() 
 
pipe=Pipeline([("SCL",scalar),("KNR",knr)])          #here we are using values of KNN transformer
params={'KNR__n_neighbors':np.arange(1,11),'SCL':[StandardScaler(),MinMaxScaler()]}

#log_loss
gcv=GridSearchCV(pipe,param_grid=params,cv=kfold,scoring='r2')
gcv.fit(X,y)

print(gcv.best_params_)
print(gcv.best_score_)

{'KNR__n_neighbors': 6, 'SCL': StandardScaler()}
0.7929593107756748


In [8]:
#With Pipeline #Comparison between Elastic and KNR for insurance dataset


kfold=KFold(n_splits=5,shuffle=True,random_state=23)
scalar=StandardScaler()
knr=KNeighborsRegressor() 

pipe=Pipeline([("SCL",scalar),("KNR",knr)])          #here we are using values of KNN transformer
params={'KNR__n_neighbors':np.arange(1,11),'SCL':[StandardScaler(),MinMaxScaler()]}

#log_loss
gcv=GridSearchCV(pipe,param_grid=params,cv=kfold,scoring='r2')
gcv.fit(X,y)

print(gcv.best_params_)
print(gcv.best_score_)

{'KNR__n_neighbors': 6, 'SCL': StandardScaler()}
0.7929593107756748


In [9]:
#Elastic
elastic=ElasticNet()

l1_rat=np.linspace(0.001,0.999,20)
alpha=np.linspace(0.001,4,20)

pipe1=Pipeline([("ELC",elastic)]) 

params1={'l1_ratio':np.linspace(0.001,0.999,20),'alpha':np.linspace(0.001,4,20)}

gcv1=GridSearchCV(elastic,param_grid=params1,cv=kfold,scoring='r2')
gcv1.fit(X,y)

print(gcv1.best_params_)
print(gcv1.best_score_)

{'alpha': 0.6324210526315789, 'l1_ratio': 0.999}
0.7472365796783413
