In [1]:
###-----------------
### Import Libraries
###-----------------

import pandas as pd
from sklearn.model_selection import KFold,cross_val_score
from sklearn.linear_model import Ridge,Lasso,ElasticNet
from sklearn.metrics import mean_squared_error
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
boston=pd.read_csv("Boston.csv")

boston.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [3]:
X = boston.drop('medv',axis=1)
y = boston['medv']

In [4]:
kfold=KFold(n_splits=5,
            shuffle=True,
            random_state=23)

elastic=ElasticNet(alpha=0.1,
                   l1_ratio=0.3)

results=cross_val_score(elastic,X,y,cv=kfold,scoring='neg_mean_squared_error')

print(results.mean())

-24.478166282670685


In [5]:
l1_rat=[0.1,0.25,0.5,0.8,0.9]
alpha=[0.1,0.5,1,2,2.5,3]

errors=dict()
for a in alpha:
    for l in l1_rat:
        elastic=ElasticNet(alpha=a,l1_ratio=l)
        results=cross_val_score(elastic,X,y,cv=kfold,scoring='neg_mean_squared_error')
        errors['alpha:'+str(a)+",L1_ratio:"+str(l)]=results.mean()
er_pd=pd.Series(errors)
er_pd.sort_values(ascending=False)

alpha:0.1,L1_ratio:0.25   -24.480867
alpha:0.1,L1_ratio:0.5    -24.482401
alpha:0.1,L1_ratio:0.8    -24.493851
alpha:0.1,L1_ratio:0.1    -24.495337
alpha:0.1,L1_ratio:0.9    -24.515351
alpha:0.5,L1_ratio:0.9    -25.570358
alpha:0.5,L1_ratio:0.8    -25.707693
alpha:0.5,L1_ratio:0.5    -25.995300
alpha:0.5,L1_ratio:0.25   -26.123674
alpha:0.5,L1_ratio:0.1    -26.148814
alpha:1,L1_ratio:0.1      -27.381951
alpha:1,L1_ratio:0.25     -27.465115
alpha:1,L1_ratio:0.5      -27.596572
alpha:1,L1_ratio:0.8      -27.848956
alpha:1,L1_ratio:0.9      -27.980729
alpha:2,L1_ratio:0.1      -28.942539
alpha:2,L1_ratio:0.25     -29.289139
alpha:2.5,L1_ratio:0.1    -29.540456
alpha:2.5,L1_ratio:0.25   -30.025901
alpha:3,L1_ratio:0.1      -30.074545
alpha:2,L1_ratio:0.5      -30.159277
alpha:3,L1_ratio:0.25     -30.698680
alpha:2.5,L1_ratio:0.5    -31.293435
alpha:2,L1_ratio:0.8      -31.629662
alpha:2,L1_ratio:0.9      -32.111692
alpha:3,L1_ratio:0.5      -32.253883
alpha:2.5,L1_ratio:0.8    -33.042126
a

# Grid_search cv

In [6]:
from sklearn.model_selection import GridSearchCV

In [7]:
######## Tuning #######
l1_rat=np.linspace(0.001,0.999,20)
alpha=np.linspace(0.001,4,20)

params={'alpha':alpha,'l1_ratio':l1_rat}

elastic=ElasticNet()

gcv=GridSearchCV(elastic,param_grid=params,
                 cv=kfold,
                 scoring='neg_mean_squared_error')

gcv.fit(X,y)


print(gcv.best_params_)
print(gcv.best_score_)

{'alpha': 0.001, 'l1_ratio': 0.6313157894736842}
-23.471789032710266


In [8]:
gcv.cv_results_   #Dictionary

{'mean_fit_time': array([0.00692182, 0.00421772, 0.00348191, 0.00377154, 0.00360932,
        0.00310493, 0.00356312, 0.0037571 , 0.00390925, 0.00356736,
        0.00360041, 0.00347853, 0.003474  , 0.00367632, 0.00339479,
        0.00421042, 0.00325351, 0.00337052, 0.00365024, 0.00355697,
        0.00640621, 0.00388274, 0.00301991, 0.00303054, 0.0030386 ,
        0.00294175, 0.00293326, 0.00326371, 0.00334663, 0.00287819,
        0.0033011 , 0.00298605, 0.00299072, 0.00335522, 0.00302544,
        0.00302315, 0.00326934, 0.00298162, 0.00333357, 0.0032023 ,
        0.00324903, 0.00337729, 0.00336723, 0.00324011, 0.00327053,
        0.0032464 , 0.00326872, 0.00326419, 0.00326629, 0.00327668,
        0.00309458, 0.00295372, 0.00285759, 0.00285239, 0.0029016 ,
        0.00292726, 0.00298476, 0.00294991, 0.00297799, 0.00299115,
        0.00336523, 0.00324488, 0.00300426, 0.00290065, 0.00295868,
        0.00315194, 0.00306144, 0.00300512, 0.00334387, 0.00299993,
        0.00339994, 0.00317817,

In [9]:
pd_cv=pd.DataFrame(gcv.cv_results_)    #Converted dictionary to DataFrame
pd_cv

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_l1_ratio,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.006922,0.001423,0.003682,0.000575,0.001,0.001,"{'alpha': 0.001, 'l1_ratio': 0.001}",-21.722305,-20.874076,-24.111755,-26.857132,-23.876826,-23.488419,2.090052,20
1,0.004218,0.000356,0.002474,0.000113,0.001,0.053526,"{'alpha': 0.001, 'l1_ratio': 0.053526315789473...",-21.726759,-20.862238,-24.117552,-26.846791,-23.876765,-23.486021,2.089284,19
2,0.003482,0.000274,0.002230,0.000136,0.001,0.106053,"{'alpha': 0.001, 'l1_ratio': 0.10605263157894737}",-21.731642,-20.850172,-24.123726,-26.836294,-23.876917,-23.483750,2.088508,17
3,0.003772,0.000140,0.002309,0.000168,0.001,0.158579,"{'alpha': 0.001, 'l1_ratio': 0.15857894736842104}",-21.736986,-20.837871,-24.130303,-26.825637,-23.877300,-23.481619,2.087724,16
4,0.003609,0.000289,0.002259,0.000134,0.001,0.211105,"{'alpha': 0.001, 'l1_ratio': 0.21110526315789474}",-21.742827,-20.825330,-24.137310,-26.814821,-23.877929,-23.479643,2.086934,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,0.003064,0.000350,0.002116,0.000196,4.0,0.788895,"{'alpha': 4.0, 'l1_ratio': 0.7888947368421053}",-39.171475,-27.490894,-37.749212,-35.334297,-39.777777,-35.904731,4.476433,390
396,0.002927,0.000105,0.002025,0.000107,4.0,0.841421,"{'alpha': 4.0, 'l1_ratio': 0.841421052631579}",-39.508675,-27.852585,-38.291453,-35.651955,-40.125012,-36.285936,4.487092,394
397,0.003164,0.000269,0.002201,0.000125,4.0,0.893947,"{'alpha': 4.0, 'l1_ratio': 0.8939473684210526}",-39.896541,-28.271249,-38.886328,-36.020666,-40.536744,-36.722306,4.499364,397
398,0.002820,0.000057,0.002016,0.000100,4.0,0.946474,"{'alpha': 4.0, 'l1_ratio': 0.9464736842105264}",-40.347816,-28.765026,-39.371320,-36.446317,-41.004263,-37.186948,4.490251,399


# Concrete dataset


In [10]:
concrete=pd.read_csv('Concrete_Data.csv')
concrete

Unnamed: 0,Cement,Blast,Fly,Water,Superplasticizer,Coarse,Fine,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.30
...,...,...,...,...,...,...,...,...,...
1025,276.4,116.0,90.3,179.6,8.9,870.1,768.3,28,44.28
1026,322.2,0.0,115.6,196.0,10.4,817.9,813.4,28,31.18
1027,148.5,139.4,108.6,192.7,6.1,892.4,780.0,28,23.70
1028,159.1,186.7,0.0,175.6,11.3,989.6,788.9,28,32.77


In [11]:
X = concrete.drop('Strength',axis=1)
y = concrete['Strength']

In [12]:
l1_rat=np.linspace(0.001,0.999,20)
alpha=np.linspace(0.001,4,20)

In [13]:
params={'alpha':alpha,'l1_ratio':l1_rat}

elastic=ElasticNet()

gcv=GridSearchCV(elastic,param_grid=params,
                 cv=kfold,
                 scoring='neg_mean_squared_error')

gcv.fit(X,y)

print(gcv.best_params_)
print(gcv.best_score_)

{'alpha': 3.7895263157894736, 'l1_ratio': 0.001}
-109.81419916701809


In [14]:
pd_cv=pd.DataFrame(gcv.cv_results_)    
pd_cv

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_l1_ratio,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.008533,0.001866,0.003186,0.000603,0.001,0.001,"{'alpha': 0.001, 'l1_ratio': 0.001}",-98.251483,-118.027976,-102.874701,-117.105588,-113.035733,-109.859096,7.910142,109
1,0.004870,0.000372,0.002126,0.000220,0.001,0.053526,"{'alpha': 0.001, 'l1_ratio': 0.053526315789473...",-98.251508,-118.027954,-102.874734,-117.105581,-113.035715,-109.859098,7.910122,110
2,0.003989,0.000072,0.001810,0.000061,0.001,0.106053,"{'alpha': 0.001, 'l1_ratio': 0.10605263157894737}",-98.251532,-118.027931,-102.874767,-117.105574,-113.035697,-109.859100,7.910101,111
3,0.004035,0.000158,0.001848,0.000106,0.001,0.158579,"{'alpha': 0.001, 'l1_ratio': 0.15857894736842104}",-98.251556,-118.027908,-102.874799,-117.105568,-113.035679,-109.859102,7.910081,112
4,0.003876,0.000073,0.001816,0.000090,0.001,0.211105,"{'alpha': 0.001, 'l1_ratio': 0.21110526315789474}",-98.251581,-118.027886,-102.874832,-117.105561,-113.035661,-109.859104,7.910061,113
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,0.003033,0.000122,0.001873,0.000113,4.0,0.788895,"{'alpha': 4.0, 'l1_ratio': 0.7888947368421053}",-100.137731,-116.681148,-105.988832,-117.241943,-111.788173,-110.367566,6.526245,386
396,0.003119,0.000072,0.001957,0.000060,4.0,0.841421,"{'alpha': 4.0, 'l1_ratio': 0.841421052631579}",-100.226394,-116.642418,-106.222356,-117.274144,-111.765763,-110.426215,6.465887,391
397,0.003386,0.000216,0.002067,0.000092,4.0,0.893947,"{'alpha': 4.0, 'l1_ratio': 0.8939473684210526}",-100.322317,-116.607565,-106.405168,-117.311573,-111.746957,-110.478716,6.412572,396
398,0.003126,0.000093,0.001953,0.000073,4.0,0.946474,"{'alpha': 4.0, 'l1_ratio': 0.9464736842105264}",-100.435034,-116.577050,-106.463609,-117.354625,-111.732290,-110.512522,6.372286,398


# Insurance file.csv

In [15]:
insurance=pd.read_csv('insurance.csv')
insurance=pd.get_dummies(insurance,drop_first=True)
insurance

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes,region_northwest,region_southeast,region_southwest
0,19,27.900,0,16884.92400,False,True,False,False,True
1,18,33.770,1,1725.55230,True,False,False,True,False
2,28,33.000,3,4449.46200,True,False,False,True,False
3,33,22.705,0,21984.47061,True,False,True,False,False
4,32,28.880,0,3866.85520,True,False,True,False,False
...,...,...,...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,True,False,True,False,False
1334,18,31.920,0,2205.98080,False,False,False,False,False
1335,18,36.850,0,1629.83350,False,False,False,True,False
1336,21,25.800,0,2007.94500,False,False,False,False,True


In [16]:
X = insurance.drop('charges',axis=1)
y = insurance['charges']

In [17]:
l1_rat=np.linspace(0.001,0.999,20)
alpha=np.linspace(0.001,4,20)

In [18]:
params={'alpha':alpha,'l1_ratio':l1_rat}

elastic=ElasticNet()

gcv=GridSearchCV(elastic,param_grid=params,
                 cv=kfold,
                 scoring='neg_mean_squared_error')

gcv.fit(X,y)

print(gcv.best_params_)
print(gcv.best_score_)

{'alpha': 0.4219473684210526, 'l1_ratio': 0.999}
-37001889.66821414


In [19]:
pd_cv=pd.DataFrame(gcv.cv_results_) 
pd_cv

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_l1_ratio,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.006753,0.001503,0.003484,0.000533,0.001,0.001,"{'alpha': 0.001, 'l1_ratio': 0.001}",-3.460795e+07,-3.584445e+07,-3.571049e+07,-4.266914e+07,-3.619432e+07,-3.700527e+07,2.881255e+06,26
1,0.004258,0.000374,0.002434,0.000181,0.001,0.053526,"{'alpha': 0.001, 'l1_ratio': 0.053526315789473...",-3.461007e+07,-3.584523e+07,-3.570782e+07,-4.266942e+07,-3.619229e+07,-3.700497e+07,2.881303e+06,25
2,0.003518,0.000342,0.001994,0.000095,0.001,0.106053,"{'alpha': 0.001, 'l1_ratio': 0.10605263157894737}",-3.461222e+07,-3.584604e+07,-3.570516e+07,-4.266972e+07,-3.619027e+07,-3.700468e+07,2.881353e+06,24
3,0.003325,0.000115,0.001960,0.000120,0.001,0.158579,"{'alpha': 0.001, 'l1_ratio': 0.15857894736842104}",-3.461438e+07,-3.584686e+07,-3.570252e+07,-4.267004e+07,-3.618827e+07,-3.700441e+07,2.881405e+06,23
4,0.003830,0.000524,0.002144,0.000248,0.001,0.211105,"{'alpha': 0.001, 'l1_ratio': 0.21110526315789474}",-3.461657e+07,-3.584771e+07,-3.569989e+07,-4.267038e+07,-3.618628e+07,-3.700417e+07,2.881459e+06,21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,0.003038,0.000014,0.001840,0.000024,4.0,0.788895,"{'alpha': 4.0, 'l1_ratio': 0.7888947368421053}",-9.329561e+07,-9.976168e+07,-1.028993e+08,-1.112993e+08,-1.023773e+08,-1.019266e+08,5.799600e+06,219
396,0.003092,0.000098,0.001949,0.000149,4.0,0.841421,"{'alpha': 4.0, 'l1_ratio': 0.841421052631579}",-8.717289e+07,-9.337685e+07,-9.647982e+07,-1.044763e+08,-9.600648e+07,-9.550246e+07,5.579227e+06,188
397,0.003047,0.000010,0.001879,0.000061,4.0,0.893947,"{'alpha': 4.0, 'l1_ratio': 0.8939473684210526}",-7.749147e+07,-8.320119e+07,-8.625936e+07,-9.362358e+07,-8.583764e+07,-8.528265e+07,5.212831e+06,147
398,0.003053,0.000014,0.001894,0.000086,4.0,0.946474,"{'alpha': 4.0, 'l1_ratio': 0.9464736842105264}",-6.031342e+07,-6.490548e+07,-6.781845e+07,-7.412586e+07,-6.748237e+07,-6.692912e+07,4.488935e+06,99


In [20]:
#Inferencing
best_model=gcv.best_estimator_
unlabel_data=pd.read_csv('tst_insure.csv')
unlabel_data=pd.get_dummies(unlabel_data,drop_first=True)
y_pred=best_model.predict(unlabel_data)

In [21]:
y_pred

array([25242.18981459,  3471.43289454,  6729.07190866,  3773.84367068,
        6102.33134604, 11822.90897151,   618.77473691,  2685.79199629,
       34130.92005875, 12720.13635342,  4505.45084458, 25377.77464579,
       13154.21980106, 26701.68658459,  9468.44399904, 12207.87522563,
       11208.06154242, 13852.98252035,  1144.3216274 ,  2964.49485932,
        2210.0779437 ,  3434.99296017,  1785.99403038, 13159.08362352,
        4974.89296557,  5227.5509726 ,  6111.10119841, 28968.19764738,
        6079.82292765, 30240.99114655,  9087.44088554, 34962.03365803,
       13414.37470692, 14502.16420412,  1978.09903491,  5245.7065463 ,
        9321.44724396, 10305.6995601 , 30562.55482635,  7449.15582018,
       12298.57698533, 31315.09514558, 24885.16337157, 30458.09587647,
       10752.20397586, 31314.29668046, 27949.00791349, 14763.38422258,
        6391.96789611,  7151.84967182, 11480.95402145,  7176.15264537,
        8610.135211  ,  6944.23721788, 17839.19386378,  3491.6121133 ,
      