# ***K-fold Cross Validation***
Selecting the Random Data using k fold method so we can avoid UnderFitting

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge , Lasso , ElasticNet, LinearRegression
from sklearn.model_selection import train_test_split,KFold,cross_val_score
from sklearn.metrics import mean_squared_error as mse


In [None]:
boston = pd.read_csv('Boston.csv')
X=boston.drop('medv',axis=1)
y=boston['medv']
X_train,X_test, y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=23)

In [None]:
kfold = KFold(n_splits=5,shuffle=True,random_state=23)
elastic = ElasticNet(alpha=0.1,l1_ratio=0.3)
result = cross_val_score(elastic,X_train,y_train,cv=kfold,scoring='neg_mean_squared_error')
result.mean()

-25.717554381469746

In [None]:
kfold = KFold(n_splits=5,shuffle=True,random_state=23)

#For Multiple Alpha and L1_ratio
l1_rat = [0.1,0.25,0.5,0.8,0.9]
alpha = [0.1,0.5,1,2,2.5,3]
error=dict()
for a in alpha:
    for l in l1_rat:
        elastic = ElasticNet(alpha=a,l1_ratio=l)
        result = cross_val_score(elastic,X_train,y_train,cv=kfold,scoring='neg_mean_squared_error')
        error['alpha : '+str(a)+', l1_ratio : '+str(l)]  = result.mean()
er_pd = pd.Series(error)
er_pd.sort_values(ascending=False)


alpha : 0.1, l1_ratio : 0.25   -25.717792
alpha : 0.1, l1_ratio : 0.5    -25.719130
alpha : 0.1, l1_ratio : 0.1    -25.727927
alpha : 0.1, l1_ratio : 0.8    -25.765598
alpha : 0.1, l1_ratio : 0.9    -25.793736
alpha : 0.5, l1_ratio : 0.9    -26.760333
alpha : 0.5, l1_ratio : 0.8    -26.938562
alpha : 0.5, l1_ratio : 0.5    -27.295059
alpha : 0.5, l1_ratio : 0.25   -27.444005
alpha : 0.5, l1_ratio : 0.1    -27.477902
alpha : 1, l1_ratio : 0.1      -28.694267
alpha : 1, l1_ratio : 0.25     -28.783695
alpha : 1, l1_ratio : 0.5      -28.946088
alpha : 1, l1_ratio : 0.8      -29.214591
alpha : 1, l1_ratio : 0.9      -29.347131
alpha : 2, l1_ratio : 0.1      -30.177899
alpha : 2, l1_ratio : 0.25     -30.554534
alpha : 2.5, l1_ratio : 0.1    -30.746715
alpha : 2.5, l1_ratio : 0.25   -31.246435
alpha : 3, l1_ratio : 0.1      -31.252100
alpha : 2, l1_ratio : 0.5      -31.463276
alpha : 3, l1_ratio : 0.25     -31.880899
alpha : 2.5, l1_ratio : 0.5    -32.565499
alpha : 2, l1_ratio : 0.8      -32

# ***GridSearchCV***
To find the Best parameters within the scope we can use this method



In [None]:
from sklearn.model_selection import GridSearchCV

kfold = KFold(n_splits=5,shuffle=True,random_state=23)
l1_rat = [0.1,0.25,0.5,0.8,0.9]
alpha = [0.1,0.5,1,2,2.5,3]
params={'alpha':alpha,'l1_ratio':l1_rat}

gcv = GridSearchCV(elastic,param_grid=params,cv=kfold,scoring='neg_mean_squared_error')

gcv.fit(X,y)
print(gcv.cv_results_)
print(gcv.best_params_)
print(gcv.best_score_)

{'alpha': 0.1, 'l1_ratio': 0.9}
-37311388.216144435


**Testing on the Different Dataset**

In [None]:
cement = pd.read_csv("Concrete_Data.csv")
X=cement.drop('Strength',axis=1)
y=cement['Strength']
X_train,X_test, y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=23)

elastic = ElasticNet()

kfold = KFold(n_splits=5,shuffle=True,random_state=23)
l1_rat = [0.1,0.25,0.5,0.8,0.9]
alpha = [0.1,0.5,1,2,2.5,3]
params={'alpha':alpha,'l1_ratio':l1_rat}

gcv = GridSearchCV(elastic,param_grid=params,cv=kfold,scoring='neg_mean_squared_error')

gcv.fit(X,y)
print(gcv.cv_results_)
print(gcv.best_params_)
print(gcv.best_score_)

best_model = gcv.best_estimator_
unlabeled_data = pd.read_csv("testConcrete.csv")
y_pred = best_model.predict(unlabeled_data)
print(y_pred)

{'alpha': 2, 'l1_ratio': 0.1}
-109.83545348980822
[69.04206293 31.36500688 19.44246045 46.16077133 58.82326551 17.09294809
 49.53732274 81.06554109 32.06327192 42.74967593 43.81101858 61.76073566
 51.61510166 15.47210704]


In [None]:
cement = pd.read_csv("Concrete_Data.csv")

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error


X=cement.drop('Strength',axis=1)
y=cement['Strength']
X_train,X_test, y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=23)

lr= LinearRegression()

lr.fit(X_train,y_train)
# y_pred = lr.predict(X_test)
y_pred = lr.predict(X_test)
print(mean_squared_error(y_test,y_pred))

107.68123684942857


# Inferencing
prediction on the Unlabeled Data

In [None]:
med = pd.read_csv("insurance.csv")
med = pd.get_dummies(med,drop_first=True)
X=med.drop(['charges'],axis=1)
y=med['charges']

X_train,X_test, y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=23)

elastic = ElasticNet()

kfold = KFold(n_splits=5,shuffle=True,random_state=23)
l1_rat = np.linspace(0.001,0.999,20)#[0.1,0.25,0.5,0.8,0.9]
alpha = np.linspace(0.001,4,20)#[0.1,0.5,1,2,2.5,3]
params={'alpha':alpha,'l1_ratio':l1_rat}

gcv = GridSearchCV(elastic,param_grid=params,cv=kfold,scoring='neg_mean_squared_error')

gcv.fit(X,y)
print(gcv.cv_results_)
print(gcv.best_params_)
print(gcv.best_score_)

#best Model Estimator
best_model = gcv.best_estimator_
unlabeled_data = pd.read_csv("tst_insure.csv")
y_unlabled = pd.get_dummies(unlabeled_data,drop_first=True)
y_pred = best_model.predict(y_unlabled)
print(y_pred)

{'alpha': 0.4219473684210526, 'l1_ratio': 0.999}
-37001889.66821415
[25242.18981459  3471.43289454  6729.07190866  3773.84367068
  6102.33134604 11822.90897151   618.77473691  2685.79199629
 34130.92005875 12720.13635342  4505.45084458 25377.77464579
 13154.21980106 26701.68658459  9468.44399904 12207.87522563
 11208.06154242 13852.98252035  1144.3216274   2964.49485932
  2210.0779437   3434.99296017  1785.99403038 13159.08362352
  4974.89296557  5227.5509726   6111.10119841 28968.19764738
  6079.82292765 30240.99114655  9087.44088554 34962.03365803
 13414.37470692 14502.16420412  1978.09903491  5245.7065463
  9321.44724396 10305.6995601  30562.55482635  7449.15582018
 12298.57698533 31315.09514558 24885.16337157 30458.09587647
 10752.20397586 31314.29668046 27949.00791349 14763.38422258
  6391.96789611  7151.84967182 11480.95402145  7176.15264537
  8610.135211    6944.23721788 17839.19386378  3491.6121133
 10779.60528526 11284.35923264 29999.44109131  6369.54237334
  5526.49415645 110