In [93]:
# importing necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math

from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet

from sklearn.metrics import mean_absolute_error, mean_squared_error

from joblib import  dump, load

CROSS VALIDATION (Train-Test Split)

In [38]:
def root_mean_square_error(y_test, model_predictions):
    """function to perform root mean squared error"""
    mse = mean_squared_error(y_test, model_predictions)
    return math.sqrt(mse)

In [3]:
# Read in the data set
df = pd.read_csv("Advertising.csv")

In [4]:
df.head(5)

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [5]:
# seperate X features and y label
X = df.drop("sales", axis=1)
y = df["sales"]

In [6]:
# perform a train test split of the new dataset
XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=0.3, random_state=101)

In [7]:
# scaling the data
# create an instance of a feature scaler
# Do not scale the test data
scaler = StandardScaler()

In [8]:
# fit the scaler to the training X feature
scaler.fit(XTrain)

StandardScaler()

In [9]:
# create the transformed version of the scaled training X features
scaledXTrain = scaler.transform(XTrain)
scaledXTest = scaler.transform(XTest)

If L2 Regression is to be used (Ridge Regression)

In [10]:
# create the Ridge regression instance
ridgeModel = Ridge(alpha=100)

# fit the ridge model to the scaled train features and the training label
ridgeModel.fit(scaledXTrain, yTrain)

# predict on the scaled test data
testPredictions = ridgeModel.predict(scaledXTest)

In [11]:
testPredictions

array([15.34908128, 17.05755308, 12.73784965, 16.18231062, 10.85075815,
        9.87999576, 17.6105132 , 15.80786278, 11.32616781, 17.30158479,
       12.8883864 , 13.64670913, 13.71636726, 18.83377117, 17.38617584,
       11.59912699, 14.88899736, 10.07145317, 10.14692243, 17.90771073,
       10.25837266, 16.71492563, 20.57087744, 19.66643199, 10.14020781,
       13.40084066, 18.09910709, 10.80433113, 13.00876939, 13.79206361,
       12.73015096, 17.42108555, 11.50183684, 10.10362749, 16.18778637,
       10.45161746, 11.25953403, 10.42658319, 12.30681396, 11.82281519,
       14.75707677, 11.58372535, 12.01609545, 10.90016204, 12.55896716,
       11.62961585, 10.8495293 , 15.74187916, 14.09264772, 18.45114683,
       13.43419788, 14.05075373, 16.0980788 , 12.07046074, 13.15048011,
        8.75095421, 19.21013193, 12.92686996, 16.49277745, 14.83525505])

In [12]:
ridgeModel.coef_

array([2.22254275, 1.61969192, 0.31224357])

In [13]:
# Evalaute the error metrics
MAE = mean_absolute_error(yTest, testPredictions)
MSE = mean_squared_error(yTest, testPredictions)
RMSE = root_mean_square_error(y_test=yTest, model_predictions=testPredictions)

In [14]:
MAE

2.1631741364394355

In [15]:
MSE

7.341775789034128

In [16]:
RMSE

2.7095711448556075

Is alpha = 100 the best possible value? we have to check for another alpha value
This is know as test improvement doing hyperparameter tuning

In [17]:
# create the Ridge regression instance 2
ridgeModel2 = Ridge(alpha=1)

# fit the ridge model to the scaled train features and the training label
ridgeModel2.fit(scaledXTrain, yTrain)

# predict on the scaled test data
testPredictions2 = ridgeModel2.predict(scaledXTest)

In [18]:
testPredictions2

array([15.73544249, 19.56177685, 11.47282584, 16.99614361,  9.19583919,
        7.06034338, 20.24078477, 17.27047482,  9.7997058 , 19.18969381,
       12.40827613, 13.88321006, 13.72330625, 21.24960621, 18.41451801,
       10.00739858, 15.54023734,  7.72694272,  7.59886443, 20.3595504 ,
        7.831815  , 18.21607253, 24.61611392, 22.77116018,  8.0117733 ,
       12.667102  , 21.40567156,  8.10250725, 12.43158049, 12.53481984,
       10.81678067, 19.21537816, 10.09192883,  6.76998079, 17.29636618,
        7.81497124,  9.28808588,  8.31202002, 10.6122371 , 10.6533735 ,
       13.05491413,  9.80364168, 10.24764859,  8.09836046, 11.58209801,
       10.10783927,  9.025001  , 16.24936342, 13.26025422, 20.77690029,
       12.51477346, 13.96784546, 17.53696507, 11.15686875, 12.57233878,
        5.56009018, 23.21824128, 12.62301353, 18.72931877, 15.18197827])

In [19]:
ridgeModel2.coef_

array([3.73961770e+00, 2.74477719e+00, 2.68837098e-03])

In [20]:
# Evalaute the error metrics
MAE2 = mean_absolute_error(yTest, testPredictions2)
MSE2 = mean_squared_error(yTest, testPredictions2)
RMSE2 = root_mean_square_error(y_test=yTest, model_predictions=testPredictions2)

In [21]:
MAE2

1.216876844358058

In [22]:
MSE2

2.3190215794287523

In [23]:
RMSE2

1.5228334050147285

We can see that an alpha =1 performs well. But for large dataset, it would be tedioud creating new models and testing with
new alpha values to get the optimum alpha value. This is an advantage. That is where cross validation and grid search comes 
into play.

Secondly the evaluation report i.e the mean square error value is not the optimum we could possibly do for the model

CROSS VALIDATION (Train-Validation-Test Split)

In [24]:
# read the dataset
dfX = pd.read_csv('Advertising.csv')
dfX.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [27]:
# seperate X features and y label
Xx = dfX.drop("sales", axis=1)
yy = dfX["sales"]

In [28]:
# 3. Performing the train test split on poly features
#a. first train test split
X_train, X_other, y_train, y_other = train_test_split(Xx, yy, test_size=0.30, random_state=101)

#b. second train test split
X_val, X_test, y_val, y_test = train_test_split(X_other, y_other, test_size=0.50, random_state=101)

In [29]:
# scaling the data
# create an instance of a feature scaler
# Do not scale the test data
scaler2 = StandardScaler()

In [30]:
# fit the scaler to the training X feature
scaler2.fit(X_train)

StandardScaler()

In [31]:
# create the transformed version of the scaled training X features
scaled_X_train = scaler2.transform(X_train)
scaled_X_test = scaler2.transform(X_test)
scaled_X_val = scaler2.transform(X_val)

If L2 Regression is to be used (Ridge Regression)

In [32]:
# creating the first model
model_one = Ridge(alpha=100)

In [33]:
# fit first model on train data set
model_one.fit(scaled_X_train, y_train)

Ridge(alpha=100)

In [34]:
# predict on the scaled validation data
testPredictions3 = model_one.predict(scaled_X_val)

In [35]:
testPredictions3

array([16.0980788 , 10.8495293 ,  8.75095421, 14.83525505, 12.55896716,
       12.8883864 , 11.58372535, 12.01609545, 16.18778637, 10.90016204,
       11.32616781, 17.90771073, 14.09264772, 13.79206361, 13.71636726,
        9.87999576, 15.34908128, 13.00876939, 13.43419788, 10.85075815,
       14.75707677, 18.83377117, 17.30158479, 15.74187916, 16.49277745,
       19.66643199, 17.6105132 , 10.07145317, 13.64670913, 17.42108555])

In [40]:
# Evalaute the error metrics
MAE3 = mean_absolute_error(y_val, testPredictions3)
MSE3 = mean_squared_error(y_val, testPredictions3)
RMSE3 = root_mean_square_error(y_test=y_val, model_predictions=testPredictions3)

In [41]:
MAE3

2.175424374439987

In [42]:
MSE3

7.320101458823869

In [43]:
RMSE3

2.705568601758948

In [44]:
# creating the second model
model_two = Ridge(alpha=1)

In [45]:
# fit second model on train data set
model_two.fit(scaled_X_train, y_train)

Ridge(alpha=1)

In [46]:
# predict on the scaled validation data
testPredictions4 = model_two.predict(scaled_X_val)

In [47]:
testPredictions4

array([17.53696507,  9.025001  ,  5.56009018, 15.18197827, 11.58209801,
       12.40827613,  9.80364168, 10.24764859, 17.29636618,  8.09836046,
        9.7997058 , 20.3595504 , 13.26025422, 12.53481984, 13.72330625,
        7.06034338, 15.73544249, 12.43158049, 12.51477346,  9.19583919,
       13.05491413, 21.24960621, 19.18969381, 16.24936342, 18.72931877,
       22.77116018, 20.24078477,  7.72694272, 13.88321006, 19.21537816])

In [48]:
# Evalaute the error metrics
MAE4 = mean_absolute_error(y_val, testPredictions4)
MSE4 = mean_squared_error(y_val, testPredictions4)
RMSE4 = root_mean_square_error(y_test=y_val, model_predictions=testPredictions4)

In [49]:
MAE4

1.195143424023704

In [50]:
MSE4

2.383783075056987

In [51]:
RMSE4

1.5439504768796786

In [52]:
# Since we are happy with model two, we use it for our final test set
testPredictions5 = model_two.predict(scaled_X_test)

In [53]:
# Evalaute the error metrics
MAE5 = mean_absolute_error(y_test, testPredictions5)
MSE5 = mean_squared_error(y_test, testPredictions5)
RMSE5 = root_mean_square_error(y_test=y_test, model_predictions=testPredictions5)

In [54]:
MAE5

1.238610264692412

In [55]:
MSE5

2.2542600838005176

In [56]:
RMSE5

1.5014193564093004

In [None]:
CROSS VALIDATION (K-Fold)

In [14]:
# read the dataset
dfXX = pd.read_csv('Advertising.csv')
dfXX.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [15]:
# seperate X features and y label
Xxx = dfXX.drop("sales", axis=1)
yyy = dfXX["sales"]

In [16]:
Xxx, yyy

(        TV  radio  newspaper
 0    230.1   37.8       69.2
 1     44.5   39.3       45.1
 2     17.2   45.9       69.3
 3    151.5   41.3       58.5
 4    180.8   10.8       58.4
 ..     ...    ...        ...
 195   38.2    3.7       13.8
 196   94.2    4.9        8.1
 197  177.0    9.3        6.4
 198  283.6   42.0       66.2
 199  232.1    8.6        8.7
 
 [200 rows x 3 columns],
 0      22.1
 1      10.4
 2       9.3
 3      18.5
 4      12.9
        ... 
 195     7.6
 196     9.7
 197    12.8
 198    25.5
 199    13.4
 Name: sales, Length: 200, dtype: float64)

In [17]:
# perform a train test split of the new dataset
XxxTrain, XxxTest, yyyTrain, yyyTest = train_test_split(Xxx, yyy, test_size=0.3, random_state=101)

In [18]:
# scaling the data
# create an instance of a feature scaler
# Do not scale the test data
scaler5 = StandardScaler()

In [19]:
# fit the scaler to the training X feature
scaler5.fit(XxxTrain)

StandardScaler()

In [22]:
# create the transformed version of the scaled training X features
scaledXxxTrain = scaler5.transform(XxxTrain)
scaledXxxTest = scaler5.transform(XxxTest)

In [23]:
# creating the first model
model_five = Ridge(alpha=100)

In [24]:
# create the instance to find the best score
scores = cross_val_score(model_five,scaledXxxTrain,yyyTrain,scoring='neg_mean_squared_error',cv=5)

In [25]:
# N.B that the higher the negative score, the better
scores

array([ -9.32552967,  -4.9449624 , -11.39665242,  -7.0242106 ,
        -8.38562723])

In [26]:
# check the absolute mean of scores for comparison with MSE
abs(scores.mean())

8.215396464543607

In [27]:
# the MSE is not so great, so we create another ridge model and change alpha
model_six = Ridge(alpha=1)

In [30]:
scores2 = cross_val_score(model_six,scaledXxxTrain,yyyTrain,scoring='neg_mean_squared_error',cv=5)

In [31]:
scores2

array([-3.15513238, -1.58086982, -5.40455562, -2.21654481, -4.36709384])

In [32]:
abs(scores2.mean())

3.344839296530696

In [39]:
# We are satisfied with model six, so now we fit it to the scaled x train and y train
model_six.fit(scaledXxxTrain, yyyTrain)

Ridge(alpha=1)

In [40]:
# predict on the scaled X test data
testPredictions6 = model_six.predict(scaledXxxTest)

In [41]:
testPredictions6

array([15.73544249, 19.56177685, 11.47282584, 16.99614361,  9.19583919,
        7.06034338, 20.24078477, 17.27047482,  9.7997058 , 19.18969381,
       12.40827613, 13.88321006, 13.72330625, 21.24960621, 18.41451801,
       10.00739858, 15.54023734,  7.72694272,  7.59886443, 20.3595504 ,
        7.831815  , 18.21607253, 24.61611392, 22.77116018,  8.0117733 ,
       12.667102  , 21.40567156,  8.10250725, 12.43158049, 12.53481984,
       10.81678067, 19.21537816, 10.09192883,  6.76998079, 17.29636618,
        7.81497124,  9.28808588,  8.31202002, 10.6122371 , 10.6533735 ,
       13.05491413,  9.80364168, 10.24764859,  8.09836046, 11.58209801,
       10.10783927,  9.025001  , 16.24936342, 13.26025422, 20.77690029,
       12.51477346, 13.96784546, 17.53696507, 11.15686875, 12.57233878,
        5.56009018, 23.21824128, 12.62301353, 18.72931877, 15.18197827])

In [42]:
model_six.coef_

array([3.73961770e+00, 2.74477719e+00, 2.68837098e-03])

In [43]:
# Evalaute the error metrics
MAE6 = mean_absolute_error(yyyTest, testPredictions6)
MSE6 = mean_squared_error(yyyTest, testPredictions6)
RMSE6 = root_mean_square_error(y_test=yyyTest, model_predictions=testPredictions6)

In [44]:
MAE6

1.216876844358058

In [45]:
MSE6

2.3190215794287523

In [46]:
RMSE6

1.5228334050147285

CROSS VALIDATION (K-Fold) using cross_validate function

In [64]:
# read the dataset
dataset = pd.read_csv('Advertising.csv')
dataset.head()

# seperate X features and y label
X_xx = dataset.drop("sales", axis=1)
y_yy = dataset["sales"]

# perform a train test split of the new dataset
X_xxTrain, X_xxTest, y_yyTrain, y_yyTest = train_test_split(X_xx, y_yy, test_size=0.3, random_state=101)

# scaling the data
# create an instance of a feature scaler
# Do not scale the test data
scaler8 = StandardScaler()

# fit the scaler to the training X feature
scaler8.fit(X_xxTrain)

# create the transformed version of the scaled training X features
scaledX_xxTrain = scaler8.transform(X_xxTrain)
scaledX_xxTest = scaler8.transform(X_xxTest)

In [65]:
X_xx

Unnamed: 0,TV,radio,newspaper
0,230.1,37.8,69.2
1,44.5,39.3,45.1
2,17.2,45.9,69.3
3,151.5,41.3,58.5
4,180.8,10.8,58.4
...,...,...,...
195,38.2,3.7,13.8
196,94.2,4.9,8.1
197,177.0,9.3,6.4
198,283.6,42.0,66.2


In [66]:
# creating the first model
model8 = Ridge(alpha=100)

In [70]:
# create the instance to find the best score
crossValidateScores = cross_validate(model8,scaledX_xxTrain,y_yyTrain,
                                     scoring=['neg_mean_squared_error','neg_mean_absolute_error'],cv=10)

In [71]:
crossValidateScores

{'fit_time': array([0.00299835, 0.00099969, 0.00100017, 0.00199938, 0.00199771,
        0.00100446, 0.00199699, 0.00099969, 0.00200176, 0.00099683]),
 'score_time': array([0.00099969, 0.00099969, 0.00099874, 0.        , 0.0010004 ,
        0.00199556, 0.0010035 , 0.00199866, 0.0009973 , 0.00100207]),
 'test_neg_mean_squared_error': array([ -6.06067062, -10.62703078,  -3.99342608,  -5.00949402,
         -9.14179955, -13.08625636,  -3.83940454,  -9.05878567,
         -9.05545685,  -5.77888211]),
 'test_neg_mean_absolute_error': array([-1.8102116 , -2.54195751, -1.46959386, -1.86276886, -2.52069737,
        -2.45999491, -1.45197069, -2.37739501, -2.44334397, -1.89979708])}

In [72]:
# convert the score dictionary to dataframe
crossValidateScores = pd.DataFrame(crossValidateScores)

In [73]:
crossValidateScores

Unnamed: 0,fit_time,score_time,test_neg_mean_squared_error,test_neg_mean_absolute_error
0,0.002998,0.001,-6.060671,-1.810212
1,0.001,0.001,-10.627031,-2.541958
2,0.001,0.000999,-3.993426,-1.469594
3,0.001999,0.0,-5.009494,-1.862769
4,0.001998,0.001,-9.1418,-2.520697
5,0.001004,0.001996,-13.086256,-2.459995
6,0.001997,0.001004,-3.839405,-1.451971
7,0.001,0.001999,-9.058786,-2.377395
8,0.002002,0.000997,-9.055457,-2.443344
9,0.000997,0.001002,-5.778882,-1.899797


In [74]:
crossValidateScores.mean()

fit_time                        0.001600
score_time                      0.001100
test_neg_mean_squared_error    -7.565121
test_neg_mean_absolute_error   -2.083773
dtype: float64

In [75]:
# repeat with a new model where alpha = 1
model9 = Ridge(alpha=1)

In [76]:
# create the instance to find the best score
crossValidateScores2 = cross_validate(model9,scaledX_xxTrain,y_yyTrain,
                                     scoring=['neg_mean_squared_error','neg_mean_absolute_error'],cv=10)

In [77]:
crossValidateScores2

{'fit_time': array([0.00199771, 0.00199986, 0.00200129, 0.00099516, 0.00099993,
        0.00099969, 0.00099993, 0.00100017, 0.00099993, 0.00099993]),
 'score_time': array([0.00199914, 0.00099826, 0.00100136, 0.00099969, 0.        ,
        0.00099874, 0.00099897, 0.0009985 , 0.        , 0.00099897]),
 'test_neg_mean_squared_error': array([-2.96250773, -3.05737833, -2.1737403 , -0.83303438, -3.46401792,
        -8.2326467 , -1.90586431, -2.76504844, -4.98950515, -2.84643818]),
 'test_neg_mean_absolute_error': array([-1.45717399, -1.5553078 , -1.23877012, -0.76893775, -1.43448944,
        -1.4943158 , -1.08136203, -1.25001123, -1.58097132, -1.22332553])}

In [78]:
crossValidateScores2 = pd.DataFrame(crossValidateScores2)

In [79]:
crossValidateScores2

Unnamed: 0,fit_time,score_time,test_neg_mean_squared_error,test_neg_mean_absolute_error
0,0.001998,0.001999,-2.962508,-1.457174
1,0.002,0.000998,-3.057378,-1.555308
2,0.002001,0.001001,-2.17374,-1.23877
3,0.000995,0.001,-0.833034,-0.768938
4,0.001,0.0,-3.464018,-1.434489
5,0.001,0.000999,-8.232647,-1.494316
6,0.001,0.000999,-1.905864,-1.081362
7,0.001,0.000998,-2.765048,-1.250011
8,0.001,0.0,-4.989505,-1.580971
9,0.001,0.000999,-2.846438,-1.223326


In [80]:
crossValidateScores2.mean()

fit_time                        0.001299
score_time                      0.000899
test_neg_mean_squared_error    -3.323018
test_neg_mean_absolute_error   -1.308467
dtype: float64

In [81]:
# We are satisfied with model9, so now we fit it to the scaled x train and y train
model9.fit(scaledX_xxTrain, y_yyTrain)

Ridge(alpha=1)

In [82]:
model9.coef_

array([3.73961770e+00, 2.74477719e+00, 2.68837098e-03])

In [83]:
# predict on the scaled X test data
testPredictions9 = model9.predict(scaledX_xxTest)

In [84]:
testPredictions9

array([15.73544249, 19.56177685, 11.47282584, 16.99614361,  9.19583919,
        7.06034338, 20.24078477, 17.27047482,  9.7997058 , 19.18969381,
       12.40827613, 13.88321006, 13.72330625, 21.24960621, 18.41451801,
       10.00739858, 15.54023734,  7.72694272,  7.59886443, 20.3595504 ,
        7.831815  , 18.21607253, 24.61611392, 22.77116018,  8.0117733 ,
       12.667102  , 21.40567156,  8.10250725, 12.43158049, 12.53481984,
       10.81678067, 19.21537816, 10.09192883,  6.76998079, 17.29636618,
        7.81497124,  9.28808588,  8.31202002, 10.6122371 , 10.6533735 ,
       13.05491413,  9.80364168, 10.24764859,  8.09836046, 11.58209801,
       10.10783927,  9.025001  , 16.24936342, 13.26025422, 20.77690029,
       12.51477346, 13.96784546, 17.53696507, 11.15686875, 12.57233878,
        5.56009018, 23.21824128, 12.62301353, 18.72931877, 15.18197827])

In [86]:
# Evalaute the error metrics
MAE9 = mean_absolute_error(y_yyTest, testPredictions9)
MSE9 = mean_squared_error(y_yyTest, testPredictions9)
RMSE9 = root_mean_square_error(y_test=y_yyTest, model_predictions=testPredictions9)

In [87]:
MAE9

1.216876844358058

In [88]:
MSE9

2.3190215794287523

In [89]:
RMSE9

1.5228334050147285

Grid Search

In [94]:
# using the same set up from above, we will create a model using elasticnet
baseElasticNetModel = ElasticNet()

In [95]:
# create a set of outcome for the elasticnet parameters we wish to play with. in this case alpha and l1_ratio
param_grid = {'alpha':[0.1,1,5,10,100], "l1_ratio":[0.1,0.5,0.7,0.95,0.99,1.00]}

In [96]:
# create the grid search model
gridModel = GridSearchCV(estimator=baseElasticNetModel,
                        param_grid=param_grid,
                        scoring='neg_mean_squared_error',
                        cv=5,
                        verbose=2)

In [97]:
# We fit it to the scaled x train and y train
gridModel.fit(scaledX_xxTrain, y_yyTrain)

Fitting 5 folds for each of 30 candidates, totalling 150 fits
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.4s
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.5; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.5; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.5; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.5; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.5; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.7; total time=   0.0s
[CV] END ............................alpha=0.1,

GridSearchCV(cv=5, estimator=ElasticNet(),
             param_grid={'alpha': [0.1, 1, 5, 10, 100],
                         'l1_ratio': [0.1, 0.5, 0.7, 0.95, 0.99, 1.0]},
             scoring='neg_mean_squared_error', verbose=2)

In [98]:
# select the best estimator and paramaeter for the grid search
gridModel.best_estimator_

ElasticNet(alpha=0.1, l1_ratio=1.0)

In [99]:
gridModel.best_params_

{'alpha': 0.1, 'l1_ratio': 1.0}

In [100]:
# check all the results from model
allResults = gridModel.cv_results_

In [101]:
allResults

{'mean_fit_time': array([0.09840875, 0.00219889, 0.00199866, 0.00140123, 0.00139804,
        0.00099702, 0.00340652, 0.00120091, 0.00099554, 0.00099883,
        0.00120077, 0.00120192, 0.00139689, 0.00099573, 0.00119934,
        0.00119877, 0.00139928, 0.00219979, 0.00519643, 0.00100002,
        0.00119958, 0.00119867, 0.00100055, 0.00099921, 0.00099826,
        0.00099802, 0.00139856, 0.00060029, 0.0010006 , 0.00119934]),
 'std_fit_time': array([1.91821477e-01, 3.98423923e-04, 6.29695650e-04, 4.87798992e-04,
        4.89540298e-04, 2.40979743e-06, 4.32628564e-03, 3.99480671e-04,
        2.43140197e-07, 2.27486810e-06, 3.98469524e-04, 3.98373814e-04,
        4.91245199e-04, 8.47644274e-07, 3.99312148e-04, 7.47946329e-04,
        4.89785533e-04, 1.46962789e-03, 7.90300907e-03, 2.43140197e-07,
        3.99186146e-04, 4.00114898e-04, 1.90734863e-07, 1.31454933e-06,
        2.13248060e-07, 1.50789149e-07, 4.89979474e-04, 4.90135030e-04,
        4.86280395e-07, 4.00021207e-04]),
 'mean_scor

In [102]:
allResults = pd.DataFrame(allResults)

In [103]:
allResults

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_l1_ratio,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.098409,0.1918215,0.000601,0.000490503,0.1,0.1,"{'alpha': 0.1, 'l1_ratio': 0.1}",-3.453021,-1.40519,-5.789125,-2.187302,-4.645576,-3.496043,1.591601,6
1,0.002199,0.0003984239,0.001002,3.443147e-06,0.1,0.5,"{'alpha': 0.1, 'l1_ratio': 0.5}",-3.32544,-1.427522,-5.59561,-2.163089,-4.451679,-3.392668,1.506827,5
2,0.001999,0.0006296956,0.001001,2.879244e-06,0.1,0.7,"{'alpha': 0.1, 'l1_ratio': 0.7}",-3.26988,-1.442432,-5.502437,-2.16395,-4.356738,-3.347088,1.462765,4
3,0.001401,0.000487799,0.000398,0.0004878184,0.1,0.95,"{'alpha': 0.1, 'l1_ratio': 0.95}",-3.213052,-1.472417,-5.396258,-2.177452,-4.24108,-3.300052,1.406248,3
4,0.001398,0.0004895403,0.0004,0.0004895784,0.1,0.99,"{'alpha': 0.1, 'l1_ratio': 0.99}",-3.208124,-1.478489,-5.380242,-2.181097,-4.222968,-3.294184,1.396953,2
5,0.000997,2.409797e-06,0.000802,0.0004011396,0.1,1.0,"{'alpha': 0.1, 'l1_ratio': 1.0}",-3.206943,-1.480065,-5.376257,-2.182076,-4.21846,-3.29276,1.394613,1
6,0.003407,0.004326286,0.0004,0.0004893432,1.0,0.1,"{'alpha': 1, 'l1_ratio': 0.1}",-9.827475,-5.261525,-11.875347,-7.449195,-8.542329,-8.591174,2.222939,12
7,0.001201,0.0003994807,0.000999,3.538247e-06,1.0,0.5,"{'alpha': 1, 'l1_ratio': 0.5}",-8.707071,-4.214228,-10.879261,-6.204545,-7.173031,-7.435627,2.255532,11
8,0.000996,2.431402e-07,0.000802,0.0004012108,1.0,0.7,"{'alpha': 1, 'l1_ratio': 0.7}",-7.92087,-3.549562,-10.024877,-5.379553,-6.324836,-6.63994,2.206213,10
9,0.000999,2.274868e-06,0.0006,0.0007996327,1.0,0.95,"{'alpha': 1, 'l1_ratio': 0.95}",-6.729435,-2.591285,-8.709842,-4.156317,-5.329916,-5.503359,2.102835,9


In [105]:
# if we are satisfied with the model, we just predict directly X test dataset
# predict on the scaled X test data
gridModelPredictions = gridModel.predict(scaledX_xxTest)

In [106]:
gridModelPredictions

array([15.66368907, 19.46408313, 11.5334467 , 16.90361792,  9.28986377,
        7.20512596, 20.12630114, 17.21717961,  9.92059194, 19.08401429,
       12.41632064, 13.90812112, 13.77020299, 21.0764028 , 18.3144613 ,
       10.08244137, 15.55332779,  7.8863488 ,  7.74187755, 20.22579399,
        7.95632931, 18.1169614 , 24.38514439, 22.58250106,  8.15339222,
       12.73095252, 21.27373859,  8.23268934, 12.49004679, 12.52397252,
       10.85578463, 19.10783619, 10.20239152,  6.91681664, 17.22447596,
        7.96982919,  9.39929572,  8.45480843, 10.65442906, 10.75772226,
       13.04497384,  9.92194633, 10.33480469,  8.21121593, 11.62693221,
       10.19036614,  9.11221   , 16.22496113, 13.29734786, 20.61601005,
       12.51850026, 14.00017552, 17.50249596, 11.23188839, 12.64040529,
        5.74152682, 23.02575646, 12.62656713, 18.65905383, 15.16774331])

In [107]:
# Evalaute the error metrics
MAE10 = mean_absolute_error(y_yyTest, gridModelPredictions)
MSE10 = mean_squared_error(y_yyTest, gridModelPredictions)
RMSE10 = root_mean_square_error(y_test=y_yyTest, model_predictions=gridModelPredictions)

In [108]:
MAE10

1.222908565262289

In [109]:
MSE10

2.387342642087475

In [110]:
RMSE10

1.5451027933724912