In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('Advertising.csv')

In [3]:
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [4]:
## CREATE X and y
X = df.drop('sales', axis = 1)
y = df['sales']

# TRAIN TEST SPLIT
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 101)

# SCALE DATA
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
from sklearn.linear_model import ElasticNet

In [6]:
help(ElasticNet)

Help on class ElasticNet in module sklearn.linear_model._coordinate_descent:

class ElasticNet(sklearn.base.MultiOutputMixin, sklearn.base.RegressorMixin, sklearn.linear_model._base.LinearModel)
 |  ElasticNet(alpha=1.0, *, l1_ratio=0.5, fit_intercept=True, precompute=False, max_iter=1000, copy_X=True, tol=0.0001, warm_start=False, positive=False, random_state=None, selection='cyclic')
 |  
 |  Linear regression with combined L1 and L2 priors as regularizer.
 |  
 |  Minimizes the objective function::
 |  
 |          1 / (2 * n_samples) * ||y - Xw||^2_2
 |          + alpha * l1_ratio * ||w||_1
 |          + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2
 |  
 |  If you are interested in controlling the L1 and L2 penalty
 |  separately, keep in mind that this is equivalent to::
 |  
 |          a * ||w||_1 + 0.5 * b * ||w||_2^2
 |  
 |  where::
 |  
 |          alpha = a + b and l1_ratio = a / (a + b)
 |  
 |  The parameter l1_ratio corresponds to alpha in the glmnet R package while
 |  alph

Creating A Base Model And Parameter Grid:

In [None]:
base_elastic_net_model = ElasticNet() #Note that no hyperparamets are defined

In [8]:
param_grid = {'alpha': [0.1, 1, 5, 10, 50, 100], 'l1_ratio': [0.1, 0.5, 0.7, 0.95, 0.99, 1]}

Grid Search With Cross-Validation:

In [9]:
from sklearn.model_selection import GridSearchCV

In [None]:
grid_model = GridSearchCV(estimator = base_elastic_net_model, param_grid = param_grid, scoring = 'neg_mean_squared_error', cv = 5, verbose = 2)
#The estimator parameter sets the model to base the grid search off of
#The param_grid parameter chooses the hyperparameter values to search, and is a dictionary of parameter names and their values
#The scoring parameter sets the method used to evaluate each combination of hyperparameters during cross-validation
#The cv parameter selects the number of folds to cross-validate by
#The verbose parameter specifies how many messages pertaining to the grid search process are printed

In [11]:
grid_model.fit(X_train, y_train)

Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.1; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.5; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.5; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.5; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.5; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.5; total time=   0.0s
[CV] END ............................alpha=0.1, l1_ratio=0.7; total time=   0.0s
[CV] END ............................alpha=0.1,

In [12]:
grid_model.best_estimator_

In [13]:
grid_model.best_params_

{'alpha': 0.1, 'l1_ratio': 1}

In [14]:
grid_model.cv_results_

{'mean_fit_time': array([0.00334706, 0.00218072, 0.00142622, 0.0023838 , 0.00118303,
        0.00161233, 0.00103831, 0.00079107, 0.00159616, 0.00110812,
        0.00079498, 0.00169148, 0.00069818, 0.00100403, 0.00100465,
        0.00100427, 0.00167041, 0.00238142, 0.00134034, 0.00205588,
        0.00100417, 0.00081367, 0.00166235, 0.00159903, 0.00119996,
        0.00078797, 0.00120149, 0.00114188, 0.00080514, 0.00099721,
        0.00080476, 0.00153089, 0.00113416, 0.00073071, 0.00070386,
        0.00120921]),
 'std_fit_time': array([2.33524365e-03, 1.46857891e-03, 4.71429609e-04, 2.81157042e-03,
        4.06432733e-04, 1.68106176e-03, 4.82760828e-05, 3.95865057e-04,
        8.08059219e-04, 2.41218454e-04, 3.97510131e-04, 1.12649352e-03,
        5.98202023e-04, 1.69430918e-05, 1.10451600e-03, 6.30586023e-04,
        7.30689964e-04, 1.33115355e-03, 6.53390514e-04, 2.11667317e-03,
        3.37432291e-05, 4.07134316e-04, 1.35447225e-03, 7.94102181e-04,
        4.00642059e-04, 3.94340294e-0

In [15]:
pd.DataFrame(grid_model.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_l1_ratio,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.003347,0.002335244,0.001946,0.001345,0.1,0.1,"{'alpha': 0.1, 'l1_ratio': 0.1}",-3.453021,-1.40519,-5.789125,-2.187302,-4.645576,-3.496043,1.591601,6
1,0.002181,0.001468579,0.001768,0.001539,0.1,0.5,"{'alpha': 0.1, 'l1_ratio': 0.5}",-3.32544,-1.427522,-5.59561,-2.163089,-4.451679,-3.392668,1.506827,5
2,0.001426,0.0004714296,0.001377,0.000803,0.1,0.7,"{'alpha': 0.1, 'l1_ratio': 0.7}",-3.26988,-1.442432,-5.502437,-2.16395,-4.356738,-3.347088,1.462765,4
3,0.002384,0.00281157,0.000845,0.000428,0.1,0.95,"{'alpha': 0.1, 'l1_ratio': 0.95}",-3.213052,-1.472417,-5.396258,-2.177452,-4.24108,-3.300052,1.406248,3
4,0.001183,0.0004064327,0.001002,2e-05,0.1,0.99,"{'alpha': 0.1, 'l1_ratio': 0.99}",-3.208124,-1.478489,-5.380242,-2.181097,-4.222968,-3.294184,1.396953,2
5,0.001612,0.001681062,0.000938,0.000534,0.1,1.0,"{'alpha': 0.1, 'l1_ratio': 1}",-3.206943,-1.480065,-5.376257,-2.182076,-4.21846,-3.29276,1.394613,1
6,0.001038,4.827608e-05,0.000202,0.000398,1.0,0.1,"{'alpha': 1, 'l1_ratio': 0.1}",-9.827475,-5.261525,-11.875347,-7.449195,-8.542329,-8.591174,2.222939,12
7,0.000791,0.0003958651,0.000398,0.000488,1.0,0.5,"{'alpha': 1, 'l1_ratio': 0.5}",-8.707071,-4.214228,-10.879261,-6.204545,-7.173031,-7.435627,2.255532,11
8,0.001596,0.0008080592,0.001395,0.001353,1.0,0.7,"{'alpha': 1, 'l1_ratio': 0.7}",-7.92087,-3.549562,-10.024877,-5.379553,-6.324836,-6.63994,2.206213,10
9,0.001108,0.0002412185,0.00061,0.000499,1.0,0.95,"{'alpha': 1, 'l1_ratio': 0.95}",-6.729435,-2.591285,-8.709842,-4.156317,-5.329916,-5.503359,2.102835,9


Using The Best Model From A Grid Search:

In [16]:
y_pred = grid_model.predict(X_test) #Automatically uses the best fitting model

In [17]:
from sklearn.metrics import mean_squared_error

In [18]:
mean_squared_error(y_test, y_pred)

2.387342642087474