In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:
df = pd.read_csv("Advertising.csv")

In [6]:
X = df.drop('sales', axis=1)

In [7]:
y = df['sales']

In [8]:
from sklearn.preprocessing import PolynomialFeatures

In [9]:
polynomialConverter = PolynomialFeatures(degree=3, include_bias=False)

In [10]:
polyFeatures = polynomialConverter.fit_transform(X)

In [67]:
polyFeatures.shape

(200, 19)

In [68]:
X.shape

(200, 3)

In [69]:
from sklearn.model_selection import train_test_split

In [193]:
X_train, X_test, y_train, y_test = train_test_split(polyFeatures, y, test_size=0.33)

In [194]:
X_train.shape

(134, 19)

In [195]:
from sklearn.preprocessing import StandardScaler

In [196]:
scaler = StandardScaler()

In [197]:
# Don't want any info to leak from the test set

In [198]:
scaler.fit(X_train)

In [199]:
X_train = scaler.transform(X_train)

In [200]:
X_test = scaler.transform(X_test)

In [201]:
X_train[0]

array([ 0.97084123, -0.52752521,  0.75260353,  0.94345138,  0.02195277,
        1.22087148, -0.7275882 , -0.09752174,  0.43363291,  0.82028173,
        0.11587417,  1.12717803, -0.40073917,  0.20947353,  0.78892722,
       -0.74837863, -0.43463083, -0.12233041,  0.14251518])

In [202]:
polyFeatures[0]

array([2.30100000e+02, 3.78000000e+01, 6.92000000e+01, 5.29460100e+04,
       8.69778000e+03, 1.59229200e+04, 1.42884000e+03, 2.61576000e+03,
       4.78864000e+03, 1.21828769e+07, 2.00135918e+06, 3.66386389e+06,
       3.28776084e+05, 6.01886376e+05, 1.10186606e+06, 5.40101520e+04,
       9.88757280e+04, 1.81010592e+05, 3.31373888e+05])

In [203]:
from sklearn.linear_model import Ridge

In [204]:
ridgeModel = Ridge(alpha=10)

In [205]:
ridgeModel.fit(X_train, y_train)

In [206]:
testPredictions = ridgeModel.predict(X_test)

In [207]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [208]:
MAE = mean_absolute_error(testPredictions, y_test)
MSE = mean_squared_error(testPredictions, y_test)
RMSE = np.sqrt(MSE)

In [209]:
MAE

0.5669397545772218

In [210]:
MSE

0.5095589184810216

In [211]:
RMSE

0.7138339572204601

In [212]:
from sklearn.linear_model import RidgeCV

In [213]:
ridgeCVModel = RidgeCV(alphas=(0.1,1.0,10), scoring='neg_mean_absolute_error')

In [214]:
ridgeCVModel.fit(X_train, y_train)

In [215]:
ridgeCVModel.alpha_

0.1

In [216]:
# from sklearn.metrics import make_scorer

In [217]:
# make_scorer.keys()

In [218]:
testPredictions = ridgeCVModel.predict(X_test)

In [219]:
MAE = mean_absolute_error(y_test, testPredictions)
RMSE = np.sqrt(mean_squared_error(y_test, testPredictions))

In [220]:
MAE

0.3378029108555796

In [221]:
RMSE

0.43393899136402514

In [222]:
ridgeCVModel.coef_

array([ 5.83457042,  0.0773522 ,  0.54504274, -6.60309706,  4.86781061,
       -0.9813238 ,  0.17986677, -0.02213458, -0.19411991,  2.69514186,
       -1.07903074,  0.4958989 ,  0.27330332, -0.14038784,  0.22807642,
       -0.08850442, -0.03703951,  0.21926926, -0.12552322])

In [223]:
ridgeCVModel.best_score_

-0.4129402016547229

In [224]:
# LASSO - Least Absolute Shrinkage and Selection Operator

In [225]:
from sklearn.linear_model import LassoCV

In [226]:
lassoCVModel = LassoCV(eps=0.1, n_alphas=100, cv=5)

In [227]:
lassoCVModel.fit(X_train, y_train)

In [228]:
lassoCVModel.alpha_

0.5288661199327459

In [229]:
testPredictions = lassoCVModel.predict(X_test)

In [230]:
MAE = mean_absolute_error(testPredictions, y_test)
MSE = mean_squared_error(testPredictions, y_test)
RMSE = np.sqrt(MSE)

In [231]:
MAE

0.5922644905562586

In [232]:
RMSE

0.8782522623431825

In [233]:
lassoCVModel.coef_

array([1.17019157, 0.        , 0.        , 0.        , 3.95759838,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        ])

In [234]:
from sklearn.linear_model import ElasticNetCV

In [235]:
elasticCVModel = ElasticNetCV(l1_ratio=[.1, .5, .7,
    .9, .95, .99, 1],  eps=0.1, max_iter=1000000, n_alphas=100)
# l1_ratio = alpha

In [236]:
elasticCVModel.fit(X_train, y_train)

In [237]:
elasticCVModel.l1_ratio

[0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1]

In [238]:
elasticCVModel.l1_ratio_

1.0

In [239]:
elasticCVModel.alpha_

0.5288661199327459

In [240]:
lassoCVModel.alpha_

0.5288661199327459

In [241]:
testPredictions = elasticCVModel.predict(X_test)

In [242]:
MAE = mean_absolute_error(y_test, testPredictions)

In [243]:
RMSE = np.sqrt(mean_squared_error(y_test, testPredictions))

In [244]:
MAE

0.5922644905562586

In [245]:
RMSE

0.8782522623431825