In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('Advertising.csv')

In [3]:
X = df.drop('sales', axis=1)

In [4]:
y = df['sales']

In [5]:
from sklearn.preprocessing import PolynomialFeatures

In [6]:
polynomial_converter = PolynomialFeatures(degree=3, include_bias=False)

In [7]:
poly_features = polynomial_converter.fit_transform(X)

In [8]:
X.shape

(200, 3)

In [9]:
poly_features.shape

(200, 19)

In [10]:
from sklearn.model_selection import train_test_split

In [12]:
X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3, random_state=101)

In [14]:
X_train.shape

(140, 19)

In [15]:
from sklearn.preprocessing import StandardScaler

In [16]:
scaler = StandardScaler()

In [17]:
scaler.fit(X_train)

In [18]:
X_train = scaler.transform(X_train)

In [19]:
X_test = scaler.transform(X_test)

In [21]:
X_train[0]

array([ 0.49300171, -0.33994238,  1.61586707,  0.28407363, -0.02568776,
        1.49677566, -0.59023161,  0.41659155,  1.6137853 ,  0.08057172,
       -0.05392229,  1.01524393, -0.36986163,  0.52457967,  1.48737034,
       -0.66096022, -0.16360242,  0.54694754,  1.37075536])

In [23]:
# poly_features[0]

In [24]:
from sklearn.linear_model import Ridge

In [27]:
# help(Ridge)

In [28]:
ridge_model = Ridge(alpha=10)

In [29]:
ridge_model.fit(X_train, y_train)

In [30]:
test_predictions = ridge_model.predict(X_test)

In [31]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [32]:
MAE = mean_absolute_error(y_test, test_predictions)

In [33]:
MAE

0.5774404204714166

In [34]:
RMSE = np.sqrt(mean_squared_error(y_test, test_predictions))

In [35]:
RMSE

0.8946386461319648

In [36]:
from sklearn.linear_model import RidgeCV

In [54]:
ridge_cv_model = RidgeCV(alphas=(0.1, 1.0, 10.0), scoring='neg_mean_absolute_error')

In [55]:
ridge_cv_model.fit(X_train, y_train)

In [56]:
ridge_cv_model.alpha_

0.1

In [53]:
# from sklearn import metrics
# metrics.get_scorer_names()

In [57]:
test_predictions = ridge_cv_model.predict(X_test)

In [58]:
MAE = mean_absolute_error(y_test, test_predictions)

In [59]:
RMSE = np.sqrt(mean_squared_error(y_test, test_predictions))

In [60]:
MAE

0.42737748843375084

In [61]:
RMSE

0.6180719926926787

In [63]:
ridge_cv_model.coef_

array([ 5.40769392,  0.5885865 ,  0.40390395, -6.18263924,  4.59607939,
       -1.18789654, -1.15200458,  0.57837796, -0.1261586 ,  2.5569777 ,
       -1.38900471,  0.86059434,  0.72219553, -0.26129256,  0.17870787,
        0.44353612, -0.21362436, -0.04622473, -0.06441449])

In [64]:
ridge_cv_model.best_score_

-0.37492233402929614

LASSO - least absolute shrinkage and selection operator

In [69]:
from sklearn.linear_model import LassoCV

In [70]:
# Lasso

In [72]:
lasso_cv_model = LassoCV(eps=0.1, n_alphas=100, cv=5)

In [73]:
lasso_cv_model.fit(X_train, y_train)

In [74]:
lasso_cv_model.alpha_

0.4943070909225831

In [75]:
test_predictions = lasso_cv_model.predict(X_test)

In [79]:
MAE = mean_absolute_error(y_test, test_predictions)

In [80]:
RMSE = np.sqrt(mean_squared_error(y_test, test_predictions))

In [81]:
MAE

0.6541723161252868

In [82]:
RMSE

1.1308001022762548

In [83]:
lasso_cv_model.coef_

array([1.002651  , 0.        , 0.        , 0.        , 3.79745279,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        ])