In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split

df = pd.read_csv("../data/Advertising.csv", index_col=0)

X, Y = df.drop("Sales", axis = "columns"), df["Sales"]

model_polynomial = PolynomialFeatures(3, include_bias=False)
poly_fatures = model_polynomial.fit_transform(X)

X_train, X_test, Y_train, Y_test = train_test_split(poly_fatures, Y, test_size=0.33, random_state=69)
X_train.shape

(134, 19)

In [14]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_X_train = scaler.fit_transform(X_train)
scaled_X_test = scaler.transform(X_test)

scaled_X_train.mean(), scaled_X_test.mean()

(3.34898382919136e-17, 0.12480168316025984)

In [15]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error

model_ridge = Ridge(alpha=0.1)

model_ridge.fit(scaled_X_train, Y_train)
y_hat = model_ridge.predict(scaled_X_test)

MSE = mean_squared_error(Y_test, y_hat)
RMSE = np.sqrt(MSE)

MSE, RMSE

(0.2816102952825163, 0.5306696668196857)

In [16]:
from sklearn.linear_model import Lasso

model_lasso = Lasso(alpha= 0.1)
model_lasso.fit(scaled_X_train, Y_train)
Y_hat = model_lasso.predict(scaled_X_test)

print(model_lasso.coef_)

np.sqrt(mean_squared_error(Y_test, Y_hat))

[ 1.6249375   0.          0.         -0.          4.00349979  0.
  0.          0.0041374   0.         -0.35316211 -0.         -0.
  0.          0.          0.          0.          0.00912827  0.
  0.        ]


0.857216832595549

In [19]:
from sklearn.linear_model import RidgeCV

model_ridgeCV = RidgeCV(alphas=[.0001, .001, .01, .1, 1, 5, 10], scoring="neg_mean_squared_error")
model_ridgeCV.fit(scaled_X_train, Y_train)
print(model_ridgeCV.coef_, "\n", model_ridgeCV.score(scaled_X_train, Y_train))
model_ridgeCV.alpha_

[  7.52689255   0.57996414   0.16204489 -11.60255076   5.57508917
  -1.05258406  -1.18672588  -0.1562316    0.24351417   5.82223289
  -1.79518878   1.1101629    0.27156058  -0.58793012   0.17391716
   0.61123904   0.0853737    0.56933227  -0.48238378] 
 0.9916984838671971


0.0001

In [22]:
from sklearn.linear_model import LassoCV

model_lassoCV = LassoCV(eps=0.001, n_alphas= 100, max_iter=10000, cv=5)
model_lassoCV.fit(scaled_X_train, Y_train)

print(model_lassoCV.alpha_)
Y_hat = model_lassoCV.predict(scaled_X_test)

np.sqrt(mean_squared_error(Y_test, Y_hat))

0.004981947537762884


0.5209315071752452

In [27]:
from sklearn.linear_model import ElasticNetCV

model_elasticCV = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .99, 1], eps=0.001, n_alphas= 100, max_iter=100000)
model_elasticCV.fit(scaled_X_train, Y_train)
print(f"L1 ratio: {model_elasticCV.l1_ratio_}, \nalpha {model_elasticCV.alpha_}")

L1 ratio: 1.0, 
alpha 0.004981947537762884


In [28]:
Y_hat = model_elasticCV.predict(scaled_X_test)
np.sqrt(mean_squared_error(Y_test, y_hat))

model_elasticCV.

0.5306696668196857