# Regularized linear models

In [97]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.linear_model import Lasso
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import LassoCV
from sklearn.linear_model import ElasticNetCV



In [98]:
data = pd.read_csv("../data/Advertising.csv", index_col=0)

X, y = data.drop("Sales", axis= 1), data["Sales"]

model_ply = PolynomialFeatures(3, include_bias=False)
poly_features = model_ply.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.33, random_state=42)

X_train.shape, X_test.shape, y_train.shape, y_test.shape





((134, 19), (66, 19), (134,), (66,))

In [99]:
# Vi måsta standardiserat våran data innan vi kör en ridge regression
# Viktiga är att vi använder samma skalerning på tränings datan som på test datan
# Att det kan finnas större eller mindre värden i test datan än tränings datan

scaler = StandardScaler()

scaled_X_train = scaler.fit_transform(X_train)
scaled_X_test = scaler.transform(X_test)

print(f"Scaled X_train mean: {scaled_X_train.mean():.2f}, std {scaled_X_train.std():.2}")
print(f"Scaled X_test mean: {scaled_X_test.mean():.2f}, std {scaled_X_test.std():.2f}")



Scaled X_train mean: -0.00, std 1.0
Scaled X_test mean: -0.12, std 1.12


In [100]:
def ridge_regression(X, penalty=0):
    model_ridge = Ridge(alpha=penalty)
    model_ridge.fit(scaled_X_train, y_train)
    y_pred = model_ridge.predict(X)

    return y_pred

y_pred = ridge_regression(scaled_X_test)
y_pred_p = ridge_regression(scaled_X_test, penalty=0.5)


MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)

MSE_p = mean_squared_error(y_test, y_pred_p)
RMSE_p = np.sqrt(MSE)
MAE_p = mean_absolute_error(y_test, y_pred_p)

print (f"Penalty = 0.\n MAE  {MAE}\n MSE  {MSE}\n RMSE {RMSE}\n")
print (f"Penalty = 10.\n MAE  {MAE_p}\n MSE  {MSE_p}\n RMSE {RMSE_p}\n")

Penalty = 0.
 MAE  0.3748516441218201
 MSE  0.26504659505538314
 RMSE 0.51482676217868

Penalty = 10.
 MAE  0.5392524917636404
 MSE  0.4487478689277049
 RMSE 0.51482676217868



Här kan vi ses att det inte hjälpte inte att göra ridge regression. Utan så här en vanlig en linjär regression bättre. För att vi vet att Newspaper här inte har något med att förusäga Y på ett bra sätt. I med att vi sprider ut värdena lämnt i en ridge regression över alla beta, så får även variabler som inte har så mycket signifikans stor vikt, för vi jämnar ut det så mycket. Då blir hela regressionen sämre. Lyckades bara öka bias och inte hitta bra balans mellan varianse och bias. 

In [101]:
model_lasso = Lasso(alpha=0.2)
model_lasso.fit(X_train, y_train)
y_pred = model_lasso.predict(scaled_X_test)

MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)

print (f" MAE  {MAE}\n MSE  {MSE}\n RMSE {RMSE}\n")


 MAE  8.325426013740572
 MSE  94.43104457780015
 RMSE 9.717563716168788



  model = cd_fast.enet_coordinate_descent(


In [102]:
# K fold cross validation är mycket dyrt

model_ridgeCV = RidgeCV(alphas=[.0001, .001, .01, .1, .5, 1, 5, 10], scoring="neg_mean_squared_error", cv= 5)

model_ridgeCV.fit(scaled_X_train, y_train)

print(model_ridgeCV.alpha_)


0.5


In [103]:
model_lassoCV = LassoCV(eps=0.001, n_alphas=100, max_iter=10000, cv=5)

model_lassoCV.fit(scaled_X_train, y_train)

y_pred = model_lassoCV.predict(scaled_X_test)

MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)


print (f" MAE:   {MAE}\n MSE:   {MSE}\n RMSE:  {RMSE}\n Alpha: {model_lassoCV.alpha_}\n")

 MAE:   0.46291883026932734
 MSE:   0.33467924600221605
 RMSE:  0.5785146895301934
 Alpha: 0.004968802520343368



In [107]:
model_elstic = ElasticNetCV(l1_ratio=[.1, .5, .7, .9 , .95, .99, 1], eps=0.001, n_alphas=100, max_iter=1000)
model_elstic.fit(scaled_X_train, y_train)
y_pred = model_elstic.predict(scaled_X_test)

MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)

print (f" MAE  {MAE}\n MSE  {MSE}\n RMSE {RMSE}\n L1 norm: {model_elstic.l1_ratio_}\n Alpha: {model_elstic.alphas_}")

 MAE  0.46802072322691235
 MSE  0.3410150044070999
 RMSE 0.5839648999786716
 L1 norm: 1.0
 Alpha: [[4.96880252e+01 4.63392186e+01 4.32161104e+01 4.03034892e+01
  3.75871689e+01 3.50539195e+01 3.26914026e+01 3.04881115e+01
  2.84333148e+01 2.65170046e+01 2.47298472e+01 2.30631383e+01
  2.15087600e+01 2.00591415e+01 1.87072225e+01 1.74464183e+01
  1.62705881e+01 1.51740049e+01 1.41513278e+01 1.31975757e+01
  1.23081032e+01 1.14785783e+01 1.07049605e+01 9.98348201e+00
  9.31062873e+00 8.68312352e+00 8.09791005e+00 7.55213802e+00
  7.04314919e+00 6.56846451e+00 6.12577199e+00 5.71291546e+00
  5.32788408e+00 4.96880252e+00 4.63392186e+00 4.32161104e+00
  4.03034892e+00 3.75871689e+00 3.50539195e+00 3.26914026e+00
  3.04881115e+00 2.84333148e+00 2.65170046e+00 2.47298472e+00
  2.30631383e+00 2.15087600e+00 2.00591415e+00 1.87072225e+00
  1.74464183e+00 1.62705881e+00 1.51740049e+00 1.41513278e+00
  1.31975757e+00 1.23081032e+00 1.14785783e+00 1.07049605e+00
  9.98348201e-01 9.31062873e-01 8.

  model = cd_fast.enet_coordinate_descent(
