# Regularized linear models

- Ridge - L2 regularization 
- Lasso - L1 regularization 
- ElasticNet - combination of L1 and L2 

In [2]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 

df = pd.read_csv("../Data/Advertising.csv",index_col=0)

df.head()

Unnamed: 0,TV,radio,newspaper,sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


# Data preparation 

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures

X,y = df.drop("sales", axis = "columns"), df["sales"]

# feel free to experiment with different degrees 
model_polynomial = PolynomialFeatures(degree=2,include_bias=False)
poly_features = model_polynomial.fit_transform(X)

print(poly_features.shape)

X_train,X_test,y_train,y_test = train_test_split(poly_features,y,test_size=0.33,random_state=42)
X_train.shape,X_test.shape

(200, 9)


((134, 9), (66, 9))

# Feature standardization

In [4]:
from sklearn.preprocessing import StandardScaler 

scaler = StandardScaler()

scaled_X_train = scaler.fit_transform(X_train)
scaled_X_test = scaler.transform(X_test)


# Regularization - Ridge

In [5]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error,mean_squared_error


def ridge_regression(X_train,X_test,y,penalty=0):
    model_ridge = Ridge(alpha = penalty)
    model_ridge.fit(scaled_X_train,y_train)
    y_pred = model_ridge.predict(scaled_X_test)
    return y_pred


y_pred = ridge_regression(X_train,X_test,y)

MSE = mean_squared_error(y_test,y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test,y_pred)

RMSE, MAE

(0.6784587307926764, 0.547583791914941)

# Regularization - LASSO 

In [6]:
from sklearn.linear_model import Lasso

model_lasso = Lasso(alpha = 0.1)
model_lasso.fit(scaled_X_train, y_train)
y_pred = model_lasso.predict(scaled_X_test)

MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)

RMSE, MAE

(0.9075189532866105, 0.6769038327060557)

# k-folded cross-validation
## RidgeCV

In [7]:
from sklearn.linear_model import RidgeCV

# alpha same as lambda in theory - penalty term
model_ridgeCV = RidgeCV(alphas=[.00001, .0001, .001, .01, .1, .3, .5, .7, .9, 1, 5, 10])
model_ridgeCV.fit(scaled_X_train, y_train)
print(model_ridgeCV.alpha_)
y_pred = model_ridgeCV.predict(scaled_X_test)

print(model_ridgeCV.coef_)

MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)

RMSE, MAE

0.1
[ 4.42838425  0.22680987  0.23941072 -2.73376765  3.64070219 -0.4743945
  0.19150131  0.13338453  0.12886391]


(0.6718010541424984, 0.5422401519106239)

## LassoCV

In [8]:
from sklearn.linear_model import LassoCV

model_lassoCV = LassoCV(eps = 0.001, n_alphas = 100, max_iter=1e4, cv=5)
model_lassoCV.fit(scaled_X_train, y_train)

print(f"Chosen alpha (penalty term) {model_lassoCV.alpha_}")

print(model_lassoCV.coef_)

y_pred = model_lassoCV.predict(scaled_X_test)

MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)

RMSE, MAE

Chosen alpha (penalty term) 0.004968802520343366
[ 4.3309372   0.25092172  0.20773543 -2.66400123  3.62767721 -0.40236964
  0.16528508  0.15107745  0.08933983]


(0.6473230819398569, 0.5259508472983855)

## ElasticNet CV

In [9]:
from sklearn.linear_model import ElasticNetCV

model_elastic = ElasticNetCV(l1_ratio = [.05, .1, .2, .5, .7, .9, .99, 1], max_iter=10000)
model_elastic.fit(scaled_X_train, y_train)

print(f"L1 ratio {model_elastic.l1_ratio_}")
print(f"alpha (penalty) {model_elastic.alpha_}")

L1 ratio 1.0
alpha (penalty) 0.004968802520343366


In [10]:
y_pred = model_elastic.predict(scaled_X_test)

MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)

RMSE, MAE

(0.6473230819398569, 0.5259508472983855)