# Ridge and Lasso regression 

We use Ridge and Lasso regression to predict the boston house median value.

We show how to calibrate the hyperparameters using cross-validation in different way:

1. RidgeCV 
2. LassoCV



In [None]:
import numpy as np
import random
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import linear_model
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LassoCV
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings("ignore")

# Focus on the importance of the hyperparameters

In [None]:
# data = pd.read_csv("../Dataset/BostonHouse.csv") you should put the data in the correct folder
print(data.shape)
data.head()

In [None]:
""""
Alternative way to import the same dataset
from sklearn.datasets import load_boston
data = load_boston()
data = pd.DataFrame(boston.data,columns=boston.feature_names)
data.head()
# add target 
data['price'] = boston.target
"""

In [None]:
# we get rid og these variables because they are categorical
X = data.drop(['zn','rad','medv'], axis = 1)
y = data['medv']

# No Cross-Validation, we try only one alpha random

In [None]:
# Split the sample 
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state = 10)

In [None]:
# Speicfy the λ parameter
lin_reg = LinearRegression()
ridge_reg = Ridge(alpha = 0.5)
lasso_reg = Lasso(alpha = 0.5)

In [None]:
# Fit the model
lin_reg.fit(X_train, y_train)
ridge_reg.fit(X_train, y_train)
lasso_reg.fit(X_train, y_train)

In [None]:
# Predict
pred_lin = lin_reg.predict(X_test)
pred_ridge = ridge_reg.predict(X_test)
pred_lasso = lasso_reg.predict(X_test)

print('MSE oos Linear Regression: ' + str(np.mean((pred_lin - y_test)**2)))
print('MSE oos Ridge: ' + str(np.mean((pred_ridge - y_test)**2)))
print('MSE oos Lasso: ' + str(np.mean((pred_lasso - y_test)**2)))

# Using the function mean_squared_error is the same
# print('MSE oos Linear Regression: ' + str(mean_squared_error(y_test, pred_lin)))
# print('MSE oos Ridge: ' + str(mean_squared_error(y_test, pred_ridge)))
# print('MSE oos Lasso: ' + str(mean_squared_error(y_test, pred_lasso)))

# Cross-Vaildation, using only one alpha

In [None]:
# Models
lin_reg2 = LinearRegression()
ridge_reg2 = Ridge(alpha = 0.5)
lasso_reg2 = Lasso(alpha = 0.5)

In [None]:
# fit and compute MSE on the entire sample
lin_reg2_ = cross_val_score(lin_reg2, X,y, scoring = 'neg_mean_squared_error', cv = 10)
ridge_reg2_ = cross_val_score(ridge_reg2, X,y, scoring = 'neg_mean_squared_error', cv = 10)
lasso_reg2_ = cross_val_score(lasso_reg2, X,y, scoring = 'neg_mean_squared_error', cv = 10)


print('Mean MSE lin reg: ' + str(np.mean(np.abs(lin_reg2_))))
print('Mean MSE ridge reg: ' + str(np.mean(np.abs(ridge_reg2_))))
print('Mean MSE lasso reg: ' + str(np.mean(np.abs(lasso_reg2_))))
print('')
print('Min MSE lin reg: ' + str(np.amin(np.abs(lin_reg2_))))
print('Min MSE ridge reg: ' + str(np.amin(np.abs(ridge_reg2_))))
print('Min MSE lasso reg: ' + str(np.amin(np.abs(lasso_reg2_))))

# with cross validation we can see that the error decreases
# Ridge and Linear regression are very similar.

We can notice that only with the cross validation the Mean MSE decreses.

# Cross - Validation usinfg a grid of alphas

In [None]:
# Models
lin_reg3 = Ridge()
ridge_reg3 = Ridge()
lasso_reg3 = Lasso()
params = {'alpha': np.linspace(0.001,0.999, 10)}
params_lin_reg = {'alpha': np.linspace(1,1, 10)}

We can use GridSearchCV or LassoCV and RidgeCV. In the following we use GridSearchCV.

input(model, parameters grid, scoring , cv)
This commad already performs the cross validation using all the parameter and performing 10fold cross validation.

In [None]:
# we can apply this only to lasso and ridge
lin_reg_3_ = GridSearchCV(lin_reg3, params_lin_reg, scoring = 'neg_mean_squared_error', cv = 10)
ridge_reg_3_ = GridSearchCV(ridge_reg3, params, scoring = 'neg_mean_squared_error', cv = 10)
lasso_reg_3_ = GridSearchCV(lasso_reg3, params, scoring = 'neg_mean_squared_error', cv = 10)

In [None]:
# Fit the model, we pass the entire dataset because GridSearchCV already does the splitting of the sample.
lin_reg_3_.fit(X, y)
ridge_reg_3_.fit(X, y)
lasso_reg_3_.fit(X, y)

In [None]:
# Show the best alpha and MSE
print('Lin param in sample:' +str(lin_reg_3_.best_params_))
print('Lin MSE in sample: ' + str(np.abs(lin_reg_3_.best_score_)))
print('Ridge param in sample:' +str(ridge_reg_3_.best_params_))
print('Ridge MSE in sample: ' + str(np.abs(ridge_reg_3_.best_score_)))
print('Lasso param in sample:' +str(lasso_reg_3_.best_params_))
print('Lasso MSE in sample: ' + str(np.abs(lasso_reg_3_.best_score_)))

# best index
print('')
print('Lin param in sample:' +str(lin_reg_3_.best_index_))
print('Ridge param in sample:' +str(ridge_reg_3_.best_index_))
print('Lasso param in sample:' +str(lasso_reg_3_.best_index_))

In [None]:
params['alpha'][9]

In [None]:
# We need to be more fair in the comparison.
# We fit the model on a training sample and fit it on the test sample.
lin_reg_3_.fit(X_train, y_train)
ridge_reg_3_.fit(X_train, y_train)
lasso_reg_3_.fit(X_train, y_train)

pred_lin3 = lin_reg_3_.predict(X_test)
pred_ridge3 = ridge_reg_3_.predict(X_test)
pred_lasso3 =lasso_reg_3_.predict(X_test)


print('OOS MSE lin reg: ' + str(np.mean((pred_lin3 - y_test)**2)))
print('OOS MSE ridge reg: ' + str(np.mean((pred_ridge3 - y_test)**2)))
print('OOS MSE lasso reg: ' + str(np.mean((pred_lasso3 - y_test)**2)))

# Out of sample the best is ridge and not lasso.
print('')
# Show the best alpha computed in the train setand MSE
print('Lin param train set:' +str(lin_reg_3_.best_params_))
print('Ridge param train set:' +str(ridge_reg_3_.best_params_))
print('Lasso param train set:' +str(lasso_reg_3_.best_params_))

This is are the parameters that are used in the out of sample. Let's see

In [None]:
# comparison with output above
ridge_try = Ridge(alpha = 0.22227777)
lasso_try = Lasso(alpha = 0.001)
ridge_try.fit(X_train, y_train)
lasso_try.fit(X_train, y_train)

ridge_pred_try = ridge_try.predict(X_test)
lasso_pred_try = lasso_try.predict(X_test)
print('OOS MSE ridge reg: ' + str(np.mean((ridge_pred_try - y_test)**2)))
print('OOS MSE lasso reg: ' + str(np.mean((lasso_pred_try - y_test)**2)))

You can notice that the results are the same as above.

With the correct oos structure we notice that Lasso is the best and both the model are able to beat the linear regression in predictin the price of the house in Boston

# let's do the same using RidgeCV and LassoCV

In [None]:
linCV_reg = RidgeCV(alphas = np.linspace(1,1,10), cv = 10, scoring='neg_mean_squared_error')
ridgeCV_reg = RidgeCV(alphas = np.linspace(0.001,0.999,10), cv = 10, scoring='neg_mean_squared_error')
lassoCV_reg = LassoCV(alphas = np.linspace(0.001,0.999,10), cv = 10)

In [None]:
# fit full sample
linCV_reg.fit(X, y)
ridgeCV_reg.fit(X, y)
lassoCV_reg.fit(X, y)

In [None]:
# show optimal hyperparameters parameters
print('Ridge λ entire sample: ' + str(ridgeCV_reg.alpha_))
print('Lasso λ entire sample: ' + str(lassoCV_reg.alpha_))

In [None]:
# ridge does not store the mse so to compute the in-sample MSE we have to do the following:
# 1. select best alpha
# 2. set Ridge_param
# 3. cross_val_score
λ_ridge = ridgeCV_reg.alpha_
ridge_tmp = Ridge(alpha = λ_ridge)
ridge_reg2_ = cross_val_score(ridge_tmp, X,y, scoring = 'neg_mean_squared_error', cv = 10)

lin_tmp = Ridge(alpha = 1)
lin_reg2_ = cross_val_score(lin_tmp, X,y, scoring = 'neg_mean_squared_error', cv = 10)

# show mse
print('MSE entire sample Lin Reg: ' + str(np.abs(lin_reg2_).mean()))
print('MSE entire sample Ridge: ' + str(np.abs(ridge_reg2_).mean()))
print('MSE entire sample Lasso: ' + str(lassoCV_reg.mse_path_.mean()))

Let's perform a proper out of sample exercise

In [None]:
# fit train set
linCV_reg.fit(X_train, y_train)
ridgeCV_reg.fit(X_train, y_train)
lassoCV_reg.fit(X_train, y_train)

In [None]:
# show parameters
print('Ridge λ entire sample: ' + str(ridgeCV_reg.alpha_))
print('Lasso λ entire sample: ' + str(lassoCV_reg.alpha_))

In [None]:
# predict - it uses the best set or parameters.
pred_lin4 = linCV_reg.predict(X_test)
pred_ridge4 = ridgeCV_reg.predict(X_test)
pred_lasso4 = lassoCV_reg.predict(X_test)

print('OOS MSE lin reg: ' + str(np.mean((pred_lin4 - y_test)**2)))
print('OOS MSE ridge reg: ' + str(np.mean((pred_ridge4 - y_test)**2)))
print('OOS MSE lasso reg: ' + str(np.mean((pred_lasso4 - y_test)**2)))

Same results as before!