## Objective: Implement regularisation techniques: L1 , L2 and Lasso
- prevent overfitting during training by penalising large coefficients & reducing model complexity

In [26]:
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression , Ridge , Lasso , ElasticNet
from sklearn.metrics import mean_squared_error 

In [11]:
cali = fetch_california_housing()
x , y = cali.data , cali.target
feature_names = cali.feature_names


x_train , x_test , y_train , y_test = train_test_split(x,y,test_size=0.2,random_state=42)
print('Feature names:',feature_names)

Feature names: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']


In [18]:
lr = LinearRegression()
lr.fit(x_train,y_train)
y_pred = lr.predict(x_test)
mse_lr = mean_squared_error(y_test,y_pred)
print('LR model without regularisation:',mse_lr)

print("\nCoefficients of LR:")
for name, coef in zip(feature_names, lr.coef_):
    print(f"{name:20}: {coef: .6f}")

LR model without regularisation: 0.5558915986952424

Coefficients of LR:
MedInc              :  0.448675
HouseAge            :  0.009724
AveRooms            : -0.123323
AveBedrms           :  0.783145
Population          : -0.000002
AveOccup            : -0.003526
Latitude            : -0.419792
Longitude           : -0.433708


## L1 Regularisation (Lasso)
- Add absolute values of coefficient to the loss function
- Encourages sparsity by setting some coefficients to zero -> feature selection

In [27]:
lasso = Lasso(alpha=1.0,max_iter=5000)
lasso.fit(x_train,y_train)
y_pred = lasso.predict(x_test)
mse_lasso = mean_squared_error(y_test,y_pred)
print('LR model with L1 regularisation:',mse_lasso)

print("\nCoefficients of LR:")
for name, coef in zip(feature_names, lasso.coef_):
    print(f"{name:20}: {coef: .6f}")

LR model with L1 regularisation: 0.9380337514945428

Coefficients of LR:
MedInc              :  0.148196
HouseAge            :  0.005728
AveRooms            :  0.000000
AveBedrms           : -0.000000
Population          : -0.000008
AveOccup            : -0.000000
Latitude            : -0.000000
Longitude           : -0.000000


## L2 Regularisation (Ridge)
- Add the squared values of coefficient to the loss function
- Shrinks coefficient towards zero but not set them to zero
- helpful when there is correlated features

In [28]:
ridge = Ridge(alpha=1.0)
ridge.fit(x_train,y_train)
y_pred = ridge.predict(x_test)
mse_ridge = mean_squared_error(y_test,y_pred)
print('LR model with L2 regularisation:',mse_ridge)

print("\nCoefficients of LR:")
for name, coef in zip(feature_names, ridge.coef_):
    print(f"{name:20}: {coef: .6f}")

LR model with L2 regularisation: 0.5558034669932184

Coefficients of LR:
MedInc              :  0.448511
HouseAge            :  0.009726
AveRooms            : -0.123014
AveBedrms           :  0.781417
Population          : -0.000002
AveOccup            : -0.003526
Latitude            : -0.419787
Longitude           : -0.433681


## Elastic Net
- combines L1 and L2
- useful when there is correlated predictors and feature selection is desired

In [None]:
en = ElasticNet(alpha=1.0)
en.fit(x_train,y_train)
y_pred = en.predict(x_test)
mse_en = mean_squared_error(y_test,y_pred)
print('LR model with Elastic Net regularisation:',mse_en)

print("\nCoefficients of LR:")
for name, coef in zip(feature_names, en.coef_):
    print(f"{name:20}: {coef: .6f}")

LR model with L2 regularisation: 0.7645556403971131

Coefficients of LR:
MedInc              :  0.255275
HouseAge            :  0.011230
AveRooms            :  0.000000
AveBedrms           : -0.000000
Population          :  0.000008
AveOccup            : -0.000000
Latitude            : -0.000000
Longitude           : -0.000000
