<a href="https://colab.research.google.com/github/PadmaJyothi-U/FML/blob/main/Lasso_Regularization_boston.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Lasso Regularization**

Lasso regularization is a technique used in machine learning and statistics to prevent overfitting of a model by adding a penalty term to the objective function. Specifically, it adds an L1 norm penalty to the sum of squared errors in the objective function, which shrinks the coefficients of some features to zero, effectively performing feature selection.


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import linear_model
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,r2_score,mean_squared_error

In [3]:
#Load the data
data = pd.read_csv("boston.csv")
data.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [4]:
#Create feature and target array from such given data
X = data.drop('medv', axis=1).values
y = data['medv'].values

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [6]:
X_train.shape, X_test.shape

((379, 13), (127, 13))

In [7]:
#Fitting a Ridge regression model
reg = Lasso(alpha = 0.1)
reg.fit(X_train, y_train)

In [8]:
#Evaluating the model on training data only
from sklearn.metrics import mean_absolute_error,r2_score,mean_squared_error
y_pred1 = reg.predict(X_train)
print(f'Performance of the model on training data :\n')
print(f'MAE = {mean_absolute_error(y_train, y_pred1)}')
print(f'MSE = {mean_squared_error(y_train, y_pred1)}')
print(f'RMSE = {np.sqrt(mean_squared_error(y_train, y_pred1))}')
print(f'R_2 = {r2_score(y_train, y_pred1)}')

Performance of the model on training data :

MAE = 3.377670130538952
MSE = 23.335638870357222
RMSE = 4.830697555256096
R_2 = 0.7368608115180646


In [9]:
#Evaluating the model on training data only

y_pred2 = reg.predict(X_test)
print(f'Performance of the model on test data :\n')
print(f'MAE = {mean_absolute_error(y_test, y_pred2)}')
print(f'MSE = {mean_squared_error(y_test, y_pred2)}')
print(f'RMSE = {np.sqrt(mean_squared_error(y_test, y_pred2))}')
print(f'R_2 = {r2_score(y_test, y_pred2)}')

Performance of the model on test data :

MAE = 3.1209646581075896
MSE = 23.385884128935917
RMSE = 4.835895380271984
R_2 = 0.666045437403698


In [10]:
#By default ridge regression score method gives r_2 score
reg.score(X_test, y_test)

0.666045437403698