# Lasso and Ridge Regression Implementation
In this Notebook, we implement Lasso (L1 Regularization) and Ridge regression (L2 regularization) from scratch, and compare it with the pre-defined models from the Scikit-learn library.

### Initial functions

In [35]:
# Importing libraries 
import numpy as np
import pandas as pd

from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

Most formulations are same in both Lasso and Ridge regression. From an implementation perspective, they differ solely in the regularization term. Hence, we implement a common regression class, in which L1 or L2 regularization can be chosen as a parameter.

In [18]:
class Regression:
    
    def __init__(self, regularization, lr, epoch):
        self.m = None #samples
        self.n = None #features
        self.w = None #weight
        self.b = None #bias
        self.regularization = regularization #penalty object
        self.lr = lr #learning rate
        self.epoch = epoch #iteration
        
    def __calculate_cost(self, y, y_pred):
        return (1 / (2*self.m)) * np.sum(np.square(y_pred-y)) + self.regularization(self.w)
    
    def __hypothesis(self, w, X):
        return np.dot(X, w) 
    
    def __initialization(self, X):
        X = np.insert(X, 0, 1, axis=1)
        self.m, self.n = X.shape
        self.w = np.zeros((self.n,1))
        return X
    
    def __update_parameters(self, X, y, y_pred):
        dw = (1/self.m) * np.dot(X.T, (y_pred - y)) + self.regularization.derivation(self.w)
        self.w = self.w - self.lr * dw
        return True
        
    def fit(self, X, y):
        X = self.__initialization(X)
        for e in range(1, self.epoch+1):
            y_pred = self.__hypothesis(self.w, X)
            cost = self.__calculate_cost(y, y_pred)
            self.__update_parameters(X, y, y_pred)
            if e % 100 == 0:
                print(f"The Cost in iteration {e}----->{cost} :)")
        return True

    def predict(self, X_test):
        X_test = np.insert(X_test, 0 , 1, axis= 1)
        y_pred = self.__hypothesis(self.w, X_test)
        return y_pred

Define the regularization terms for Lasso and Ridge respectively:

In [19]:
class LassoPenalty:
    
    def __init__(self, l):
        self.l = l # lambda value
        
    def __call__(self,w):
        return self.l * np.sum(np.abs(w))
        
    def derivation(self, w):
        return self.l * np.sign(w)
    
class RidgePenalty:
    
    def __init__(self, l):
        self.l = l
        
    def __call__(self, w):
        return self.l * np.sum(np.square(w))
        
    def derivation(self, w):
        return self.l * 2 * w

In [20]:
class Lasso(Regression):
    
    def __init__(self, l, lr, epoch):
        self.regularization = LassoPenalty(l)
        super().__init__(self.regularization, lr, epoch )
        
class Ridge(Regression):
    
    def __init__(self, l, lr, epoch):
        self.regularization = RidgePenalty(l)
        super().__init__(self.regularization, lr, epoch )

### Dataset Operations

In [21]:
# Importing dataset     
df = pd.read_csv( "../ML-Project-CS361/cleaned_shifted_data.csv" ) 
drop_cols = [0,1,2,12,14,16]
drop_cols = df.columns[drop_cols]
drop_cols

Index(['Timestamp', 'Unnamed: 0', 'Station', 'Checks', 'AQI_bucket_calculated',
       'AQI_bucket_calculated_shifted'],
      dtype='object')

In [22]:
# Drop the columns and make a new dataframe df1
df.drop(columns=drop_cols, inplace=True)

In [23]:
print(df.shape)

(174762, 11)


In [28]:
df.head()

Unnamed: 0,PM2.5 (µg/m³),PM10 (µg/m³),NO (µg/m³),NO2 (µg/m³),NOx (ppb),NH3 (µg/m³),SO2 (µg/m³),CO (mg/m³),Ozone (µg/m³),AQI_calculated,AQI_calculated_shifted
0,46.0,80.0,1.29,9.16,12.02,27.19,13.56,0.4,15.8,67.0,296.0
1,46.0,80.0,1.74,8.93,12.48,30.29,13.71,0.41,15.52,68.0,297.0
2,45.62,79.92,1.87,8.56,12.17,28.2,13.88,0.41,15.33,68.0,298.0
3,41.0,72.92,1.83,8.72,12.37,26.69,13.77,0.4,15.3,68.0,298.0
4,41.0,79.0,1.69,7.91,11.3,26.83,13.87,0.41,15.49,68.0,299.0


Since the original dataset is too large to fit into a numpy array, take a random subset of this data. 

In [24]:
# Randomly sample 32000 rows
df_subset = df.sample(n=32000, random_state=42)
print(df_subset.shape)

(32000, 11)


In [34]:
df_subset.describe()

Unnamed: 0,PM2.5 (µg/m³),PM10 (µg/m³),NO (µg/m³),NO2 (µg/m³),NOx (ppb),NH3 (µg/m³),SO2 (µg/m³),CO (mg/m³),Ozone (µg/m³),AQI_calculated,AQI_calculated_shifted
count,32000.0,32000.0,32000.0,32000.0,32000.0,32000.0,32000.0,32000.0,32000.0,32000.0,32000.0
mean,60.007376,111.450673,9.785336,8.365729,17.911769,9.936954,18.552968,0.689827,24.399094,140.854719,138.908719
std,60.077518,111.412286,21.020019,10.607242,34.041483,7.555035,13.777714,0.623949,22.927234,104.175769,105.049152
min,0.1,1.7,0.01,0.02,0.23,0.01,0.1,0.0,0.02,14.0,11.0
25%,20.0,37.54,1.21,2.52,4.61,4.0,10.12,0.31,12.75,56.0,55.0
50%,39.5,73.105,3.36,4.17,5.47,7.14,14.51,0.49,18.1,101.0,100.0
75%,81.0,149.0,5.61,10.21,13.4,15.31,22.88,0.84,26.76,216.0,204.0
max,916.67,1000.0,299.64,120.2,330.29,76.79,194.13,6.22,193.57,1109.0,1105.0


In [25]:
X = df_subset.iloc[:, :-1].values #features
Y = df_subset.iloc[:, 1].values.reshape(-1, 1) #target value
print(X.shape)
print(Y.shape)

(32000, 10)
(32000, 1)


In [26]:
# Splitting dataset into train and test set 
X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size = 1 / 3, random_state = 0 )
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(21333, 10)
(10667, 10)
(21333, 1)
(10667, 1)


In [37]:
# Standardize the features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train

array([[-0.7578099 , -0.84412827, -0.39188338, ..., -0.41370701,
         0.09591132, -0.70718809],
       [ 1.77848328,  2.02302695,  3.85916975, ...,  4.80450109,
        -1.03646199,  1.08175379],
       [-0.31818575, -0.40355122, -0.20417454, ..., -0.78299558,
         0.85038999,  1.72271158],
       ...,
       [ 1.82920914,  1.19688825, -0.27666568, ...,  1.17583946,
        -0.36280472,  2.02884067],
       [ 0.32434186,  0.28904352, -0.19649336, ...,  0.21247797,
         0.18590704,  1.06262073],
       [-0.96071335, -0.88815873, -0.45765348, ..., -0.76693955,
         0.00809997, -1.02288371]])

### L1 Regularization (Lasso)

Here we can observe much lower values of the cost function, since the regularization term depends only on the absolute values of weights.

In [38]:
parameters = {
    "l" : 0.1,
    "lr" : 0.1,
    "epoch" : 1000
}
model = Lasso(**parameters)
model.fit(X_train, Y_train) 

Y_pred = model.predict(X_test)
score = r2_score(Y_test, Y_pred)
print("r2 score", score)

The Cost in iteration 100----->34.60743228681634 :)
The Cost in iteration 200----->22.504082071973468 :)
The Cost in iteration 300----->22.134226316211297 :)
The Cost in iteration 400----->22.124677130885832 :)
The Cost in iteration 500----->22.12610985168155 :)
The Cost in iteration 600----->22.125450798705277 :)
The Cost in iteration 700----->22.125003424361722 :)
The Cost in iteration 800----->22.12492034203957 :)
The Cost in iteration 900----->22.124973665273448 :)
The Cost in iteration 1000----->22.12412581971832 :)
r2 score 0.9999985216129738


### L2 Regularization (Ridge)

Here, we can observe much higher values of the cost function, as the regularization term depends on squares of weights.

In [40]:
parameters = {
    "l" : 0.1,
    "lr" : 0.1,
    "epoch" : 1000
}
model = Ridge(**parameters)
model.fit(X_train, Y_train)
 
Y_pred = model.predict(X_test)
score = r2_score(Y_test, Y_pred)
print("r2 score", score)

The Cost in iteration 100----->1815.8151767286631 :)
The Cost in iteration 200----->1815.7178832926809 :)
The Cost in iteration 300----->1815.717829673503 :)
The Cost in iteration 400----->1815.7178296285117 :)
The Cost in iteration 500----->1815.717829628444 :)
The Cost in iteration 600----->1815.7178296284437 :)
The Cost in iteration 700----->1815.7178296284437 :)
The Cost in iteration 800----->1815.7178296284437 :)
The Cost in iteration 900----->1815.7178296284437 :)
The Cost in iteration 1000----->1815.7178296284437 :)
r2 score 0.9348384078101843


### Library Implementation

Using Sklearn library implementation of Lasso:

In [41]:
from sklearn.linear_model import Lasso
from sklearn.metrics import r2_score
parameters = {'alpha': 0.1,
              'tol': 0.1
              }
model = Lasso(**parameters)

model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)
score = r2_score(Y_test, Y_pred)
print("R2 {}".format(score))

R2 0.978661149555438


Using Sklearn library implementation of Ridge:

In [42]:
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score
parameters = {'alpha': 0.1
              }
model = Ridge(**parameters)

model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)
score = r2_score(Y_test, Y_pred)
print("R2 {}".format(score))

R2 0.9999999999295118
