# Lasso and Ridge Regression Library Implementation
In this Notebook, we implement Lasso (L1 Regularization) and Ridge regression (L2 regularization) using the pre-defined models from the Scikit-learn library.

## Dataset Operations

In [1]:
# Importing libraries 
import numpy as np
import pandas as pd

from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
from sklearn.preprocessing import OneHotEncoder
df = pd.read_csv("cleaned_shifted_data.csv")
    
oe = OneHotEncoder(sparse=False)
encoded = oe.fit_transform(pd.DataFrame(df['Station']))

In [3]:
oe.get_feature_names()

array(['x0_IITG ', 'x0_LGBI Airport ', 'x0_Pan Bazaar ',
       'x0_Railway Colony '], dtype=object)

In [4]:

one_hot_df = pd.DataFrame(encoded, columns=oe.get_feature_names())
df = pd.concat([df, one_hot_df], axis=1)

df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df['year'] = df['Timestamp'].dt.year
df['month'] = df['Timestamp'].dt.month
df['dayofweek'] = df['Timestamp'].dt.day_of_week

drop_cols = [0,1,2,12,14,16]
drop_cols = df.columns[drop_cols]
df.drop(drop_cols,axis=1,inplace=True)

X = df.drop('AQI_calculated_shifted',axis = 1)
y = df['AQI_calculated_shifted']

In [5]:
print(X.shape, y.shape)

(174762, 17) (174762,)


In [6]:
# Splitting dataset into train and test set 
X_train, X_test, Y_train, Y_test = train_test_split( X, y, test_size = 1 / 3, random_state = 0 )
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(116508, 17)
(58254, 17)
(116508,)
(58254,)


In [7]:
# Standardize the features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train

array([[ 3.67831173,  2.2237575 , -0.20599341, ...,  1.19263893,
        -1.54346737, -0.01566533],
       [-0.29548835, -0.3588719 , -0.26667847, ..., -0.95436545,
        -0.96118867, -0.01566533],
       [ 1.56565852,  0.86541954, -0.35201684, ..., -0.23869732,
        -1.25232802,  0.49471947],
       ...,
       [-0.79850102, -0.71758036, -0.40464216, ...,  0.47697081,
         0.49450809,  0.49471947],
       [-0.84880229, -0.76101143, -0.15526449, ...,  1.19263893,
         0.78564744,  1.00510426],
       [-0.69370672, -0.52419593, -0.20220059, ..., -0.95436545,
         0.49450809, -0.01566533]])

## Lasso

In [8]:
from sklearn.linear_model import Lasso
from sklearn.metrics import r2_score
parameters = {'alpha': 0.5,
              'tol': 0.1
              }
model = Lasso(**parameters)

model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)
score = r2_score(Y_test, Y_pred)
print("R2 {}".format(score))
mse = mean_squared_error(Y_test, Y_pred)
print("MSE", mse)

R2 0.4139777006204265
MSE 6554.421924381027


## Ridge

In [9]:
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score
parameters = {'alpha': 0.5
              }
model = Ridge(**parameters)

model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)
score = r2_score(Y_test, Y_pred)
print("R2 {}".format(score))
mse = mean_squared_error(Y_test, Y_pred)
print("MSE", mse)

R2 0.4162744542784833
MSE 6528.733665508954
