**Regularization of L2: Ridge Regression**

In [2]:
#Import libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [3]:
#Download dataset
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00294/CCPP.zip
!unzip CCPP.zip

--2022-02-21 11:37:14--  https://archive.ics.uci.edu/ml/machine-learning-databases/00294/CCPP.zip
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3674852 (3.5M) [application/x-httpd-php]
Saving to: ‘CCPP.zip’


2022-02-21 11:37:15 (6.87 MB/s) - ‘CCPP.zip’ saved [3674852/3674852]

Archive:  CCPP.zip
   creating: CCPP/
  inflating: CCPP/Folds5x2_pp.ods    
  inflating: CCPP/Folds5x2_pp.xlsx   
  inflating: CCPP/Readme.txt         
  inflating: CCPP/Readme.txt~        


In [4]:
#Read the dataset into a data frame
df_multi = pd.read_excel('CCPP/Folds5x2_pp.xlsx')
df_multi

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.40,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.50,1009.23,96.62,473.90
...,...,...,...,...,...
9563,16.65,49.69,1014.01,91.00,460.03
9564,13.19,39.18,1023.67,66.78,469.62
9565,31.32,74.33,1012.92,36.48,429.57
9566,24.48,69.45,1013.86,62.39,435.74


In [6]:
#Set features X and labels y
X, y = df_multi.values[:,:-1], df_multi.values[:,-1]

In [7]:
#Scale features values in a range of 0-1 to normilize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [8]:
#Split dataset into trian and test
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.5, shuffle=True)

In [12]:
#Define train function using regularization expression lambda of L2
def train(X, y, lam):
  ones = np.ones((X.shape[0], 1))
  X_stacked = np.hstack((ones, X))
  XTX = X_stacked.T @ X_stacked
  LI = lam * np.eye(XTX.shape[0])
  XT = X_stacked.T
  return np.linalg.inv(XTX + LI) @ XT @ y

In [10]:
#Define predict function
def predict(W, X):
  ones = np.ones((X.shape[0], 1))
  X_stacked = np.hstack((ones, X))
  return X_stacked @ W

In [13]:
#Make the training with λ = 0.5 to find parameters
W = train(X_train, y_train, [[0.5]])
W

array([ 4.54245728e+02, -1.48542673e+01, -2.87005978e+00,  2.70686177e-01,
       -2.31332522e+00])

In [14]:
#Test with R2 Score
r2_score(y_test, predict(W, X_test))

0.9279513703286193

**Ridge Regression with SKLearn**

In [17]:
from sklearn.linear_model import Ridge

clf = Ridge(alpha=0.5)
clf.fit(X_train, y_train)

clf.predict(scaler.transform(np.array([[32, 27.5, 1024, 50.5]])))

array([439.83630209])