# Ridge Regression 

Here, I aim to implement the ridge regression algorithm including the functions to fit the model with training data and predict the output for test data.

In [None]:
import numpy as np

In [None]:
class RidgeRegression():
    '''
    This is a class for ridge regression algorithm.
    
    The class contains the hyper parameters of the ridge regression algorithm as attributes, such as the regurization 
    parameter(Lambda).
    It also contains the functions for initializing the class, calculating the loss, fitting the ridge regression 
    model and use the fitted model to predict test samples.
    
    Attributes:
        lr:        learning rate of gradient descent
        Lambda:    regularization parameter for L_2 penalty
        max_itr:   maximum number of iteration for gradient descent
        tol:       if the change in loss is smaller than tol, then we stop iteration
        W:         concatenation of weight w and bias b
        
    '''
    def __init__(self, lr, Lambda, max_itr, tol):
        '''
        Initialize the RidgeRegression class
        '''
        self.lr = lr
        self.Lambda = Lambda
        self.max_itr = max_itr
        self.tol = tol
        
    def _loss_ridge(self, X, y, W):
        '''
        Calculating the regularized empirical loss
        '''
        return ((y-X@W).T@(y-X@W))[0,0] + self.Lambda * np.sum(W[:X.shape[1]-1,0]**2)
    
    
    def fit(self,x,y):  
        '''
        estimate the weight and bias in the ridge regression model by gradient descent
        
        Args: 
            x (matrix, num_train*num_variables): input of training samples
            y (matrix, num_test*1): output of training samples
            
        Returns:
            self.W (matrix, (num_variables+1)*1): estimation of weight w and bias b
        ''' 
        m = x.shape[0]
        ### Add the all-one vector to the last column 
        X = np.concatenate((x,np.ones((m,1))),axis=1)
        d = X.shape[1]
        self.W = np.ones((d,1))
        
        ### Use the gradient descent to update W
        previous_loss = self._loss_ridge(X, y, self.W)
        for i in range(self.max_itr):
            L_2_der_W = np.zeros((d,1))
            L_2_der_W[:d,0] = self.W[:d,0]
            gradient = X.T@(X@self.W-y)/m + self.Lambda * L_2_der_W
            self.W = self.W - self.lr * gradient
            current_loss = self._loss_ridge(X, y, self.W)
            if previous_loss - current_loss < self.tol:
                print(f'Converged after {i} iterations')
                break
            else:
                previous_loss = current_loss
        return self.W
    
    def predict(self,x): 
        '''
        predict the output of the test samples
        
        Args: 
            x (matrix, num_test*num_variables): input of test samples
            
        Returns:
            y (matrix, num_test*1): predicted outputs of test samples
        ''' 
        m = x.shape[0]
        X = np.concatenate((x, np.ones((m,1))),axis=1)
        return np.dot(X, self.W)

Next, use the class "RidgeRegression" class to fit, predict and evaluate the ridge regression model. 

In [None]:
from sklearn.metrics import mean_squared_error
### Initial the class RidgeRegression by assigning values to the parameters.
model = RidgeRegression(lr=0.01, Lambda=0.002, max_itr = 20000, tol = 1e-5)
### Fit model with training data
W = model.fit(X_train, y_train)
### Predict the output of test samples
y_pred = model.predict(X_test)
### Evaluate the model by calculating the MSE of test samples.
mse = mean_squared_error(y_pred, y_test)
### Print MSE 
print("MSE of Ridge Regression is {}".format(mse))
### Print the estimated w and b
print("The weight w of Ridge Regression is \n {}.".format(W[:X_test.shape[1],0].T))
print("The bias b of Ridge Regression is {}.".format(W[X_test.shape[1],0]))

Converged after 9928 iterations
MSE of Ridge Regression is 0.010401064104879443
The weight w of Ridge Regression is 
 [ 1.00703274e+00  2.95993559e+00  9.41954206e-04 -5.41866994e-03
  1.90221845e-03  4.70667448e-04].
The bias b of Ridge Regression is 1.9876498728411585.
