Modify the regression scratch code in our lecture such that:

- Implement early stopping in which if the absolute difference between old loss and new loss does not exceed certain threshold, we abort the learning.

- Implement options for stochastic gradient descent in which we use only one sample for training.  Make sure that sample does not repeat unless all samples are read at least once already.

- Put everything into class.

In [1]:
import numpy as np
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
boston = load_boston()
X = boston.data
m = X.shape[0]  
n = X.shape[1] 

y = boston.target
 
scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
 
intercept = np.ones((X_train.shape[0], 1))
X_train = np.concatenate((intercept, X_train), axis=1)
intercept = np.ones((X_test.shape[0], 1))
X_test = np.concatenate((intercept, X_test), axis=1)

In [64]:
class LinearRegression:
    
    def __init__(self, alpha=0.001, max_iter= 10000, old_loss=9000, tol=1e-3, method="batch", batch_size=20):
        self.alpha = alpha
        self.max_iter = max_iter
        self.old_loss = old_loss
        self.tol = tol
        self.method = method
        self.batch_size = batch_size 
    
    def h_theta(self,X):
        return X @ self.theta
    
    def gradient(self,X, error):
        return X.T @ error

    def mse(self,yhat, y):
        return ((yhat - y)**2).sum() / yhat.shape[0]
    
    
        
    def fit(self,X,y):
        self.theta = np.zeros(X.shape[1])
        rand_indx=[]
       
        for j in range(self.max_iter):
            
            if self.method == 'batch':
                X_new = X
                y_new = y
                                          
            elif self.method == 'sgd': 
                    j= np.random.randint(X.shape[0])
                    for j in rand_indx:
                        j= np.random.randint(X.shape[0])
                
                    X_new = X[j, :].reshape(1, -1)
                    y_new = y[j]
                    rand_indx.append(j)
                    if len(rand_indx) == X.shape[0]:
                        rand_indx = []
                        
            elif self.method == 'minibatch':
                j= np.random.randint(X.shape[0])
                X_new = X[j : j + self.batch_size]
                y_new = y[j : j + self.batch_size]
                
                
                
            yhat = self.h_theta(X_new)
            current_loss = self.mse(yhat, y_new)
            difference = np.abs(current_loss - self.old_loss)
            if difference < self.tol:
                self.ite = j
                break
            
            self.loss_old = current_loss  
            error = yhat - y_new
            grad = self.gradient(X_new, error)
            self.theta = self.theta - self.alpha * grad
           

In [65]:
model1 = LinearRegression( alpha =0.0001,method = "batch")
model1.fit(X_train,y_train)
yhat = model1.h_theta(X_test)
mse = model1.mse(yhat, y_test)
print("MSE by batch method: ", mse)

MSE by batch method:  28.306454826995083


In [66]:
model2 = LinearRegression(method='sgd')
model2.fit(X_train, y_train)
yhat = model2.h_theta(X_test)
mse = model2.mse(yhat, y_test)
print("MSE using sgd method: ", mse)


MSE using sgd method:  28.267006707838497


In [67]:
model3 = LinearRegression(method='minibatch')
model3.fit(X_train, y_train)
yhat = model3.h_theta(X_test)
mse = model3.mse(yhat, y_test)
print("MSE using minibatch mehod: ", mse)

MSE using minibatch mehod:  28.557418399800312
