Modify the regression scratch code in our lecture such that:

- Implement early stopping in which if the absolute difference between old loss and new loss does not exceed certain threshold, we abort the learning.

- Implement options for stochastic gradient descent in which we use only one sample for training.  Make sure that sample does not repeat unless all samples are read at least once already.

- Put everything into class.

In [1]:
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler

import numpy as np

boston = load_boston()
X = boston.data
m = X.shape[0]  
n = X.shape[1] 

y = boston.target
 

#standardize
scaler = StandardScaler()
X = scaler.fit_transform(X)

#train_test plist 80% train 20% test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

#adding intercept 
intercept = np.ones((X_train.shape[0], 1))
X_train = np.concatenate((intercept, X_train), axis=1)
intercept = np.ones((X_test.shape[0], 1))
X_test = np.concatenate((intercept, X_test), axis=1)

In [3]:
class LR:
    
    def __init__(self, alpha=0.001, early_stopping = True, max_iter= 1000, 
            loss_old=10000, tol=1e-3, method="batch",batch_size = 100):
        self.alpha = alpha
        self.early_stopping = early_stopping
        self.max_iter = max_iter
        self.loss_old = 10000
        self.tol = tol
        self.method = method
        self.batch_size = batch_size
        
    
        
    def predict(self,X):
        return X @ self.theta
    
    def gradient(self,X, error):
        return X.T @ error

    def mse(self,yhat, y):
        return ((yhat - y)**2).sum() / yhat.shape[0]
             
    
    def fit(self,X,y):
        self.theta = np.zeros(X.shape[1])
        for j in range(self.max_iter):
            
            
            if self.method == 'batch': #for batch method
                X_train = X
                y_train = y
                
                yhat = self.predict(X_train)
                error = yhat - y_train
                grad = self.gradient(X_train, error)
                self.theta = self.theta - self.alpha * grad
            
            elif self.method == 'sto': #for stochastic method
                a = np.arange(X.shape[0])
                np.random.shuffle(a)
                for i in a:
                    X_stoc = X[i,:].reshape(1,-1)
                    y_stoc = y[i]
                    yhat = self.predict(X_stoc)
                    error = yhat - y_stoc   
                    grad = self.gradient(X_stoc, error)  
                    self.theta = self.theta - self.alpha * grad
                    
            elif self.method == 'mini': #for mini-batch
                
                n_batches = np.ceil(X.shape[0]/self.batch_size)
                last_batch = m%self.batch_size
                
                k = 0
                for i in range(int(n_batches)):    
                    if i != range(int(n_batches))[-1] : 
                        X_mini = X[k:k+self.batch_size]
                        y_mini = y[k:k+self.batch_size]
                        yhat = self.predict(X_mini)
                        error = yhat - y_mini   
                        grad = self.gradient(X_mini, error)  
                        self.theta = self.theta - self.alpha * grad

                        k=k+self.batch_size

                    else:
                        X_mini = X[k:k+last_batch]
                        y_mini = y[k:k+last_batch]
                        yhat = self.predict(X_mini)
                        error = yhat - y_mini   
                        grad = self.gradient(X_mini, error)  
                        self.theta = self.theta - self.alpha * grad
                        
            if self.early_stopping == True: #for early stopping
                yhat = self.predict(X)
                new_loss = self.mse(yhat,y)
                if np.abs(self.loss_old - new_loss) < self.tol:
                    print('iteration stopped at = ' + str(j))
                    break
                else:
                    continue

In [8]:
model = LR(alpha = 0.001, method = "batch", max_iter = 100)
model.fit(X_train,y_train)
yhat = model.predict(X_test)
mse = model.mse(yhat, y_test)
print("MSE: ", mse)

In [9]:
model2 = LR(alpha = 0.001, method = "sto")
model2.fit(X_train,y_train)
yhat2 = model2.predict(X_test)
mse = model2.mse(yhat2, y_test)
print("MSE: ", mse)

array([22.73328234, -0.49801977,  0.9520299 ,  0.31490166,  1.10203905,
       -1.90293778,  2.39271912,  0.41779973, -2.94771463,  2.70586458,
       -1.86232498, -2.13295683,  0.91024757, -4.91911424])

In [10]:
model3 = LR(alpha = 0.001, method = "mini")
model3.fit(X_train,y_train)
yhat3 = model3.predict(X_test)
mse = model3.mse(yhat3, y_test)
print("MSE: ", mse)