## Importing necessary libraries

In [56]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import warnings 
warnings.filterwarnings("ignore")

In [57]:
# Loading the Boston dataset from CSV file and displaying frist 5 rows

df = pd.read_csv("train.csv")
# X_train = df.drop(df["Y"], axis=1)
df.head()

Unnamed: 0,# CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Y
0,-0.40785,-0.487722,-1.266023,-0.272599,-0.576134,1.239974,0.840122,-0.520264,-0.752922,-1.278354,-0.303094,0.410571,-1.09799,37.9
1,-0.407374,-0.487722,0.247057,-0.272599,-1.016689,0.001946,-0.838337,0.336351,-0.523001,-0.060801,0.113032,0.291169,-0.520474,21.4
2,0.125179,-0.487722,1.015999,-0.272599,1.36749,-0.439699,0.687212,-0.577309,1.661245,1.530926,0.806576,-3.795795,0.891076,12.7
3,0.028304,-0.487722,1.015999,-0.272599,1.859875,-0.047918,0.801005,-0.712836,1.661245,1.530926,0.806576,-0.06605,0.215438,19.9
4,-0.412408,-0.487722,-0.969827,-0.272599,-0.913029,-0.384137,-0.834781,0.300508,-0.752922,-0.957633,0.02056,0.431074,0.029007,22.5


In [58]:
# Rename columns by removing leading '#' and stripping spaces
df.columns = df.columns.str.replace('#', '').str.strip()
df.columns

Index(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT', 'Y'],
      dtype='object')

In [59]:
X = df.drop('Y', axis=1)
y = df["Y"]
X.shape, y.shape 
# X_train.iloc[63]

((379, 13), (379,))

In [60]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
X_train.shape, X_test.shape

((265, 13), (114, 13))

## Batch GDRegressor

In [61]:
# Batch Gradient Descent

class GDRegressor:
    
    def __init__(self, lr = 0.01, epochs = 100):
        
        self.lr = lr
        self.epochs = epochs
        self.m = None
        self.c = None
        
    def fit(self, X, y):
        
        self.c = 0
        self.m = np.zeros(X.shape[1])
        
        for i in range(self.epochs):
            
            y_pred =  np.dot(X,self.m) + self.c
            
            der_m = (-2/X.shape[0]) * np.dot(X.T, (y - y_pred))
            der_c = (-2) * np.mean(y - y_pred)
            
            self.m = self.m - self.lr * der_m
            self.c = self.c - self.lr * der_c
            
        return self.m, self.c
    
    def predict(self, X):
        
        return  np.dot(X,self.m) + self.c
    

In [84]:
gd = GDRegressor(lr = 0.01, epochs = 200)

In [85]:
# Training the GDRegressor model on the provided training data.

gd.fit(X_train,y_train)

(array([-0.81611372,  0.09828713, -0.17254211,  0.66407146, -0.95043614,
         3.10113171, -0.38087387, -1.80385045,  1.03814976, -0.2596327 ,
        -1.82615967,  0.95820786, -3.96604492]),
 22.2342996238665)

In [86]:
y_pred = gd.predict(X_test)

In [87]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score 

In [88]:
# Calculating and displaying the performance metrics of the trained GDRegressor model.

print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R2 Score:",r2_score(y_test, y_pred))

Mean Absolute Error: 3.3043666972957553
Mean Squared Error: 20.49611995993805
R2 Score: 0.7583132203351639


## Stochastic GDRegressor

In [74]:
# Stochastic Gradient Descent

class SGDRegressor:
    
    def __init__(self, lr = 0.01, epochs = 100):
        
        self.lr = lr
        self.epochs = epochs
        self.m = None
        self.c = None
        
    def fit(self, X, y):
        
        self.c = 0
        self.m = np.ones(X.shape[1])
        
        for i in range(self.epochs):
            
            for j in range(X.shape[0]):
                
                ran_num = np.random.randint(0, X.shape[0])
                y_hat = np.dot(X.iloc[ran_num], self.m) + self.c
                
                der_m = (-2) * np.dot((y.iloc[ran_num]-y_hat),X.iloc[ran_num])
                der_c = (-2) * ((y.iloc[ran_num]-y_hat))
                
                self.m = self.m - self.lr * der_m
                self.c = self.c - self.lr * der_c
            
        return self.m, self.c
    
    def predict(self, X):
        
        return  np.dot(X,self.m) + self.c
    

In [97]:
sgd = SGDRegressor(lr = 0.01,epochs = 60)

In [98]:
sgd.fit(X_train,y_train)

(array([-0.82405994,  0.92591311,  0.2827904 ,  0.77455624, -2.65308938,
         2.35327388,  0.16945467, -3.15568233,  2.60092138, -1.1934712 ,
        -2.5021802 , -0.78007377, -4.12346414]),
 23.003217931629038)

In [99]:
y_pred_sgd = sgd.predict(X_test)

In [100]:
# Calculating and displaying the performance metrics of the trained SGDRegressor model.

print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred_sgd))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred_sgd))
print("R2 Score:",r2_score(y_test, y_pred_sgd))

Mean Absolute Error: 3.655195772525519
Mean Squared Error: 20.879810344312393
R2 Score: 0.7537888082235518


**Conclusion:**

Convergence:

SGD: Converged in 60 epochs.

BGD: Converged in 200 epochs.

Computational Efficiency:

SGD: Faster training due to updating weights after each random data point.

BGD: Slower training as it processes the entire dataset in each epoch.

Coherence of Results:

SGD: Can be more erratic due to noisy updates, but can escape local minima.

BGD: Generally smoother convergence towards the optimal solution.

Performance Metrics:

Both models have similar Mean Squared Errors and R2 Scores, indicating comparable predictive capability.
SGD has a slightly higher Mean Absolute Error, indicating slightly less accuracy compared to BGD.

Trade-off:

SGD: Faster convergence, suitable for larger datasets; more variance in updates.

BGD: Slower convergence, better suited for smaller datasets; smoother updates.

But We have remember that both methods have their strengths and weaknesses, making one more suitable than the other depending on factors like dataset size, convergence speed, and stability of updates. SGD is useful for large datasets where BGD might be computationally expensive, but BGD can provide more stable convergence.

