In [1]:
#Comparing Batch, Stochastic, and Mini Batch Gradient Descent algorithms with a Kaggle Dataset

#importing libraries
import pandas as pd
import numpy as np
import random

#reading CSV
df = pd.read_csv('Student_Performance.CSV')
df.head()

#converting categorical data into numerical
df['Extracurricular Activities'] = df['Extracurricular Activities'].apply(lambda x: 1 if x=='Yes' else 0)

#segregating predictor and target variables
X = df.iloc[:, 0:5]
y = df.iloc[:, -1]

#converting DataFrame into numpy array
X = np.array(X)
y = np.array(y)

#splitting training-testing data in an 80:20 ratio
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=2)

#-----------------------------------

#Implementation of Batch Gradient Descent

#training model and predicting o/p using scikit-learn 'LinearRegression' class
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X_train,y_train)
y_pred = lin_reg.predict(X_test)

#finding out model parameters and model evaluation metric
print('Coefficients (BGD): ',lin_reg.coef_)
print('Intercept (BGD): ',lin_reg.intercept_)

from sklearn.metrics import r2_score
print('R2 Score (BGD): ',r2_score(y_test,y_pred))
print('\n')

#-----------------------------------

#Implementation of Stochastic Gradient Descent

#training model and predicting o/p using scikit-learn 'SGDRegressor' class
from  sklearn.linear_model import SGDRegressor
#creating an object with no. of epochs = 160 and learning_rate = 0.0001
sgd_reg = SGDRegressor(max_iter=160,learning_rate='constant',eta0=0.0001)

sgd_reg.fit(X_train,y_train)
y_predict = sgd_reg.predict(X_test)

#finding out model parameters and model evaluation metric
print('Coefficients (SGD): ',sgd_reg.coef_)
print('Intercept (SGD): ',sgd_reg.intercept_)
print('R2 Score (SGD): ',r2_score(y_test,y_predict))
print('\n')

#-----------------------------------

#Implementation of Mini Batch Gradient Descent

#creating an object with learning_rate = 0.0001
sgd_reg_mb = SGDRegressor(learning_rate='constant',eta0=0.0001)

#defining hyperparameters
epochs_sgd_mb = 100
batch_size = 160
no_of_batches = int(X_train.shape[0]/batch_size)

#updating model parameters: for each epoch, no. of batches times  
for i in range(epochs_sgd_mb):
    for j in range(no_of_batches):
        rand_nums = np.random.choice(range(X_train.shape[0]), batch_size, replace=False)
        sgd_reg_mb.partial_fit(X_train[rand_nums],y_train[rand_nums])

#o/p prediction
y_prediction = sgd_reg_mb.predict(X_test)

#finding out model parameters and model evaluation metric
print('Coefficients (MBGD): ',sgd_reg_mb.coef_)
print('Intercept (MBGD): ',sgd_reg_mb.intercept_)
print('R2 Score (MBGD): ',r2_score(y_test,y_prediction))

Coefficients (BGD):  [2.85352109 1.01959723 0.59483017 0.48314352 0.1977199 ]
Intercept (BGD):  -34.17752765121658
R2 Score (BGD):  0.9890954655668953


Coefficients (SGD):  [2.8013702  1.02523616 0.45361981 0.34924863 0.15217633]
Intercept (SGD):  [-31.87673426]
R2 Score (SGD):  0.9841541534514815


Coefficients (MBGD):  [2.76164802 0.97535041 0.31917638 0.27435601 0.09972621]
Intercept (MBGD):  [-29.72794068]
R2 Score (MBGD):  0.9845159709554225
