In [1]:
#importing libraries
import pandas as pd
import numpy as np
import time
from colorama import Fore, Back, Style

In [2]:
#reading CSV
df = pd.read_csv('Student_Performance.CSV')
df.tail()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
9995,1,49,Yes,4,2,23.0
9996,7,64,Yes,8,5,58.0
9997,6,83,Yes,8,5,74.0
9998,9,97,Yes,7,0,95.0
9999,7,74,No,8,1,64.0


In [3]:
#converting 'Yes/No' to '1/0'
df['Extracurricular Activities'] = df['Extracurricular Activities'].apply(lambda x: 1 if x=='Yes' else 0)
df.tail()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
9995,1,49,1,4,2,23.0
9996,7,64,1,8,5,58.0
9997,6,83,1,8,5,74.0
9998,9,97,1,7,0,95.0
9999,7,74,0,8,1,64.0


In [4]:
#segregating i/p and o/p variables
X = df.iloc[:, 0:5]
y = df.iloc[:,-1]

In [5]:
#splitting training and testing data
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [7]:
#creating a dedicated class for Batch Gradient Descent

#defining a class
class myBGD:

    #defining a constructor
    def __init__(self,learning_rate,epochs_bgd):
        self.intercept = None
        self.weights = None
        self.lr = learning_rate
        self.epochs = epochs_bgd

    #method_1: iterating to find final values of intercept and weights 
    def fitting(self,X_train,y_train):
        #initializing intercept and weights
        self.intercept=0
        self.weights=np.ones(X.shape[1])

        #iterating till the value of epochs
        for i in range(self.epochs):
            #updating the intercept
            #vectorization
            y_train_hat = self.intercept + np.dot(X_train, self.weights)
            lossfunc_slope_intercept = -2 * np.mean(y_train - y_train_hat)
            self.intercept = self.intercept - (self.lr*lossfunc_slope_intercept)

            #updating the weights
            lossfunc_slope_weights = -2 * np.dot((y_train - y_train_hat),X_train)/X_train.shape[0]
            self.weights = self.weights - (self.lr*lossfunc_slope_weights)

        #returning the final intercept and weights
        return(self.weights,self.intercept)

    #method_2: predicting the target variable
    def prediction(self,X_test):
        return self.intercept + np.dot(X_test,self.weights)

#creating a class object
gd_obj = myBGD(learning_rate=0.0001,epochs_bgd=400000)
#getting final intercept, weights and predicted value using class methods
start = time.time()
[ultimate_weights,ultimate_intercept] = gd_obj.fitting(X_train,y_train)
print(Back.GREEN + 'Time Taken (Batch Gradient Descent): ',time.time()-start)
y_prediction = gd_obj.prediction(X_test)


#printing intercept, coefficients and R2-Score
print(Back.GREEN + 'Weights (Batch Gradient Descent): ',ultimate_weights)
print(Back.GREEN + 'Intercept (Batch Gradient Descent): ',ultimate_intercept)

from sklearn.metrics import r2_score
print(Back.GREEN + 'R2 Score (Batch Gradient Descent): ',r2_score(y_test,y_prediction))
print(Style.RESET_ALL)
print('\n')

[42mTime Taken (Batch Gradient Descent):  363.1872844696045
[42mWeights (Batch Gradient Descent):  [2.76677753 0.99373569 0.33395041 0.22470001 0.13936025]
[42mIntercept (Batch Gradient Descent):  -29.753903074317876
[42mR2 Score (Batch Gradient Descent):  0.9880382967603957
[0m




In [8]:
#creating a dedicated class for Stochastic Gradient Descent

#defining a class
class class_SGD:

    #defining a constructor
    def __init__(self,learning_rate,epochs_sgd):
        self.weights = None
        self.intercept = None
        self.lr = learning_rate
        self.epochs = epochs_sgd

    #method_1: iterating to find final values of intercept and weights 
    def model_fit(self,X_train,y_train):
        #initializing intercept and weights
        self.weights = np.ones(X_train.shape[1])
        self.intercept = 0

        #converting training data into array
        X_train = np.array(X_train)
        y_train = np.array(y_train)

        #stochastic updation of intercept and weights no. of rows times for each epoch 
        for i in range(self.epochs):
            for j in range(X_train.shape[0]):
                rand_num = np.random.randint(0, X_train.shape[0])

                y_cap = self.intercept + np.dot(X_train[rand_num],self.weights)
                lossfunc_slope_intercept = -2 *np.mean(y_train[rand_num] - y_cap)
                self.intercept = self.intercept - (self.lr *lossfunc_slope_intercept)

                lossfunc_slope_weights = -2 *np.dot((y_train[rand_num] - y_cap),X_train[rand_num])
                self.weights = self.weights - (self.lr * lossfunc_slope_weights)

        #returning the final intercept and weights
        return (self.weights,self.intercept)

    #method_2: predicting the target variable
    def prediction(self,X_test):
        return self.intercept + np.dot(X_test,self.weights)

#creating a class object
sgd_obj = class_SGD(learning_rate=0.0001,epochs_sgd=100)
#getting final intercept, weights and predicted value using class methods
start = time.time()
[final_weights, final_intercept] = sgd_obj.model_fit(X_train,y_train)
print(Back.YELLOW + 'Time Taken (Stochastic Gradient Descent): ',time.time()-start)
y_predict = sgd_obj.prediction(X_test)

#printing intercept, coefficients and R2-Score
print(Back.YELLOW + 'Weights (Stochastic Gradient Descent): ',final_weights)
print(Back.YELLOW + 'Intercept (Stochastic Gradient Descent): ',final_intercept)
print(Back.YELLOW + 'R2 Score (Stochastic Gradient Descent): ',r2_score(y_test,y_predict))

[43mTime Taken (Stochastic Gradient Descent):  17.38086771965027
[43mWeights (Stochastic Gradient Descent):  [2.87327257 1.02080621 0.58337786 0.43160777 0.15205201]
[43mIntercept (Stochastic Gradient Descent):  -33.62982319011821
[43mR2 Score (Stochastic Gradient Descent):  0.9889358077098803
