In [1]:
#importing 
#libraries ('pandas', 'numpy'), 
#functions ('train_test_split', 'r2_score'), 
#class ('LinearRegression')
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [2]:
#reading CSV and printing out top 5 rows
df = pd.read_csv('Student_Performance.CSV')
df.head()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,Yes,9,1,91.0
1,4,82,No,4,2,65.0
2,8,51,Yes,7,2,45.0
3,5,52,Yes,5,2,36.0
4,7,75,No,8,5,66.0


In [3]:
#converting 'Yes/No'<-'1/0'
df['Extracurricular Activities'] = df['Extracurricular Activities'].apply(lambda x: 1 if x=='Yes' else 0)
df.head()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,1,9,1,91.0
1,4,82,0,4,2,65.0
2,8,51,1,7,2,45.0
3,5,52,1,5,2,36.0
4,7,75,0,8,5,66.0


In [4]:
#segregating predictor ('X') and target ('y') variables
X = df.iloc[:,0:5]
y = df.iloc[:,-1]

In [5]:
#splitting training-testing data with 70:30 ratio
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=2)

In [6]:
#creating an object of class 'LinearRegression'
lin_reg = LinearRegression()
#training the linear regression model
lin_reg.fit(X_train,y_train)
#predicting the target variable
y_pred = lin_reg.predict(X_test)

In [7]:
#printing intercept, coefficients and R2-Score
print('Intercept (Scikit-learn_LR): ',lin_reg.intercept_)
print('Weights (Scikit-learn_LR): ',lin_reg.coef_)
print('R2 Score (Scikit-learn_LR): ',r2_score(y_test,y_pred))

Intercept (Scikit-learn_LR):  -34.17650031064235
Weights (Scikit-learn_LR):  [2.85442232 1.01984531 0.55955086 0.48506508 0.19447807]
R2 Score (Scikit-learn_LR):  0.9890969000618695


In [8]:
#creating a dedicated class for Batch Gradient Descent

#defining a class
class myGD:

    #defining a constructor
    def __init__(self,learning_rate,epochs):
        self.intercept = None
        self.weights = None
        self.lr = learning_rate
        self.epochs = epochs

    #method_1: iterating to find final values of intercept and weights 
    def fitting(self,X_train,y_train):
        #initializing intercept and weights
        self.intercept=0
        self.weights=np.ones(X.shape[1])

        #iterating till the value of epochs
        for i in range(self.epochs):
            #updating the intercept
            #vectorization
            y_train_hat = self.intercept + np.dot(X_train, self.weights)
            lossfunc_slope_intercept = -2 * np.mean(y_train - y_train_hat)
            self.intercept = self.intercept - (self.lr*lossfunc_slope_intercept)

            #updating the weights
            lossfunc_slope_weights = -2 * np.dot((y_train - y_train_hat),X_train)/X_train.shape[0]
            self.weights = self.weights - (self.lr*lossfunc_slope_weights)

        #returning the final intercept and weights
        return(self.intercept,self.weights)

    #method_2: predicting the target variable
    def prediction(self,X_test):
        return self.intercept + np.dot(X_test,self.weights)

#creating object
gd_obj = myGD(0.0001,400000)
#getting final intercept, weights and predicted value using class methods
[final_intercept, final_weights] = gd_obj.fitting(X_train,y_train)
y_prediction = gd_obj.prediction(X_test)

#printing intercept, coefficients and R2-Score
print('Intercept (Gradient Descent): ',final_intercept)
print('Weights (Gradient Descent): ',final_weights)
print('R2 Score (Gradient Descent): ',r2_score(y_test,y_prediction))

Intercept (Gradient Descent):  -29.638185726060197
Weights (Gradient Descent):  [2.76683006 0.99310719 0.2856021  0.22025527 0.13611776]
R2 Score (Gradient Descent):  0.9878485889227095
