# Part 2: Gradient Descent

In this section, I continue the analysis of the Student Performance dataset, as previously done in Part 1. However, instead of using the least squares method, I implement a custom gradient descent algorithm for optimization.

## Import libraries

In [1]:
import pandas as pd
import numpy as np
from Linear_Regression.models.gradient_descent import GradienDescent
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

## Load dataset

In [2]:
df = pd.read_csv("../data/Student_Performance.csv")
print(df.head())
print(df.info())

   Hours Studied  Previous Scores Extracurricular Activities  Sleep Hours  \
0              7               99                        Yes            9   
1              4               82                         No            4   
2              8               51                        Yes            7   
3              5               52                        Yes            5   
4              7               75                         No            8   

   Sample Question Papers Practiced  Performance Index  
0                                 1               91.0  
1                                 2               65.0  
2                                 2               45.0  
3                                 2               36.0  
4                                 5               66.0  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 6 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------              

## Convert categorical features

In [3]:
df = pd.get_dummies(data=df, drop_first=True)
print(df.head())

   Hours Studied  Previous Scores  Sleep Hours  \
0              7               99            9   
1              4               82            4   
2              8               51            7   
3              5               52            5   
4              7               75            8   

   Sample Question Papers Practiced  Performance Index  \
0                                 1               91.0   
1                                 2               65.0   
2                                 2               45.0   
3                                 2               36.0   
4                                 5               66.0   

   Extracurricular Activities_Yes  
0                            True  
1                           False  
2                            True  
3                            True  
4                           False  


## Split dat into features and target variables

In [4]:
X = df.drop(columns=["Performance Index"]).values
y = df["Performance Index"].values

## Split data into trainig and testing sets

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Standardize the data

In [6]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Use Gradient Descent to predict the outcome

In [7]:
gradient_descent_model = GradienDescent()

gradient_descent_model.fit(X_train=X_train, y_train=y_train, learning_rate=0.01, iterations=2000)
y_pred = gradient_descent_model.predict(X_test=X_test)

## Measure results

In [8]:
mse = mean_squared_error(y_true=y_test, y_pred=y_pred)
mae = mean_absolute_error(y_true=y_test, y_pred=y_pred)
rmse = np.sqrt(mean_squared_error(y_true=y_test, y_pred=y_pred))
r2 = r2_score(y_true=y_test, y_pred=y_pred)

print(f"MSE:     {mse}")
print(f"MAE:     {mae}")
print(f"RMSE:    {rmse}")
print(f"R2:      {r2}")

MSE:     4.082628398521848
MAE:     1.611121346312302
RMSE:    2.020551508505004
R2:      0.9889832909573145
