<a href="https://colab.research.google.com/github/GrizzlyToast/ML_Practise/blob/main/StudentPerformance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [94]:
from google.colab import userdata
import os

os.environ["KAGGLE_KEY"] = userdata.get('KAGGLE_KEY')
os.environ["KAGGLE_USERNAME"] = userdata.get('KAGGLE_USERNAME')

# Loading Data

In [95]:
!kaggle datasets download -d neurocipher/student-performance
! unzip "student-performance.zip"

Dataset URL: https://www.kaggle.com/datasets/neurocipher/student-performance
License(s): apache-2.0
student-performance.zip: Skipping, found more recently modified local copy (use --force to force download)
Archive:  student-performance.zip
replace StudentPerformance.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: StudentPerformance.csv  


In [105]:
import pandas as pd
df = pd.read_csv('StudentPerformance.csv')
df.head()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,Yes,9,1,91.0
1,4,82,No,4,2,65.0
2,8,51,Yes,7,2,45.0
3,5,52,Yes,5,2,36.0
4,7,75,No,8,5,66.0


# Data Preprocessing

In [106]:
df['Extracurricular Activities'] = df['Extracurricular Activities'].apply(lambda x: 1 if str(x)=="Yes" else 0)
df.head()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,1,9,1,91.0
1,4,82,0,4,2,65.0
2,8,51,1,7,2,45.0
3,5,52,1,5,2,36.0
4,7,75,0,8,5,66.0


Transformed categorical values to binary values for consistency in data.

In [107]:
df.corr()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
Hours Studied,1.0,-0.01239,0.003873,0.001245,0.017463,0.37373
Previous Scores,-0.01239,1.0,0.008369,0.005944,0.007888,0.915189
Extracurricular Activities,0.003873,0.008369,1.0,-0.023284,0.013103,0.024525
Sleep Hours,0.001245,0.005944,-0.023284,1.0,0.00399,0.048106
Sample Question Papers Practiced,0.017463,0.007888,0.013103,0.00399,1.0,0.043268
Performance Index,0.37373,0.915189,0.024525,0.048106,0.043268,1.0


In [121]:
df = df.drop('Extracurricular Activities', axis=1)

In [108]:
df.corr()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
Hours Studied,1.0,-0.01239,0.003873,0.001245,0.017463,0.37373
Previous Scores,-0.01239,1.0,0.008369,0.005944,0.007888,0.915189
Extracurricular Activities,0.003873,0.008369,1.0,-0.023284,0.013103,0.024525
Sleep Hours,0.001245,0.005944,-0.023284,1.0,0.00399,0.048106
Sample Question Papers Practiced,0.017463,0.007888,0.013103,0.00399,1.0,0.043268
Performance Index,0.37373,0.915189,0.024525,0.048106,0.043268,1.0


In [109]:
dataset = df.to_numpy()
print(dataset)

[[ 7. 99.  1.  9.  1. 91.]
 [ 4. 82.  0.  4.  2. 65.]
 [ 8. 51.  1.  7.  2. 45.]
 ...
 [ 6. 83.  1.  8.  5. 74.]
 [ 9. 97.  1.  7.  0. 95.]
 [ 7. 74.  0.  8.  1. 64.]]


In [110]:
import numpy as np
X, y = np.split(dataset,[-1],axis=1)

## Variance Threshold (Hyperparameter)

In [122]:
from sklearn.feature_selection import VarianceThreshold
selector = VarianceThreshold(threshold=0.1) # hyperparameter
X = selector.fit_transform(X)
print(X)
print(y)

[[ 7. 99.  1.  9.  1.]
 [ 4. 82.  0.  4.  2.]
 [ 8. 51.  1.  7.  2.]
 ...
 [ 6. 83.  1.  8.  5.]
 [ 9. 97.  1.  7.  0.]
 [ 7. 74.  0.  8.  1.]]
[[91.]
 [65.]
 [45.]
 ...
 [74.]
 [95.]
 [64.]]


# Linear Regression Model

In [123]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
reg = LinearRegression().fit(X_train, y_train)
y_pred = reg.predict(X_test)

# Performance Metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Mean Squared Error:", mse)
print("R-squared:", r2)

Mean Squared Error: 4.224022760753757
R-squared: 0.9884855999665682


##