In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.feature_selection import RFE
from sklearn.metrics import accuracy_score


In [2]:
df=pd.read_csv('preprocessed_student_dataset.csv')

In [3]:
df

Unnamed: 0,Age,Gender,Study_Hours_per_Week,Online_Courses_Completed,Participation_in_Discussions,Assignment_Completion_Rate (%),Exam_Score (%),Attendance_Rate (%),Use_of_Educational_Tech,Self_Reported_Stress_Level,Time_Spent_on_Social_Media (hours/week),Sleep_Hours_per_Night,Final_Grade
0,-1.582639,0.0,1.605127,0.650559,1,1.708927,-0.067365,-0.616001,1,2,-0.657989,0.511151,2
1,1.594887,0.0,0.220714,1.628328,0,-0.267263,-1.710558,-1.226232,1,1,1.447930,0.511151,3
2,-1.004907,0.0,1.528215,0.161674,0,-1.016852,-1.540573,0.265444,1,0,-0.214638,0.010366,3
3,-0.138309,0.0,-1.086788,-1.630902,1,-0.812418,-0.010703,-1.022822,1,0,1.004579,1.512721,1
4,-1.293773,0.0,-0.240757,1.465366,1,-1.084996,-0.407336,1.214692,1,1,1.226255,0.511151,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,-1.004907,1.0,0.220714,-0.653133,1,-0.880563,-0.690645,0.062033,1,1,0.228714,-0.490419,2
9996,-0.138309,0.0,-0.856052,-0.327210,1,-1.425719,0.782562,0.740068,1,1,-0.990503,-0.991204,1
9997,0.728289,1.0,-0.317669,-1.142018,1,-1.425719,-1.710558,-0.344787,0,1,0.561228,0.511151,3
9998,-1.582639,1.0,1.066744,-0.490172,1,-0.607985,-1.427249,1.011282,1,0,-0.990503,0.511151,3


In [4]:
# Separate features and target
X = df.drop(columns=['Final_Grade'])
y = df['Final_Grade']


In [5]:
# Fill missing values (if any)
X = X.fillna(X.median())

In [6]:
# Standardize features (important for some models like SVM, KNN)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [7]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [8]:
# Base estimator for RFE (Logistic Regression here, can be changed)
base_estimator = LogisticRegression(max_iter=1000)
#Recursive Feature Elimination
rfe = RFE(estimator=base_estimator, n_features_to_select=10)
X_train_rfe = rfe.fit_transform(X_train, y_train)
X_test_rfe = rfe.transform(X_test)



In [9]:
# List of models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector Machine": SVC()
}

In [10]:
# Train and evaluate each model
for name, model in models.items():
    model.fit(X_train_rfe, y_train)
    y_pred = model.predict(X_test_rfe)
    acc = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {acc:.2f}")

Logistic Regression Accuracy: 1.00
Random Forest Accuracy: 1.00
Decision Tree Accuracy: 1.00
K-Nearest Neighbors Accuracy: 0.70
Support Vector Machine Accuracy: 0.96
