In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings("ignore")


In [7]:
df=pd.read_csv('preprocessed_student_dataset.csv')

In [8]:
df

Unnamed: 0,Age,Gender,Study_Hours_per_Week,Online_Courses_Completed,Participation_in_Discussions,Assignment_Completion_Rate (%),Exam_Score (%),Attendance_Rate (%),Use_of_Educational_Tech,Self_Reported_Stress_Level,Time_Spent_on_Social_Media (hours/week),Sleep_Hours_per_Night,Final_Grade
0,-1.582639,0.0,1.605127,0.650559,1,1.708927,-0.067365,-0.616001,1,2,-0.657989,0.511151,2
1,1.594887,0.0,0.220714,1.628328,0,-0.267263,-1.710558,-1.226232,1,1,1.447930,0.511151,3
2,-1.004907,0.0,1.528215,0.161674,0,-1.016852,-1.540573,0.265444,1,0,-0.214638,0.010366,3
3,-0.138309,0.0,-1.086788,-1.630902,1,-0.812418,-0.010703,-1.022822,1,0,1.004579,1.512721,1
4,-1.293773,0.0,-0.240757,1.465366,1,-1.084996,-0.407336,1.214692,1,1,1.226255,0.511151,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,-1.004907,1.0,0.220714,-0.653133,1,-0.880563,-0.690645,0.062033,1,1,0.228714,-0.490419,2
9996,-0.138309,0.0,-0.856052,-0.327210,1,-1.425719,0.782562,0.740068,1,1,-0.990503,-0.991204,1
9997,0.728289,1.0,-0.317669,-1.142018,1,-1.425719,-1.710558,-0.344787,0,1,0.561228,0.511151,3
9998,-1.582639,1.0,1.066744,-0.490172,1,-0.607985,-1.427249,1.011282,1,0,-0.990503,0.511151,3


In [9]:
print(df.isnull().sum())

Age                                          0
Gender                                     406
Study_Hours_per_Week                         0
Online_Courses_Completed                     0
Participation_in_Discussions                 0
Assignment_Completion_Rate (%)               0
Exam_Score (%)                               0
Attendance_Rate (%)                          0
Use_of_Educational_Tech                      0
Self_Reported_Stress_Level                   0
Time_Spent_on_Social_Media (hours/week)      0
Sleep_Hours_per_Night                        0
Final_Grade                                  0
dtype: int64


In [10]:
# Fill missing Gender with mode
df['Gender'].fillna(df['Gender'].mode()[0], inplace=True)


In [11]:
print(df.isnull().sum())

Age                                        0
Gender                                     0
Study_Hours_per_Week                       0
Online_Courses_Completed                   0
Participation_in_Discussions               0
Assignment_Completion_Rate (%)             0
Exam_Score (%)                             0
Attendance_Rate (%)                        0
Use_of_Educational_Tech                    0
Self_Reported_Stress_Level                 0
Time_Spent_on_Social_Media (hours/week)    0
Sleep_Hours_per_Night                      0
Final_Grade                                0
dtype: int64


In [14]:
# Define independent (X) and target (y) variables
X = df.drop(['Final_Grade'], axis=1)  # Drop target and ID column
y = df['Final_Grade']

In [15]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [3, 5, 10, None],
    'min_samples_split': [2, 5, 10]
}

In [17]:
dt = DecisionTreeClassifier(random_state=42)
grid_search = GridSearchCV(estimator=dt, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

In [18]:
# Best model
best_dt = grid_search.best_estimator_

In [19]:
# Predict & Evaluate
y_pred = best_dt.predict(X_test)
print("Best Parameters:", grid_search.best_params_)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Best Parameters: {'criterion': 'gini', 'max_depth': 3, 'min_samples_split': 2}
Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       533
           1       1.00      1.00      1.00       471
           2       1.00      1.00      1.00       495
           3       1.00      1.00      1.00       501

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000

