Needed Libs


In [None]:
# Data
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# ML
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report
# Suppressing warnings
import warnings

DataSet

In [None]:
df = pd.read_csv('D:\StudentPredictionML\Math-Students.csv')

df.head()


Seeing if Pass/Fail

In [None]:
# Pass if G3 >= 10 else Fail
df['pass_fail'] = df['G3'].apply(lambda x: 1 if x >= 10 else 0)


df['pass_fail'].value_counts()

sns.countplot(x='pass_fail', data=df)
plt.title("Pass vs Fail Distribution")
plt.show()


Heatmap of pass/fail

In [None]:
numeric_df = df.select_dtypes(include=['int64', 'float64'])

# Plot heatmap
plt.figure(figsize=(12, 8))
sns.set_style("whitegrid")
sns.heatmap(numeric_df.corr(), annot=True, fmt=".2f", cmap="coolwarm", cbar=True, square=True)
plt.title("Feature Correlation (Numeric Only)", fontsize=16)
plt.tight_layout()
plt.show()

Collec Columns

In [None]:
# categorical columns
cat_cols = df.select_dtypes(include='object').columns

# categ as numbers
le = LabelEncoder()
for col in cat_cols:
    df[col] = le.fit_transform(df[col])


Scaliing Num.. Columns if needed

In [None]:
num_cols = ['age','traveltime','studytime','failures','famrel','freetime','goout','Dalc','Walc','health','absences','G1','G2']
scaler = StandardScaler()
df[num_cols] = scaler.fit_transform(df[num_cols])


Testing target [ Grade(80 - 20 ) ]

In [None]:
X = df.drop(['pass_fail','G3'], axis=1)  # Drop target and final grade
y = df['pass_fail']

# Train(80%-20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


StudyTime vs Pass/Fail

In [None]:
sns.boxplot(x='pass_fail', y='studytime', data=df)
plt.title("Study Time vs Pass/Fail")
plt.show()


Attendence vs Pass/Fail

In [None]:
sns.boxplot(x='pass_fail', y='absences', data=df)
plt.title("Absences vs Pass/Fail")
plt.show()


Logistic Regression

In [None]:
lr = LogisticRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)


Decision Tree

In [None]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)


Random Forest

In [None]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)


Models for Evaluation

In [None]:
def evaluate_model(y_true, y_pred, model_name):
    print(f"--- {model_name} ---")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred))
    print("Recall:", recall_score(y_true, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("Classification Report:\n", classification_report(y_true, y_pred))
    print("\n")

evaluate_model(y_test, y_pred_lr, "Logistic Regression")
evaluate_model(y_test, y_pred_dt, "Decision Tree")
evaluate_model(y_test, y_pred_rf, "Random Forest")
