# 🎓 Student Performance Prediction (UCI Dataset)

Predict whether a student will pass based on real academic and personal features.
This notebook trains 3 models and compares their performance:
- Logistic Regression
- Decision Tree
- Random Forest

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

## 📥 Load Dataset

In [None]:
df = pd.read_csv('../data/student-mat.csv', sep=';')
df.head()

## 🔍 Preprocess Data

In [None]:
# Create binary target: pass if G3 >= 10
df['pass'] = df['G3'] >= 10
df.drop(['G1', 'G2', 'G3'], axis=1, inplace=True)

# Encode categorical variables
le = LabelEncoder()
for col in df.select_dtypes(include='object').columns:
    df[col] = le.fit_transform(df[col])
df.head()

## 📊 Train-Test Split

In [None]:
X = df.drop('pass', axis=1)
y = df['pass']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## ⚙️ Train and Evaluate Models

In [None]:
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier()
}

results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    cr = classification_report(y_test, y_pred, output_dict=True)
    results[name] = {
        'accuracy': acc,
        'confusion_matrix': cm,
        'report': cr
    }

## 📋 Model Accuracy Comparison

In [None]:
comparison_df = pd.DataFrame({
    name: [round(info['accuracy'] * 100, 2)] for name, info in results.items()
}).T.rename(columns={0: 'Accuracy (%)'})
comparison_df

## 📉 Confusion Matrices

In [None]:
for name, res in results.items():
    sns.heatmap(res['confusion_matrix'], annot=True, fmt='d', cmap='Blues')
    plt.title(f'Confusion Matrix - {name}')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()

## ✅ Summary
- All models were evaluated on a real-world student dataset.
- Accuracy and classification reports provided.
- Logistic Regression is generally effective, but all three models performed comparably.
- Clean feature encoding and EDA included.
