In [None]:
# ============================================================
# Student Performance Analysis & Grade Prediction
# FINAL â€“ JUPYTER SAFE VERSION
# ============================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from pathlib import Path

# ------------------------------------------------------------
# STEP 1: PROJECT ROOT (JUPYTER SAFE)
# ------------------------------------------------------------
# Jupyter always runs from project root
PROJECT_ROOT = Path.cwd()

print("Project root detected as:", PROJECT_ROOT)

# ------------------------------------------------------------
# STEP 2: LOAD CSV
# ------------------------------------------------------------
csv_path = None
for p in PROJECT_ROOT.rglob("student_data.csv"):
    csv_path = p
    break

if csv_path is None:
    raise FileNotFoundError("student_data.csv not found")

print("CSV found at:", csv_path)

df = pd.read_csv(csv_path)
print("\nDataset Loaded Successfully âœ…")
print(df.head())

# ------------------------------------------------------------
# STEP 3: FEATURE ENGINEERING
# ------------------------------------------------------------
df["average_score"] = (
    df["math score"] + df["reading score"] + df["writing score"]
) / 3

# ------------------------------------------------------------
# STEP 4: ENCODING
# ------------------------------------------------------------
from sklearn.preprocessing import LabelEncoder, StandardScaler

le = LabelEncoder()
for col in ["gender", "lunch", "test preparation course", "parental level of education"]:
    df[col] = le.fit_transform(df[col])

# ------------------------------------------------------------
# STEP 5: FEATURES & TARGET
# ------------------------------------------------------------
X = df[["math score", "reading score", "gender", "lunch", "test preparation course"]]
y = df["writing score"]

# ------------------------------------------------------------
# STEP 6: SCALING
# ------------------------------------------------------------
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ------------------------------------------------------------
# STEP 7: TRAIN TEST SPLIT
# ------------------------------------------------------------
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# ------------------------------------------------------------
# STEP 8: MODEL TRAINING
# ------------------------------------------------------------
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)

# ------------------------------------------------------------
# STEP 9: SAVE MODEL & SCALER (PROJECT ROOT)
# ------------------------------------------------------------
model_path = PROJECT_ROOT / "student_model.pkl"
scaler_path = PROJECT_ROOT / "scaler.pkl"

joblib.dump(rf, model_path)
joblib.dump(scaler, scaler_path)

print("\nâœ… Model saved at:", model_path)
print("âœ… Scaler saved at:", scaler_path)
print("ðŸŽ‰ MODEL & SCALER SAVED SUCCESSFULLY ðŸŽ‰")
