In [2]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# -----------------------------
# 1. Load Dataset
# -----------------------------
df = pd.read_csv("student_data.csv")

# -----------------------------
# 2. Create Target Variable (Strength)
# -----------------------------
def create_strength(score):
    if score < 60:
        return "Weak"
    elif score < 75:
        return "Moderate"
    else:
        return "Strong"

df["strength"] = df["score"].apply(create_strength)

# -----------------------------
# 3. Encode Categorical Features
# -----------------------------

# Encode topic
topic_encoder = LabelEncoder()
df["topic_encoded"] = topic_encoder.fit_transform(df["topic"])

# Encode strength (target)
strength_encoder = LabelEncoder()
df["strength_encoded"] = strength_encoder.fit_transform(df["strength"])

# -----------------------------
# 4. Define Features and Target
# -----------------------------
X = df[["quiz_no", "time_taken", "topic_encoded"]]
y = df["strength_encoded"]

# -----------------------------
# 5. Train-Test Split
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -----------------------------
# 6. Train Model
# -----------------------------
model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)

# -----------------------------
# 7. Evaluate Model
# -----------------------------
predictions = model.predict(X_test)
print("Model Evaluation:\n")
print(classification_report(y_test, predictions))

# -----------------------------
# 8. Save Model + Encoders
# -----------------------------
joblib.dump(model, "model.pkl")
joblib.dump(topic_encoder, "topic_encoder.pkl")
joblib.dump(strength_encoder, "strength_encoder.pkl")

print("\nFiles saved successfully:")
print("✔ model.pkl")
print("✔ topic_encoder.pkl")
print("✔ strength_encoder.pkl")





Model Evaluation:

              precision    recall  f1-score   support

           0       0.16      0.09      0.12       395
           1       0.35      0.36      0.36       734
           2       0.42      0.49      0.46       871

    accuracy                           0.37      2000
   macro avg       0.31      0.32      0.31      2000
weighted avg       0.34      0.37      0.35      2000


Files saved successfully:
✔ model.pkl
✔ topic_encoder.pkl
✔ strength_encoder.pkl
