In [1]:
# student_model.ipynb

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.metrics import classification_report, r2_score
import joblib

# Load dataset
df = pd.read_csv("student_data.csv")

# Rename columns for consistency (if needed)
df.rename(columns=lambda x: x.strip().lower().replace(" ", "_"), inplace=True)

# Features and labels
X = df[['math_score', 'reading_score', 'writing_score']]
y_class = (df['math_score'] >= 40).astype(int)  # Classification: Pass = 1, Fail = 0
y_reg = df['math_score']                        # Regression target

# Split data
X_train, X_test, y_train_cls, y_test_cls = train_test_split(X, y_class, test_size=0.2, random_state=42)
_, _, y_train_reg, y_test_reg = train_test_split(X, y_reg, test_size=0.2, random_state=42)

# Train Classification Model
clf = RandomForestClassifier()
clf.fit(X_train, y_train_cls)
print("Classification Report:\n", classification_report(y_test_cls, clf.predict(X_test)))

# Train Regression Model
reg = LinearRegression()
reg.fit(X_train, y_train_reg)
print("Regression R2 Score:", r2_score(y_test_reg, reg.predict(X_test)))

# Save models
joblib.dump(clf, 'classifier.pkl')
joblib.dump(reg, 'regressor.pkl')


Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       1.00      1.00      1.00       189

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200

Regression R2 Score: 1.0


['regressor.pkl']