In [21]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import GradientBoostingRegressor
import pickle

In [22]:
# 1. Load Dataset
df = pd.read_csv("../dataset/StudentExamScores1.csv")

In [23]:
# 2. Define Features
target = 'Exam_Score'
numerical_features = ['Hours_Studied', 'Attendance', 'Sleep_Hours', 'Previous_Scores',
                      'Tutoring_Sessions', 'Physical_Activity']
categorical_features = [col for col in df.columns if col not in numerical_features + [target]]

In [24]:
# 3. Split Data
X = df.drop(columns=target)
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [25]:
# 4. Preprocessing Pipelines
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)

In [26]:
# 5. Build Pipeline with Gradient Boosting Regressor
model = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1, max_depth=5, random_state=42)

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', model)
])

In [27]:
# 6. Train Model
pipeline.fit(X_train, y_train)

In [28]:
# 7. Save Trained Pipeline
with open("../models/student_score_model.pkl", "wb") as f:
    pickle.dump(pipeline, f)

print("Model trained and saved successfully.")

Model trained and saved successfully.
