In [3]:
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
import joblib

# Load Preprocessed Data
data = pd.read_csv('../Data/preprocessed_data.csv')

# Define Features and Target
feature_columns = ['studytime', 'failures', 'absences', 'G1', 'G2', 'avg_grade', 'studytime_level']
X = data[feature_columns]
y = data['Pass_Fail']

# Define Hyperparameter Grid
param_grid = {
    'n_estimators': [100, 150, 200],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 4, 5]
}

# Set up Grid Search
grid_search = GridSearchCV(
    estimator=GradientBoostingClassifier(random_state=42),
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)

# Fit Grid Search
grid_search.fit(X, y)

# Retrieve Best Model
best_model = grid_search.best_estimator_
print("✅ Best Hyperparameters:", grid_search.best_params_)

# Save Fine-Tuned Model
joblib.dump(best_model, '../App/model/fine_tune.pkl')

print("✅ Fine-tuned Gradient Boosting model saved as fine_tune.pkl")

✅ Best Hyperparameters: {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 100}
✅ Fine-tuned Gradient Boosting model saved as fine_tune.pkl
