In [5]:
# ✅ University Recommendation System - Evaluation and Fine-Tuning (Random Forest Regressor)

import pandas as pd
import numpy as np
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Load Preprocessed Data
df = pd.read_csv('../Data/preprocessed_data.csv')

X = df[['GRE_Score', 'TOEFL_Score', 'University_Rating', 'SOP', 'LOR', 'CGPA', 'Research']]
y = df['Chance_of_Admit']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2: Define Base Model
rf = RandomForestRegressor(random_state=42)

# Step 3: Define Hyperparameter Grid for Random Forest
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10]
}

# Step 4: GridSearchCV for Hyperparameter Tuning
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid,
                           cv=5, n_jobs=-1, scoring='r2', verbose=2)

grid_search.fit(X_train, y_train)

# Step 5: Evaluate Best Model
best_rf_model = grid_search.best_estimator_
y_pred = best_rf_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"✅ Best Parameters: {grid_search.best_params_}")
print(f"✅ Fine-Tuned Random Forest R² Score: {round(r2, 4)}")
print(f"✅ Fine-Tuned Random Forest MSE: {round(mse, 4)}")

# Step 6: Save Fine-Tuned Model as fine_tune.pkl
joblib.dump(best_rf_model, '../App/model/fine_tune.pkl')

print("✅ Fine-tuned Random Forest model saved as '../App/model/fine_tune.pkl'")


Fitting 5 folds for each of 27 candidates, totalling 135 fits
✅ Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 300}
✅ Fine-Tuned Random Forest R² Score: 0.9152
✅ Fine-Tuned Random Forest MSE: 0.001
✅ Fine-tuned Random Forest model saved as '../App/model/fine_tune.pkl'
