In [13]:
# ✅ University Recommendation System - Regression Model Training with Accuracy-like Metric

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Load Data
df = pd.read_csv('../Data/preprocessed_data.csv')

# Step 2: Features and Target
X = df[['GRE_Score', 'TOEFL_Score', 'University_Rating', 'SOP', 'LOR', 'CGPA', 'Research']]
y = df['Chance_of_Admit']

# Step 3: Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Models
models = {
    'Linear Regression': LinearRegression(),
    'Support Vector Regression': SVR(kernel='rbf'),
    'Random Forest Regressor': RandomForestRegressor(n_estimators=100, random_state=42),
    'AdaBoost Regressor': AdaBoostRegressor(n_estimators=100, random_state=42),
    'Gradient Boosting Regressor': GradientBoostingRegressor(n_estimators=100, random_state=42)
}

# Step 5: Train, Evaluate, Custom Accuracy
results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # Custom Accuracy: % of predictions within ±0.05 of true value
    tolerance = 0.05
    accuracy_within_tolerance = np.mean(np.abs(y_pred - y_test) <= tolerance)
    
    results[name] = {
        'MSE': round(mse, 4),
        'R2 Score': round(r2, 4),
        'Accuracy (±0.05 tolerance)': round(accuracy_within_tolerance * 100, 2)
    }

# Step 6: Display
results_df = pd.DataFrame(results).T.sort_values(by='R2 Score', ascending=False)
print("\n✅ Model Performance (Regression + Custom Accuracy):")
print(results_df)



✅ Model Performance (Regression + Custom Accuracy):
                                MSE  R2 Score  Accuracy (±0.05 tolerance)
Random Forest Regressor      0.0011    0.9121                       92.13
Gradient Boosting Regressor  0.0011    0.9119                       91.12
Linear Regression            0.0020    0.8303                       82.74
AdaBoost Regressor           0.0028    0.7681                       64.47
Support Vector Regression    0.0032    0.7328                       52.54
