In [1]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.metrics import mean_squared_error, r2_score

# 1. Load dataset (changed to Diabetes dataset)
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

# 2. Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 3. Train Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_test)

# 4. Train AdaBoost Regressor
ab_model = AdaBoostRegressor(n_estimators=100, random_state=42)
ab_model.fit(X_train, y_train)
ab_preds = ab_model.predict(X_test)

# 5. Evaluate models
print("Random Forest R² Score:", r2_score(y_test, rf_preds))
print("Random Forest MSE:", mean_squared_error(y_test, rf_preds))
print("AdaBoost R² Score:", r2_score(y_test, ab_preds))
print("AdaBoost MSE:", mean_squared_error(y_test, ab_preds))

Random Forest R² Score: 0.47027066465218703
Random Forest MSE: 2859.641982706767
AdaBoost R² Score: 0.4658108945739403
AdaBoost MSE: 2883.7171941363927


In [2]:
from sklearn.metrics import mean_absolute_error, median_absolute_error, explained_variance_score
from sklearn.model_selection import cross_val_score

# Existing predictions
print("Random Forest R² Score:", r2_score(y_test, rf_preds))
print("Random Forest MSE:", mean_squared_error(y_test, rf_preds))

# Additional metrics for Random Forest
print("Random Forest MAE:", mean_absolute_error(y_test, rf_preds))
print("Random Forest Median AE:", median_absolute_error(y_test, rf_preds))
print("Random Forest Explained Variance:", explained_variance_score(y_test, rf_preds))

# Cross-validated R² score for Random Forest on the whole dataset (5-fold CV)
cv_scores = cross_val_score(rf_model, X, y, cv=5, scoring='r2')
print("Random Forest CV R² Scores:", cv_scores)
print("Random Forest Mean CV R² Score:", cv_scores.mean())

# Same for AdaBoost
print("AdaBoost R² Score:", r2_score(y_test, ab_preds))
print("AdaBoost MSE:", mean_squared_error(y_test, ab_preds))
print("AdaBoost MAE:", mean_absolute_error(y_test, ab_preds))
print("AdaBoost Median AE:", median_absolute_error(y_test, ab_preds))
print("AdaBoost Explained Variance:", explained_variance_score(y_test, ab_preds))

cv_scores_ab = cross_val_score(ab_model, X, y, cv=5, scoring='r2')
print("AdaBoost CV R² Scores:", cv_scores_ab)
print("AdaBoost Mean CV R² Score:", cv_scores_ab.mean())


Random Forest R² Score: 0.47027066465218703
Random Forest MSE: 2859.641982706767
Random Forest MAE: 42.75075187969925
Random Forest Median AE: 35.81
Random Forest Explained Variance: 0.4703376031579882
Random Forest CV R² Scores: [0.38297026 0.51674721 0.42936115 0.35090466 0.41209037]
Random Forest Mean CV R² Score: 0.41841473022497616
AdaBoost R² Score: 0.4658108945739403
AdaBoost MSE: 2883.7171941363927
AdaBoost MAE: 43.041557425721415
AdaBoost Median AE: 37.28985507246378
AdaBoost Explained Variance: 0.47353024658329934
AdaBoost CV R² Scores: [0.36331081 0.4673352  0.41228926 0.35715597 0.42431587]
AdaBoost Mean CV R² Score: 0.40488142072930283
