In [1]:
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
import xgboost as xgb
import sys
import os
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
from scripts.datasets import create_folds
from joblib import dump

Add all models to test in the list

In [2]:
models = [(RandomForestRegressor(n_estimators=100, n_jobs=-1, random_state=42), "Forest"),
          (xgb.XGBRegressor(n_estimators=100, n_jobs=-1, random_state=42), "XGBoost"),
          (AdaBoostRegressor(n_estimators=100, random_state=42), "AdaBoost")]

In [None]:

X, y, groups, group_kfold = create_folds()
best_model = None
best_accuracy = 0
for model, name in models:
    error = 0
    total = 0
    for fold_idx, (train_idx, test_idx) in enumerate(group_kfold.split(X, y, groups)):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
        X_train = X_train.drop(columns=["Year", "Player", "Team"])
        model.fit(X_train, y_train)
        X_test_clean = X_test.drop(columns=["Year", "Player", "Team"])
        y_preds = model.predict(X_test_clean)
        X_test2 = X_test.copy()
        X_test2["Predicted"] = y_preds
        X_test2["Actual"] = y_test
        X_predict = X_test2.sort_values("Predicted", ascending=False)
        X_real = X_test2.sort_values("Actual", ascending=False)
        total += 1
        if X_real["Player"].iloc[0] != X_predict["Player"].iloc[0]:
            error += 1
    
    accuracy = (total - error) / total
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model
            
    print(f"Accuracy for {name}: {(total - error) / total}")
    
print(f"Best model: {best_model}, accuracy: {best_accuracy}")

Predicted: 3.375611376367625, Player: Nikola Jokić
Predicted: 2.5333518567541953, Player: Shai Gilgeous-Alexander
Predicted: 1.802329360845622, Player: Giannis Antetokounmpo
Predicted: 1.3634251332315555, Player: Luka Dončić
Predicted: 0.7152362235881904, Player: Joel Embiid
Predicted: 0.06560314107514562, Player: Domantas Sabonis
Predicted: -0.02122898246620974, Player: Anthony Davis
Predicted: -0.02588658790585442, Player: Tyrese Haliburton
Predicted: -0.03750783880221788, Player: Jayson Tatum
Predicted: -0.13890221896583138, Player: Jalen Brunson
Predicted: -0.1805078303944695, Player: Anthony Edwards
Predicted: -0.2197343727852079, Player: Jaylen Brown
Predicted: -0.23290630592805428, Player: LeBron James
Predicted: -0.25058610423026717, Player: Rudy Gobert
Predicted: -0.3312454434219137, Player: Zion Williamson
Predicted: -0.39885823706036144, Player: Stephen Curry
Predicted: -0.4044769597963733, Player: Kevin Durant
Predicted: -0.41289757249638215, Player: Kawhi Leonard
Predicted

Save the best model

In [4]:
dump(best_model, "../models/model.joblib")

['../models/model.joblib']