In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score


In [2]:
training_data = pd.read_csv("cleaned_datasets/training_data.csv")
all_data = pd.read_csv("cleaned_datasets/all_data.csv")


In [3]:
features = ["AGE","GP","MpG","PpG","RpG","ApG","SpG","BpG","TOpG"]


In [4]:
X_train = training_data[features].copy()

y_fppg = training_data["FPpG_next"].copy()
mask = y_fppg != 0

In [5]:
fp_model = LinearRegression()
fp_model.fit(X_train, y_fppg)
y_pred = fp_model.predict(X_train)

In [6]:
mae = mean_absolute_error(y_fppg[mask], y_pred[mask])
r2_overall = r2_score(y_fppg, y_pred)
r2_masked = r2_score(y_fppg[mask], y_pred[mask])

In [7]:
print(f"Baseline MAE FPpG (masked): {mae:.3f}")
print(f"R² overall FPpG: {r2_overall:.3f}")
print(f"R² masked FPpG: {r2_masked:.3f}")


Baseline MAE FPpG (masked): 4.226
R² overall FPpG: 0.784
R² masked FPpG: 0.784


In [8]:
y_gp = training_data["GP_next"].copy()
gp_model = LinearRegression()
gp_model.fit(X_train, y_gp)
gp_pred_train = gp_model.predict(X_train)

In [11]:
training_data["FP_total_pred_train"] = y_pred * gp_pred_train
training_data["GP_pred_train"] = gp_pred_train

In [12]:
curr = all_data[all_data["SEASON"] == 2024].copy()


In [13]:
X_curr = curr[features].copy()

curr["FPpG_pred"] = fp_model.predict(X_curr)
curr["GP_pred"]   = gp_model.predict(X_curr)

# Clean up predictions
curr["FPpG_pred"] = curr["FPpG_pred"].clip(lower=0)
curr["GP_pred"]   = curr["GP_pred"].clip(lower=0, upper=82)

curr["FP_total_pred"] = curr["FPpG_pred"] * curr["GP_pred"]
curr["SEASON"] = 2025  # new target season

out_cols = ["NAME","SEASON","FPpG_pred","GP_pred","FP_total_pred"]
pred_2025 = curr[out_cols].sort_values("FP_total_pred", ascending=False)

pred_2025.to_csv("predictions/linear_pred_2025.csv", index=False)

In [14]:
print(pred_2025.head(20))


                         NAME  SEASON  FPpG_pred    GP_pred  FP_total_pred
2109             NIKOLA JOKIC    2025  60.848211  72.432125    4407.365256
2438  SHAI GILGEOUS-ALEXANDER    2025  54.841340  71.016584    3894.644623
923     GIANNIS ANTETOKOUNMPO    2025  56.051779  68.478715    3838.353810
1292             JAYSON TATUM    2025  46.701646  71.452670    3336.957318
132           ANTHONY EDWARDS    2025  43.780959  71.132644    3114.255415
303           CADE CUNNINGHAM    2025  48.263371  64.467324    3111.410385
1548       KARL-ANTHONY TOWNS    2025  43.858683  70.518538    3092.850204
1805              LUKA DONCIC    2025  50.224635  59.013986    2963.955902
729          DOMANTAS SABONIS    2025  41.429572  71.120781    2946.503517
2732        VICTOR WEMBANYAMA    2025  53.836598  54.281048    2922.306968
85             ALPEREN SENGUN    2025  41.549097  70.282311    2920.166592
2694        TYRESE HALIBURTON    2025  40.429283  71.735312    2900.207243
688              DEVIN BO