In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import log_loss, roc_auc_score
from itertools import combinations


In [41]:
data = pd.read_excel("team_stats_data.xlsx")

In [42]:
data["win_rate"] = data["WINS"] / data["GAMES"]


In [43]:
features = [
    "ADJ OE", "ADJ DE", "EFG", "EFG D", "FT RATE", "FT RATE D", 
    "TOV%", "TOV% D", "O REB%", "OP OREB%", "2P %", "2P % D.", "3P %", "3P % D."
]
X = data[features]
y = data["win_rate"]

In [44]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [47]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [48]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error

# Train a Gradient Boosting Regressor
model = GradientBoostingRegressor(random_state=42)
model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = model.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")


Mean Squared Error: 0.012169689999817282


In [49]:
def predict_win_probability(team1_stats, team2_stats, model, scaler):
    """
    Predict the win probability for two teams.
    """
    team1_scaled = scaler.transform([team1_stats])
    team2_scaled = scaler.transform([team2_stats])

    team1_win_rate = model.predict(team1_scaled)[0]
    team2_win_rate = model.predict(team2_scaled)[0]

    # Normalize probabilities
    total = team1_win_rate + team2_win_rate
    team1_prob = team1_win_rate / total
    team2_prob = team2_win_rate / total

    return team1_prob, team2_prob

# Example usage
team1_stats = X.iloc[0].values  # Replace with actual team stats
team2_stats = X.iloc[1].values  # Replace with actual team stats
team1_prob, team2_prob = predict_win_probability(team1_stats, team2_stats, model, scaler)
print(f"Team 1 Probability: {team1_prob:.2f}, Team 2 Probability: {team2_prob:.2f}")


Team 1 Probability: 0.52, Team 2 Probability: 0.48




In [50]:
import joblib
joblib.dump(scaler, "scaler.pkl")
joblib.dump(model, "team_matchup_predictor.pkl")

['team_matchup_predictor.pkl']