In [51]:
matchups = [
    # Home, Away
    ("HOU", "CLE"),
    ("NYJ", "NE"),
    ("DET", "ATL"),
    ("WAS", "BUF"),
    ("GB", "NO"),
    ("MIA", "DEN"),
    ("CLE", "TEN"),
    ("MIN", "LAC"),
    ("NYJ", "NE"),
    ("JAX", "HOU"),
    ("BAL", "IND"),
    ("SEA", "CAR"),
    ("KC", "CHI"),
    ("ARI", "DAL"),
    ("LV", "PIT"),
    ("TB", "PHI"),
]

In [52]:
import joblib
import pandas as pd
import numpy as np

# Load the saved model
model = joblib.load("./models/clf.pkl")

# Load season data so far
data = pd.read_csv("./data/play_by_play_2024.csv")

  data = pd.read_csv("./data/play_by_play_2024.csv")


In [53]:
# print all unique home_team and away_team in data
print(data.home_team.unique())

['BUF' 'KC' 'NO' 'CLE' 'SEA' 'PHI' 'IND' 'MIA' 'DET' 'LAC' 'NYG' 'CIN'
 'SF' 'ATL' 'CHI' 'TB' 'JAX' 'GB' 'CAR' 'BAL' 'DAL' 'WAS' 'TEN' 'NE' 'MIN']


In [54]:
import itertools


def get_prediction(matchup):

    HOME_TEAM = matchup[0]
    AWAY_TEAM = matchup[1]

    def ewma(data, window):
        """
        Calculate the most recent value for EWMA given an array of data and a window size
        """
        alpha = 2 / (window + 1.0)
        alpha_rev = 1 - alpha
        scale = 1 / alpha_rev
        n = data.shape[0]
        r = np.arange(n)
        scale_arr = scale**r
        offset = data[0] * alpha_rev ** (r + 1)
        pw0 = alpha * alpha_rev ** (n - 1)
        mult = data * pw0 * scale_arr
        cumsums = mult.cumsum()
        out = offset + cumsums * scale_arr[::-1]
        return out[-1]

    offense = data.loc[(data["posteam"] == HOME_TEAM) | (data["posteam"] == AWAY_TEAM)]
    defense = data.loc[(data["defteam"] == HOME_TEAM) | (data["defteam"] == AWAY_TEAM)]

    rushing_offense = (
        offense.loc[offense["rush_attempt"] == 1]
        .groupby(["posteam", "week"], as_index=False)["epa"]
        .mean()
        .rename(columns={"posteam": "team"})
    )
    passing_offense = (
        offense.loc[offense["pass_attempt"] == 1]
        .groupby(["posteam", "week"], as_index=False)["epa"]
        .mean()
        .rename(columns={"posteam": "team"})
    )
    rushing_defense = (
        defense.loc[defense["rush_attempt"] == 1]
        .groupby(["defteam", "week"], as_index=False)["epa"]
        .mean()
        .rename(columns={"defteam": "team"})
    )
    passing_defense = (
        defense.loc[defense["pass_attempt"] == 1]
        .groupby(["defteam", "week"], as_index=False)["epa"]
        .mean()
        .rename(columns={"defteam": "team"})
    )

    super_bowl_X = np.zeros(8)

    for i, (tm, stat_df) in enumerate(
        itertools.product(
            [HOME_TEAM, AWAY_TEAM],
            [rushing_offense, passing_offense, rushing_defense, passing_defense],
        )
    ):
        ewma_value = ewma(stat_df.loc[stat_df["team"] == tm]["epa"].values, 20)
        super_bowl_X[i] = ewma_value

    predicted_winner = model.predict(super_bowl_X.reshape(1, 8))[0]
    predicted_proba = model.predict_proba(super_bowl_X.reshape(1, 8))[0]

    winner = HOME_TEAM if predicted_winner else AWAY_TEAM
    win_prob = predicted_proba[-1] if predicted_winner else predicted_proba[0]
    return winner, win_prob

In [59]:
results = []

for matchup in matchups:
    winner, probability = get_prediction(matchup)
    print(
        f"{matchup[0]} vs {matchup[1]}: {winner} with {probability*100:.5f}% probability"
    )
    results.append((matchup, winner, probability))

results

HOU vs CLE: HOU with 76.89039% probability
NYJ vs NE: NE with 58.14629% probability
DET vs ATL: DET with 82.06373% probability
WAS vs BUF: BUF with 78.10979% probability
GB vs NO: NO with 79.41108% probability
MIA vs DEN: MIA with 82.64485% probability
CLE vs TEN: CLE with 59.49715% probability
MIN vs LAC: MIN with 78.54664% probability
NYJ vs NE: NE with 58.14629% probability
JAX vs HOU: HOU with 65.98913% probability
BAL vs IND: BAL with 60.15540% probability
SEA vs CAR: SEA with 95.01683% probability
KC vs CHI: KC with 77.15705% probability
ARI vs DAL: DAL with 52.47692% probability
LV vs PIT: PIT with 76.21091% probability
TB vs PHI: TB with 72.80042% probability


[(('HOU', 'CLE'), 'HOU', np.float64(0.768903915669553)),
 (('NYJ', 'NE'), 'NE', np.float64(0.58146293700059)),
 (('DET', 'ATL'), 'DET', np.float64(0.8206372949331042)),
 (('WAS', 'BUF'), 'BUF', np.float64(0.7810979210323074)),
 (('GB', 'NO'), 'NO', np.float64(0.7941108322849342)),
 (('MIA', 'DEN'), 'MIA', np.float64(0.8264485373557431)),
 (('CLE', 'TEN'), 'CLE', np.float64(0.5949714597131823)),
 (('MIN', 'LAC'), 'MIN', np.float64(0.7854663505809777)),
 (('NYJ', 'NE'), 'NE', np.float64(0.58146293700059)),
 (('JAX', 'HOU'), 'HOU', np.float64(0.659891301729453)),
 (('BAL', 'IND'), 'BAL', np.float64(0.6015539940479555)),
 (('SEA', 'CAR'), 'SEA', np.float64(0.9501682652457129)),
 (('KC', 'CHI'), 'KC', np.float64(0.7715705421715212)),
 (('ARI', 'DAL'), 'DAL', np.float64(0.5247691506823648)),
 (('LV', 'PIT'), 'PIT', np.float64(0.7621090945622662)),
 (('TB', 'PHI'), 'TB', np.float64(0.7280042100230298))]