# Wednesday, November 5, 2025

### 1. IMPORT LIBRARIES

In [None]:
import nfl_data_py as nfl
import pandas as pd

### 2. LOAD THE DATASET
Load all REGULAR seasons only

In [11]:
seasons = [2025, 2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017,
           2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009, 2008,
           2007, 2006, 2005, 2004, 2003, 2002, 2001, 2000, 1999]
regular = True

# load play-by-play
dataset = nfl.import_pbp_data(seasons, downcast=True, cache=False)

# keep regular season only
if regular and "season_type" in dataset.columns:
    dataset = dataset[dataset["season_type"] == "REG"]

2025 done.
2024 done.
2023 done.
2022 done.
2021 done.
2020 done.
2019 done.
2018 done.
2017 done.
2016 done.
2015 done.
2014 done.
2013 done.
2012 done.
2011 done.
2010 done.
2009 done.
2008 done.
2007 done.
2006 done.
2005 done.
2004 done.
2003 done.
2002 done.
2001 done.
2000 done.
1999 done.
Downcasting floats.


- Save the dataset for the first time
- All other times just load the dataset

In [12]:
dataset.to_csv('NFL[1999-2025].csv')

In [2]:
dataset = pd.read_csv('NFL[1999-2025].csv', low_memory=False)

### 3. CLEAN THE DATASET
#### Offensive Metrics:
[ Off_EPA | DSR | Off_ DVOA | AY/A | QB_EPA ]
#### Defensive Metrics:
[ Def_EPA | Def_DVOA | NPRG ]
#### Other Metrics
[ Game_ID | Year | Home_Team | Away_Team | Home_Coach | Away_Coach | Result ]

In [117]:
# keep only scrimmage plays that actually gain/lose yards: runs or passes
# (This implicitly excludes penalties-only, timeouts, spikes, kneels, punts, etc.)
play_mask = (dataset.get("rush", 0) == 1) | (dataset.get("pass", 0) == 1)
dataset = dataset[play_mask].copy()

# OFFENSE metrics (by possession team)
# epa: expected points added - how good offense is
# dsr: drive_first_downs / drive
# dvoa: 100 * (team_epa - league_epa) / abs(league_epa)
# ay/a: (passing_yards + 20*pass_touchdown - 45*interception) / pass_attempt/s

offense = (
    dataset.groupby(["game_id", "posteam"])
       .agg(
           off_epa=("epa", "mean"), # epa & dvoa
           qb_epa=("qb_epa", "mean"), # qb
           # dsr
           total_drives=("drive", "sum"),
           successful_drives=("drive_first_downs", "count"),
           # ay/a
           pass_yards=("passing_yards", "sum"),
           pass_touchdowns=("pass_touchdown", "sum"),
           interceptions=("interception", "sum"),
           pass_attempts=("pass_attempt", "sum")
       )
       .rename_axis(["game_id", "team"])
       .reset_index()
)

# DEFENSE metrics (what each defense allowed)
# epa_allowed: mean offensive EPA by opponents vs this defense - how weak defense is
# we'll keep it positive-as-bad (higher means defense allowed more).
# nprg: sum(was_pressure == TRUE) / number_of_pass_dropback_snaps

defense = (
    dataset.groupby(["game_id", "defteam"])
       .agg(
           def_epa=("epa", "mean"), # epa & dvoa
           # total number of times qb was pressured for nprg
           pressures=("was_pressure", "count"), # true/false values. NaN/0.0
           dropbacks=("qb_dropback", "count")
       )
       .rename_axis(["game_id", "team"])
       .reset_index()
)

# combine and compute NET metrics
team_stats = offense.merge(defense, on=["game_id", "team"], how="outer")  # merge on 'team'
team_stats

Unnamed: 0,game_id,team,off_epa,qb_epa,total_drives,successful_drives,pass_yards,pass_touchdowns,interceptions,pass_attempts,def_epa,pressures,dropbacks
0,1999_01_ARI_PHI,ARI,-0.124676,-0.124676,1546.0,87,274.0,1.0,3.0,50.0,-0.250785,0,60
1,1999_01_ARI_PHI,PHI,-0.250785,-0.250785,941.0,61,91.0,2.0,2.0,30.0,-0.124676,0,87
2,1999_01_BUF_IND,BUF,-0.209526,-0.209526,834.0,68,300.0,1.0,2.0,47.0,0.103372,0,61
3,1999_01_BUF_IND,IND,0.103372,0.103372,699.0,62,284.0,2.0,2.0,33.0,-0.209526,0,68
4,1999_01_CAR_NO,CAR,-0.350204,-0.350204,914.0,56,207.0,1.0,1.0,39.0,-0.204681,0,65
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13649,2025_09_NO_LA,NO,-0.297707,-0.297707,415.0,40,176.0,1.0,1.0,25.0,0.310416,0,76
13650,2025_09_SEA_WAS,SEA,0.454613,0.454613,452.0,51,330.0,4.0,1.0,24.0,-0.055173,0,65
13651,2025_09_SEA_WAS,WAS,-0.055173,-0.055173,711.0,65,153.0,0.0,1.0,26.0,0.454613,0,51
13652,2025_09_SF_NYG,NYG,0.172433,0.172433,643.0,57,191.0,2.0,0.0,35.0,0.309834,0,64


In [144]:
team_eff = team_stats[['game_id', 'team', 'off_epa', 'def_epa', 'qb_epa']].copy()

# Calculations for...
# DVOA (offensive and defensive) = (team_epa - league_epa) / abs(league_epa)
off_league_epa = team_stats['off_epa'].mean()
def_league_epa = team_stats['def_epa'].mean()
team_eff['off_dvoa'] = (team_stats['off_epa'] - off_league_epa) / abs(off_league_epa)
team_eff['def_dvoa'] = (team_stats['def_epa'] - def_league_epa) / abs(def_league_epa)

# DSR = successful_drives / total_drives
team_eff['dsr'] = team_stats['successful_drives'] / team_stats['total_drives']

# AY/A = (passing_yards + 20 * pass_touchdowns - 45 * interceptions) / pass_attempts
team_eff['ay/a'] = (team_stats['pass_yards'] + 20 * team_stats['pass_touchdowns']
                    - 45 * team_stats['interceptions']) / team_stats['pass_attempts']

# NPRG = pressures / dropbacks
team_eff['nprg'] = team_stats['pressures'] / team_stats['dropbacks']

team_eff

Unnamed: 0,game_id,team,off_epa,def_epa,qb_epa,off_dvoa,def_dvoa,dsr,ay/a,nprg
0,1999_01_ARI_PHI,ARI,-0.124676,-0.250785,-0.124676,-8.227432,-17.560850,0.056274,3.180000,0.0
1,1999_01_ARI_PHI,PHI,-0.250785,-0.124676,-0.250785,-17.560850,-8.227432,0.064825,1.366667,0.0
2,1999_01_BUF_IND,BUF,-0.209526,0.103372,-0.209526,-14.507203,8.650636,0.081535,4.893617,0.0
3,1999_01_BUF_IND,IND,0.103372,-0.209526,0.103372,8.650636,-14.507203,0.088698,7.090909,0.0
4,1999_01_CAR_NO,CAR,-0.350204,-0.204681,-0.350204,-24.918961,-14.148623,0.061269,4.666667,0.0
...,...,...,...,...,...,...,...,...,...,...
13649,2025_09_NO_LA,NO,-0.297707,0.310416,-0.297707,-21.033611,23.974189,0.096386,6.040000,0.0
13650,2025_09_SEA_WAS,SEA,0.454613,-0.055173,0.454613,34.646375,-3.083409,0.112832,15.208333,0.0
13651,2025_09_SEA_WAS,WAS,-0.055173,0.454613,-0.055173,-3.083409,34.646375,0.091421,4.153846,0.0
13652,2025_09_SF_NYG,NYG,0.172433,0.309834,0.172433,13.761977,23.931168,0.088647,6.600000,0.0


In [145]:
# split into away and home DataFrames
away = team_eff.iloc[::2].reset_index(drop=True)  # even indices → away team
home = team_eff.iloc[1::2].reset_index(drop=True)  # odd indices → home team

# merge side by side
gbg = pd.concat([away, home], axis=1, keys=['away', 'home'])

# flatten the MultiIndex columns (e.g., 'away_team', 'home_team')
gbg.columns = [f"{side}_{col}" for side, col in gbg.columns]

# keep only one copy of game_id
gbg = gbg.drop(columns=['home_game_id'])
gbg = gbg.rename(columns={'away_game_id': 'game_id'})

# split game_id into parts
team_eff[['year', 'week', 'away', 'home']] = team_eff['game_id'].str.split('_', expand=True, n=3)
gbg = gbg.join(team_eff[['year', 'week']])

gbg

Unnamed: 0,game_id,away_team,away_off_epa,away_def_epa,away_qb_epa,away_off_dvoa,away_def_dvoa,away_dsr,away_ay/a,away_nprg,...,home_off_epa,home_def_epa,home_qb_epa,home_off_dvoa,home_def_dvoa,home_dsr,home_ay/a,home_nprg,year,week
0,1999_01_ARI_PHI,ARI,-0.124676,-0.250785,-0.124676,-8.227432,-17.560850,0.056274,3.180000,0.0,...,-0.250785,-0.124676,-0.250785,-17.560850,-8.227432,0.064825,1.366667,0.0,1999,01
1,1999_01_BUF_IND,BUF,-0.209526,0.103372,-0.209526,-14.507203,8.650636,0.081535,4.893617,0.0,...,0.103372,-0.209526,0.103372,8.650636,-14.507203,0.088698,7.090909,0.0,1999,01
2,1999_01_CAR_NO,CAR,-0.350204,-0.204681,-0.350204,-24.918961,-14.148623,0.061269,4.666667,0.0,...,-0.204681,-0.350204,-0.204681,-14.148623,-24.918961,0.057093,5.846154,0.0,1999,01
3,1999_01_CIN_TEN,CIN,0.173752,0.039889,0.173752,13.859592,3.952192,0.080271,3.972973,0.0,...,0.039889,0.173752,0.122393,3.952192,13.859592,0.076709,10.470588,0.0,1999,01
4,1999_01_DAL_WAS,DAL,0.277992,0.200565,0.277992,21.574477,15.844056,0.065918,6.540000,0.0,...,0.200565,0.277992,0.200565,15.844056,21.574477,0.071992,11.722222,0.0,1999,01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6822,2025_09_LAC_TEN,LAC,0.084300,-0.233385,0.084300,7.239152,-16.273029,0.086253,7.000000,0.0,...,-0.233385,0.084300,-0.233385,-16.273029,7.239152,0.091977,5.800000,0.0,2012,08
6823,2025_09_MIN_DET,DET,-0.041215,-0.001013,-0.041215,-2.050392,0.925047,0.079404,7.714286,0.0,...,-0.001013,-0.041215,-0.001013,0.925047,-2.050392,0.084302,4.600000,0.0,2012,08
6824,2025_09_NO_LA,LA,0.310416,-0.297707,0.310416,23.974189,-21.033611,0.099869,10.617647,0.0,...,-0.297707,0.310416,-0.297707,-21.033611,23.974189,0.096386,6.040000,0.0,2012,08
6825,2025_09_SEA_WAS,SEA,0.454613,-0.055173,0.454613,34.646375,-3.083409,0.112832,15.208333,0.0,...,-0.055173,0.454613,-0.055173,-3.083409,34.646375,0.091421,4.153846,0.0,2012,08


In [146]:
# find the result of each game
result = dataset[['game_id', 'home_score', 'away_score']].drop_duplicates(['game_id'])
result['result'] = result['home_score'] > result['away_score']

# reset the indices to match the ones in gbg dataframe
result = result.sort_values('game_id', ascending=True).reset_index()

result

Unnamed: 0,index,game_id,home_score,away_score,result
0,1157719,1999_01_ARI_PHI,24,25,False
1,1157920,1999_01_BUF_IND,31,14,True
2,1158102,1999_01_CAR_NO,19,10,True
3,1158276,1999_01_CIN_TEN,36,35,True
4,1158465,1999_01_DAL_WAS,35,41,False
...,...,...,...,...,...
6822,22350,2025_09_LAC_TEN,20,27,False
6823,22501,2025_09_MIN_DET,24,27,False
6824,22673,2025_09_NO_LA,34,10,True
6825,22830,2025_09_SEA_WAS,14,38,False


In [147]:
gbg = gbg.join(result[['result']])

# organize the dataframe
cols = ['year', 'week', 'away_team', 'home_team',
        'away_off_epa', 'away_def_epa', 'away_qb_epa', 'away_off_dvoa', 'away_def_dvoa', 'away_dsr', 'away_ay/a', 'away_nprg',
        'home_off_epa', 'home_def_epa', 'home_qb_epa', 'home_off_dvoa', 'home_def_dvoa', 'home_dsr', 'home_ay/a', 'home_nprg',
        'result']
gbg = gbg[cols]

gbg

Unnamed: 0,year,week,away_team,home_team,away_off_epa,away_def_epa,away_qb_epa,away_off_dvoa,away_def_dvoa,away_dsr,...,away_nprg,home_off_epa,home_def_epa,home_qb_epa,home_off_dvoa,home_def_dvoa,home_dsr,home_ay/a,home_nprg,result
0,1999,01,ARI,PHI,-0.124676,-0.250785,-0.124676,-8.227432,-17.560850,0.056274,...,0.0,-0.250785,-0.124676,-0.250785,-17.560850,-8.227432,0.064825,1.366667,0.0,False
1,1999,01,BUF,IND,-0.209526,0.103372,-0.209526,-14.507203,8.650636,0.081535,...,0.0,0.103372,-0.209526,0.103372,8.650636,-14.507203,0.088698,7.090909,0.0,True
2,1999,01,CAR,NO,-0.350204,-0.204681,-0.350204,-24.918961,-14.148623,0.061269,...,0.0,-0.204681,-0.350204,-0.204681,-14.148623,-24.918961,0.057093,5.846154,0.0,True
3,1999,01,CIN,TEN,0.173752,0.039889,0.173752,13.859592,3.952192,0.080271,...,0.0,0.039889,0.173752,0.122393,3.952192,13.859592,0.076709,10.470588,0.0,True
4,1999,01,DAL,WAS,0.277992,0.200565,0.277992,21.574477,15.844056,0.065918,...,0.0,0.200565,0.277992,0.200565,15.844056,21.574477,0.071992,11.722222,0.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6822,2012,08,LAC,TEN,0.084300,-0.233385,0.084300,7.239152,-16.273029,0.086253,...,0.0,-0.233385,0.084300,-0.233385,-16.273029,7.239152,0.091977,5.800000,0.0,False
6823,2012,08,DET,MIN,-0.041215,-0.001013,-0.041215,-2.050392,0.925047,0.079404,...,0.0,-0.001013,-0.041215,-0.001013,0.925047,-2.050392,0.084302,4.600000,0.0,False
6824,2012,08,LA,NO,0.310416,-0.297707,0.310416,23.974189,-21.033611,0.099869,...,0.0,-0.297707,0.310416,-0.297707,-21.033611,23.974189,0.096386,6.040000,0.0,True
6825,2012,08,SEA,WAS,0.454613,-0.055173,0.454613,34.646375,-3.083409,0.112832,...,0.0,-0.055173,0.454613,-0.055173,-3.083409,34.646375,0.091421,4.153846,0.0,False


In [164]:
# save the dataframe for the next steps
gbg.to_csv('NFL_gbg_data[1999-2025].csv')

### 4. PREP FOR TRAINING
- The model should know EPA, YPP, home & away scores
- The home & away teams are only for our eyes, not the model

In [1]:
import pandas as pd

dataset = pd.read_csv('V3_NFL_gbg_data[1999-2025].csv')

In [2]:
team_map = {
    'NO': 1,
    'BUF': 2,
    'JAX': 3,
    'CLE': 4,
    'PHI': 5,
    'GB': 6,
    'LA': 7,
    'LAC': 8,
    'NE': 9,
    'IND': 10,
    'CHI': 11,
    'WAS': 12,
    'NYJ': 13,
    'SEA': 14,
    'ATL': 15,
    'DEN': 16,
    'MIN': 17,
    'ARI': 18,
    'DET': 19,
    'BAL': 20,
    'CIN': 21,
    'LV': 22,
    'TEN': 23,
    'MIA': 24,
    'DAL': 25,
    'KC': 26,
    'PIT': 27,
    'HOU': 28,
    'SF': 29,
    'CAR': 30,
    'NYG': 31,
    'TB': 32
}

dataset['away_team'] = dataset['away_team'].map(team_map)
dataset['home_team'] = dataset['home_team'].map(team_map)

# Removing unnamed columns using drop function
dataset.drop(dataset.columns[dataset.columns.str.contains(
    'unnamed', case=False)], axis=1, inplace=True)

dataset

Unnamed: 0,year,week,away_team,away_off_epa,away_def_epa,away_qb_epa,away_off_dvoa,away_def_dvoa,away_dsr,away_ay/a,...,home_team,home_off_epa,home_def_epa,home_qb_epa,home_off_dvoa,home_def_dvoa,home_dsr,home_ay/a,home_nprg,result
0,1999,1,18,-0.124676,-0.250785,-0.124676,-8.227432,-17.560850,0.056274,3.180000,...,5,-0.250785,-0.124676,-0.250785,-17.560850,-8.227432,0.064825,1.366667,0.0,False
1,1999,1,2,-0.209526,0.103372,-0.209526,-14.507203,8.650636,0.081535,4.893617,...,10,0.103372,-0.209526,0.103372,8.650636,-14.507203,0.088698,7.090909,0.0,True
2,1999,1,30,-0.350204,-0.204681,-0.350204,-24.918961,-14.148623,0.061269,4.666667,...,1,-0.204681,-0.350204,-0.204681,-14.148623,-24.918961,0.057093,5.846154,0.0,True
3,1999,1,21,0.173752,0.039889,0.173752,13.859592,3.952192,0.080271,3.972973,...,23,0.039889,0.173752,0.122393,3.952192,13.859592,0.076709,10.470588,0.0,True
4,1999,1,25,0.277992,0.200565,0.277992,21.574477,15.844056,0.065918,6.540000,...,12,0.200565,0.277992,0.200565,15.844056,21.574477,0.071992,11.722222,0.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6822,2025,9,8,0.084300,-0.233385,0.084300,7.239152,-16.273029,0.086253,7.000000,...,23,-0.233385,0.084300,-0.233385,-16.273029,7.239152,0.091977,5.800000,0.0,False
6823,2025,9,19,-0.041215,-0.001013,-0.041215,-2.050392,0.925047,0.079404,7.714286,...,17,-0.001013,-0.041215,-0.001013,0.925047,-2.050392,0.084302,4.600000,0.0,False
6824,2025,9,7,0.310416,-0.297707,0.310416,23.974189,-21.033611,0.099869,10.617647,...,1,-0.297707,0.310416,-0.297707,-21.033611,23.974189,0.096386,6.040000,0.0,True
6825,2025,9,14,0.454613,-0.055173,0.454613,34.646375,-3.083409,0.112832,15.208333,...,12,-0.055173,0.454613,-0.055173,-3.083409,34.646375,0.091421,4.153846,0.0,False


In [3]:
# set the training and testing variables
from sklearn.model_selection import train_test_split

X = dataset.drop(columns=['result'])
y = dataset['result']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 80% train / 20% test

### 5. AI Model Training


In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV

In [5]:
# hyperparameter grid for random search
param_grid = {
   'n_estimators': [int(x) for x in range(100, 2001, 100)],
   'max_depth': [None, 10, 20, 30],
   'min_samples_split': [2, 5, 10],
   'min_samples_leaf': [1, 2, 4],
   'bootstrap': [True, False]
}

# create the random forest model
classifier = RandomForestClassifier()

# hyperparameter tuning
random_search = RandomizedSearchCV(classifier, param_grid)
random_search.fit(X_train, y_train)

# print the best parameters for the random forest model
print(random_search.best_estimator_)

RandomForestClassifier(max_depth=30, n_estimators=200)


In [None]:
# use the hyperparameters from random search
# iterations:
# 1. max_depth=30, min_samples_leaf=4, min_samples_split=5, n_estimators=1000   |   Accuracy: 57.96%
# 2. max_depth=30, n_estimators=200                                             |   Accuracy: 57.63%
# 3. max_depth=30, min_samples_leaf=4, min_samples_split=5, n_estimators=2000   |   Accuracy: 57.43%
classifier = RandomForestClassifier(max_depth=30, min_samples_leaf=4, min_samples_split=5, n_estimators=1000)
classifier.fit(X_train, y_train)

# test the model
y_pred = classifier.predict(X_test)
print("\nAccuracy Score: ", accuracy_score(y_test, y_pred))


Accuracy Score:  0.5600292825768668


In [8]:
import joblib

# save the model
joblib.dump(classifier, './V2_NFL[1999-2025].joblib')

['./V2_NFL[1999-2025].joblib']

In [9]:
import joblib

# load the model
rf = joblib.load('./V2_NFL[1999-2025].joblib')

In [14]:
from sklearn.model_selection import cross_val_score, KFold

# define number of k-folds
num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

# perform k-fold cross-validation
cross_val_results = cross_val_score(rf, X_train, y_train, cv=kf)

# evaluation metrics
print("Cross-Validation Results (Accuracy): ")
for i, result in enumerate(cross_val_results, 1):
    print(f" Fold {i}: {result * 100:.2f}%")

print(f"Mean Accuracy: {cross_val_results.mean() * 100:.2f}%")

Cross-Validation Results (Accuracy): 
 Fold 1: 58.74%
 Fold 2: 58.79%
 Fold 3: 57.05%
 Fold 4: 56.23%
 Fold 5: 56.32%
Mean Accuracy: 57.43%


In [None]:
team_stats_map = {
    'NO': [1,-0.29770712624999995,0.31041573736842104,-0.29770712624999995,-21.033611204236365,23.974188609474492,0.0963855421686747,6.04,0.0], 
    'BUF': [2,0.21966449296923074,0.015184524683333341,0.21966449296923074,17.25759549133313,2.1238223196334967,0.10077519379844961,10.10344827586207,0.0],
    'JAX': [3,0.12435491583886078,0.1616960068125,0.12435491583886078,10.203635470345054,12.967288093709781,0.1101813110181311,5.147058823529412,0.0],
    'CLE': [4,-0.31476569307407404,0.0934068416923077,-0.31476569307407404,-22.29613331392716,7.913136610427254,0.07520891364902507,2.864864864864865,0.0],
    'PHI': [5,0.4715571105125,0.061180261078431386,0.4715571105125,35.90042769382833,5.528014169347859,0.1100196463654224,10.791666666666666,0.0],
    'GB': [6,0.00997950690769229,0.09694015772068966,0.00997950690769229,1.7385935902301335,8.174640970808902,0.1282051282051282,6.0,0.0],
    'LA': [7,0.31041573736842104,-0.29770712624999995,0.31041573736842104,23.974188609474492,-21.033611204236365,0.09986859395532194,10.617647058823529,0.0],
    'LAC': [8,0.084300302178125,-0.23338452531914897,0.084300302178125,7.239152236593283,-16.27302922419509,0.0862533692722372,7.0,0.0],
    'NE': [9,0.013419390420000002,0.057627978571428554,0.013419390420000002,1.9931829138138886,5.265106080338316,0.09818731117824774,7.257142857142857,0.0],
    'IND': [10,-0.10602000072207793,-0.09770837595081969,-0.10602000072207793,-6.8466495082199055,-6.231497593678175,0.09210526315789473,4.203703703703703,0.0],
    'CHI': [11,0.44084588835,0.22179886567164178,0.44084588835,33.627458493328874,17.415562614533158,0.1034928848641656,9.55,0.0],
    'WAS': [12,-0.055172979092307686,0.4546129820784314,-0.055172979092307686,-3.0834090389845663,34.64637528731186,0.09142053445850915,4.153846153846154,0.0],
    'NYJ': [13,0.3412436362185714,0.24608341213559323,0.3412436362185714,26.255793171863562,19.212886923822914,0.08827238335435057,8.727272727272727,0.0],
    'SEA': [14,0.4546129820784314,-0.055172979092307686,0.4546129820784314,34.64637528731186,-3.0834090389845663,0.11283185840707964,15.208333333333334,0.0],
    'ATL': [15,0.057627978571428554,0.013419390420000002,0.057627978571428554,5.265106080338316,1.9931829138138886,0.08211143695014662,7.394736842105263,0.0],
    'DEN': [16,-0.08861086904444446,-0.16860605271232876,-0.08861086904444446,-5.558181732456237,-11.478707711634547,0.0670926517571885,4.3076923076923075,0.0],
    'MIN': [17,-0.0010127278793103426,-0.041215367359375005,-0.0010127278793103426,0.9250469660250032,-2.050391811157537,0.08430232558139535,4.6,0.0],
    'ARI': [18,0.12321315818461534,-0.14641783277272727,0.12321315818461534,10.119132809760506,-9.836534688699512,0.12770137524557956,8.36111111111111,0.0],
    'DET': [19,-0.041215367359375005,-0.0010127278793103426,-0.041215367359375005,-2.050391811157537,0.9250469660250032,0.0794044665012407,7.714285714285714,0.0],
    'BAL': [20,0.21235004096481483,-0.21113443688888892,0.21235004096481483,16.716245360862924,-14.626277250510254,0.07714285714285714,11.36,0.0],
    'CIN': [21,0.22179886567164178,0.44084588835,0.22179886567164178,17.415562614533158,33.627458493328874,0.07067510548523206,9.019607843137255,0.0],
    'LV': [22,0.1616960068125,0.12435491583886078,0.1616960068125,12.967288093709781,10.203635470345054,0.10666666666666667,7.595238095238095,0.0],
    'TEN': [23,-0.23338452531914897,0.084300302178125,-0.23338452531914897,-16.27302922419509,7.239152236593283,0.09197651663405088,5.8,0.0],
    'MIA': [24,-0.21113443688888892,0.21235004096481483,-0.11386114165079365,-14.626277250510254,16.716245360862924,0.0847913862718708,5.2682926829268295,0.0],
    'DAL': [25,-0.14641783277272727,0.12321315818461534,-0.06698892337878788,-9.836534688699512,10.119132809760506,0.11036789297658862,5.232558139534884,0.0],
    'KC': [26,0.015184524683333341,0.21966449296923074,0.015184524683333341,2.1238223196334967,17.25759549133313,0.08797653958944282,5.394736842105263,0.0],
    'PIT': [27,-0.09770837595081969,-0.10602000072207793,-0.03421853332786887,-6.231497593678175,-6.8466495082199055,0.09621451104100946,5.868421052631579,0.0],
    'HOU': [28,-0.16860605271232876,-0.08861086904444446,-0.16860605271232876,-11.478707711634547,-5.558181732456237,0.09480519480519481,4.909090909090909,0.0],
    'SF': [29,0.309834466390625,0.1724334431491228,0.309834466390625,23.93116814540166,13.76197682202969,0.09523809523809523,10.576923076923077,0.0],
    'CAR': [30,0.09694015772068966,0.00997950690769229,0.09694015772068966,8.174640970808902,1.7385935902301335,0.14009661835748793,2.7142857142857144,0.0],
    'NYG': [31,0.1724334431491228,0.309834466390625,0.1724334431491228,13.76197682202969,23.93116814540166,0.08864696734059098,6.6,0.0],
    'TB': [32,-0.16390229196551723,-0.4819995701267606,-0.16390229196551723,-11.130577531485747,-34.673284890949446,0.1039426523297491,5.62962962962963,0.0]
}

year, week           = 2025,  10
away_team, home_team = 'BAL', 'MIA'

new_data = {
    'year': year, 'week': week,
    # away stats
    'away_team': '',
    'away_off_epa': 0, 'away_def_epa': 0, 'away_qb_epa': 0,
    'away_off_dvoa': 0, 'away_def_dvoa': 0,
    'away_dsr': 0, 'away_ay/a': 0, 'away_nprg': 0,
    # home stats
    'home_team': '',
    'home_off_epa': 0, 'home_def_epa': 0, 'home_qb_epa': 0,
    'home_off_dvoa': 0, 'home_def_dvoa': 0,
    'home_dsr': 0, 'home_ay/a': 0, 'home_nprg': 0,
}

n = len(team_stats_map[away_team])
for i, key in enumerate(new_data):
    if i >= 2:
        if i < len(team_stats_map[away_team]):
            new_data.update({key: team_stats_map[away_team][i]})
        else:
            new_data.update({key: team_stats_map[home_team][i - (n + 2)]})

new_X = pd.DataFrame(data=new_data, index=[0])

pred = rf.predict(new_X)
if pred == 1:
    print(home_team, "will win!")
else:
    print(away_team, "will win!")

# W10: 8/13 = 61.53%
# W 9: 4/14 = 28.57%

Unnamed: 0,year,week,away_team,home_team,away_off_epa,away_def_epa,away_qb_epa,away_off_dvoa,away_def_dvoa,away_dsr,away_ay/a,away_nprg,home_off_epa,home_def_epa,home_qb_epa,home_off_dvoa,home_def_dvoa,home_dsr,home_ay/a,home_nprg
0,2025,9,11,21,0.440846,0.221799,0.440846,33.627458,17.415563,0.103493,9.55,0.0,0.221799,0.440846,0.221799,17.415563,33.627458,0.070675,9.019608,0.0
