In [49]:
import pandas as pd

In [50]:
gamelogs = pd.read_csv('nfl_df.csv', index_col=None)

In [51]:
n_columns_to_keep = 38
gamelogs = gamelogs.iloc[: , :n_columns_to_keep]

In [52]:
gamelogs = gamelogs.drop(columns=['Unnamed: 3'])

In [53]:
gamelogs.rename(columns={'Unnamed: 6': 'venue'}, inplace=True)

In [54]:
gamelogs.rename(columns={'Unnamed: 4': 'W/L'}, inplace=True)

In [55]:
gamelogs["full_date"] = gamelogs["Date"] + " " + gamelogs["Season"].astype(str)


In [56]:
gamelogs["full_date"] = pd.to_datetime(gamelogs["full_date"], format="%B %d %Y", errors="coerce")


In [57]:
gamelogs["Date"] = gamelogs["full_date"]


In [58]:
gamelogs = gamelogs.drop(columns=["full_date"])

In [59]:
gamelogs["Date"] = pd.to_datetime(gamelogs["Date"])

In [60]:
gamelogs["venue_code"] = gamelogs["venue"].astype("category").cat.codes #convert string into category which is converted into nunmbers

In [61]:
gamelogs["opp_code"] = gamelogs["Opp"].astype("category").cat.codes

In [62]:
gamelogs["day_code"] = gamelogs["Date"].dt.dayofweek #convert date into day of week

In [63]:
gamelogs["target"] = (gamelogs["W/L"] == "W").astype("int")

In [64]:
from sklearn.ensemble import RandomForestClassifier

In [65]:
rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=1)

In [66]:
train = gamelogs[gamelogs["Date"] < '2024-9-8']

In [67]:
test = gamelogs[gamelogs["Date"] >= '2024-9-8']

In [68]:
predictors = ["venue_code", "opp_code", "day_code"]

In [69]:
rf.fit(train[predictors], train["target"])

In [70]:
preds = rf.predict(test[predictors])

In [71]:
from sklearn.metrics import accuracy_score

In [72]:
accuracy = accuracy_score(test["target"], preds)

In [73]:
combined = pd.DataFrame(dict(actual=test["target"], prediction=preds))

In [74]:
pd.crosstab(index=combined["actual"], columns=combined["prediction"])

prediction,0,1
actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0,139,115
1,129,125


In [75]:
from sklearn.metrics import precision_score

In [76]:
precision_score(test["target"], preds)

0.5208333333333334

In [77]:
grouped_games = gamelogs.groupby("Team")

In [78]:
group = grouped_games.get_group("CRD")

In [79]:
def rolling_averages(group, cols, new_cols):
  group = group.sort_values("Date")
  rolling_stats = group[cols].rolling(3, closed='left').mean()
  group[new_cols] = rolling_stats
  group = group.dropna(subset=new_cols)
  return group

In [80]:
cols = ["Tm", "Opp.1", "Cmp", "Att", "Yds", "Int", "Sk", "Yds.1", "Y/A", "NY/A", "Cmp%", "Rate", "Att.1", "Yds.2", "Y/A.1", "TD.1", "FGM", "FGA", "XPM", "XPA", "Pnt", "Yds.3", "3DConv", "3DAtt", "4DConv", "4DAtt"]
new_cols = [f"{c}_rolling" for c in cols]

In [81]:
rolling_averages(group, cols, new_cols)

Unnamed: 0,Season,Week,Team,Day,Date,W/L,OT,venue,Opp,Tm,...,FGM_rolling,FGA_rolling,XPM_rolling,XPA_rolling,Pnt_rolling,Yds.3_rolling,3DConv_rolling,3DAtt_rolling,4DConv_rolling,4DAtt_rolling
1,2021,2,CRD,Sun,2021-09-19,W,,,Minnesota Vikings,34,...,2.666667,3.000000,3.000000,3.000000,3.000000,139.666667,7.666667,15.666667,1.000000,1.333333
2,2021,3,CRD,Sun,2021-09-26,W,,@,Jacksonville Jaguars,31,...,2.000000,2.333333,4.000000,4.000000,3.333333,173.000000,6.333333,13.333333,0.666667,1.000000
3,2021,4,CRD,Sun,2021-10-03,W,,@,Los Angeles Rams,37,...,1.333333,2.000000,4.333333,4.333333,4.000000,206.000000,3.666667,10.333333,1.000000,1.000000
4,2021,5,CRD,Sun,2021-10-10,W,,,San Francisco 49ers,17,...,2.000000,2.333333,4.000000,4.000000,3.666667,187.333333,4.000000,10.333333,1.000000,1.000000
5,2021,6,CRD,Sun,2021-10-17,W,,@,Cleveland Browns,37,...,1.666667,2.333333,3.333333,3.333333,3.666667,182.000000,4.000000,10.666667,0.666667,0.666667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1641,2024,13,CRD,Sun,2024-12-01,L,,@,Minnesota Vikings,22,...,1.666667,2.000000,2.333333,2.333333,3.666667,179.000000,4.333333,10.666667,0.333333,1.000000
1642,2024,14,CRD,Sun,2024-12-08,L,,,Seattle Seahawks,18,...,2.666667,3.333333,1.666667,1.666667,2.666667,130.666667,4.333333,10.666667,0.333333,1.000000
1643,2024,15,CRD,Sun,2024-12-15,W,,,New England Patriots,30,...,2.666667,3.666667,0.666667,0.666667,3.666667,177.333333,4.000000,12.333333,0.333333,1.000000
1644,2024,16,CRD,Sun,2024-12-22,L,OT,@,Carolina Panthers,30,...,3.000000,3.666667,1.666667,1.666667,2.333333,106.333333,6.333333,13.333333,0.333333,0.666667


In [82]:
gamelogs_rolling = gamelogs.groupby("Team").apply(lambda x: rolling_averages(x, cols, new_cols))

  gamelogs_rolling = gamelogs.groupby("Team").apply(lambda x: rolling_averages(x, cols, new_cols))


In [83]:
gamelogs_rolling = gamelogs_rolling.droplevel("Team")

In [84]:
gamelogs_rolling.index = range(gamelogs_rolling.shape[0])

In [85]:
predictors += new_cols

In [86]:
def make_predictions(data, predictors):
    train = data[data["Date"] < '2024-9-8']
    test = data[data["Date"] >= '2024-9-8']
    rf.fit(train[predictors], train["target"])
    preds = rf.predict(test[predictors])
    combined = pd.DataFrame(dict(actual=test["target"], prediction=preds), index=test.index)
    precision = precision_score(test["target"], preds)
    return combined, precision

In [87]:
combined, precision = make_predictions(gamelogs_rolling, predictors = new_cols)

In [88]:
combined = combined.merge(gamelogs_rolling[["Date", "Team", "Opp", "W/L"]], left_index=True, right_index=True)

In [89]:
class MissingDict(dict):
  __missing__ = lambda self, key: key

map_values = {
    "CRD": "Arizona Cardinals",
    "ATL": "Atlanta Falcons",
    "RAV": "Baltimore Ravens",
    "BUF": "Buffalo Bills",
    "CAR": "Carolina Panthers",
    "CHI": "Chicago Bears",
    "CIN": "Cincinnati Bengals",
    "CLE": "Cleveland Browns",
    "DAL": "Dallas Cowboys",
    "DEN": "Denver Broncos",
    "DET": "Detroit Lions",
    "GNB": "Green Bay Packers",
    "HTX": "Houston Texans",
    "CLT": "Indianapolis Colts",
    "JAX": "Jacksonville Jaguars",
    "KAN": "Kansas City Chiefs",
    "RAI": "Las Vegas Raiders",
    "SDG": "Los Angeles Chargers",
    "RAM": "Los Angeles Rams",
    "MIA": "Miami Dolphins",
    "MIN": "Minnesota Vikings",
    "NWE": "New England Patriots",
    "NOR": "New Orleans Saints",
    "NYG": "New York Giants",
    "NYJ": "New York Jets",
    "PHI": "Philadelphia Eagles",
    "PIT": "Pittsburgh Steelers",
    "SFO": "San Francisco 49ers",
    "SEA": "Seattle Seahawks",
    "TAM": "Tampa Bay Buccaneers",
    "OTI": "Tennessee Titans",
    "WAS": "Washington Football Team"
}

mapping = MissingDict(**map_values)

In [90]:
mapping["CRD"]

'Arizona Cardinals'

In [91]:
combined["new_team"] = combined["Team"].map(mapping)

In [92]:
merged = combined.merge(combined, left_on=["Date", "new_team"], right_on=["Date", "Opp"])

In [93]:
merged[(merged["prediction_x"]==1) & (merged["prediction_y"]==0)]["actual_x"].value_counts()

Unnamed: 0_level_0,count
actual_x,Unnamed: 1_level_1
1,72
0,37


In [94]:
72/ (72+37)

0.6605504587155964