## Prepare and Clean NFL Datasets

In [12]:
import os
import pandas as pd
%pip install tabulate

Note: you may need to restart the kernel to use updated packages.


In [2]:
# years of evaluation
start_year = 2014
end_year = 2023

# datasets files
teams_dataset_file = os.path.join("datasets", "teams.csv")
standings_dataset_file = os.path.join("datasets" ,"standings.csv")
games_dataset_file = os.path.join("datasets", "games.csv")

teams_cleaned_dataset_file = os.path.join("datasets", "teams_clean.csv")
standings_cleaned_dataset_file = os.path.join("datasets" ,"standings_clean.csv")
games_cleaned_dataset_file = os.path.join("datasets", "games_clean.csv")

# seasons to evaluate
seasons_to_evaluate = [year for year in range(start_year, end_year + 1)]
# read teams dataset
teams = pd.read_csv(teams_dataset_file)

# removing irrelevant columns
teams.drop(["nfl", "nfl_team_id", "espn", "pfr", "pff", "pfflabel", "fo", "location", "short_location", "nickname", "hyphenated", "sbr", "sbr_wins", "sbr_name", "draft_kings"]
    , axis=1
    , inplace=True)
#display(teams.sample(10))

# keep last season records and set index on team short name
teams = teams[teams["season"] == end_year]
teams.set_index("team", inplace=True)
teams.drop("season", axis=1, inplace=True)
teams.rename(columns={"full" : "team_name"}, inplace=True)

display(teams)


Unnamed: 0_level_0,team_name
team,Unnamed: 1_level_1
ARI,Arizona Cardinals
ATL,Atlanta Falcons
BAL,Baltimore Ravens
BUF,Buffalo Bills
CAR,Carolina Panthers
CHI,Chicago Bears
CIN,Cincinnati Bengals
CLE,Cleveland Browns
DAL,Dallas Cowboys
DEN,Denver Broncos


In [3]:
# dictionaries for relocated teams: {"PREV_TEAM_ID" : "NEW_TEAM_ID"}
relocated_teams = {"OAK" : "LV", "SD" : "LAC", "STL" : "LA"}

# generic function to replace relocated team ids
def replace_relocated_teams(target_data_frame, column_name, relocated_teams):
    for prev_team_id, new_team_id in relocated_teams.items():
        target_data_frame.loc[target_data_frame[column_name] == prev_team_id, column_name] = new_team_id

In [9]:
# read standings dataset
all_standings = pd.read_csv(standings_dataset_file)
#display(all_standings.sample(10))

# removing irrelevant columns
all_standings.drop(["sov", "sos"]
    , axis=1
    , inplace=True)

# replace old team ids
replace_relocated_teams(all_standings, "team", relocated_teams)
# filter seasons to evaluate in a new dataframe
standings = all_standings.drop(all_standings[~all_standings["season"].isin(seasons_to_evaluate)].index)
display(standings)


Unnamed: 0,season,conf,division,team,wins,losses,ties,pct,div_rank,scored,allowed,net,seed,playoff
384,2014,AFC,AFC East,BUF,9,7,0,0.562500,2,343,289,54,,
385,2014,AFC,AFC East,MIA,8,8,0,0.500000,3,388,373,15,,
386,2014,AFC,AFC East,NE,12,4,0,0.750000,1,468,313,155,1.0,WonSB
387,2014,AFC,AFC East,NYJ,4,12,0,0.250000,4,283,401,-118,,
388,2014,AFC,AFC North,BAL,10,6,0,0.625000,3,409,302,107,6.0,LostDV
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
699,2023,NFC,NFC South,TB,9,8,0,0.529412,1,348,325,23,4.0,LostDV
700,2023,NFC,NFC West,ARI,4,13,0,0.235294,4,330,455,-125,,
701,2023,NFC,NFC West,LA,10,7,0,0.588235,2,404,377,27,6.0,LostWC
702,2023,NFC,NFC West,SEA,9,8,0,0.529412,3,364,402,-38,,


In [None]:
standings_md = standings.to_markdown()


|     |   season | conf   | division   | team   |   wins |   losses |   ties |      pct |   div_rank |   scored |   allowed |   net |   seed | playoff   |
|----:|---------:|:-------|:-----------|:-------|-------:|---------:|-------:|---------:|-----------:|---------:|----------:|------:|-------:|:----------|
| 384 |     2014 | AFC    | AFC East   | BUF    |      9 |        7 |      0 | 0.5625   |          2 |      343 |       289 |    54 |    nan | nan       |
| 385 |     2014 | AFC    | AFC East   | MIA    |      8 |        8 |      0 | 0.5      |          3 |      388 |       373 |    15 |    nan | nan       |
| 386 |     2014 | AFC    | AFC East   | NE     |     12 |        4 |      0 | 0.75     |          1 |      468 |       313 |   155 |      1 | WonSB     |
| 387 |     2014 | AFC    | AFC East   | NYJ    |      4 |       12 |      0 | 0.25     |          4 |      283 |       401 |  -118 |    nan | nan       |
| 388 |     2014 | AFC    | AFC North  | BAL    |     10 |        6 | 