# API for Game Data

In [1]:
import requests
import pandas as pd

def get_nfl_games(year: int) -> pd.DataFrame:

    header = {"User-Agent": "xcs5hg@virginia.edu"}

    def get_json(url: str):
        r = requests.get(url, headers=header)
        r.raise_for_status()
        return r.json()

    season_url = f"https://sports.core.api.espn.com/v2/sports/football/leagues/nfl/seasons/{year}"
    season_data = get_json(season_url)

    season_parts = {}  

    for t in season_data["types"]["items"]:
        t_data = get_json(t["$ref"])
        name = t_data.get("name")
        if name in ["Regular Season", "Postseason"]:
            season_parts[name] = t_data

    if not season_parts:
        raise ValueError(f"No Regular/Postseason types found for {year}")

    event_urls = []

    for part_name, part in season_parts.items():
        weeks_url = part["weeks"]["$ref"]
        weeks_data = get_json(weeks_url)
        week_items = weeks_data.get("items", [])

        for w in week_items:
            week = get_json(w["$ref"])
            events_url = week["events"]["$ref"]
            events_data = get_json(events_url)
            event_urls.extend(e["$ref"] for e in events_data.get("items", []))

    records = []

    for event_url in event_urls:
        event = get_json(event_url)

        game_id = event.get("id")
        date = event.get("date")

        comp = event["competitions"][0]
        competitors = comp.get("competitors", [])

        home_team = None
        away_team = None

        for c in competitors:
            side = c.get("homeAway") 
            team_obj = c.get("team", {})

            if "$ref" in team_obj:
                team_data = get_json(team_obj["$ref"])
            else:
                team_data = team_obj

            team_name = (
                team_data.get("displayName")
                or team_data.get("name")
                or team_data.get("abbreviation")
            )

            if side == "home":
                home_team = team_name
            elif side == "away":
                away_team = team_name

        records.append(
            {
                "game_id": game_id,
                "date": date,
                "home_team": home_team,
                "away_team": away_team,
            }
        )

    return pd.DataFrame(records)

In [465]:
df_games_2019 = get_nfl_games(2019)
df_games_2019.to_csv("nfl_games_2019.csv", index=False)

In [490]:
game_id_2019 = df_games_2019['game_id'].to_list()
game_id_2019 = [int(x) for x in game_id_2019]

In [563]:
df_games_2019

Unnamed: 0,game_id,date,home_team,away_team
0,401127913,2019-09-06T00:20Z,Chicago Bears,Green Bay Packers
1,401127928,2019-09-08T17:00Z,Cleveland Browns,Tennessee Titans
2,401127954,2019-09-08T17:00Z,Miami Dolphins,Baltimore Ravens
3,401127961,2019-09-08T17:00Z,Minnesota Vikings,Atlanta Falcons
4,401127963,2019-09-08T17:00Z,New York Jets,Buffalo Bills
...,...,...,...,...
263,401131042,2020-01-12T23:40Z,Green Bay Packers,Seattle Seahawks
264,401131044,2020-01-19T20:05Z,Kansas City Chiefs,Tennessee Titans
265,401131045,2020-01-19T23:40Z,San Francisco 49ers,Green Bay Packers
266,401131046,2020-01-26T20:00Z,AFC All-Stars,NFC All-Stars


# API for play by play AND API for each season's team's rooster

In [2]:
import requests
import pandas as pd

def get_game_plays(game_id: int) -> pd.DataFrame:

    header = {"User-Agent": "xcs5hg@virginia.edu"}

    event_url = (
        "https://sports.core.api.espn.com/v2/"
        f"sports/football/leagues/nfl/events/{game_id}"
    )
    event = requests.get(event_url, headers=header).json()

    competition = event["competitions"][0]
    comp_id = competition["id"]

    plays_url = (
        "https://sports.core.api.espn.com/v2/"
        f"sports/football/leagues/nfl/events/{game_id}/competitions/{comp_id}/plays?limit=500"
    )

    plays = requests.get(plays_url, headers=header).json()

    record = [item for item in plays["items"]]

    keep_cols = [
        "text",
        "period.number",
        "clock.value",
        "start.down",
        "start.distance",
        "start.yardLine",
        "start.yardsToEndzone",
        "end.yardsToEndzone",
        "homeScore",
        "awayScore",
        "statYardage",
        "scoringPlay",
        "scoreValue",
        "type.text",
        "type.abbreviation",
        "team.$ref",       # NEW
        "participants",    # NEW (list of dicts)
    ]

    data = pd.json_normalize(record)

    # only keep the columns that actually exist (to be safe)
    cols_existing = [c for c in keep_cols if c in data.columns]
    data = data[cols_existing]

    return data

In [4]:
import math

header = {"User-Agent": "xcs5hg@virginia.edu"}

def get_json(url: str):
    r = requests.get(url, headers=header)
    r.raise_for_status()
    return r.json()

def get_game_roster_from_plays(game_id: int) -> pd.DataFrame:
    """
    Use get_game_plays(game_id) (above) to infer the set of players
    who appeared in that game.
    """

    plays_df = get_game_plays(game_id)
    if plays_df.empty:
        return pd.DataFrame()

    # 1) Build mapping team_ref -> (team_id, team_name)
    team_refs = (
        plays_df.get("team.$ref")
        .dropna()
        .unique()
        .tolist()
        if "team.$ref" in plays_df.columns
        else []
    )

    team_map = {}
    for tref in team_refs:
        if isinstance(tref, str):
            tdata = get_json(tref)
            tid = tdata.get("id")
            tname = (
                tdata.get("displayName")
                or tdata.get("name")
                or tdata.get("abbreviation")
            )
            team_map[tref] = (tid, tname)

    athlete_cache = {}
    rows = []

    for _, row in plays_df.iterrows():
        team_ref = row.get("team.$ref")
        team_id, team_name = (None, None)
        if isinstance(team_ref, str) and team_ref in team_map:
            team_id, team_name = team_map[team_ref]

        participants = row.get("participants")

        # handle NaN / None
        if participants is None or (isinstance(participants, float) and math.isnan(participants)):
            continue
        if not isinstance(participants, list):
            continue

        for part in participants:
            if not isinstance(part, dict):
                continue

            athlete_ref = part.get("athlete", {}).get("$ref")
            if not athlete_ref:
                continue

            if athlete_ref in athlete_cache:
                athlete = athlete_cache[athlete_ref]
            else:
                athlete = get_json(athlete_ref)
                athlete_cache[athlete_ref] = athlete

            pos = athlete.get("position") or {}

            rows.append(
                {
                    "game_id": game_id,
                    "team_id": team_id,
                    "team_name": team_name,
                    "athlete_id": athlete.get("id"),
                    "full_name": athlete.get("fullName"),
                    "display_name": athlete.get("displayName"),
                    "position_name": pos.get("name"),
                    "position_abbr": pos.get("abbreviation"),
                }
            )

    df = pd.DataFrame(rows)
    if df.empty:
        return df

    # 3) Deduplicate per game/team/athlete
    df = df.drop_duplicates(subset=["game_id", "team_id", "athlete_id"])
    return df

In [None]:
plays_1 = get_game_plays(401127913)

roster_1 = get_game_roster_from_plays(401127913)


(89,
      game_id team_id          team_name athlete_id             full_name  \
 0  401127913       9  Green Bay Packers    4034949          Eddy Pineiro   
 1  401127913       9  Green Bay Packers    3042519       Aaron Jones Sr.   
 2  401127913       9  Green Bay Packers    3915189          Roquan Smith   
 3  401127913       9  Green Bay Packers       8439         Aaron Rodgers   
 7  401127913       9  Green Bay Packers    2574891  Roy Robertson-Harris   
 
            display_name     position_name position_abbr  
 0          Eddy Pineiro      Place kicker            PK  
 1       Aaron Jones Sr.      Running Back            RB  
 2          Roquan Smith        Linebacker            LB  
 3         Aaron Rodgers       Quarterback            QB  
 7  Roy Robertson-Harris  Defensive Tackle            DT  )

In [6]:
roster_1

Unnamed: 0,game_id,team_id,team_name,athlete_id,full_name,display_name,position_name,position_abbr
0,401127913,9,Green Bay Packers,4034949,Eddy Pineiro,Eddy Pineiro,Place kicker,PK
1,401127913,9,Green Bay Packers,3042519,Aaron Jones Sr.,Aaron Jones Sr.,Running Back,RB
2,401127913,9,Green Bay Packers,3915189,Roquan Smith,Roquan Smith,Linebacker,LB
3,401127913,9,Green Bay Packers,8439,Aaron Rodgers,Aaron Rodgers,Quarterback,QB
7,401127913,9,Green Bay Packers,2574891,Roy Robertson-Harris,Roy Robertson-Harris,Defensive Tackle,DT
...,...,...,...,...,...,...,...,...
394,401127913,9,Green Bay Packers,3066158,Tarik Cohen,Tarik Cohen,Running Back,RB
404,401127913,9,Green Bay Packers,4198676,Adam Shaheen,Adam Shaheen,Tight End,TE
408,401127913,9,Green Bay Packers,2582132,Adrian Amos,Adrian Amos,Safety,S
420,401127913,3,Chicago Bears,3916126,Duke Shelley,Duke Shelley,Cornerback,CB


In [7]:
roster_1.to_csv("nfl_game_401127913_roster.csv", index=False)

# Athlete Statlog

In [24]:
import requests
import pandas as pd

def get_game_player_stats(game_id: int) -> pd.DataFrame:

    url = f"https://site.api.espn.com/apis/site/v2/sports/football/nfl/summary?event={game_id}"
    r = requests.get(url).json()

    date = r.get("meta", {}).get("firstPlayWallClock", "")[:10]

    competitions = r.get("header", {}).get("competitions", [])
    if not competitions:
        return pd.DataFrame()

    comp = competitions[0]
    competitor_data = comp.get("competitors", [])

    team_info_map = {}
    for c in competitor_data:
        t = c.get("team", {})
        tid = t.get("id")
        team_info_map[tid] = {
            "team_name": t.get("displayName"),
            "home_away": c.get("homeAway")
        }

    team_names = [info["team_name"] for info in team_info_map.values()]

    players = r.get("boxscore", {}).get("players", [])
    rows = []

    for team_block in players:

        tid = team_block.get("team", {}).get("id")
        team_name = team_info_map.get(tid, {}).get("team_name")
        home_away = team_info_map.get(tid, {}).get("home_away")

        opposing_team = (
            team_names[1] if team_names[0] == team_name else team_names[0]
        )

        for stat_group in team_block.get("statistics", []):
            category = stat_group.get("name")

            for athlete in stat_group.get("athletes", []):
                a = athlete.get("athlete", {})

                rows.append({
                    "game_id": game_id,
                    "date": date,
                    "team": team_name,
                    "home_away": home_away,
                    "opposing_team": opposing_team,
                    "athlete_id": a.get("id"),
                    "display_name": a.get("displayName"),
                    "category": category,
                    "stats": athlete.get("stats"),
                })

    return pd.DataFrame(rows)

In [25]:
data = get_game_player_stats(401127913)

Unnamed: 0,game_id,date,team,home_away,opposing_team,athlete_id,display_name,category,stats
0,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,8439,Aaron Rodgers,passing,"[18/30, 203, 6.8, 1, 0, 5-37, 37.9, 91.4]"
1,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3042519,Aaron Jones Sr.,rushing,"[13, 39, 3.0, 0, 9]"
2,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,8439,Aaron Rodgers,rushing,"[3, 8, 2.7, 0, 10]"
3,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,2980453,Jamaal Williams,rushing,"[5, 0, 0.0, 0, 5]"
4,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3051738,Marquez Valdes-Scantling,rushing,"[1, 0, 0.0, 0, 0]"


In [26]:
import requests
import pandas as pd

header = {"User-Agent": "xcs5hg@virginia.edu"}

def get_athlete_gamelog(athlete_id: int, season: int | None = None) -> pd.DataFrame:

    base_url = (
        "https://site.web.api.espn.com/apis/common/v3/sports/football/nfl/"
        f"athletes/{athlete_id}/gamelog"
    )

    params = {}
    if season is not None:
        params["season"] = season

    r = requests.get(base_url, headers=header, params=params)
    r.raise_for_status()
    data = r.json()

    logs = (
        data.get("gamelogs")
        or data.get("events")
        or data.get("items")
    )

    if not logs:
        print(f"No gamelog entries found for athlete {athlete_id} (season={season})")
        return pd.DataFrame()

    df = pd.json_normalize(logs)

    df["athlete_id"] = athlete_id
    if season is not None:
        df["season"] = season

    return df

In [27]:
import requests
import pandas as pd

header = {"User-Agent": "xcs5hg@virginia.edu"}

def get_all_player_stats_for_game(game_id: int) -> pd.DataFrame:
    """
    Get per-player box score stats for a single NFL game.
    One row per (player, stat category).
    """
    url = f"https://site.api.espn.com/apis/site/v2/sports/football/nfl/summary?event={game_id}"
    data = requests.get(url, headers=header).json()

    rows = []

    for team_block in data.get("boxscore", {}).get("players", []):
        team_info = team_block.get("team", {})
        team_name = team_info.get("displayName")
        team_id = team_info.get("id")

        for stat_group in team_block.get("statistics", []):
            category = stat_group.get("name")      

            for entry in stat_group.get("athletes", []):
                a = entry.get("athlete", {})
                rows.append({
                    "game_id": game_id,
                    "team_id": team_id,
                    "team_name": team_name,
                    "athlete_id": a.get("id"),
                    "full_name": a.get("fullName"),
                    "display_name": a.get("displayName"),
                    "category": category,        
                    "stats": entry.get("stats"), 
                })

    return pd.DataFrame(rows)

In [28]:
def get_season_player_stats(game_ids: list[int]) -> pd.DataFrame:

    all_dfs = []

    for gid in game_ids:
        try:
            df_game = get_all_player_stats_for_game(gid)
            all_dfs.append(df_game)
        except Exception as e:
            print(f"Failed to fetch stats for game {gid}: {e}")

    if not all_dfs:
        return pd.DataFrame()

    return pd.concat(all_dfs, ignore_index=True)

In [29]:
import json

def get_all_player_stats_for_game(game_id: int):
    url = f"https://site.api.espn.com/apis/site/v2/sports/football/nfl/summary?event={game_id}"
    data = requests.get(url, headers=header).json()

    print(json.dumps(data, indent=2))

    return data  

In [30]:
import pandas as pd

def get_season_player_stats(game_ids):

    all_dfs = []

    for gid in game_ids:
        try:
            df = get_game_player_stats(gid)  # your function from before

            if df is None or not isinstance(df, pd.DataFrame) or df.empty:
                print(f"[warn] No data for game {gid}, skipping.")
                continue

            all_dfs.append(df)

        except Exception as e:
            # don't append error info into the list of frames
            print(f"[error] Failed to fetch/parse game {gid}: {e}")
            continue

    if not all_dfs:
        # nothing worked, return empty DataFrame
        return pd.DataFrame()

    # now we are **sure** everything in all_dfs is a DataFrame
    return pd.concat(all_dfs, ignore_index=True)

In [31]:
season_stats_2019 = get_season_player_stats([401127913, 401127928])
season_stats_2019

Unnamed: 0,game_id,date,team,home_away,opposing_team,athlete_id,display_name,category,stats
0,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,8439,Aaron Rodgers,passing,"[18/30, 203, 6.8, 1, 0, 5-37, 37.9, 91.4]"
1,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3042519,Aaron Jones Sr.,rushing,"[13, 39, 3.0, 0, 9]"
2,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,8439,Aaron Rodgers,rushing,"[3, 8, 2.7, 0, 10]"
3,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,2980453,Jamaal Williams,rushing,"[5, 0, 0.0, 0, 5]"
4,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3051738,Marquez Valdes-Scantling,rushing,"[1, 0, 0.0, 0, 0]"
...,...,...,...,...,...,...,...,...,...
140,401127928,2019-09-08,Cleveland Browns,home,Tennessee Titans,3126246,Dontrell Hilliard,kickReturns,"[3, 80, 26.7, 32, 0]"
141,401127928,2019-09-08,Cleveland Browns,home,Tennessee Titans,3139602,D'Ernest Johnson,kickReturns,"[1, 23, 23.0, 23, 0]"
142,401127928,2019-09-08,Cleveland Browns,home,Tennessee Titans,3126246,Dontrell Hilliard,puntReturns,"[1, 18, 18.0, 18, 0]"
143,401127928,2019-09-08,Cleveland Browns,home,Tennessee Titans,3821683,Austin Seibert,kicking,"[0/0, 0.0, 0, 1/2, 1]"


In [32]:
import pandas as pd

def split_stats_by_category(df: pd.DataFrame) -> dict:


    LABELS_BY_CATEGORY = {
        "defensive": [
            "TOT", "SOLO", "SACKS", "TFL", "PD", "QB_HTS", "TD"
        ],
        "receiving": [
            "REC", "YDS", "AVG", "TD", "LONG", "TGTS"
        ],
        "rushing": [
            "CAR", "YDS", "AVG", "TD", "LONG"
        ],
        "passing": [
            "C_ATT", "YDS", "AVG", "TD", "INT", "SACKS", "QBR", "RTG"
        ],
        "fumbles": [
            "FUM", "LOST", "REC"
        ],
        "interceptions": [
            "INT", "YDS", "TD"
        ],
        "kickReturns": [
            "NO", "YDS", "AVG", "LONG", "TD"
        ],
        "puntReturns": [
            "NO", "YDS", "AVG", "LONG", "TD"
        ],
        "kicking": [
            "FG", "PCT", "LONG", "XP", "PTS"
        ],
        "punting": [
            "NO", "YDS", "AVG", "TB", "IN_20", "LONG"
        ],
    }

    result = {}

    for cat, labels in LABELS_BY_CATEGORY.items():
        sub = df[df["category"] == cat].copy()
        if sub.empty:
            continue 

        def _safe_stats(lst):
            if lst is None:
                return [None] * len(labels)
            lst = list(lst)
            if len(lst) < len(labels):
                lst = lst + [None] * (len(labels) - len(lst))
            return lst[:len(labels)]

        stats_matrix = sub["stats"].apply(_safe_stats).to_list()
        stats_df = pd.DataFrame(stats_matrix, columns=labels, index=sub.index)

        out = pd.concat(
            [sub.drop(columns=["stats", "category"]), stats_df],
            axis=1
        )

        result[cat] = out

    return result

In [33]:
splits = split_stats_by_category(season_stats_2019)

defensive_df     = splits["defensive"]
receiving_df     = splits["receiving"]
rushing_df       = splits["rushing"]
passing_df       = splits["passing"]
fumbles_df       = splits["fumbles"]
interceptions_df = splits["interceptions"]
kickreturns_df   = splits["kickReturns"]
puntreturns_df   = splits["puntReturns"]
kicking_df       = splits["kicking"]
punting_df       = splits["punting"]

In [34]:
defensive_df

Unnamed: 0,game_id,date,team,home_away,opposing_team,athlete_id,display_name,TOT,SOLO,SACKS,TFL,PD,QB_HTS,TD
14,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,2978273,Blake Martinez,7,5,1,1.5,0,1,0
15,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3049331,Raven Greene,6,5,0,0,1,0,0
16,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3052170,Kevin King,5,5,1,1,1,1,0
17,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,2582132,Adrian Amos,5,4,0,0.5,1,0,0
18,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3115308,Tony Brown II,5,3,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,401127928,2019-09-08,Cleveland Browns,home,Tennessee Titans,3917852,Sheldrick Redwine,1,1,0,0,0,0,0
136,401127928,2019-09-08,Cleveland Browns,home,Tennessee Titans,2574557,Jermaine Whitehead,1,0,0,0,0,1,0
137,401127928,2019-09-08,Cleveland Browns,home,Tennessee Titans,2971281,Pharaoh Brown,1,0,0,0,0,0,0
138,401127928,2019-09-08,Cleveland Browns,home,Tennessee Titans,3047876,KhaDarel Hodge,1,0,0,0,0,0,0


In [35]:
receiving_df

Unnamed: 0,game_id,date,team,home_away,opposing_team,athlete_id,display_name,REC,YDS,AVG,TD,LONG,TGTS
5,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3051738,Marquez Valdes-Scantling,4,52,13.0,0,47,6
6,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,16800,Davante Adams,4,36,9.0,0,11,8
7,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,13232,Jimmy Graham,3,30,10.0,1,16,5
8,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,2573343,Trevor Davis,1,28,28.0,0,28,1
9,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,2975674,Robert Tonyan,1,28,28.0,0,28,1
10,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,2980453,Jamaal Williams,2,15,7.5,0,10,2
11,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,9614,Marcedes Lewis,2,14,7.0,0,9,3
12,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3042519,Aaron Jones Sr.,1,0,0.0,0,0,1
39,401127913,2019-09-06,Chicago Bears,home,Green Bay Packers,16799,Allen Robinson,7,102,14.6,0,27,13
40,401127913,2019-09-06,Chicago Bears,home,Green Bay Packers,3066158,Tarik Cohen,8,49,6.1,0,9,10
