# API for Game Data

In [72]:
import requests
import pandas as pd

def get_nfl_games(year: int) -> pd.DataFrame:

    header = {"User-Agent": "xcs5hg@virginia.edu"}

    def get_json(url: str):
        r = requests.get(url, headers=header)
        r.raise_for_status()
        return r.json()

    season_url = f"https://sports.core.api.espn.com/v2/sports/football/leagues/nfl/seasons/{year}"
    season_data = get_json(season_url)

    season_parts = {}  

    for t in season_data["types"]["items"]:
        t_data = get_json(t["$ref"])
        name = t_data.get("name")
        if name in ["Regular Season", "Postseason"]:
            season_parts[name] = t_data

    if not season_parts:
        raise ValueError(f"No Regular/Postseason types found for {year}")

    event_urls = []

    for part_name, part in season_parts.items():
        weeks_url = part["weeks"]["$ref"]
        weeks_data = get_json(weeks_url)
        week_items = weeks_data.get("items", [])

        for w in week_items:
            week = get_json(w["$ref"])
            events_url = week["events"]["$ref"]
            events_data = get_json(events_url)
            event_urls.extend(e["$ref"] for e in events_data.get("items", []))

    records = []

    for event_url in event_urls:
        event = get_json(event_url)

        game_id = event.get("id")
        date = event.get("date")

        comp = event["competitions"][0]
        competitors = comp.get("competitors", [])

        home_team = None
        away_team = None

        for c in competitors:
            side = c.get("homeAway") 
            team_obj = c.get("team", {})

            if "$ref" in team_obj:
                team_data = get_json(team_obj["$ref"])
            else:
                team_data = team_obj

            team_name = (
                team_data.get("displayName")
                or team_data.get("name")
                or team_data.get("abbreviation")
            )

            if side == "home":
                home_team = team_name
            elif side == "away":
                away_team = team_name

        records.append(
            {
                "game_id": game_id,
                "date": date,
                "home_team": home_team,
                "away_team": away_team,
            }
        )

    return pd.DataFrame(records)

In [None]:
df_games_2019 = get_nfl_games(2019)
df_games_2019.to_csv("nfl_games_2019.csv", index=False)

In [4]:
game_id_2019 = df_games_2019['game_id'].to_list()
game_id_2019 = [int(x) for x in game_id_2019]

In [5]:
df_games_2019

Unnamed: 0,game_id,date,home_team,away_team
0,401127913,2019-09-06T00:20Z,Chicago Bears,Green Bay Packers
1,401127928,2019-09-08T17:00Z,Cleveland Browns,Tennessee Titans
2,401127954,2019-09-08T17:00Z,Miami Dolphins,Baltimore Ravens
3,401127961,2019-09-08T17:00Z,Minnesota Vikings,Atlanta Falcons
4,401127963,2019-09-08T17:00Z,New York Jets,Buffalo Bills
...,...,...,...,...
263,401131042,2020-01-12T23:40Z,Green Bay Packers,Seattle Seahawks
264,401131044,2020-01-19T20:05Z,Kansas City Chiefs,Tennessee Titans
265,401131045,2020-01-19T23:40Z,San Francisco 49ers,Green Bay Packers
266,401131046,2020-01-26T20:00Z,AFC All-Stars,NFC All-Stars


In [12]:
from datetime import datetime, timedelta, timezone
import requests
import pandas as pd
from concurrent.futures import ThreadPoolExecutor

def get_nfl_games(years: int | list[int]) -> pd.DataFrame:
    """
    Fetch NFL games for one or multiple years.
    Returns a DataFrame with:
        game_id, date, season, home_team, away_team, datetime
    """
    header = {"User-Agent": "epx8hh@virginia.edu"}
    
    def get_json(url: str):
        r = requests.get(url, headers=header)
        r.raise_for_status()
        return r.json()
    
    def fetch_season(year: int) -> pd.DataFrame:
        """Fetch games for a single season"""
        try:
            season_url = f"https://sports.core.api.espn.com/v2/sports/football/leagues/nfl/seasons/{year}"
            season_data = get_json(season_url)
            
            season_parts = {}
            for t in season_data["types"]["items"]:
                t_data = get_json(t["$ref"])
                name = t_data.get("name")
                if name in ["Regular Season", "Postseason"]:
                    season_parts[name] = t_data
            
            if not season_parts:
                raise ValueError(f"No Regular/Postseason types found for {year}")
            
            event_urls = []
            for part in season_parts.values():
                weeks_data = get_json(part["weeks"]["$ref"])
                for w in weeks_data.get("items", []):
                    week = get_json(w["$ref"])
                    events_data = get_json(week["events"]["$ref"])
                    event_urls.extend(e["$ref"] for e in events_data.get("items", []))
            
            records = []
            for event_url in event_urls:
                event = get_json(event_url)
                game_id = event.get("id")
                date = event.get("date")
                
                comp = event["competitions"][0]
                competitors = comp.get("competitors", [])
                
                home_team = None
                away_team = None
                
                for c in competitors:
                    side = c.get("homeAway")
                    team_obj = c.get("team", {})
                    
                    # Fetch team if only ref provided
                    if "$ref" in team_obj:
                        team_data = get_json(team_obj["$ref"])
                    else:
                        team_data = team_obj
                    
                    team_name = (
                        team_data.get("displayName")
                        or team_data.get("name")
                        or team_data.get("abbreviation")
                    )
                    
                    if side == "home":
                        home_team = team_name
                    elif side == "away":
                        away_team = team_name
                
                records.append({
                    "game_id": game_id,
                    "date": date,
                    "season": year,
                    "home_team": home_team,
                    "away_team": away_team,
                })
            
            print(f"‚úì Fetched {len(records)} games for {year}")
            return pd.DataFrame(records)
            
        except Exception as e:
            print(f"‚úó Failed to fetch {year}: {e}")
            return pd.DataFrame()
    
    # Allow int or list
    if isinstance(years, int):
        years = [years]
    
    # Fetch in parallel
    all_dfs = []
    with ThreadPoolExecutor(max_workers=5) as executor:
        futures = [executor.submit(fetch_season, y) for y in years]
        for f in futures:
            df = f.result()
            if not df.empty:
                all_dfs.append(df)
    
    if not all_dfs:
        return pd.DataFrame()

    # Combine all years
    df = pd.concat(all_dfs, ignore_index=True)

    # --------------------------------------
    # CORRECT DATETIME PARSING (FIXED)
    # --------------------------------------
    df["datetime"] = pd.to_datetime(
        df["date"],
        format="%Y-%m-%dT%H:%MZ",
        utc=True,
        errors="coerce"
    )

    # Drop invalid dates
    df = df[df["datetime"].notna()]

    # --------------------------------------
    # FUTURE FILTERING
    # --------------------------------------
    today = datetime.now(timezone.utc)
    cutoff = today + timedelta(days=14)

    df = df[df["datetime"] <= cutoff]

    # --------------------------------------
    # REMOVE TBD MATCHUPS
    # --------------------------------------
    df = df[(df["home_team"] != "TBD") & (df["away_team"] != "TBD")]

    return df.sort_values("datetime").reset_index(drop=True)

In [9]:
df_2024_26 = get_nfl_games(list(range(2024, 2026)))
df_2024_26

‚úì Fetched 286 games for 2025
‚úì Fetched 286 games for 2024


Unnamed: 0,game_id,date,season,home_team,away_team,datetime
0,401671789,2024-09-06T00:40Z,2024,Kansas City Chiefs,Baltimore Ravens,2024-09-06 00:40:00+00:00
1,401671805,2024-09-07T00:15Z,2024,Philadelphia Eagles,Green Bay Packers,2024-09-07 00:15:00+00:00
2,401671744,2024-09-08T17:00Z,2024,Atlanta Falcons,Pittsburgh Steelers,2024-09-08 17:00:00+00:00
3,401671617,2024-09-08T17:00Z,2024,Buffalo Bills,Arizona Cardinals,2024-09-08 17:00:00+00:00
4,401671719,2024-09-08T17:00Z,2024,Chicago Bears,Tennessee Titans,2024-09-08 17:00:00+00:00
...,...,...,...,...,...,...
470,401772895,2025-11-30T18:00Z,2025,Tampa Bay Buccaneers,Arizona Cardinals,2025-11-30 18:00:00+00:00
471,401772786,2025-11-30T18:00Z,2025,Tennessee Titans,Jacksonville Jaguars,2025-11-30 18:00:00+00:00
472,401772785,2025-11-30T18:00Z,2025,Cleveland Browns,San Francisco 49ers,2025-11-30 18:00:00+00:00
473,401772787,2025-11-30T18:00Z,2025,Indianapolis Colts,Houston Texans,2025-11-30 18:00:00+00:00


# API for play by play AND API for each season's team's rooster

In [13]:
import requests
import pandas as pd

def get_game_plays(game_id: int) -> pd.DataFrame:

    header = {"User-Agent": "xcs5hg@virginia.edu"}

    event_url = (
        "https://sports.core.api.espn.com/v2/"
        f"sports/football/leagues/nfl/events/{game_id}"
    )
    event = requests.get(event_url, headers=header).json()

    competition = event["competitions"][0]
    comp_id = competition["id"]

    plays_url = (
        "https://sports.core.api.espn.com/v2/"
        f"sports/football/leagues/nfl/events/{game_id}/competitions/{comp_id}/plays?limit=500"
    )

    plays = requests.get(plays_url, headers=header).json()

    record = [item for item in plays["items"]]

    keep_cols = [
        "text",
        "period.number",
        "clock.value",
        "start.down",
        "start.distance",
        "start.yardLine",
        "start.yardsToEndzone",
        "end.yardsToEndzone",
        "homeScore",
        "awayScore",
        "statYardage",
        "scoringPlay",
        "scoreValue",
        "type.text",
        "type.abbreviation",
        "team.$ref",       # NEW
        "participants",    # NEW (list of dicts)
    ]

    data = pd.json_normalize(record)

    # only keep the columns that actually exist (to be safe)
    cols_existing = [c for c in keep_cols if c in data.columns]
    data = data[cols_existing]

    return data

In [14]:
import math

header = {"User-Agent": "xcs5hg@virginia.edu"}

def get_json(url: str):
    r = requests.get(url, headers=header)
    r.raise_for_status()
    return r.json()

def get_game_roster_from_plays(game_id: int) -> pd.DataFrame:
    """
    Use get_game_plays(game_id) (above) to infer the set of players
    who appeared in that game.
    """

    plays_df = get_game_plays(game_id)
    if plays_df.empty:
        return pd.DataFrame()

    # 1) Build mapping team_ref -> (team_id, team_name)
    team_refs = (
        plays_df.get("team.$ref")
        .dropna()
        .unique()
        .tolist()
        if "team.$ref" in plays_df.columns
        else []
    )

    team_map = {}
    for tref in team_refs:
        if isinstance(tref, str):
            tdata = get_json(tref)
            tid = tdata.get("id")
            tname = (
                tdata.get("displayName")
                or tdata.get("name")
                or tdata.get("abbreviation")
            )
            team_map[tref] = (tid, tname)

    athlete_cache = {}
    rows = []

    for _, row in plays_df.iterrows():
        team_ref = row.get("team.$ref")
        team_id, team_name = (None, None)
        if isinstance(team_ref, str) and team_ref in team_map:
            team_id, team_name = team_map[team_ref]

        participants = row.get("participants")

        # handle NaN / None
        if participants is None or (isinstance(participants, float) and math.isnan(participants)):
            continue
        if not isinstance(participants, list):
            continue

        for part in participants:
            if not isinstance(part, dict):
                continue

            athlete_ref = part.get("athlete", {}).get("$ref")
            if not athlete_ref:
                continue

            if athlete_ref in athlete_cache:
                athlete = athlete_cache[athlete_ref]
            else:
                athlete = get_json(athlete_ref)
                athlete_cache[athlete_ref] = athlete

            pos = athlete.get("position") or {}

            rows.append(
                {
                    "game_id": game_id,
                    "team_id": team_id,
                    "team_name": team_name,
                    "athlete_id": athlete.get("id"),
                    "full_name": athlete.get("fullName"),
                    "display_name": athlete.get("displayName"),
                    "position_name": pos.get("name"),
                    "position_abbr": pos.get("abbreviation"),
                }
            )

    df = pd.DataFrame(rows)
    if df.empty:
        return df

    # 3) Deduplicate per game/team/athlete
    df = df.drop_duplicates(subset=["game_id", "team_id", "athlete_id"])
    return df

In [8]:
plays_1 = get_game_plays(401127913)

roster_1 = get_game_roster_from_plays(401127913)


In [9]:
roster_1

Unnamed: 0,game_id,team_id,team_name,athlete_id,full_name,display_name,position_name,position_abbr
0,401127913,9,Green Bay Packers,4034949,Eddy Pineiro,Eddy Pineiro,Place kicker,PK
1,401127913,9,Green Bay Packers,3042519,Aaron Jones Sr.,Aaron Jones Sr.,Running Back,RB
2,401127913,9,Green Bay Packers,3915189,Roquan Smith,Roquan Smith,Linebacker,LB
3,401127913,9,Green Bay Packers,8439,Aaron Rodgers,Aaron Rodgers,Quarterback,QB
7,401127913,9,Green Bay Packers,2574891,Roy Robertson-Harris,Roy Robertson-Harris,Defensive Tackle,DT
...,...,...,...,...,...,...,...,...
394,401127913,9,Green Bay Packers,3066158,Tarik Cohen,Tarik Cohen,Running Back,RB
404,401127913,9,Green Bay Packers,4198676,Adam Shaheen,Adam Shaheen,Tight End,TE
408,401127913,9,Green Bay Packers,2582132,Adrian Amos,Adrian Amos,Safety,S
420,401127913,3,Chicago Bears,3916126,Duke Shelley,Duke Shelley,Cornerback,CB


In [43]:
roster_1.game_id

0      401127913
1      401127913
2      401127913
3      401127913
7      401127913
         ...    
394    401127913
404    401127913
408    401127913
420    401127913
425    401127913
Name: game_id, Length: 89, dtype: int64

In [10]:
roster_1.to_csv("nfl_game_401127913_roster.csv", index=False)

# Athlete Statlog

In [15]:
import requests
import pandas as pd
from functools import lru_cache

header = {"User-Agent": "xcs5hg@virginia.edu"}

# Cache athlete positions to avoid repeated API calls
@lru_cache(maxsize=5000)
def get_athlete_position(athlete_id: int) -> str:
    """Fetch athlete's position from their profile"""
    try:
        url = f"https://sports.core.api.espn.com/v2/sports/football/leagues/nfl/athletes/{athlete_id}"
        r = requests.get(url, headers=header)
        r.raise_for_status()
        data = r.json()
        return data.get("position", {}).get("abbreviation")
    except Exception as e:
        print(f"Failed to fetch position for athlete {athlete_id}: {e}")
        return None

def get_game_player_stats(game_id: int) -> pd.DataFrame:
    url = f"https://site.api.espn.com/apis/site/v2/sports/football/nfl/summary?event={game_id}"
    r = requests.get(url, headers=header).json()
    
    date = r.get("meta", {}).get("firstPlayWallClock", "")[:10]
    
    season = (
        r.get("header", {}).get("season", {}).get("year") or
        r.get("gameInfo", {}).get("season", {}).get("year") or
        r.get("season", {}).get("year") or
        None
    )
    
    competitions = r.get("header", {}).get("competitions", [])
    if not competitions:
        return pd.DataFrame()
    
    comp = competitions[0]
    competitor_data = comp.get("competitors", [])
    
    team_info_map = {}
    for c in competitor_data:
        t = c.get("team", {})
        tid = t.get("id")
        team_info_map[tid] = {
            "team_name": t.get("displayName"),
            "home_away": c.get("homeAway")
        }
    
    team_names = [info["team_name"] for info in team_info_map.values()]
    players = r.get("boxscore", {}).get("players", [])
    rows = []
    
    for team_block in players:
        tid = team_block.get("team", {}).get("id")
        team_name = team_info_map.get(tid, {}).get("team_name")
        home_away = team_info_map.get(tid, {}).get("home_away")
        
        opposing_team = (
            team_names[1] if team_names[0] == team_name else team_names[0]
        )
        
        for stat_group in team_block.get("statistics", []):
            category = stat_group.get("name")
            
            for athlete in stat_group.get("athletes", []):
                a = athlete.get("athlete", {})
                athlete_id = a.get("id")
                
                # Try to get position from boxscore first, then fetch from profile
                pos = a.get("position", {})
                position = pos.get("abbreviation")
                
                if not position and athlete_id:
                    position = get_athlete_position(athlete_id)
                
                rows.append({
                    "game_id": game_id,
                    "date": date,
                    "season": season,
                    "team": team_name,
                    "home_away": home_away,
                    "opposing_team": opposing_team,
                    "athlete_id": athlete_id,
                    "display_name": a.get("displayName"),
                    "position": position, 
                    "category": category,
                    "stats": athlete.get("stats"),
                })
    
    return pd.DataFrame(rows)

In [56]:
data = get_game_player_stats(401127913)
data

Unnamed: 0,game_id,date,season,team,home_away,opposing_team,athlete_id,display_name,position,category,stats
0,401127913,2019-09-06,2019,Green Bay Packers,away,Chicago Bears,8439,Aaron Rodgers,QB,passing,"[18/30, 203, 6.8, 1, 0, 5-37, 37.9, 91.4]"
1,401127913,2019-09-06,2019,Green Bay Packers,away,Chicago Bears,3042519,Aaron Jones Sr.,RB,rushing,"[13, 39, 3.0, 0, 9]"
2,401127913,2019-09-06,2019,Green Bay Packers,away,Chicago Bears,8439,Aaron Rodgers,QB,rushing,"[3, 8, 2.7, 0, 10]"
3,401127913,2019-09-06,2019,Green Bay Packers,away,Chicago Bears,2980453,Jamaal Williams,RB,rushing,"[5, 0, 0.0, 0, 5]"
4,401127913,2019-09-06,2019,Green Bay Packers,away,Chicago Bears,3051738,Marquez Valdes-Scantling,WR,rushing,"[1, 0, 0.0, 0, 0]"
...,...,...,...,...,...,...,...,...,...,...,...
61,401127913,2019-09-06,2019,Chicago Bears,home,Green Bay Packers,2969924,Eddie Goldman,DT,defensive,"[1, 1, 0, 0, 0, 0, 0]"
62,401127913,2019-09-06,2019,Chicago Bears,home,Green Bay Packers,3117922,Bilal Nichols,DT,defensive,"[1, 0, 0, 0, 0, 0, 0]"
63,401127913,2019-09-06,2019,Chicago Bears,home,Green Bay Packers,3066158,Tarik Cohen,RB,puntReturns,"[4, 36, 9.0, 14, 0]"
64,401127913,2019-09-06,2019,Chicago Bears,home,Green Bay Packers,4034949,Eddy Pineiro,PK,kicking,"[1/1, 100.0, 38, 0/0, 3]"


In [16]:
import requests
import pandas as pd

header = {"User-Agent": "xcs5hg@virginia.edu"}

def get_athlete_gamelog(athlete_id: int, season: int | None = None) -> pd.DataFrame:

    base_url = (
        "https://site.web.api.espn.com/apis/common/v3/sports/football/nfl/"
        f"athletes/{athlete_id}/gamelog"
    )

    params = {}
    if season is not None:
        params["season"] = season

    r = requests.get(base_url, headers=header, params=params)
    r.raise_for_status()
    data = r.json()

    logs = (
        data.get("gamelogs")
        or data.get("events")
        or data.get("items")
    )

    if not logs:
        print(f"No gamelog entries found for athlete {athlete_id} (season={season})")
        return pd.DataFrame()

    df = pd.json_normalize(logs)

    df["athlete_id"] = athlete_id
    if season is not None:
        df["season"] = season

    return df

In [17]:
import requests
import pandas as pd

header = {"User-Agent": "xcs5hg@virginia.edu"}

def get_all_player_stats_for_game(game_id: int) -> pd.DataFrame:
    """
    Get per-player box score stats for a single NFL game.
    One row per (player, stat category).
    """
    url = f"https://site.api.espn.com/apis/site/v2/sports/football/nfl/summary?event={game_id}"
    data = requests.get(url, headers=header).json()

    rows = []

    for team_block in data.get("boxscore", {}).get("players", []):
        team_info = team_block.get("team", {})
        team_name = team_info.get("displayName")
        team_id = team_info.get("id")

        for stat_group in team_block.get("statistics", []):
            category = stat_group.get("name")      

            for entry in stat_group.get("athletes", []):
                a = entry.get("athlete", {})
                position_abbr = get_position_abbr(a)
                rows.append({
                    "game_id": game_id,
                    "team_id": team_id,
                    "team_name": team_name,
                    "athlete_id": a.get("id"),
                    "full_name": a.get("fullName"),
                    "display_name": a.get("displayName"),
                    "position": position_abbr,
                    "category": category,        
                    "stats": entry.get("stats"), 
                })

    return pd.DataFrame(rows)

In [18]:
def get_season_player_stats(game_ids: list[int]) -> pd.DataFrame:

    all_dfs = []

    for gid in game_ids:
        try:
            df_game = get_all_player_stats_for_game(gid)
            all_dfs.append(df_game)
        except Exception as e:
            print(f"Failed to fetch stats for game {gid}: {e}")

    if not all_dfs:
        return pd.DataFrame()

    return pd.concat(all_dfs, ignore_index=True)

In [19]:
import json

def get_all_player_stats_for_game(game_id: int):
    url = f"https://site.api.espn.com/apis/site/v2/sports/football/nfl/summary?event={game_id}"
    data = requests.get(url, headers=header).json()

    # üîç Pretty print the entire JSON
    print(json.dumps(data, indent=2))

    return data   # optional: return raw data

In [20]:

ATHLETE_POS_CACHE = {}

def get_position_abbr(a):
    aid = a.get("id")
    if not aid:
        return None

    # return cached if possible
    if aid in ATHLETE_POS_CACHE:
        return ATHLETE_POS_CACHE[aid]

    # fetch once
    ref = a.get("$ref") or a.get("links", {}).get("athlete", {}).get("href")
    if not ref:
        ATHLETE_POS_CACHE[aid] = None
        return None

    data = get_json(ref)
    pos = data.get("position", {}).get("abbreviation")

    ATHLETE_POS_CACHE[aid] = pos
    return pos

In [21]:
import requests
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
from functools import lru_cache

header = {"User-Agent": "epxhh@virginia.edu"}

# Cache HTTP requests
@lru_cache(maxsize=10000)
def get_json_cached(url: str):
    r = requests.get(url, headers=header)
    r.raise_for_status()
    return r.json()

def get_season_player_stats_optimized(game_ids, max_workers=20):
    """Parallel fetching with progress tracking"""
    all_dfs = []
    
    def fetch_with_retry(gid, retries=3):
        for attempt in range(retries):
            try:
                time.sleep(0.05)  # Rate limit: 20 req/sec
                return get_game_player_stats(gid)
            except Exception as e:
                if attempt == retries - 1:
                    print(f"Failed game {gid}: {e}")
                    return None
                time.sleep(1)
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(fetch_with_retry, gid): gid 
                   for gid in game_ids}
        
        for i, future in enumerate(as_completed(futures), 1):
            if i % 10 == 0:
                print(f"Progress: {i}/{len(game_ids)}")
            
            result = future.result()
            if result is not None and not result.empty:
                all_dfs.append(result)
    
    return pd.concat(all_dfs, ignore_index=True) if all_dfs else pd.DataFrame()

In [62]:
df_games_2019.game_id[0:10]

0    401127913
1    401127928
2    401127954
3    401127961
4    401127963
5    401127968
6    401127923
7    401127931
8    401127972
9    401127995
Name: game_id, dtype: object

In [63]:
OPT_season_stats_2019 = get_season_player_stats_optimized(df_games_2019.game_id[0:10])
OPT_season_stats_2019

Progress: 10/10


Unnamed: 0,game_id,date,season,team,home_away,opposing_team,athlete_id,display_name,position,category,stats
0,401127913,2019-09-06,2019,Green Bay Packers,away,Chicago Bears,8439,Aaron Rodgers,QB,passing,"[18/30, 203, 6.8, 1, 0, 5-37, 37.9, 91.4]"
1,401127913,2019-09-06,2019,Green Bay Packers,away,Chicago Bears,3042519,Aaron Jones Sr.,RB,rushing,"[13, 39, 3.0, 0, 9]"
2,401127913,2019-09-06,2019,Green Bay Packers,away,Chicago Bears,8439,Aaron Rodgers,QB,rushing,"[3, 8, 2.7, 0, 10]"
3,401127913,2019-09-06,2019,Green Bay Packers,away,Chicago Bears,2980453,Jamaal Williams,RB,rushing,"[5, 0, 0.0, 0, 5]"
4,401127913,2019-09-06,2019,Green Bay Packers,away,Chicago Bears,3051738,Marquez Valdes-Scantling,WR,rushing,"[1, 0, 0.0, 0, 0]"
...,...,...,...,...,...,...,...,...,...,...,...
739,401127954,2019-09-08,2019,Miami Dolphins,home,Baltimore Ravens,3128630,Chris Lammons,CB,defensive,"[0, 0, 0, 0, 1, 0, 0]"
740,401127954,2019-09-08,2019,Miami Dolphins,home,Baltimore Ravens,2577641,Jakeem Grant Sr.,WR,kickReturns,"[3, 59, 19.7, 23, 0]"
741,401127954,2019-09-08,2019,Miami Dolphins,home,Baltimore Ravens,2577641,Jakeem Grant Sr.,WR,puntReturns,"[1, 0, 0.0, 0, 0]"
742,401127954,2019-09-08,2019,Miami Dolphins,home,Baltimore Ravens,3124679,Jason Sanders,PK,kicking,"[1/1, 100.0, 54, 1/1, 4]"


In [22]:
import pandas as pd

def split_stats_by_category(df: pd.DataFrame) -> dict:
    """
    Takes a boxscore dataframe with columns:
      - game_id, team_id, team_name, athlete_id, full_name, display_name
      - category  (e.g. 'passing', 'rushing', 'receiving', ...)
      - stats     (list of stat strings)

    Returns:
      dict[category] -> DataFrame with one row per (game, player) in that category,
      and the `stats` column expanded into its own columns.
    """

    LABELS_BY_CATEGORY = {
        "defensive": [
            "TOT", "SOLO", "SACKS", "TFL", "PD", "QB_HTS", "TD"
        ],
        "receiving": [
            "REC", "YDS", "AVG", "TD", "LONG", "TGTS"
        ],
        "rushing": [
            "CAR", "YDS", "AVG", "TD", "LONG"
        ],
        "passing": [
            "C_ATT", "YDS", "AVG", "TD", "INT", "SACKS", "QBR", "RTG"
        ],
        "fumbles": [
            "FUM", "LOST", "REC"
        ],
        "interceptions": [
            "INT", "YDS", "TD"
        ],
        "kickReturns": [
            "NO", "YDS", "AVG", "LONG", "TD"
        ],
        "puntReturns": [
            "NO", "YDS", "AVG", "LONG", "TD"
        ],
        "kicking": [
            "FG", "PCT", "LONG", "XP", "PTS"
        ],
        "punting": [
            "NO", "YDS", "AVG", "TB", "IN_20", "LONG"
        ],
    }

    result = {}

    for cat, labels in LABELS_BY_CATEGORY.items():
        sub = df[df["category"] == cat].copy()
        if sub.empty:
            continue 

        def _safe_stats(lst):
            if lst is None:
                return [None] * len(labels)
            lst = list(lst)
            if len(lst) < len(labels):
                lst = lst + [None] * (len(labels) - len(lst))
            return lst[:len(labels)]

        stats_matrix = sub["stats"].apply(_safe_stats).to_list()
        stats_df = pd.DataFrame(stats_matrix, columns=labels, index=sub.index)

        out = pd.concat(
            [sub.drop(columns=["stats", "category"]), stats_df],
            axis=1
        )

        result[cat] = out

    return result

In [None]:
splits = split_stats_by_category(season_stats_2019)

defensive_df     = splits["defensive"]
receiving_df     = splits["receiving"]
rushing_df       = splits["rushing"]
passing_df       = splits["passing"]
fumbles_df       = splits["fumbles"]
interceptions_df = splits["interceptions"]
kickreturns_df   = splits["kickReturns"]
puntreturns_df   = splits["puntReturns"]
kicking_df       = splits["kicking"]
punting_df       = splits["punting"]

In [42]:
defensive_df

Unnamed: 0,game_id,date,team,home_away,opposing_team,athlete_id,display_name,TOT,SOLO,SACKS,TFL,PD,QB_HTS,TD
14,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,2978273,Blake Martinez,7,5,1,1.5,0,1,0
15,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3049331,Raven Greene,6,5,0,0,1,0,0
16,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3052170,Kevin King,5,5,1,1,1,1,0
17,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,2582132,Adrian Amos,5,4,0,0.5,1,0,0
18,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3115308,Tony Brown II,5,3,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,401127928,2019-09-08,Cleveland Browns,home,Tennessee Titans,3917852,Sheldrick Redwine,1,1,0,0,0,0,0
136,401127928,2019-09-08,Cleveland Browns,home,Tennessee Titans,2574557,Jermaine Whitehead,1,0,0,0,0,1,0
137,401127928,2019-09-08,Cleveland Browns,home,Tennessee Titans,2971281,Pharaoh Brown,1,0,0,0,0,0,0
138,401127928,2019-09-08,Cleveland Browns,home,Tennessee Titans,3047876,KhaDarel Hodge,1,0,0,0,0,0,0


In [23]:
receiving_df.head(10)

Unnamed: 0,game_id,date,team,home_away,opposing_team,athlete_id,display_name,REC,YDS,AVG,TD,LONG,TGTS
5,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3051738,Marquez Valdes-Scantling,4,52,13.0,0,47,6
6,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,16800,Davante Adams,4,36,9.0,0,11,8
7,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,13232,Jimmy Graham,3,30,10.0,1,16,5
8,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,2573343,Trevor Davis,1,28,28.0,0,28,1
9,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,2975674,Robert Tonyan,1,28,28.0,0,28,1
10,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,2980453,Jamaal Williams,2,15,7.5,0,10,2
11,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,9614,Marcedes Lewis,2,14,7.0,0,9,3
12,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3042519,Aaron Jones Sr.,1,0,0.0,0,0,1
39,401127913,2019-09-06,Chicago Bears,home,Green Bay Packers,16799,Allen Robinson,7,102,14.6,0,27,13
40,401127913,2019-09-06,Chicago Bears,home,Green Bay Packers,3066158,Tarik Cohen,8,49,6.1,0,9,10


## Larger run (all 2019)

In [17]:
df_games_2019 = get_nfl_games(2019)

In [26]:
season_stats_2019 = get_season_player_stats_optimized(df_games_2019.game_id)

Progress: 10/268
Progress: 20/268
Progress: 30/268
Progress: 40/268
Progress: 50/268
Progress: 60/268
Progress: 70/268
Progress: 80/268
Progress: 90/268
Progress: 100/268
Progress: 110/268
Progress: 120/268
Progress: 130/268
Progress: 140/268
Progress: 150/268
Progress: 160/268
Progress: 170/268
Progress: 180/268
Progress: 190/268
Progress: 200/268
Progress: 210/268
Progress: 220/268
Progress: 230/268
Progress: 240/268
Progress: 250/268
Progress: 260/268


In [28]:
season_stats_2019

Unnamed: 0,game_id,date,team,home_away,opposing_team,athlete_id,display_name,position,category,stats
0,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,8439,Aaron Rodgers,,passing,"[18/30, 203, 6.8, 1, 0, 5-37, 37.9, 91.4]"
1,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3042519,Aaron Jones Sr.,,rushing,"[13, 39, 3.0, 0, 9]"
2,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,8439,Aaron Rodgers,,rushing,"[3, 8, 2.7, 0, 10]"
3,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,2980453,Jamaal Williams,,rushing,"[5, 0, 0.0, 0, 5]"
4,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3051738,Marquez Valdes-Scantling,,rushing,"[1, 0, 0.0, 0, 0]"
...,...,...,...,...,...,...,...,...,...,...
20577,401131039,2020-01-05,Philadelphia Eagles,home,Seattle Seahawks,3116058,Craig James,,defensive,"[1, 1, 0, 0, 0, 0, 0]"
20578,401131039,2020-01-05,Philadelphia Eagles,home,Seattle Seahawks,14959,Vinny Curry,,defensive,"[0, 0, 0, 0, 0, 2, 0]"
20579,401131039,2020-01-05,Philadelphia Eagles,home,Seattle Seahawks,3051439,Boston Scott,,kickReturns,"[1, 24, 24.0, 24, 0]"
20580,401131039,2020-01-05,Philadelphia Eagles,home,Seattle Seahawks,3050478,Jake Elliott,,kicking,"[3/3, 100.0, 46, 0/0, 9]"


In [29]:
splits = split_stats_by_category(season_stats_2019)

defensive_df     = splits["defensive"]
receiving_df     = splits["receiving"]
rushing_df       = splits["rushing"]
passing_df       = splits["passing"]
fumbles_df       = splits["fumbles"]
interceptions_df = splits["interceptions"]
kickreturns_df   = splits["kickReturns"]
puntreturns_df   = splits["puntReturns"]
kicking_df       = splits["kicking"]
punting_df       = splits["punting"]

In [30]:
defensive_df

Unnamed: 0,game_id,date,team,home_away,opposing_team,athlete_id,display_name,position,TOT,SOLO,SACKS,TFL,PD,QB_HTS,TD
14,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,2978273,Blake Martinez,,7,5,1,1.5,0,1,0
15,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3049331,Raven Greene,,6,5,0,0,1,0,0
16,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3052170,Kevin King,,5,5,1,1,1,1,0
17,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,2582132,Adrian Amos,,5,4,0,0.5,1,0,0
18,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3115308,Tony Brown II,,5,3,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20574,401131039,2020-01-05,Philadelphia Eagles,home,Seattle Seahawks,15222,Rodney McLeod Jr.,,2,1,0,0,0,0,0
20575,401131039,2020-01-05,Philadelphia Eagles,home,Seattle Seahawks,16785,Timmy Jernigan,,2,0,0,1,0,0,0
20576,401131039,2020-01-05,Philadelphia Eagles,home,Seattle Seahawks,3042725,Duke Riley,,1,1,0,0,0,0,0
20577,401131039,2020-01-05,Philadelphia Eagles,home,Seattle Seahawks,3116058,Craig James,,1,1,0,0,0,0,0


In [31]:
punting_df

Unnamed: 0,game_id,date,team,home_away,opposing_team,athlete_id,display_name,position,NO,YDS,AVG,TB,IN_20,LONG
33,401127913,2019-09-06,Green Bay Packers,away,Chicago Bears,3126368,JK Scott,,9,428,47.6,0,5,63
65,401127913,2019-09-06,Chicago Bears,home,Green Bay Packers,16863,Pat O'Donnell,,8,341,42.6,0,3,53
106,401127928,2019-09-08,Tennessee Titans,away,Cleveland Browns,11555,Brett Kern,,6,277,46.2,0,3,70
144,401127928,2019-09-08,Cleveland Browns,home,Tennessee Titans,3936185,Jamie Gillan,,5,233,46.6,0,3,52
186,401127954,2019-09-08,Baltimore Ravens,away,Miami Dolphins,9789,Sam Koch,,1,56,56.0,0,1,56
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20435,401127984,2019-12-29,Houston Texans,home,Tennessee Titans,14950,Bryan Anger,,5,248,49.6,0,3,58
20474,401127950,2019-12-29,Pittsburgh Steelers,away,Baltimore Ravens,2472364,Jordan Berry,,5,244,48.8,1,2,54
20519,401127950,2019-12-29,Baltimore Ravens,home,Pittsburgh Steelers,9789,Sam Koch,,3,140,46.7,0,3,54
20549,401131039,2020-01-05,Seattle Seahawks,away,Philadelphia Eagles,3929851,Michael Dickson,,4,164,41.0,0,2,44


## Final Calling for 2019-2023

In [65]:
import os
#df_2019_2023 = get_nfl_games(list(range(2019, 2024)))
df_2019_2023

Unnamed: 0,game_id,date,season,home_team,away_team
0,401127913,2019-09-06T00:20Z,2019,Chicago Bears,Green Bay Packers
1,401127928,2019-09-08T17:00Z,2019,Cleveland Browns,Tennessee Titans
2,401127954,2019-09-08T17:00Z,2019,Miami Dolphins,Baltimore Ravens
3,401127961,2019-09-08T17:00Z,2019,Minnesota Vikings,Atlanta Falcons
4,401127963,2019-09-08T17:00Z,2019,New York Jets,Buffalo Bills
...,...,...,...,...,...
1391,401547758,2024-01-21T23:30Z,2023,Buffalo Bills,Kansas City Chiefs
1392,401547379,2024-01-28T20:00Z,2023,Baltimore Ravens,Kansas City Chiefs
1393,401547380,2024-01-28T23:30Z,2023,San Francisco 49ers,Detroit Lions
1394,401616889,2024-02-04T20:00Z,2023,AFC,NFC


In [66]:
season_stats_2019 = get_season_player_stats_optimized(df_2019_2023.game_id)
season_stats_2019

Progress: 10/1396
Progress: 20/1396
Progress: 30/1396
Progress: 40/1396
Progress: 50/1396
Progress: 60/1396
Progress: 70/1396
Progress: 80/1396
Progress: 90/1396
Progress: 100/1396
Progress: 110/1396
Progress: 120/1396
Progress: 130/1396
Progress: 140/1396
Progress: 150/1396
Progress: 160/1396
Progress: 170/1396
Progress: 180/1396
Progress: 190/1396
Progress: 200/1396
Progress: 210/1396
Progress: 220/1396
Progress: 230/1396
Progress: 240/1396
Progress: 250/1396
Progress: 260/1396
Progress: 270/1396
Progress: 280/1396
Progress: 290/1396
Progress: 300/1396
Progress: 310/1396
Progress: 320/1396
Progress: 330/1396
Progress: 340/1396
Progress: 350/1396
Progress: 360/1396
Progress: 370/1396
Progress: 380/1396
Progress: 390/1396
Progress: 400/1396
Progress: 410/1396
Progress: 420/1396
Progress: 430/1396
Progress: 440/1396
Progress: 450/1396
Progress: 460/1396
Progress: 470/1396
Progress: 480/1396
Progress: 490/1396
Progress: 500/1396
Progress: 510/1396
Progress: 520/1396
Progress: 530/1396
Pr

Unnamed: 0,game_id,date,season,team,home_away,opposing_team,athlete_id,display_name,position,category,stats
0,401127972,2019-09-08,2019,Indianapolis Colts,away,Los Angeles Chargers,2578570,Jacoby Brissett,QB,passing,"[21/27, 190, 7.0, 2, 0, 2-17, 60.5, 120.7]"
1,401127972,2019-09-08,2019,Indianapolis Colts,away,Los Angeles Chargers,3139605,Marlon Mack,RB,rushing,"[25, 174, 7.0, 1, 63]"
2,401127972,2019-09-08,2019,Indianapolis Colts,away,Los Angeles Chargers,3916430,Nyheim Hines,RB,rushing,"[4, 13, 3.3, 0, 6]"
3,401127972,2019-09-08,2019,Indianapolis Colts,away,Los Angeles Chargers,2578570,Jacoby Brissett,QB,rushing,"[3, 9, 3.0, 0, 6]"
4,401127972,2019-09-08,2019,Indianapolis Colts,away,Los Angeles Chargers,3121410,Parris Campbell,WR,rushing,"[1, 7, 7.0, 0, 7]"
...,...,...,...,...,...,...,...,...,...,...,...
109123,401547645,2024-01-07,2023,Los Angeles Chargers,home,Kansas City Chiefs,3858276,Jaylinn Hawkins,S,defensive,"[1, 0, 0, 0, 0, 0, 0]"
109124,401547645,2024-01-07,2023,Los Angeles Chargers,home,Kansas City Chiefs,4039413,Alohi Gilman,S,interceptions,"[1, 0, 0]"
109125,401547645,2024-01-07,2023,Los Angeles Chargers,home,Kansas City Chiefs,4362477,Derius Davis,WR,kickReturns,"[2, 69, 34.5, 46, 0]"
109126,401547645,2024-01-07,2023,Los Angeles Chargers,home,Kansas City Chiefs,4362081,Cameron Dicker,PK,kicking,"[4/4, 100.0, 49, 0/0, 12]"


In [67]:
splits = split_stats_by_category(season_stats_2019)

defensive_df     = splits["defensive"]
receiving_df     = splits["receiving"]
rushing_df       = splits["rushing"]
passing_df       = splits["passing"]
fumbles_df       = splits["fumbles"]
interceptions_df = splits["interceptions"]
kickreturns_df   = splits["kickReturns"]
puntreturns_df   = splits["puntReturns"]
kicking_df       = splits["kicking"]
punting_df       = splits["punting"]

rushing_df

Unnamed: 0,game_id,date,season,team,home_away,opposing_team,athlete_id,display_name,position,CAR,YDS,AVG,TD,LONG
1,401127972,2019-09-08,2019,Indianapolis Colts,away,Los Angeles Chargers,3139605,Marlon Mack,RB,25,174,7.0,1,63
2,401127972,2019-09-08,2019,Indianapolis Colts,away,Los Angeles Chargers,3916430,Nyheim Hines,RB,4,13,3.3,0,6
3,401127972,2019-09-08,2019,Indianapolis Colts,away,Los Angeles Chargers,2578570,Jacoby Brissett,QB,3,9,3.0,0,6
4,401127972,2019-09-08,2019,Indianapolis Colts,away,Los Angeles Chargers,3121410,Parris Campbell,WR,1,7,7.0,0,7
41,401127972,2019-09-08,2019,Los Angeles Chargers,home,Indianapolis Colts,3068267,Austin Ekeler,RB,12,58,4.8,1,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109087,401547645,2024-01-07,2023,Los Angeles Chargers,home,Kansas City Chiefs,4362477,Derius Davis,WR,3,17,5.7,0,18
109088,401547645,2024-01-07,2023,Los Angeles Chargers,home,Kansas City Chiefs,3068267,Austin Ekeler,RB,10,11,1.1,0,6
109089,401547645,2024-01-07,2023,Los Angeles Chargers,home,Kansas City Chiefs,4242433,Joshua Palmer,WR,1,6,6.0,0,6
109090,401547645,2024-01-07,2023,Los Angeles Chargers,home,Kansas City Chiefs,4429025,Quentin Johnston,WR,1,6,6.0,0,6


In [68]:
defensive_df.to_csv("data/defensive_2019_2023.csv", index=False)
receiving_df.to_csv("data/receiving_2019_2023.csv", index=False)
rushing_df.to_csv("data/rushing_2019_2023.csv", index=False)
passing_df.to_csv("data/passing_2019_2023.csv", index=False)
fumbles_df.to_csv("data/fumbles_2019_2023.csv", index=False)
interceptions_df.to_csv("data/interceptions_2019_2023.csv", index=False)
kickreturns_df.to_csv("data/kickreturns_2019_2023.csv", index=False)
puntreturns_df.to_csv("data/puntreturn_2019_2023.csv", index=False)
kicking_df.to_csv("data/kicking_2019_2023.csv", index=False)
punting_df.to_csv("data/punting_2019_2023.csv", index=False)


In [70]:
# Folder where all CSVs are stored

defensive_df      = pd.read_csv(os.path.join("data/defensive_2019_2023.csv"))
receiving_df      = pd.read_csv(os.path.join("data/receiving_2019_2023.csv"))
rushing_df        = pd.read_csv(os.path.join("data/rushing_2019_2023.csv"))
passing_df        = pd.read_csv(os.path.join("data/passing_2019_2023.csv"))
fumbles_df        = pd.read_csv(os.path.join("data/fumbles_2019_2023.csv"))
interceptions_df  = pd.read_csv(os.path.join("data/interceptions_2019_2023.csv"))
kickreturns_df    = pd.read_csv(os.path.join("data/kickreturns_2019_2023.csv"))
puntreturns_df    = pd.read_csv(os.path.join("data/puntreturn_2019_2023.csv"))
kicking_df        = pd.read_csv(os.path.join("data/kicking_2019_2023.csv"))
punting_df        = pd.read_csv(os.path.join("data/punting_2019_2023.csv"))

receiving_df

Unnamed: 0,game_id,date,season,team,home_away,opposing_team,athlete_id,display_name,position,REC,YDS,AVG,TD,LONG,TGTS
0,401127972,2019-09-08,2019,Indianapolis Colts,away,Los Angeles Chargers,14924,T.Y. Hilton,WR,8,87,10.9,2,19,9
1,401127972,2019-09-08,2019,Indianapolis Colts,away,Los Angeles Chargers,3728254,Deon Cain,WR,2,35,17.5,0,25,2
2,401127972,2019-09-08,2019,Indianapolis Colts,away,Los Angeles Chargers,2977609,Devin Funchess,TE,3,32,10.7,0,16,5
3,401127972,2019-09-08,2019,Indianapolis Colts,away,Los Angeles Chargers,16504,Jack Doyle,TE,1,20,20.0,0,20,2
4,401127972,2019-09-08,2019,Indianapolis Colts,away,Los Angeles Chargers,16732,Eric Ebron,TE,1,8,8.0,0,8,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22407,401547645,2024-01-07,2023,Los Angeles Chargers,home,Kansas City Chiefs,4429025,Quentin Johnston,WR,2,17,8.5,0,13,5
22408,401547645,2024-01-07,2023,Los Angeles Chargers,home,Kansas City Chiefs,4250764,Stone Smartt,TE,2,14,7.0,0,8,3
22409,401547645,2024-01-07,2023,Los Angeles Chargers,home,Kansas City Chiefs,3120980,Keelan Doss,WR,1,8,8.0,0,8,5
22410,401547645,2024-01-07,2023,Los Angeles Chargers,home,Kansas City Chiefs,3918639,Gerald Everett,TE,1,4,4.0,0,4,1


## DATA FOR HOLDOUT

In [10]:
#df_24tocurrent = get_nfl_games(list(range(2024, 2026)))
#df_24tocurrent

df_2024_26

Unnamed: 0,game_id,date,season,home_team,away_team,datetime
0,401671789,2024-09-06T00:40Z,2024,Kansas City Chiefs,Baltimore Ravens,2024-09-06 00:40:00+00:00
1,401671805,2024-09-07T00:15Z,2024,Philadelphia Eagles,Green Bay Packers,2024-09-07 00:15:00+00:00
2,401671744,2024-09-08T17:00Z,2024,Atlanta Falcons,Pittsburgh Steelers,2024-09-08 17:00:00+00:00
3,401671617,2024-09-08T17:00Z,2024,Buffalo Bills,Arizona Cardinals,2024-09-08 17:00:00+00:00
4,401671719,2024-09-08T17:00Z,2024,Chicago Bears,Tennessee Titans,2024-09-08 17:00:00+00:00
...,...,...,...,...,...,...
470,401772895,2025-11-30T18:00Z,2025,Tampa Bay Buccaneers,Arizona Cardinals,2025-11-30 18:00:00+00:00
471,401772786,2025-11-30T18:00Z,2025,Tennessee Titans,Jacksonville Jaguars,2025-11-30 18:00:00+00:00
472,401772785,2025-11-30T18:00Z,2025,Cleveland Browns,San Francisco 49ers,2025-11-30 18:00:00+00:00
473,401772787,2025-11-30T18:00Z,2025,Indianapolis Colts,Houston Texans,2025-11-30 18:00:00+00:00


In [24]:
season_stats_df_24tocurrent = get_season_player_stats_optimized(df_2024_26.game_id)
season_stats_df_24tocurrent

Progress: 10/475
Progress: 20/475
Progress: 30/475
Progress: 40/475
Progress: 50/475
Progress: 60/475
Progress: 70/475
Progress: 80/475
Progress: 90/475
Progress: 100/475
Progress: 110/475
Progress: 120/475
Progress: 130/475
Progress: 140/475
Progress: 150/475
Progress: 160/475
Progress: 170/475
Progress: 180/475
Progress: 190/475
Progress: 200/475
Progress: 210/475
Progress: 220/475
Progress: 230/475
Progress: 240/475
Progress: 250/475
Progress: 260/475
Progress: 270/475
Progress: 280/475
Progress: 290/475
Progress: 300/475
Progress: 310/475
Progress: 320/475
Progress: 330/475
Progress: 340/475
Progress: 350/475
Progress: 360/475
Progress: 370/475
Progress: 380/475
Progress: 390/475
Progress: 400/475
Progress: 410/475
Progress: 420/475
Progress: 430/475
Progress: 440/475
Progress: 450/475
Progress: 460/475
Progress: 470/475


Unnamed: 0,game_id,date,season,team,home_away,opposing_team,athlete_id,display_name,position,category,stats
0,401671617,2024-09-08,2024,Arizona Cardinals,away,Buffalo Bills,3917315,Kyler Murray,QB,passing,"[21/31, 162, 5.2, 1, 0, 4-16, 80.5, 91.1]"
1,401671617,2024-09-08,2024,Arizona Cardinals,away,Buffalo Bills,3917315,Kyler Murray,QB,rushing,"[5, 57, 11.4, 0, 29]"
2,401671617,2024-09-08,2024,Arizona Cardinals,away,Buffalo Bills,3045147,James Conner,RB,rushing,"[16, 50, 3.1, 1, 20]"
3,401671617,2024-09-08,2024,Arizona Cardinals,away,Buffalo Bills,4429275,Trey Benson,RB,rushing,"[3, 13, 4.3, 0, 6]"
4,401671617,2024-09-08,2024,Arizona Cardinals,away,Buffalo Bills,4037235,Greg Dortch,WR,rushing,"[1, 4, 4.0, 0, 4]"
...,...,...,...,...,...,...,...,...,...,...,...
34810,401772631,2025-11-16,2025,Miami Dolphins,home,Washington Commanders,4035686,Jack Jones,CB,interceptions,"[1, 7, 0]"
34811,401772631,2025-11-16,2025,Miami Dolphins,home,Washington Commanders,4569603,Malik Washington,WR,kickReturns,"[1, 21, 21.0, 21, 0]"
34812,401772631,2025-11-16,2025,Miami Dolphins,home,Washington Commanders,4569603,Malik Washington,WR,puntReturns,"[1, 3, 3.0, 3, 0]"
34813,401772631,2025-11-16,2025,Miami Dolphins,home,Washington Commanders,4243371,Riley Patterson,PK,kicking,"[3/3, 100.0, 46, 1/1, 10]"


In [26]:
splits = split_stats_by_category(season_stats_df_24tocurrent)

defensive_df     = splits["defensive"]
receiving_df     = splits["receiving"]
rushing_df       = splits["rushing"]
passing_df       = splits["passing"]
fumbles_df       = splits["fumbles"]
interceptions_df = splits["interceptions"]
kickreturns_df   = splits["kickReturns"]
puntreturns_df   = splits["puntReturns"]
kicking_df       = splits["kicking"]
punting_df       = splits["punting"]

rushing_df

Unnamed: 0,game_id,date,season,team,home_away,opposing_team,athlete_id,display_name,position,CAR,YDS,AVG,TD,LONG
1,401671617,2024-09-08,2024,Arizona Cardinals,away,Buffalo Bills,3917315,Kyler Murray,QB,5,57,11.4,0,29
2,401671617,2024-09-08,2024,Arizona Cardinals,away,Buffalo Bills,3045147,James Conner,RB,16,50,3.1,1,20
3,401671617,2024-09-08,2024,Arizona Cardinals,away,Buffalo Bills,4429275,Trey Benson,RB,3,13,4.3,0,6
4,401671617,2024-09-08,2024,Arizona Cardinals,away,Buffalo Bills,4037235,Greg Dortch,WR,1,4,4.0,0,4
36,401671617,2024-09-08,2024,Buffalo Bills,home,Arizona Cardinals,4379399,James Cook III,RB,19,71,3.7,0,15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34743,401772631,2025-11-16,2025,Washington Commanders,away,Miami Dolphins,3127586,Jeremy McNichols,RB,4,13,3.3,0,7
34744,401772631,2025-11-16,2025,Washington Commanders,away,Miami Dolphins,3126486,Deebo Samuel,WR,1,3,3.0,0,3
34782,401772631,2025-11-16,2025,Miami Dolphins,home,Washington Commanders,4429160,De'Von Achane,RB,21,120,5.7,0,23
34783,401772631,2025-11-16,2025,Miami Dolphins,home,Washington Commanders,4711533,Ollie Gordon II,RB,9,45,5.0,1,20


In [27]:
defensive_df.to_csv("data/defensive_24tocurrent.csv", index=False)
receiving_df.to_csv("data/receiving_24tocurrent.csv", index=False)
rushing_df.to_csv("data/rushing_24tocurrent.csv", index=False)
passing_df.to_csv("data/passing_24tocurrent.csv", index=False)
fumbles_df.to_csv("data/fumbles_24tocurrent.csv", index=False)
interceptions_df.to_csv("data/interceptions_24tocurrent.csv", index=False)
kickreturns_df.to_csv("data/kickreturns_24tocurrent.csv", index=False)
puntreturns_df.to_csv("data/puntreturn_24tocurrent.csv", index=False)
kicking_df.to_csv("data/kicking_24tocurrent.csv", index=False)
punting_df.to_csv("data/punting_24tocurrent.csv", index=False)