In [1]:
from pathlib import Path
import nflreadpy as nfl

DATA_DIR = Path("../data/raw")
DATA_DIR.mkdir(parents=True, exist_ok=True)

seasons = list(range(2012, 2025))

In [2]:
from nflreadpy.config import update_config

update_config(
    cache_mode="filesystem",
    cache_dir=Path("../.nflreadpy_cache"),
    cache_duration=86400,
    verbose=True,
)

In [3]:
pbp = nfl.load_pbp(seasons)
schedules = nfl.load_schedules(seasons)
injuries = nfl.load_injuries(seasons)
snap_counts = nfl.load_snap_counts(seasons)
players = nfl.load_players()

In [4]:
pbp.write_parquet(DATA_DIR / "pbp.parquet")
schedules.write_parquet(DATA_DIR / "schedules.parquet")
injuries.write_parquet(DATA_DIR / "injuries.parquet")
snap_counts.write_parquet(DATA_DIR / "snap_counts.parquet")
players.write_parquet(DATA_DIR / "players.parquet")

In [5]:
files = [
    "pbp.parquet",
    "schedules.parquet",
    "injuries.parquet",
    "snap_counts.parquet",
    "players.parquet",
]

for f in files:
    p = DATA_DIR / f
    print(f, p.exists(), p.stat().st_size if p.exists() else None)

pbp.parquet True 168426769
schedules.parquet True 180361
injuries.parquet True 982163
snap_counts.parquet True 2569972
players.parquet True 2100727


In [6]:
print("pbp", pbp.shape)
print("schedules", schedules.shape)
print("injuries", injuries.shape)
print("snap_counts", snap_counts.shape)
print("players", players.shape)

pbp (627226, 372)
schedules (3544, 46)
injuries (70401, 16)
snap_counts (297999, 16)
players (24350, 39)
