In [1]:
from pathlib import Path
from nhl_pbp.downloader import NHLPBPDownloader
from nhl_pbp.transform import season_jsons_to_csvs_via_cache

def season_label(y: int) -> str:
    return f"{y}-{y+1}"

BASE = Path("../../ift6758/data/nhl/csv")
dl = NHLPBPDownloader()

total_rows_all = 0
for season in range(2016, 2024):  # 2024 is exclusive → runs 2016..2023
    label = season_label(season)
    out_dir = BASE / label
    merged_path = BASE / f"{label}_events.csv"

    print(f"\n=== {label} ===")
    # 1) Download & cache the whole season (regular + playoffs)
    ids = dl.download_season(
        season_start_year=season,
        include_regular=True,
        include_playoffs=True,
        limit=None,
        progress=True
    )
    print(f"Cached {len(ids)} games.")

    # 2) Transform cached JSON → per-game CSVs + merged per-season CSV
    rows = season_jsons_to_csvs_via_cache(
        season_start_year=season,
        out_dir=str(out_dir),
        merged_out_path=str(merged_path)
    )
    total_rows_all += rows
    print(f"Wrote {rows} rows → {merged_path}")

print(f"\nDone. Total rows across seasons: {total_rows_all}")


=== 2016-2017 ===


20162017 teams:   0%|          | 0/30 [00:00<?, ?it/s]

[2016-2017] scanned 30 teams, 2844 items -> 1317 games (R+P)


2016-2017 downloads:   0%|          | 0/1317 [00:00<?, ?it/s]

Cached 1317 games.
Wrote 80399 rows → ../../ift6758/data/nhl/csv/2016-2017_events.csv

=== 2017-2018 ===


20172018 teams:   0%|          | 0/31 [00:00<?, ?it/s]

[2017-2018] scanned 31 teams, 2926 items -> 1355 games (R+P)


2017-2018 downloads:   0%|          | 0/1355 [00:00<?, ?it/s]

Cached 1355 games.
Wrote 87137 rows → ../../ift6758/data/nhl/csv/2017-2018_events.csv

=== 2018-2019 ===


20182019 teams:   0%|          | 0/31 [00:00<?, ?it/s]

[2018-2019] scanned 31 teams, 2934 items -> 1358 games (R+P)


2018-2019 downloads:   0%|          | 0/1358 [00:00<?, ?it/s]

Cached 1358 games.
Wrote 85939 rows → ../../ift6758/data/nhl/csv/2018-2019_events.csv

=== 2019-2020 ===


20192020 teams:   0%|          | 0/31 [00:00<?, ?it/s]

[2019-2020] scanned 31 teams, 2658 items -> 1212 games (R+P)


2019-2020 downloads:   0%|          | 0/1212 [00:00<?, ?it/s]

Cached 1212 games.
Wrote 76620 rows → ../../ift6758/data/nhl/csv/2019-2020_events.csv

=== 2020-2021 ===


20202021 teams:   0%|          | 0/31 [00:00<?, ?it/s]

[2020-2021] scanned 31 teams, 1904 items -> 952 games (R+P)


2020-2021 downloads:   0%|          | 0/952 [00:00<?, ?it/s]

Cached 952 games.
Wrote 57734 rows → ../../ift6758/data/nhl/csv/2020-2021_events.csv

=== 2021-2022 ===


20212022 teams:   0%|          | 0/32 [00:00<?, ?it/s]

[2021-2022] scanned 32 teams, 3008 items -> 1401 games (R+P)


2021-2022 downloads:   0%|          | 0/1401 [00:00<?, ?it/s]

Cached 1401 games.
Wrote 89585 rows → ../../ift6758/data/nhl/csv/2021-2022_events.csv

=== 2022-2023 ===


20222023 teams:   0%|          | 0/32 [00:00<?, ?it/s]

[2022-2023] scanned 32 teams, 3014 items -> 1400 games (R+P)


2022-2023 downloads:   0%|          | 0/1400 [00:00<?, ?it/s]

Cached 1400 games.
Wrote 88086 rows → ../../ift6758/data/nhl/csv/2022-2023_events.csv

=== 2023-2024 ===


20232024 teams:   0%|          | 0/32 [00:00<?, ?it/s]

[2023-2024] scanned 32 teams, 3022 items -> 1400 games (R+P)


2023-2024 downloads:   0%|          | 0/1400 [00:00<?, ?it/s]

Cached 1400 games.
Wrote 84932 rows → ../../ift6758/data/nhl/csv/2023-2024_events.csv

Done. Total rows across seasons: 650432
