This opens the existing DuckDB file so all downstream tables are persisted in a single database and the notebook runs against a stable state

In [None]:
from pathlib import Path
import duckdb

DB_DIR = Path("../db")
con = duckdb.connect(str(DB_DIR / "nflpa.duckdb"))

print("connected")

Quick sanity check to verify that the schedules relation is accessible and non empty before any transformations are attempted.

In [None]:
print("schedules", con.execute("SELECT COUNT(*) FROM schedules").fetchone()[0])

This expands each game into two team perspective rows and establishes the one row per team per week structure with the required primary keys

In [None]:
con.execute("""
CREATE OR REPLACE TABLE team_game_weeks AS
WITH base AS (
  SELECT
    s.season,
    s.week,
    s.game_id,
    TRY_CAST(CAST(s.gameday AS VARCHAR) AS DATE) AS game_date,
    s.home_team,
    s.away_team,
    s.home_score,
    s.away_score
  FROM schedules s
)
SELECT
  season,
  week,
  game_id,
  game_date,
  home_team AS team,
  away_team AS opponent,
  1 AS home_flag,
  0 AS away_flag,
  home_score AS points_for,
  away_score AS points_against
FROM base

UNION ALL

SELECT
  season,
  week,
  game_id,
  game_date,
  away_team AS team,
  home_team AS opponent,
  0 AS home_flag,
  1 AS away_flag,
  away_score AS points_for,
  home_score AS points_against
FROM base
""")

print("team_game_weeks", con.execute("SELECT COUNT(*) FROM team_game_weeks").fetchone()[0])

Quick sanity check that ensures the generated primary key, season, week, team, game_id is unique and confirms there are no accidental duplicates

In [None]:
con.execute("""
SELECT
  COUNT(*) AS rows,
  COUNT(DISTINCT CAST(season AS VARCHAR) || '-' || CAST(week AS VARCHAR) || '-' || team || '-' || game_id) AS distinct_keys
FROM team_game_weeks
""").df()

This checkpoints the database to reduce wal growth and closes the connection cleanly

In [None]:
con.execute("CHECKPOINT")
con.close()

print("db successfully closed")