# Setup

In [0]:
!pip install nfl-data-py

Collecting nfl-data-py
  Downloading nfl_data_py-0.3.3-py3-none-any.whl.metadata (12 kB)
Collecting appdirs>1 (from nfl-data-py)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting fastparquet>0.5 (from nfl-data-py)
  Downloading fastparquet-2024.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Collecting cramjam>=2.3 (from fastparquet>0.5->nfl-data-py)
  Downloading cramjam-2.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Collecting fsspec (from fastparquet>0.5->nfl-data-py)
  Downloading fsspec-2025.7.0-py3-none-any.whl.metadata (12 kB)
Downloading nfl_data_py-0.3.3-py3-none-any.whl (13 kB)
Downloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)
Downloading fastparquet-2024.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.8 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━

In [0]:
import nfl_data_py as nfl
import pandas as pd

In [0]:
ALL_AVAILABLE_SEASONS = list(range(2009, 2025))

In [0]:
def save_table_to_warehouse(df: pd.DataFrame, table_name: str):
    df.to_csv(f'/tmp/{table_name}.csv', index=False)
    spark.createDataFrame(df).write.mode("overwrite").saveAsTable(table_name)

# 1. Games

In [0]:
schedules = nfl.import_schedules(ALL_AVAILABLE_SEASONS)

In [0]:
print(len(schedules))
schedules.columns

4345


Index(['game_id', 'season', 'game_type', 'week', 'gameday', 'weekday',
       'gametime', 'away_team', 'away_score', 'home_team', 'home_score',
       'location', 'result', 'total', 'overtime', 'old_game_id', 'gsis',
       'nfl_detail_id', 'pfr', 'pff', 'espn', 'ftn', 'away_rest', 'home_rest',
       'away_moneyline', 'home_moneyline', 'spread_line', 'away_spread_odds',
       'home_spread_odds', 'total_line', 'under_odds', 'over_odds', 'div_game',
       'roof', 'surface', 'temp', 'wind', 'away_qb_id', 'home_qb_id',
       'away_qb_name', 'home_qb_name', 'away_coach', 'home_coach', 'referee',
       'stadium_id', 'stadium'],
      dtype='object')

In [0]:
save_table_to_warehouse(schedules, 'bronze_games')

# 2. Injury Reports

In [0]:
injuries = nfl.import_injuries(ALL_AVAILABLE_SEASONS)

In [0]:
print(len(injuries))
injuries.columns

84684


Index(['season', 'game_type', 'team', 'week', 'gsis_id', 'position',
       'full_name', 'first_name', 'last_name', 'report_primary_injury',
       'report_secondary_injury', 'report_status', 'practice_primary_injury',
       'practice_secondary_injury', 'practice_status', 'date_modified'],
      dtype='object')

In [0]:
save_table_to_warehouse(injuries, 'bronze_injuries')

# 3. Snap Counts

In [0]:
snap_counts = nfl.import_snap_counts(range(2013, 2025))

In [0]:
print(len(snap_counts))
snap_counts.columns

295583


Index(['game_id', 'pfr_game_id', 'season', 'game_type', 'week', 'player',
       'pfr_player_id', 'position', 'team', 'opponent', 'offense_snaps',
       'offense_pct', 'defense_snaps', 'defense_pct', 'st_snaps', 'st_pct',
       '.progress'],
      dtype='object')

In [0]:
save_table_to_warehouse(snap_counts, 'bronze_snap_counts')

# 4. Depth Charts

In [0]:
depth_charts = nfl.import_depth_charts(range(2013, 2025))

In [0]:
save_table_to_warehouse(depth_charts, 'bronze_depth_charts')