In [None]:
# üöÄ Auto-setup: installs deps + configures CFBD access
%run ./_auto_setup.py


In [None]:
# Cell 1: Imports
import pandas as pd
import os
import sys
from pathlib import Path

# Import starter pack configuration system
_config_dir = Path().resolve() / "config"
if str(_config_dir.parent) not in sys.path:
    sys.path.insert(0, str(_config_dir.parent))
from config.data_config import get_starter_pack_config

# Get configuration
config = get_starter_pack_config()

# Helper to load & preview files
def preview_csv(path, n=5):
    df = pd.read_csv(path)
    print(f"\nüìÑ {os.path.basename(path)}")
    print(f"Columns: {len(df.columns)}")
    if "games.csv" in path:
        print(df.tail(n))
    else:
        print(df.head(n))

# üìò Data Dictionary

Welcome to the College Football Data Starter Pack!

This notebook serves as a reference guide to help you understand what‚Äôs included in the ZIP archive and how the different datasets relate to each other.

Each dataset listed below is accompanied by a short description and a preview of the columns and data format.

## üß≠ Metadata Files

In [None]:
# conferences.csv
preview_csv(str(config.get_data_path("conferences.csv")))

# teams.csv
preview_csv(str(config.get_data_path("teams.csv")))

### `conferences.csv`
Basic info about FBS-level conferences.

- `name`: Full name of the conference
- `abbreviation`: Short label (e.g., SEC, Big Ten)
- `division`: NCAA division (typically "fbs")

---

### `teams.csv`
Master reference of all teams with full metadata.

- Includes `classification`, `conference`, home venue info, location, elevation, etc.

## üèüÔ∏è Game-Level Data

In [None]:
# games.csv
preview_csv(str(config.get_data_path("games.csv")))

In [None]:
# game_stats/{current_year}.csv
current_year = config.current_year
preview_csv(str(config.get_data_path(f"game_stats/{current_year}.csv")))

### `games.csv`
Master list of games from 1869‚Äìpresent. Includes team IDs, points, Elo ratings, win probabilities, venue info, and more.

---

### `game_stats/YYYY.csv`
Traditional box score stats per team per game (yardage, completions, sacks, etc.). Use with `games.csv` to connect metadata.

## üìä Season-Level Stats

In [None]:
# season_stats/{current_year}.csv
current_year = config.current_year
preview_csv(str(config.get_data_path(f"season_stats/{current_year}.csv")))

In [None]:
# advanced_season_stats/{current_year}.csv
current_year = config.current_year
preview_csv(str(config.get_data_path(f"advanced_season_stats/{current_year}.csv")))

### `season_stats/YYYY.csv`
Raw season-long team stats (first downs, turnovers, penalties, etc.)

---

### `advanced_season_stats/YYYY.csv`
Custom-derived season metrics:
- EPA, explosiveness, success rates
- Down splits (standard vs passing)
- Field position, havoc, efficiency metrics

## üß¨ Game-Level Advanced Stats

In [None]:
# advanced_game_stats/{current_year}.csv
current_year = config.current_year
preview_csv(str(config.get_data_path(f"advanced_game_stats/{current_year}.csv")))

### `advanced_game_stats/YYYY.csv`
Per-team, per-game advanced stats derived from play-by-play data.

Includes EPA, success rate, explosiveness, line yards, etc.

## üîÑ Play & Drive Data

In [None]:
# drives/drives_{current_year}.csv
current_year = config.current_year
preview_csv(str(config.get_drives_path(current_year)))

In [None]:
# plays/{current_year}/regular_13_plays.csv
current_year = config.current_year
plays_path = config.data_dir / "plays" / str(current_year) / "regular_13_plays.csv"
preview_csv(str(plays_path))

### `drives/drives_YYYY.csv`
One row per drive. Includes periods, starting/ending yard lines, results, and score progression.

---

### `plays/YYYY/SEASONTYPE_WEEK_plays.csv`
Play-by-play data with down, distance, yardage, play type, and custom PPA metric.

## üß© Relationships & Tips

- Use `gameId` to join plays ‚Üí drives ‚Üí games ‚Üí advanced stats
- Use `team_id` or `school` to join stats with `teams.csv`
- Normalize time columns like `possessionTime` if needed