# Data Overview

Quick-start notebook showing how to load and explore the FRED data.

In [None]:
import sys
from pathlib import Path

# Add project root to path so we can import lib/
PROJECT_ROOT = Path.cwd().parent
sys.path.insert(0, str(PROJECT_ROOT))

import json
import pandas as pd
import matplotlib.pyplot as plt

DATA_DIR = PROJECT_ROOT / "data" / "raw" / "fred"
CONFIG = PROJECT_ROOT / "config" / "series.json"

## Load series config

In [None]:
with open(CONFIG) as f:
    config = json.load(f)["fred"]

# Flatten to a list of all series
all_series = []
for cat, items in config.items():
    for item in items:
        item["category_key"] = cat
        all_series.append(item)

series_df = pd.DataFrame(all_series)
print(f"{len(series_df)} series configured across {len(config)} categories")
series_df.head(10)

## Load a single CSV

In [None]:
def load_series(series_id: str) -> pd.DataFrame:
    """Load a FRED series CSV as a DataFrame."""
    path = DATA_DIR / f"{series_id}.csv"
    df = pd.read_csv(path, parse_dates=["date"])
    return df.sort_values("date").reset_index(drop=True)

dgs10 = load_series("DGS10")
print(f"DGS10: {len(dgs10)} rows, {dgs10['date'].min()} to {dgs10['date'].max()}")
dgs10.tail()

## Plot a few series

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 8))

for ax, (sid, title) in zip(axes.flat, [
    ("DGS10", "10-Year Treasury"),
    ("UNRATE", "Unemployment Rate"),
    ("CPIAUCSL", "CPI All Urban"),
    ("T10Y2Y", "10Y-2Y Spread"),
]):
    df = load_series(sid)
    ax.plot(df["date"], df["value"], linewidth=0.8)
    ax.set_title(title)
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Load the master workbook

After running `python scripts/build_master_sheet.py`, the master workbook
contains all series aligned on a common date axis.

In [None]:
WORKBOOK = PROJECT_ROOT / "data" / "master_workbook.xlsx"

if WORKBOOK.exists():
    all_data = pd.read_excel(WORKBOOK, sheet_name="All Data")
    print(f"Master workbook: {all_data.shape[0]} rows x {all_data.shape[1]} columns")
    print(f"Date range: {all_data['date'].min()} to {all_data['date'].max()}")
    all_data.head()
else:
    print("Master workbook not found. Run: python scripts/build_master_sheet.py")

## Date alignment across frequencies

Daily series (treasury rates) vs monthly (CPI) vs weekly (initial claims).
The master workbook forward-fills to align them.

In [None]:
# Compare raw frequencies
for sid in ["DGS10", "CPIAUCSL", "ICSA", "GDP"]:
    df = load_series(sid)
    print(f"{sid:20s}  {len(df):>6} rows  ({df['date'].min()} to {df['date'].max()})")