# 01 — Data Exploration

This notebook walks through:
1. Fetching FRED data via `FREDClient`
2. Applying stationarity transforms
3. Basic exploratory data analysis (correlations, stationarity tests)


In [None]:
import sys, os
sys.path.insert(0, os.path.abspath('..'))
from dotenv import load_dotenv
load_dotenv()

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
sns.set_theme(style='darkgrid')

## Fetch Data

Requires `FRED_API_KEY` in your `.env` file.

In [None]:
from src.data.fred_client import FREDClient
from src.data.data_pipeline import DataPipeline

client = FREDClient()  # reads FRED_API_KEY from env
pipeline = DataPipeline(fred_client=client, start_date='2000-01-01')

# Fetch and transform all series
panel = pipeline.run(save_vintage=False)
print(f'Panel shape: {panel.shape}')
print(f'Date range: {panel.index[0].date()} — {panel.index[-1].date()}')
panel.head()

## Missing Data Heatmap

In [None]:
fig, ax = plt.subplots(figsize=(16, 4))
sns.heatmap(panel.isna().T, cmap='Blues', cbar=False, ax=ax)
ax.set_title('Missing Data (ragged edge)')
plt.tight_layout()
plt.show()

## Correlation Matrix (Labor Market Series)

In [None]:
labor_series = ['PAYEMS', 'UNRATE', 'ICSA', 'U6RATE', 'CIVPART']
available = [s for s in labor_series if s in panel.columns]
corr = panel[available].corr()
sns.heatmap(corr, annot=True, fmt='.2f', cmap='RdYlGn', vmin=-1, vmax=1)
plt.title('Labor Market Series Correlations')
plt.show()