# Why Predicting Distributions Beats Point Predictions

**The Core Insight**: A point prediction like "+0.5% expected return" throws away critical information:
- How confident is the model?
- What's the range of possible outcomes?
- What's the probability of a catastrophic loss?

**Distribution prediction captures ALL of this.**

---

## The temporalpdf Workflow: 3 Lines

```python
params = tpdf.fit_nig(returns)              # Fit
var_5 = tpdf.var(tpdf.NIG(), params, 0.05)  # Query risk
if expected > 0 and var_5 < max_risk: trade()  # Decide
```

In [None]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent / "src"))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import temporalpdf as tpdf

%matplotlib inline
plt.rcParams['figure.figsize'] = (14, 5)

# Load real market data
DATA_DIR = Path.cwd().parent / "data"
datasets = {
    'BTC': pd.read_csv(DATA_DIR / 'crypto_returns.csv'),
    'S&P 500': pd.read_csv(DATA_DIR / 'equity_returns.csv'),
    'EUR/USD': pd.read_csv(DATA_DIR / 'forex_returns.csv'),
}

for name, df in datasets.items():
    r = df['return_pct'].values
    print(f"{name}: {len(r):,} days, kurtosis={stats.kurtosis(r):.1f} (Normal=0)")

---
# Part 1: The Core API

**Fit a distribution, get risk metrics, make decisions.**

In [None]:
# The complete workflow in 10 lines
returns = datasets['S&P 500']['return_pct'].values

# 1. FIT - One line
params = tpdf.fit_nig(returns)
print(f"Fitted NIG: mu={params.mu:.4f}, delta={params.delta:.4f}, alpha={params.alpha:.2f}, beta={params.beta:.3f}")

# 2. QUERY - Get any risk metric
nig = tpdf.NIG()
var_5 = tpdf.var(nig, params, alpha=0.05)
cvar_5 = tpdf.cvar(nig, params, alpha=0.05)
kelly = tpdf.kelly_fraction(nig, params)

print(f"\nRisk Metrics:")
print(f"  VaR(5%):  {var_5:.2f}% (only 5% chance of losing more)")
print(f"  CVaR(5%): {cvar_5:.2f}% (expected loss in worst 5%)")
print(f"  Kelly:    {kelly:.1%} (optimal position size)")

# 3. PROBABILITY QUERIES
print(f"\nProbability Queries:")
print(f"  P(return < -1%): {tpdf.prob_less_than(nig, params, -1.0):.2%}")
print(f"  P(return > +1%): {tpdf.prob_greater_than(nig, params, 1.0):.2%}")
print(f"  P(-0.5% < return < +0.5%): {tpdf.prob_between(nig, params, -0.5, 0.5):.2%}")

---
# Part 2: Why Not Just Use Normal?

**Normal distribution lies about tail risk.** It says 3-sigma events happen once every 370 days. Reality: 3-10x more often.

In [None]:
# Count extreme events
print("EXTREME EVENTS: Normal Prediction vs Reality")
print("=" * 60)

for name, df in datasets.items():
    returns = df['return_pct'].values
    sigma = np.std(returns)
    n = len(returns)
    
    for k in [3, 4]:
        actual = np.sum(np.abs(returns) > k * sigma)
        normal_expected = 2 * (1 - stats.norm.cdf(k)) * n
        ratio = actual / max(normal_expected, 0.001)
        print(f"{name} {k}Ïƒ events: {actual} actual vs {normal_expected:.1f} expected ({ratio:.1f}x more)")

In [None]:
# Visual: NIG vs Normal fit
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
nig = tpdf.NIG()

for ax, (name, df) in zip(axes, datasets.items()):
    returns = df['return_pct'].values
    
    # Fit both distributions using library functions
    nig_params = tpdf.fit_nig(returns)
    norm_params = tpdf.fit_normal(returns)
    
    # Plot
    ax.hist(returns, bins=100, density=True, alpha=0.6, color='steelblue', label='Actual')
    x = np.linspace(min(returns), max(returns), 500)
    ax.plot(x, stats.norm.pdf(x, norm_params.mu_0, norm_params.sigma_0), 'r--', lw=2, label='Normal')
    ax.plot(x, nig.pdf(x, 0, nig_params), 'g-', lw=2, label='NIG')
    ax.set_title(name, fontweight='bold')
    ax.legend()
    ax.grid(True, alpha=0.3)

plt.suptitle('NIG Captures Fat Tails', fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig('01_nig_vs_normal.png', dpi=150, bbox_inches='tight', facecolor='white')
plt.show()

---
# Part 3: Same Expected Return, Different Risk

**A point prediction of +0.5% tells you nothing about risk.** Two distributions can have the same mean but completely different tail behavior.

In [None]:
# Two distributions with same E[X] but different risk
nig = tpdf.NIG()

# Calm market: narrow, symmetric
params_calm = tpdf.NIGParameters(mu=0.5, delta=0.4, alpha=4.0, beta=0.0)

# Volatile market: wide, negative skew (crashes more likely)
# Adjust mu so E[X] matches
delta_v, alpha_v, beta_v = 1.5, 2.0, -0.5
adjustment = delta_v * beta_v / np.sqrt(alpha_v**2 - beta_v**2)
params_volatile = tpdf.NIGParameters(mu=0.5 - adjustment, delta=delta_v, alpha=alpha_v, beta=beta_v)

# Compare using library risk functions
print("SAME EXPECTED RETURN, DIFFERENT RISK")
print("=" * 50)
print(f"{'Metric':<20} {'Calm':<15} {'Volatile'}")
print("-" * 50)
print(f"{'VaR(5%)':<20} {tpdf.var(nig, params_calm, 0.05):.2f}%{'':<10} {tpdf.var(nig, params_volatile, 0.05):.2f}%")
print(f"{'CVaR(5%)':<20} {tpdf.cvar(nig, params_calm, 0.05):.2f}%{'':<10} {tpdf.cvar(nig, params_volatile, 0.05):.2f}%")
print(f"{'Kelly fraction':<20} {tpdf.kelly_fraction(nig, params_calm):.1%}{'':<10} {tpdf.kelly_fraction(nig, params_volatile):.1%}")
print("\nA point prediction of '+0.5%' cannot distinguish these!")

In [None]:
# Visual
fig, ax = plt.subplots(figsize=(10, 5))
x = np.linspace(-8, 8, 500)

ax.plot(x, nig.pdf(x, 0, params_calm), 'g-', lw=3, label='Calm Market')
ax.plot(x, nig.pdf(x, 0, params_volatile), 'r-', lw=3, label='Volatile Market')
ax.axvline(0.5, color='blue', ls='--', lw=2, label='Same E[X] = +0.5%')

# Mark VaR regions
var_v = tpdf.var(nig, params_volatile, 0.05)
ax.fill_between(x, nig.pdf(x, 0, params_volatile), where=(x < -var_v), alpha=0.3, color='red')

ax.set_xlabel('Return (%)')
ax.set_ylabel('Density')
ax.set_title('Same Mean, Different Risk', fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)
plt.savefig('02_same_mean_different_risk.png', dpi=150, bbox_inches='tight', facecolor='white')
plt.show()

---
# Part 4: Choosing the Best Distribution

**Don't guess - measure.** Use `tpdf.select_best_distribution()` or `tpdf.compare_distributions()`.

In [None]:
# Automatic distribution selection
print("DISTRIBUTION SELECTION")
print("=" * 60)

for name, df in datasets.items():
    returns = df['return_pct'].values
    result = tpdf.select_best_distribution(returns, candidates=['normal', 'student_t', 'nig'])
    
    print(f"\n{name}:")
    print(f"  Best: {result['best'].upper()} (confidence: {result['confidence']})")
    print(f"  CRPS: " + ", ".join(f"{k}={v:.4f}" for k, v in sorted(result['scores'].items(), key=lambda x: x[1])))

In [None]:
# Cross-validated comparison with significance test
returns = datasets['S&P 500']['return_pct'].values
result = tpdf.compare_distributions(returns, n_folds=5)

print("CROSS-VALIDATED COMPARISON (S&P 500)")
print("=" * 50)
print(f"Winner: {result['winner'].upper()}")
print(f"Statistically significant: {result['significant']}")
for dist in ['normal', 'student_t', 'nig']:
    print(f"  {dist}: {result['mean_scores'][dist]:.4f} +/- {result['std_scores'][dist]:.4f}")

---
# Part 5: VaR Backtest

**Does the distribution give accurate risk estimates?** Use `tpdf.rolling_var_backtest()`.

In [None]:
# Rolling VaR backtest
returns = datasets['S&P 500']['return_pct'].values

print("VAR BACKTEST (5% target, 252-day lookback)")
print("=" * 50)
print(f"{'Distribution':<15} {'Exceedance Rate':<18} {'Status'}")
print("-" * 50)

for dist in ['historical', 'normal', 'student_t', 'nig']:
    result = tpdf.rolling_var_backtest(returns, distribution=dist, lookback=252, alpha=0.05)
    print(f"{dist:<15} {result['exceedance_rate']*100:>6.1f}%             {result['status']}")

print("\nPASS = exceedance rate in [3%, 7%]")

---
# Part 6: Regime Adaptation

**Distribution shape changes with market conditions.** High volatility = wider distribution, more tail risk.

In [None]:
# Compare high-vol vs low-vol regimes
returns = datasets['S&P 500']['return_pct'].values
rolling_vol = pd.Series(returns).rolling(60).std().values

high_vol_idx = rolling_vol > np.nanpercentile(rolling_vol, 80)
low_vol_idx = rolling_vol < np.nanpercentile(rolling_vol, 20)

# Fit to each regime using library function
params_high = tpdf.fit_nig(returns[high_vol_idx])
params_low = tpdf.fit_nig(returns[low_vol_idx])

nig = tpdf.NIG()
print("REGIME-SPECIFIC RISK")
print("=" * 40)
print(f"{'Metric':<15} {'Low Vol':<12} {'High Vol'}")
print("-" * 40)
print(f"{'VaR(5%)':<15} {tpdf.var(nig, params_low, 0.05):.2f}%{'':<8} {tpdf.var(nig, params_high, 0.05):.2f}%")
print(f"{'CVaR(5%)':<15} {tpdf.cvar(nig, params_low, 0.05):.2f}%{'':<8} {tpdf.cvar(nig, params_high, 0.05):.2f}%")

In [None]:
# Visual
fig, ax = plt.subplots(figsize=(10, 5))
x = np.linspace(-6, 6, 500)

ax.plot(x, nig.pdf(x, 0, params_low), 'g-', lw=3, label='Low Volatility')
ax.plot(x, nig.pdf(x, 0, params_high), 'r-', lw=3, label='High Volatility')
ax.fill_between(x, nig.pdf(x, 0, params_low), alpha=0.2, color='green')
ax.fill_between(x, nig.pdf(x, 0, params_high), alpha=0.2, color='red')

ax.set_xlabel('Return (%)')
ax.set_ylabel('Density')
ax.set_title('Distributions Adapt to Market Regime', fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)
plt.savefig('03_regime_adaptation.png', dpi=150, bbox_inches='tight', facecolor='white')
plt.show()

---
# Summary: The Complete Workflow

```python
import temporalpdf as tpdf

# Fit distribution to data
params = tpdf.fit_nig(returns)

# Get risk metrics
dist = tpdf.NIG()
var_5 = tpdf.var(dist, params, alpha=0.05)
cvar_5 = tpdf.cvar(dist, params, alpha=0.05)
kelly = tpdf.kelly_fraction(dist, params)

# Probability queries
p_loss = tpdf.prob_less_than(dist, params, threshold=-1.0)
p_gain = tpdf.prob_greater_than(dist, params, threshold=1.0)

# Make decision
if expected_return > 0 and var_5 < max_acceptable_risk:
    execute_trade(position_size=kelly)
```

**Point predictions throw away critical information. Distribution predictions capture everything.**

In [None]:
# Final demonstration with real data
returns = datasets['S&P 500']['return_pct'].values[-252:]  # Last year

params = tpdf.fit_nig(returns)
dist = tpdf.NIG()

print("S&P 500 (Last 252 Days)")
print("=" * 40)
print(f"Parameters: mu={params.mu:.4f}, delta={params.delta:.4f}, alpha={params.alpha:.2f}, beta={params.beta:.3f}")
print(f"\nRisk Profile:")
print(f"  VaR(5%):        {tpdf.var(dist, params, 0.05):.2f}%")
print(f"  VaR(1%):        {tpdf.var(dist, params, 0.01):.2f}%")
print(f"  CVaR(5%):       {tpdf.cvar(dist, params, 0.05):.2f}%")
print(f"  Kelly fraction: {tpdf.kelly_fraction(dist, params):.1%}")
print(f"\nProbabilities:")
print(f"  P(loss > 1%):   {tpdf.prob_less_than(dist, params, -1.0):.2%}")
print(f"  P(gain > 1%):   {tpdf.prob_greater_than(dist, params, 1.0):.2%}")