# temporalpdf: Expanded Showcase

This notebook demonstrates the full capabilities of the `temporalpdf` library:

1. **3D Surface Plots** - Visualize how probability distributions evolve over time
2. **Multi-Asset Comparison** - Same analysis across Crypto, Equity, and Forex
3. **Confidence Bands** - Show uncertainty growth over time
4. **Interactive Plots** - Rotatable 3D visualizations
5. **Distribution Fitting** - Compare fitted vs actual returns

---

In [None]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent / "src"))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy import stats

import temporalpdf as tpdf
from temporalpdf.visualization import PDFPlotter, InteractivePlotter
from temporalpdf.core.result import PDFResult

%matplotlib inline
plt.rcParams['figure.figsize'] = (14, 6)
plt.rcParams['font.size'] = 11

DATA_DIR = Path.cwd().parent / "data"
print("Setup complete")

---
# Part 1: 3D Distribution Evolution

How does a probability distribution change as market conditions evolve?

We'll fit NIG distributions to rolling windows and visualize how the shape changes over time.

In [None]:
# Load all three datasets
datasets = {
    'Crypto (BTC)': pd.read_csv(DATA_DIR / 'crypto_returns.csv'),
    'Equity (S&P 500)': pd.read_csv(DATA_DIR / 'equity_returns.csv'),
    'Forex (EUR/USD)': pd.read_csv(DATA_DIR / 'forex_returns.csv'),
}

for name, df in datasets.items():
    returns = df['return_pct'].values
    print(f"{name}: {len(df):,} days, Mean: {np.mean(returns):+.4f}%, Std: {np.std(returns):.4f}%")

In [None]:
from scipy import optimize

nig = tpdf.NIG()

def fit_nig_mle(data):
    """Fit NIG distribution via maximum likelihood."""
    x0 = [np.mean(data), np.log(np.std(data) + 0.01), np.log(5.0), 0.0]
    def nll(theta):
        mu, log_delta, log_alpha, beta_raw = theta
        delta, alpha = np.exp(log_delta), np.exp(log_alpha)
        beta = alpha * np.tanh(beta_raw)
        try:
            params = tpdf.NIGParameters(mu=mu, delta=delta, alpha=alpha, beta=beta)
            pdf_vals = nig.pdf(data, 0, params)
            return -np.sum(np.log(np.maximum(pdf_vals, 1e-300)))
        except:
            return 1e10
    result = optimize.minimize(nll, x0, method='Nelder-Mead', options={'maxiter': 500})
    mu, log_delta, log_alpha, beta_raw = result.x
    return tpdf.NIGParameters(
        mu=mu, 
        delta=np.exp(log_delta), 
        alpha=np.exp(log_alpha), 
        beta=np.exp(log_alpha) * np.tanh(beta_raw)
    )

print("NIG fitting function ready")

In [None]:
# Fit NIG to rolling 60-day windows over 2 years of S&P 500 data
# This shows how the distribution EVOLVES over time

equity_returns = datasets['Equity (S&P 500)']['return_pct'].values
window_size = 60
step = 20  # Fit every 20 days to keep it manageable
n_windows = 30  # ~2 years of evolution

# Use last portion of data
start_idx = len(equity_returns) - window_size - (n_windows * step)

fitted_params = []
window_centers = []

for i in range(n_windows):
    idx = start_idx + i * step
    window = equity_returns[idx:idx + window_size]
    params = fit_nig_mle(window)
    fitted_params.append(params)
    window_centers.append(i * step)  # Days from start

print(f"Fitted {len(fitted_params)} NIG distributions over {n_windows * step} days")
print(f"\nParameter evolution (first vs last window):")
print(f"  mu: {fitted_params[0].mu:.4f} -> {fitted_params[-1].mu:.4f}")
print(f"  delta: {fitted_params[0].delta:.4f} -> {fitted_params[-1].delta:.4f}")
print(f"  alpha: {fitted_params[0].alpha:.2f} -> {fitted_params[-1].alpha:.2f}")
print(f"  beta: {fitted_params[0].beta:.4f} -> {fitted_params[-1].beta:.4f}")

In [None]:
# Create 3D surface: Time x Return x Probability
x_grid = np.linspace(-4, 4, 200)  # Return values
t_grid = np.array(window_centers)  # Time (days)

# Build PDF matrix
pdf_matrix = np.zeros((len(t_grid), len(x_grid)))
for i, params in enumerate(fitted_params):
    pdf_matrix[i, :] = nig.pdf(x_grid, 0, params)

# Create 3D surface plot
fig = plt.figure(figsize=(14, 8))
ax = fig.add_subplot(111, projection='3d')

X, T = np.meshgrid(x_grid, t_grid)
surf = ax.plot_surface(T, X, pdf_matrix, cmap='viridis', edgecolor='none', alpha=0.9)

ax.set_xlabel('Time (days)', fontsize=12)
ax.set_ylabel('Return (%)', fontsize=12)
ax.set_zlabel('Probability Density', fontsize=12)
ax.set_title('Distribution Evolution: S&P 500 (60-day rolling NIG fits)', fontsize=14, fontweight='bold')
ax.view_init(elev=25, azim=-60)

fig.colorbar(surf, ax=ax, shrink=0.5, aspect=10, label='Density')
plt.tight_layout()
plt.savefig('3d_distribution_evolution.png', dpi=150, bbox_inches='tight', facecolor='white')
plt.show()

print("The shape changes over time - wider during volatile periods, narrower during calm periods.")

---
# Part 2: Multi-Asset 3D Comparison

Compare distribution shapes across asset classes: Crypto vs Equity vs Forex

Each has very different characteristics:
- **Crypto**: High volatility, fat tails, occasional huge moves
- **Equity**: Medium volatility, slight negative skew (crashes)
- **Forex**: Low volatility, more symmetric

In [None]:
# Fit NIG to last 250 days (1 year) of each asset
asset_params = {}
window = 250

for name, df in datasets.items():
    returns = df['return_pct'].values[-window:]
    params = fit_nig_mle(returns)
    asset_params[name] = params
    print(f"{name}:")
    print(f"  mu={params.mu:.4f}, delta={params.delta:.4f}, alpha={params.alpha:.2f}, beta={params.beta:.4f}")

In [None]:
# Overlay all three distributions
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Left: Full scale (shows crypto's width)
ax = axes[0]
x_wide = np.linspace(-15, 15, 500)
colors = {'Crypto (BTC)': 'orange', 'Equity (S&P 500)': 'blue', 'Forex (EUR/USD)': 'green'}

for name, params in asset_params.items():
    pdf = nig.pdf(x_wide, 0, params)
    ax.plot(x_wide, pdf, lw=2, label=name, color=colors[name])
    ax.fill_between(x_wide, pdf, alpha=0.2, color=colors[name])

ax.set_xlabel('Return (%)', fontsize=12)
ax.set_ylabel('Probability Density', fontsize=12)
ax.set_title('Full Scale: Crypto is MUCH wider', fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_xlim(-15, 15)

# Right: Zoomed in to see Forex detail
ax = axes[1]
x_narrow = np.linspace(-3, 3, 500)

for name, params in asset_params.items():
    pdf = nig.pdf(x_narrow, 0, params)
    ax.plot(x_narrow, pdf, lw=2, label=name, color=colors[name])

ax.set_xlabel('Return (%)', fontsize=12)
ax.set_title('Zoomed: Different shapes and peaks', fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_xlim(-3, 3)

plt.tight_layout()
plt.savefig('multi_asset_distributions.png', dpi=150, bbox_inches='tight', facecolor='white')
plt.show()

In [None]:
# 2x2 grid of 3D plots - rolling distribution for each asset
fig = plt.figure(figsize=(16, 12))

window_size = 40
step = 10
n_windows = 20
x_grid = np.linspace(-6, 6, 150)

for plot_idx, (name, df) in enumerate(datasets.items()):
    returns = df['return_pct'].values
    start_idx = len(returns) - window_size - (n_windows * step)
    
    # Build PDF matrix for this asset
    t_vals = []
    pdf_mat = []
    
    for i in range(n_windows):
        idx = start_idx + i * step
        window = returns[idx:idx + window_size]
        params = fit_nig_mle(window)
        pdf_mat.append(nig.pdf(x_grid, 0, params))
        t_vals.append(i * step)
    
    pdf_mat = np.array(pdf_mat)
    t_vals = np.array(t_vals)
    
    ax = fig.add_subplot(2, 2, plot_idx + 1, projection='3d')
    X, T = np.meshgrid(x_grid, t_vals)
    ax.plot_surface(T, X, pdf_mat, cmap='plasma', edgecolor='none', alpha=0.85)
    ax.set_xlabel('Time (days)')
    ax.set_ylabel('Return (%)')
    ax.set_zlabel('Density')
    ax.set_title(name, fontweight='bold', fontsize=12)
    ax.view_init(elev=25, azim=-55)

# Fourth subplot: Parameter comparison
ax = fig.add_subplot(2, 2, 4)
params_df = pd.DataFrame({
    'Asset': list(asset_params.keys()),
    'mu': [p.mu for p in asset_params.values()],
    'delta': [p.delta for p in asset_params.values()],
    'alpha': [p.alpha for p in asset_params.values()],
    'beta': [p.beta for p in asset_params.values()],
})
params_df = params_df.set_index('Asset')

# Normalize for visualization
params_norm = params_df.copy()
for col in params_norm.columns:
    params_norm[col] = (params_norm[col] - params_norm[col].min()) / (params_norm[col].max() - params_norm[col].min() + 1e-10)

params_norm.plot(kind='bar', ax=ax, width=0.8)
ax.set_title('NIG Parameters (normalized)', fontweight='bold')
ax.set_ylabel('Normalized Value')
ax.legend(loc='upper right')
ax.set_xticklabels(ax.get_xticklabels(), rotation=15)

plt.tight_layout()
plt.savefig('multi_asset_3d_grid.png', dpi=150, bbox_inches='tight', facecolor='white')
plt.show()

---
# Part 3: Confidence Bands - Uncertainty Over Time

One key advantage of distributional forecasting: we can show how **uncertainty grows** over longer horizons.

A 1-day forecast has less uncertainty than a 10-day forecast.

In [None]:
# Simulate multi-day uncertainty using the fitted NIG parameters
# Key property of NIG: sum of NIG random variables is also NIG!

params = asset_params['Equity (S&P 500)']
horizons = [1, 5, 10, 20, 40]  # days

fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Left: Distribution at each horizon
ax = axes[0]
x = np.linspace(-15, 20, 500)
cmap = plt.cm.viridis(np.linspace(0.2, 0.9, len(horizons)))

for h, color in zip(horizons, cmap):
    # For NIG: sum of h iid NIG ~ NIG with scaled parameters
    # delta_h = delta * h, alpha stays, beta stays, mu_h = mu * h
    scaled_params = tpdf.NIGParameters(
        mu=params.mu * h,
        delta=params.delta * h,
        alpha=params.alpha,
        beta=params.beta
    )
    pdf = nig.pdf(x, 0, scaled_params)
    ax.plot(x, pdf, lw=2, color=color, label=f'{h}-day')
    ax.fill_between(x, pdf, alpha=0.1, color=color)

ax.set_xlabel('Cumulative Return (%)', fontsize=12)
ax.set_ylabel('Probability Density', fontsize=12)
ax.set_title('Distribution Spreads with Horizon', fontweight='bold')
ax.legend(title='Horizon')
ax.grid(True, alpha=0.3)
ax.axvline(0, color='gray', ls='--', lw=1)

# Right: Confidence intervals over time
ax = axes[1]
horizon_range = np.arange(1, 61)
means = []
ci_5 = []
ci_95 = []
ci_1 = []
ci_99 = []

rng = np.random.default_rng(42)
for h in horizon_range:
    scaled_params = tpdf.NIGParameters(
        mu=params.mu * h,
        delta=params.delta * h,
        alpha=params.alpha,
        beta=params.beta
    )
    samples = nig.sample(10000, 0, scaled_params, rng=rng)
    means.append(np.mean(samples))
    ci_5.append(np.percentile(samples, 5))
    ci_95.append(np.percentile(samples, 95))
    ci_1.append(np.percentile(samples, 1))
    ci_99.append(np.percentile(samples, 99))

ax.fill_between(horizon_range, ci_1, ci_99, alpha=0.2, color='blue', label='98% CI')
ax.fill_between(horizon_range, ci_5, ci_95, alpha=0.4, color='blue', label='90% CI')
ax.plot(horizon_range, means, 'b-', lw=2, label='Expected')
ax.axhline(0, color='gray', ls='--', lw=1)

ax.set_xlabel('Horizon (days)', fontsize=12)
ax.set_ylabel('Cumulative Return (%)', fontsize=12)
ax.set_title('Confidence Bands Widen Over Time', fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('confidence_bands.png', dpi=150, bbox_inches='tight', facecolor='white')
plt.show()

print(f"At 60 days: Expected return = {means[-1]:.1f}%, but 90% CI is [{ci_5[-1]:.1f}%, {ci_95[-1]:.1f}%]")
print("This uncertainty is what Pipeline 2 captures that Pipeline 1 misses!")

---
# Part 4: Tail Risk Comparison - Normal vs NIG

Why does NIG matter? Because Normal underestimates extreme events.

Let's compare the actual frequency of large moves vs what Normal and NIG predict.

In [None]:
# Compare tail frequencies: Actual vs Normal vs NIG
fig, axes = plt.subplots(1, 3, figsize=(16, 5))

for ax, (name, df) in zip(axes, datasets.items()):
    returns = df['return_pct'].values
    n = len(returns)
    
    # Fit Normal
    mu_norm, std_norm = np.mean(returns), np.std(returns)
    
    # Fit NIG
    nig_params = fit_nig_mle(returns)
    
    # Calculate tail probabilities for various thresholds
    thresholds = [1, 2, 3, 4, 5]  # standard deviations
    
    actual_probs = []
    normal_probs = []
    nig_probs = []
    
    for k in thresholds:
        threshold = k * std_norm
        
        # Actual frequency of |return| > threshold
        actual = np.mean(np.abs(returns) > threshold)
        actual_probs.append(actual * 100)
        
        # Normal prediction
        normal_pred = 2 * (1 - stats.norm.cdf(k))  # Two-tailed
        normal_probs.append(normal_pred * 100)
        
        # NIG prediction (via sampling)
        samples = nig.sample(100000, 0, nig_params, rng=np.random.default_rng(42))
        nig_pred = np.mean(np.abs(samples) > threshold)
        nig_probs.append(nig_pred * 100)
    
    x = np.arange(len(thresholds))
    width = 0.25
    
    bars1 = ax.bar(x - width, actual_probs, width, label='Actual', color='black', alpha=0.8)
    bars2 = ax.bar(x, normal_probs, width, label='Normal', color='blue', alpha=0.7)
    bars3 = ax.bar(x + width, nig_probs, width, label='NIG', color='red', alpha=0.7)
    
    ax.set_xticks(x)
    ax.set_xticklabels([f'{k}σ' for k in thresholds])
    ax.set_xlabel('Move Size (in std devs)')
    ax.set_ylabel('Frequency (%)')
    ax.set_title(f'{name}', fontweight='bold')
    ax.legend()
    ax.set_yscale('log')
    ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('tail_risk_comparison.png', dpi=150, bbox_inches='tight', facecolor='white')
plt.show()

print("At 3+ sigma moves, Normal SEVERELY underestimates the actual frequency!")
print("NIG captures these fat tails much better.")

---
# Part 5: VaR Accuracy - Which Distribution is Better?

Value at Risk (VaR) is used for risk management. Let's test which distribution gives more accurate VaR estimates.

In [None]:
# VaR Backtest: How often do actual losses exceed the VaR estimate?
# At 5% VaR, we expect ~5% of days to exceed it. More = model underestimates risk.

fig, ax = plt.subplots(figsize=(12, 6))

var_levels = [0.01, 0.05, 0.10]  # 1%, 5%, 10% VaR
results = []

for name, df in datasets.items():
    returns = df['return_pct'].values
    
    # Rolling backtest: fit on past 250 days, test on next day
    window = 250
    violations_normal = {v: 0 for v in var_levels}
    violations_nig = {v: 0 for v in var_levels}
    n_tests = 0
    
    for i in range(window, len(returns) - 1):
        train = returns[i-window:i]
        test_return = returns[i]
        
        # Normal VaR
        mu, sigma = np.mean(train), np.std(train)
        
        # NIG VaR (fit and sample)
        nig_params = fit_nig_mle(train)
        nig_samples = nig.sample(5000, 0, nig_params, rng=np.random.default_rng(i))
        
        for v in var_levels:
            normal_var = stats.norm.ppf(v, mu, sigma)
            nig_var = np.percentile(nig_samples, v * 100)
            
            if test_return < normal_var:
                violations_normal[v] += 1
            if test_return < nig_var:
                violations_nig[v] += 1
        
        n_tests += 1
        
        if i % 500 == 0:
            print(f"{name}: {i}/{len(returns)}...", end='\r')
    
    for v in var_levels:
        actual_normal = violations_normal[v] / n_tests
        actual_nig = violations_nig[v] / n_tests
        results.append({
            'Asset': name.split()[0],
            'VaR Level': f'{int(v*100)}%',
            'Expected': v,
            'Normal': actual_normal,
            'NIG': actual_nig,
        })
    print(f"{name}: Done ({n_tests} tests)")

results_df = pd.DataFrame(results)
print("\nVaR Backtest Results (violation rate - should equal VaR level):")
print(results_df.to_string(index=False))

In [None]:
# Visualize VaR accuracy
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for ax, (asset, group) in zip(axes, results_df.groupby('Asset')):
    x = np.arange(len(group))
    width = 0.25
    
    ax.bar(x - width, group['Expected'] * 100, width, label='Expected', color='gray', alpha=0.5)
    ax.bar(x, group['Normal'] * 100, width, label='Normal', color='blue', alpha=0.7)
    ax.bar(x + width, group['NIG'] * 100, width, label='NIG', color='red', alpha=0.7)
    
    ax.set_xticks(x)
    ax.set_xticklabels(group['VaR Level'])
    ax.set_xlabel('VaR Level')
    ax.set_ylabel('Actual Violation Rate (%)')
    ax.set_title(f'{asset}', fontweight='bold')
    ax.legend()
    ax.grid(True, alpha=0.3, axis='y')
    
    # Add reference lines
    for i, exp in enumerate(group['Expected']):
        ax.axhline(exp * 100, xmin=i/3-0.1, xmax=i/3+0.4, color='green', ls='--', lw=1)

plt.tight_layout()
plt.savefig('var_backtest.png', dpi=150, bbox_inches='tight', facecolor='white')
plt.show()

print("\nInterpretation:")
print("- If bar > expected: Model UNDERESTIMATES risk (dangerous!)")
print("- If bar ≈ expected: Model is well-calibrated")
print("- NIG should be closer to expected than Normal, especially at 1% VaR")

---
# Part 6: Interactive 3D Plot

Create a rotatable 3D visualization that can be saved as HTML.

In [None]:
# Create PDFResult for the plotter
# Build a proper time-evolving distribution

equity_returns = datasets['Equity (S&P 500)']['return_pct'].values
window_size = 60
step = 5
n_windows = 50

start_idx = len(equity_returns) - window_size - (n_windows * step)
x_grid = np.linspace(-5, 5, 200)
t_grid = np.arange(n_windows) * step

pdf_matrix = np.zeros((len(t_grid), len(x_grid)))
for i in range(n_windows):
    idx = start_idx + i * step
    window = equity_returns[idx:idx + window_size]
    params = fit_nig_mle(window)
    pdf_matrix[i, :] = nig.pdf(x_grid, 0, params)

# Create PDFResult object
result = PDFResult(
    pdf_matrix=pdf_matrix,
    time_grid=t_grid,
    value_grid=x_grid,
    distribution_name="NIG (S&P 500 Rolling 60-day)"
)

print(f"Created PDFResult: {result.pdf_matrix.shape}")
print(f"Cumulative E[X]: {result.cumulative_expected_value:.4f}")

In [None]:
# Interactive 3D plot using Plotly
try:
    plotter = InteractivePlotter(colorscale='Viridis')
    fig = plotter.surface_3d(result, title='Interactive: S&P 500 Distribution Evolution')
    plotter.save_html(fig, 'interactive_3d.html')
    print("Saved: interactive_3d.html (open in browser for rotatable 3D plot)")
    fig.show()
except ImportError:
    print("Plotly not installed - skipping interactive plot")
    print("Install with: pip install plotly")

In [None]:
# Use the built-in PDFPlotter for publication-quality figures
plotter = PDFPlotter()

# 3D Surface
fig = plotter.surface_3d(result, title='Distribution Evolution (S&P 500)')
plotter.save(fig, '3d_surface_plotter.png')

# Heatmap view
fig = plotter.heatmap(result, title='Heatmap: Time vs Return')
plotter.save(fig, 'heatmap.png')

# Confidence bands
fig = plotter.confidence_bands(result, confidence_levels=(0.5, 0.9, 0.99))
plotter.save(fig, 'confidence_bands_plotter.png')

# E[X] over time
fig = plotter.expected_value_over_time(result)
plotter.save(fig, 'expected_value_trajectory.png')

print("Generated publication-quality figures using PDFPlotter")

---
# Summary

This showcase demonstrated:

1. **3D Distribution Evolution** - How probability distributions change over time as market conditions evolve

2. **Multi-Asset Comparison** - Crypto, Equity, and Forex have very different distribution shapes
   - Crypto: Wide, fat-tailed
   - Equity: Medium width, slight negative skew
   - Forex: Narrow, more symmetric

3. **Confidence Bands** - Uncertainty grows with forecast horizon

4. **Tail Risk** - Normal distribution severely underestimates extreme events; NIG captures fat tails

5. **VaR Accuracy** - NIG provides better-calibrated risk estimates than Normal

6. **Interactive Plots** - Rotatable 3D visualizations for exploration

**Key Takeaway**: Financial returns have fat tails and skewness that Normal distributions miss. The NIG distribution captures these features, leading to better risk management and trading decisions.