# XGBoost: Point Prediction vs Distribution Prediction

**Pipeline 1**: XGBoost → single number (expected return)

**Pipeline 2**: XGBoost → 4 numbers (NIG distribution parameters) → VaR filter

Same features, same model architecture, different outputs.

In [None]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent / "src"))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor
import temporalpdf as tpdf

%matplotlib inline
plt.rcParams['figure.figsize'] = (14, 5)

COST_BPS = 2  # Transaction cost in basis points

## Load Data & Create Features

In [None]:
df = pd.read_csv(Path.cwd().parent / "data" / "equity_returns.csv")
returns = df["return_pct"].values
print(f"S&P 500: {len(returns):,} days")

# Create features from lookback window
lookback = 20
X, y = [], []
for i in range(lookback, len(returns) - 1):
    window = returns[i-lookback:i]
    X.append([np.mean(window), np.std(window), window[-1], window[-2],
              np.min(window), np.max(window), np.sum(window > 0) / lookback])
    y.append(returns[i + 1])
X, y = np.array(X), np.array(y)

# Train/test split
split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]
print(f"Train: {len(y_train):,}, Test: {len(y_test):,}")

## Pipeline 1: XGBoost → Point Prediction

Model predicts next-day return. Decision: go long if prediction > 0.

In [None]:
model_point = GradientBoostingRegressor(n_estimators=100, max_depth=3, random_state=42)
model_point.fit(X_train, y_train)
pred_point = model_point.predict(X_test)

decisions_p1 = pred_point > 0
print(f"Pipeline 1: {np.sum(decisions_p1)} / {len(y_test)} trades ({np.mean(decisions_p1):.1%})")

## Pipeline 2: XGBoost → Distribution Parameters

Model predicts NIG parameters (mu, delta, alpha, beta). Decision: go long if E[X] > 0 AND VaR(5%) > -2%.

In [None]:
# Create distribution targets: fit NIG to each training window
print("Fitting NIG distributions to training windows...")
param_targets = []
for i in range(lookback, lookback + len(y_train)):
    window = returns[i-lookback:i]
    params = tpdf.fit_nig(window)  # Use library function
    # Store in transformed space (log for positive params)
    beta_ratio = np.clip(params.beta / params.alpha, -0.99, 0.99)
    param_targets.append([
        params.mu,
        np.log(params.delta),
        np.log(params.alpha),
        np.arctanh(beta_ratio)
    ])
param_targets = np.array(param_targets)
print(f"Created {len(param_targets)} distribution targets")

In [None]:
# Train model to predict distribution parameters
model_dist = MultiOutputRegressor(
    GradientBoostingRegressor(n_estimators=100, max_depth=3, random_state=42)
)
model_dist.fit(X_train, param_targets)
pred_params_raw = model_dist.predict(X_test)
print("Model trained")

In [None]:
# Make decisions from predicted distributions
nig = tpdf.NIG()
rng = np.random.default_rng(42)
decisions_p2 = []
var_estimates = []

for mu, log_delta, log_alpha, beta_raw in pred_params_raw:
    delta = max(np.exp(log_delta), 0.01)
    alpha = max(np.exp(log_alpha), 0.1)
    beta = np.clip(alpha * np.tanh(beta_raw), -alpha + 0.01, alpha - 0.01)
    
    params = tpdf.NIGParameters(mu=mu, delta=delta, alpha=alpha, beta=beta)
    samples = nig.sample(3000, 0, params, rng=rng)
    
    expected = np.mean(samples)
    var_5 = -np.percentile(samples, 5)  # VaR as positive loss
    var_estimates.append(var_5)
    
    # Decision: positive expected AND acceptable risk (VaR < 2%)
    decisions_p2.append(expected > 0 and var_5 < 2.0)

decisions_p2 = np.array(decisions_p2)
print(f"Pipeline 2: {np.sum(decisions_p2)} / {len(y_test)} trades ({np.mean(decisions_p2):.1%})")

## Results

In [None]:
def sharpe(returns):
    if np.std(returns) == 0: return 0
    return np.mean(returns) / np.std(returns) * np.sqrt(252)

def bootstrap_ci(returns, n=1000):
    rng = np.random.default_rng(42)
    sharpes = [sharpe(rng.choice(returns, len(returns), replace=True)) for _ in range(n)]
    return np.percentile(sharpes, 2.5), np.percentile(sharpes, 97.5)

cost = COST_BPS / 100
strat_bh = y_test
strat_p1 = np.where(decisions_p1, y_test - cost, 0)
strat_p2 = np.where(decisions_p2, y_test - cost, 0)

sharpe_bh, ci_bh = sharpe(strat_bh), bootstrap_ci(strat_bh)
sharpe_p1, ci_p1 = sharpe(strat_p1), bootstrap_ci(strat_p1)
sharpe_p2, ci_p2 = sharpe(strat_p2), bootstrap_ci(strat_p2)

print("COMPARISON")
print("=" * 70)
print(f"{'Strategy':<25} {'Trades':>8} {'PnL':>10} {'Sharpe':>10} {'95% CI':>16}")
print("-" * 70)
print(f"{'Buy & Hold':<25} {len(y_test):>8} {np.sum(strat_bh):>+9.1f}% {sharpe_bh:>10.2f} [{ci_bh[0]:.2f}, {ci_bh[1]:.2f}]")
print(f"{'XGBoost -> Point':<25} {np.sum(decisions_p1):>8} {np.sum(strat_p1):>+9.1f}% {sharpe_p1:>10.2f} [{ci_p1[0]:.2f}, {ci_p1[1]:.2f}]")
print(f"{'XGBoost -> Distribution':<25} {np.sum(decisions_p2):>8} {np.sum(strat_p2):>+9.1f}% {sharpe_p2:>10.2f} [{ci_p2[0]:.2f}, {ci_p2[1]:.2f}]")
print("-" * 70)
print(f"\nDistribution vs Point: {(sharpe_p2/sharpe_p1 - 1)*100:+.0f}% Sharpe improvement")

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Cumulative PnL
ax = axes[0]
days = np.arange(len(y_test))
ax.plot(days, np.cumsum(strat_bh), 'k-', alpha=0.5, lw=1.5, label=f'Buy & Hold ({sharpe_bh:.2f})')
ax.plot(days, np.cumsum(strat_p1), 'r-', lw=2, label=f'Point Prediction ({sharpe_p1:.2f})')
ax.plot(days, np.cumsum(strat_p2), 'b-', lw=2, label=f'Distribution ({sharpe_p2:.2f})')
ax.axhline(0, color='gray', ls=':', lw=1)
ax.set_xlabel('Day')
ax.set_ylabel('Cumulative PnL (%)')
ax.set_title('Cumulative Returns (Sharpe in legend)', fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)

# Sharpe comparison
ax = axes[1]
x = [0, 1, 2]
bars = ax.bar(x, [sharpe_bh, sharpe_p1, sharpe_p2], color=['gray', 'red', 'blue'], alpha=0.7)
ax.errorbar(x, [sharpe_bh, sharpe_p1, sharpe_p2],
            yerr=[[sharpe_bh-ci_bh[0], sharpe_p1-ci_p1[0], sharpe_p2-ci_p2[0]],
                  [ci_bh[1]-sharpe_bh, ci_p1[1]-sharpe_p1, ci_p2[1]-sharpe_p2]],
            fmt='none', color='black', capsize=5)
ax.set_xticks(x)
ax.set_xticklabels(['Buy & Hold', 'Point\nPrediction', 'Distribution\nPrediction'])
ax.set_ylabel('Annualized Sharpe')
ax.set_title('Risk-Adjusted Returns (95% CI)', fontweight='bold')
ax.grid(True, alpha=0.3, axis='y')
for i, s in enumerate([sharpe_bh, sharpe_p1, sharpe_p2]):
    ax.text(i, s + 0.15, f'{s:.2f}', ha='center', fontweight='bold')

plt.tight_layout()
plt.savefig('comparison.png', dpi=150, bbox_inches='tight', facecolor='white')
plt.show()

## Why Distribution Prediction Wins

The VaR filter rejects trades where predicted return is positive but risk is high.

In [None]:
# Days where Pipeline 1 trades but Pipeline 2 doesn't
rejected = decisions_p1 & ~decisions_p2
print(f"Days rejected by VaR filter: {np.sum(rejected)}")
print(f"Mean return on rejected days: {np.mean(y_test[rejected]):+.3f}%")
print(f"Std return on rejected days: {np.std(y_test[rejected]):.3f}%")
print()
print(f"Days kept by Pipeline 2: {np.sum(decisions_p2)}")
print(f"Mean return on kept days: {np.mean(y_test[decisions_p2]):+.3f}%")
print(f"Std return on kept days: {np.std(y_test[decisions_p2]):.3f}%")
print()
print("The VaR filter removes high-volatility days with worse risk-adjusted returns.")

## Summary

| Pipeline | Output | Decision Rule | Result |
|----------|--------|---------------|--------|
| Point | E[return] | Long if E[X] > 0 | Trades often, captures market |
| Distribution | (mu, delta, alpha, beta) | Long if E[X] > 0 AND VaR > -2% | Selective, better risk-adjusted |

**Key insight**: Distribution prediction provides uncertainty information that enables risk-based filtering.