# 05 — Simulation & Reproducibility Diagnostics

This notebook analyzes two chapters:
1. **Simulation of Random Variates from Copulas** (`01_random_variates.md`)
2. **Reproducibility, Random Seeds, and Statistical Validation** (`02_seeds_reproducibility.md`)


## 1. Setup

In [1]:

import os
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm, t as student_t

ROOT = Path.cwd().parent  # notebooks/ -> repo root
FIG_DIR = ROOT / "docs" / "assets" / "figures" / "05_simulation"
FIG_DIR.mkdir(parents=True, exist_ok=True)

def savefig(path, **kwargs):
    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(path, bbox_inches="tight", **kwargs)
    plt.close()

def sample_gaussian_copula(n=10_000, rho=0.6, seed=None):
    rng = np.random.default_rng(seed)
    cov = np.array([[1.0, rho], [rho, 1.0]])
    L = np.linalg.cholesky(cov)
    z = rng.standard_normal(size=(n, 2)) @ L.T
    return norm.cdf(z)

def sample_t_copula(n=10_000, rho=0.6, nu=5, seed=None):
    rng = np.random.default_rng(seed)
    cov = np.array([[1.0, rho], [rho, 1.0]])
    L = np.linalg.cholesky(cov)
    z = rng.standard_normal(size=(n, 2)) @ L.T
    g = rng.chisquare(df=nu, size=(n, 1))
    t_z = z / np.sqrt(g / nu)
    return student_t.cdf(t_z, df=nu)

def sample_clayton_copula(n=10_000, theta=2.0, seed=None):
    rng = np.random.default_rng(seed)
    S = rng.gamma(shape=1.0/theta, scale=1.0, size=n)
    E1 = rng.exponential(size=n); E2 = rng.exponential(size=n)
    U1 = (1.0 + E1 / S) ** (-1.0/theta)
    U2 = (1.0 + E2 / S) ** (-1.0/theta)
    return np.column_stack([U1, U2])

def sample_gumbel_copula(n=10_000, theta=1.6, seed=None):
    rng = np.random.default_rng(seed)
    alpha = 1.0/theta
    V = rng.uniform(low=-np.pi/2, high=np.pi/2, size=n)
    W = rng.exponential(size=n)
    S = (np.sin(alpha*(V+np.pi/2)) / (np.cos(V)**(1/alpha))) *         (np.cos(V - alpha*(V+np.pi/2)) / W) ** ((1-alpha)/alpha)
    E1 = rng.exponential(size=n); E2 = rng.exponential(size=n)
    U1 = np.exp(-(E1 / S) ** (1.0/theta))
    U2 = np.exp(-(E2 / S) ** (1.0/theta))
    return np.column_stack([U1, U2])

def sample_frank_copula(n=10_000, theta=8.0, seed=None):
    rng = np.random.default_rng(seed)
    U = rng.uniform(size=n); V = rng.uniform(size=n)
    W = rng.exponential(size=n)
    U1 = -np.log(1 + np.exp(-theta*U) * (np.exp(-theta*W) - 1.0)) / theta
    U2 = -np.log(1 + np.exp(-theta*V) * (np.exp(-theta*W) - 1.0)) / theta
    return np.column_stack([U1, U2])

def empirical_tail_dependence(U, side="upper", grid=40):
    u = np.linspace(0.90, 0.995, grid) if side=="upper" else np.linspace(0.005, 0.10, grid)
    est = []
    for t in u:
        if side == "upper":
            est.append(np.mean((U[:,0] > t) & (U[:,1] > t)) / (1 - t))
        else:
            est.append(np.mean((U[:,0] < t) & (U[:,1] < t)) / (t))
    return u, np.array(est)


## 2. Chapter 1 — Simulation of Random Variates from Copulas

Visual diagnostics for common families and empirical tail dependence.

In [2]:

families = [
    ("Gaussian ρ=0.7", lambda n, s: sample_gaussian_copula(n, rho=0.7, seed=s)),
    ("t ν=4, ρ=0.7",   lambda n, s: sample_t_copula(n, rho=0.7, nu=4, seed=s)),
    ("Clayton θ=2",    lambda n, s: sample_clayton_copula(n, theta=2.0, seed=s)),
    ("Gumbel θ=1.6",   lambda n, s: sample_gumbel_copula(n, theta=1.6, seed=s)),
    ("Frank θ=8",      lambda n, s: sample_frank_copula(n, theta=8.0, seed=s)),
]
n = 4000
plt.figure(figsize=(14, 2.6))
for i, (name, sampler) in enumerate(families, start=1):
    U = sampler(n, 123+i)
    ax = plt.subplot(1, 5, i)
    ax.scatter(U[:,0], U[:,1], s=2, alpha=0.5)
    ax.set_title(name, fontsize=10)
    ax.set_xlim(0,1); ax.set_ylim(0,1)
    ax.grid(True, linewidth=0.3, alpha=0.4)
savefig(FIG_DIR / "copula_sim_samples.svg", format="svg")


In [3]:

rhos = [0.5, 0.7, 0.9]; nu = 4
plt.figure(figsize=(6,4))
for rho in rhos:
    U = sample_t_copula(n=80_000, rho=rho, nu=nu, seed=42)
    grid_u, emp = empirical_tail_dependence(U, side="upper", grid=40)
    theo = 2.0 * student_t.cdf(-np.sqrt((nu+1.0)*(1.0-rho)/(1.0+rho)), df=nu+1.0)
    plt.plot(grid_u, emp, label=f"empirical ρ={rho:.1f}")
    plt.hlines(theo, grid_u.min(), grid_u.max(), linestyles="dashed")
plt.xlabel("u"); plt.ylabel(r"Upper tail $\lambda_U$ estimate")
plt.title(f"t-copula tail dependence (ν={nu})")
plt.legend(frameon=False); plt.grid(True, linewidth=0.5, alpha=0.5)
savefig(FIG_DIR / "copula_sim_tail_check.svg", format="svg")


In [4]:

plt.figure(figsize=(8, 2.8)); plt.axis("off")
texts = [
    ("Latent base draw\n(Z, W, ...)", 0.05, 0.5),
    ("Copula transform\n(Gaussian / t / Archimedean)", 0.36, 0.5),
    ("Uniforms U∈[0,1]^d", 0.67, 0.5),
    ("Marginal inverse\nX_i = F_i^{-1}(U_i)", 0.90, 0.5),
]
for txt, x, y in texts:
    plt.text(x, y, txt, ha="center", va="center", fontsize=11,
             bbox=dict(boxstyle="round,pad=0.4", ec="black"))
plt.annotate("", xy=(0.26, 0.5), xytext=(0.14, 0.5), arrowprops=dict(arrowstyle="->", lw=1.5))
plt.annotate("", xy=(0.56, 0.5), xytext=(0.46, 0.5), arrowprops=dict(arrowstyle="->", lw=1.5))
plt.annotate("", xy=(0.82, 0.5), xytext=(0.74, 0.5), arrowprops=dict(arrowstyle="->", lw=1.5))
savefig(FIG_DIR / "copula_sim_pipeline.svg", format="svg")


## 3. Chapter 2 — Reproducibility, Seeds, and Validation

In [5]:

U1 = sample_gaussian_copula(n=1000, rho=0.65, seed=2025)
U2 = sample_gaussian_copula(n=1000, rho=0.65, seed=2025)
assert np.allclose(U1, U2), "Same seed should reproduce identical uniforms"
U3 = sample_gaussian_copula(n=1000, rho=0.65, seed=7)
print("Kolmogorov distance (U1[:,0] vs U3[:,0]):", np.max(np.abs(np.sort(U1[:,0]) - np.sort(U3[:,0]))))


Kolmogorov distance (U1[:,0] vs U3[:,0]): 0.04489550247508811


In [6]:

plt.figure(figsize=(10, 2.8)); plt.axis("off")
nodes = [
    ("Seed init", 0.08, 0.5),
    ("Sampling", 0.25, 0.5),
    ("Estimation", 0.42, 0.5),
    ("Validation", 0.59, 0.5),
    ("Logging/Metadata", 0.78, 0.5),
    ("Re-run\n(same seed)", 0.92, 0.5),
]
for txt, x, y in nodes:
    plt.text(x, y, txt, ha="center", va="center", fontsize=11,
             bbox=dict(boxstyle="round,pad=0.35", ec="black"))
for i in range(len(nodes)-1):
    x0 = nodes[i][1] + 0.06
    x1 = nodes[i+1][1] - 0.06
    plt.annotate("", xy=(x1, 0.5), xytext=(x0, 0.5), arrowprops=dict(arrowstyle="->", lw=1.5))
savefig(FIG_DIR / "reproducibility_pipeline.svg", format="svg")
