In [2]:
# ----------------------------------------------------------------------------------
#  MOCK side-by-side fast-Greeks  vs  finite-difference (bump) for a 3-asset worst-of call
#  — with device-fix in bump_fd —
# ----------------------------------------------------------------------------------
import math, torch, pandas as pd
torch.set_default_dtype(torch.float64)

# ---------------- Market inputs ----------------------------------------------------
S0    = torch.tensor([100.0, 95.0, 90.0])
sigma = torch.tensor([0.25,  0.30, 0.28])
r, T  = 0.02, 1.0
K     = 85.0

corr  = torch.tensor([[1.0, 0.3, 0.2],
                      [0.3, 1.0, 0.4],
                      [0.2, 0.4, 1.0]])

bump_rel = 1e-4
n_paths  = 100_000_000
n_rows   = 1
device   = 'cuda' if torch.cuda.is_available() else 'cpu'
chunk    = 250_000

# ---------------- Helpers ----------------------------------------------------------
def simulate_paths(S0, sigma, r, T, L, n, dev):
    Z = torch.randn(n, 3, device=dev)
    Y = Z @ L.T
    ST = S0 * torch.exp((r - 0.5 * sigma**2)*T + sigma*math.sqrt(T)*Y)
    return ST, Y

def fast_greeks_block(S0, sigma, r, T, K, L, n, dev):
    disc = math.exp(-r*T)
    ST, Y = simulate_paths(S0, sigma, r, T, L, n, dev)
    m, idx  = ST.min(dim=1)
    itm     = m > K

    S_star  = ST[itm, idx[itm]]
    Y_star  = Y [itm, idx[itm]]
    S0_star = S0[idx[itm]]
    sig_star= sigma[idx[itm]]
    payoff  = (m[itm] - K)

    delta  = disc * S_star / S0_star
    gamma  = disc * S_star / S0_star**2
    vega   = disc * S_star * (math.sqrt(T)*Y_star - sig_star*T)
    theta  = disc * (-r*payoff + S_star*(r - 0.5*sig_star**2 + sig_star*Y_star/(2*math.sqrt(T))))
    rho    = disc * K * T * torch.ones_like(payoff)

    return {
        'price' : disc*payoff.mean(),
        'delta' : torch.zeros_like(S0).index_add_(0, idx[itm], delta).div_(n),
        'gamma' : torch.zeros_like(S0).index_add_(0, idx[itm], gamma).div_(n),
        'vega'  : torch.zeros_like(S0).index_add_(0, idx[itm], vega ).div_(n),
        'theta' : theta.mean(),
        'rho'   : rho.mean()
    }

def bump_fd(block_fun, S0, sigma):
    res = {}
    base = block_fun()
    res['price'] = base['price']

    # Delta & Gamma (bump S0)
    deltas, gammas = [], []
    for i in range(3):
        S0_up   = S0.clone();   S0_up[i]  *= (1+ bump_rel)
        S0_down = S0.clone();   S0_down[i]*= (1- bump_rel)

        # **MOVE bumped S0 to GPU** ⬇
        up   = block_fun(S0=S0_up.to(device))
        down = block_fun(S0=S0_down.to(device))

        deltas.append((up['price'] - down['price'])/(2*S0[i]*bump_rel))
        gammas.append((up['price'] - 2*base['price'] + down['price'])/(S0[i]*bump_rel)**2)
    res['delta'] = torch.tensor(deltas)
    res['gamma'] = torch.tensor(gammas)

    # Vega (bump sigma)
    vegas = []
    for i in range(3):
        s_up   = sigma.clone(); s_up[i]  *= (1+ bump_rel)
        s_down = sigma.clone(); s_down[i]*= (1- bump_rel)

        # **MOVE bumped sigma to GPU** ⬇
        up, down = block_fun(sigma=s_up.to(device)), block_fun(sigma=s_down.to(device))
        vegas.append((up['price'] - down['price'])/(2*sigma[i]*bump_rel))
    res['vega'] = torch.tensor(vegas)

    # Rho (bump r)
    up   = block_fun(r=r*(1+bump_rel))
    down = block_fun(r=r*(1-bump_rel))
    res['rho'] = (up['price'] - down['price'])/(2*r*bump_rel)

    # Theta (bump T)
    up   = block_fun(T=T*(1+bump_rel))
    down = block_fun(T=T*(1-bump_rel))
    res['theta'] = (up['price'] - down['price'])/(2*T*bump_rel)

    return res

# ------------- Single-row run (n_rows = 1) ----------------------------------------
L = torch.linalg.cholesky(corr.to(device))
def block(**kwargs):
    return fast_greeks_block(
        kwargs.get('S0',    S0.to(device)),
        kwargs.get('sigma', sigma.to(device)),
        kwargs.get('r',     kwargs.get('r', r)),
        kwargs.get('T',     kwargs.get('T', T)),
        K, L, n_paths, device
    )

print("Running 1 block of", n_paths, "paths …")
fast_res = block()
fd_res   = bump_fd(block, S0, sigma)

def fmt(x): return x if isinstance(x, float) else x.cpu().numpy()
table = pd.DataFrame({
    'fast'         : {k: fmt(v) for k,v in fast_res.items()},
    'finite-diff' : {k: fmt(v) for k,v in fd_res.items()}
})
table


Running 1 block of 100000000 paths …


Unnamed: 0,fast,finite-diff
price,13.874156583114669,13.876936450373647
delta,"[0.10205772163023107, 0.09343248524556544, 0.1...","[0.036894735860038, 0.08018067911486071, -0.02..."
gamma,"[0.0010205772163024167, 0.0009834998446900855,...","[-14.986424678866683, -91.03701137010403, -64...."
vega,"[-1.7329129611253231, -0.635044237984899, 1.90...","[-15.597459719067785, 43.404703030939615, 168...."
theta,1.5274572172791328,19.894785926748426
rho,83.31688723107426,1927.925942039721
