In [None]:
%load_ext autoreload
%autoreload 2

In [1]:
import pandas as pd
import polars as pl
import utils
from scipy.optimize import least_squares, curve_fit, minimize
import numpy as np
import os
from tqdm import tqdm
from scipy.stats import norm
from scipy.integrate import quad
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from DataLoader import DataLoader
import datetime as dt
import joblib as joblib

import warnings
warnings.filterwarnings("ignore")

In [2]:
start, end = "20060224", "20230831"
loader = DataLoader("./data", "csv")
df = loader.load_dates(start, end).sort('Date')[:, 1:]

In [5]:
res = pl.read_parquet('params_20060224_20230831.parquet')
res.columns = res.columns[:-1] + ['date']
res

H,eta,err_code,date
f64,f64,f64,datetime[ns]
0.060878,1.240324,0.0,2006-02-24 00:00:00
0.001504,6.514187,0.0,2006-02-27 00:00:00
0.103098,1.246271,0.0,2006-02-28 00:00:00
0.218843,1.309456,0.0,2006-03-01 00:00:00
0.000359,16.490253,0.0,2006-03-02 00:00:00
…,…,…,…
0.003983,12.433389,0.0,2023-08-24 00:00:00
0.021462,5.520464,0.0,2023-08-25 00:00:00
0.029094,4.848911,0.0,2023-08-28 00:00:00
0.028095,4.93641,0.0,2023-08-29 00:00:00


In [9]:
dates = res.filter(pl.col('err_code')==1).select('date')

In [13]:
failed = df.filter(pl.col('Date').is_in(dates))
failed

Expiry,Texp,Strike,Bid,Ask,Fwd,CallMid,Date
i64,f64,f64,f64,f64,f64,f64,datetime[ns]
20060719,0.093151,12.5,0.69134,0.947509,16.293825,4.0,2006-06-15 00:00:00
20060719,0.093151,15.0,0.794019,0.96067,16.293825,2.375,2006-06-15 00:00:00
20060719,0.093151,17.5,0.871538,1.073819,16.293825,1.45,2006-06-15 00:00:00
20060719,0.093151,20.0,1.016273,1.128372,16.293825,0.95,2006-06-15 00:00:00
20060719,0.093151,22.5,1.049316,1.217591,16.293825,0.625,2006-06-15 00:00:00
…,…,…,…,…,…,…,…
20240417,0.673511,140.0,1.005966,1.048042,20.723892,0.171824,2023-08-15 00:00:00
20240417,0.673511,150.0,1.013574,1.060234,20.723892,0.150997,2023-08-15 00:00:00
20240417,0.673511,160.0,1.027859,1.068587,20.723892,0.135376,2023-08-15 00:00:00
20240417,0.673511,170.0,1.039397,1.08303,20.723892,0.124963,2023-08-15 00:00:00


In [20]:
day1 = failed['Date'].unique()[0]

In [23]:
failed.filter(pl.col('Date') == day1)

Expiry,Texp,Strike,Bid,Ask,Fwd,CallMid,Date
i64,f64,f64,f64,f64,f64,f64,datetime[ns]
20060719,0.093151,12.5,0.69134,0.947509,16.293825,4.0,2006-06-15 00:00:00
20060719,0.093151,15.0,0.794019,0.96067,16.293825,2.375,2006-06-15 00:00:00
20060719,0.093151,17.5,0.871538,1.073819,16.293825,1.45,2006-06-15 00:00:00
20060719,0.093151,20.0,1.016273,1.128372,16.293825,0.95,2006-06-15 00:00:00
20060719,0.093151,22.5,1.049316,1.217591,16.293825,0.625,2006-06-15 00:00:00
…,…,…,…,…,…,…,…
20070214,0.668607,27.5,0.59767,0.652905,17.273379,1.075,2006-06-15 00:00:00
20070214,0.668607,30.0,0.622781,0.682669,17.273379,0.925,2006-06-15 00:00:00
20070516,0.917808,15.0,0.341941,0.413542,17.267955,3.65,2006-06-15 00:00:00
20070516,0.917808,20.0,0.452026,0.543522,17.267955,2.3,2006-06-15 00:00:00


In [24]:
## Debug

In [25]:
# Parameters
H = utils.Hurst(0.07)
eta = 1.9
volvol = eta * np.sqrt(H.h2) / (utils.c_h(H) * 2)


def vix2(ivolData, expiry: int):
    df = ivolData[ivolData['Expiry'] == expiry]
    texp = df['Texp'].values[0]
    mask = ~df['Bid'].isna()
    df = df.loc[mask]

    midVol = 0.5 * (df['Ask'] + df['Bid'])
    fwd = df['Fwd'].values[0]  # Forward price
    strikes = df['Strike']
    k = np.log(strikes / fwd).values  # Log-moneyness
    kmin, kmax = min(k), max(k)
    minvo, maxvo = midVol[k == kmin].values[0], midVol[k == kmax].values[0]

    def volInterp(kout):
        """
        Interpolate vol between strikes;
        set constant outside strike range, use Stineman inside
        """
        if not isinstance(kout, np.ndarray):
            kout = np.array([kout])
        return np.where(
            kout < kmin, minvo,
            np.where(
                kout > kmax, maxvo,
                utils.stineman_interp(k, midVol.values, kout)
            )
        )
    
    # Integrand functions for calls and puts
    def cTilde(y):
        K = np.exp(y)
        vol = volInterp(y)
        price = utils.BSFormula(S=1., K=K, t=texp, r=0, vol=vol, callPutFlag=1)
        return np.exp(y) * price
    
    def pTilde(y):
        K = np.exp(y)
        vol = volInterp(y)
        price = utils.BSFormula(S=1., K=K, t=texp, r=0, vol=vol, callPutFlag=0)
        return np.exp(y) * price
    
    
    # Compute the integrals
    callIntegral, _ = quad(cTilde, 0, 10)
    putIntegral, _ = quad(pTilde, -10, 0)
    
    # Calculate the result
    # res = fwd ** 2 + 2 * (callIntegral + putIntegral)
    res = fwd ** 2 * (1 + 2 * (callIntegral + putIntegral))
    return res


def vix4(ivolData, expiry: int):
    df = ivolData[ivolData['Expiry'] == expiry]
    texp = df['Texp'].values[0]
    mask = ~df['Bid'].isna()
    df = df.loc[mask]

    midVol = 0.5 * (df['Ask'] + df['Bid'])
    fwd = df['Fwd'].values[0]  # Forward price
    strikes = df['Strike']
    k = np.log(strikes / fwd).values  # Log-moneyness
    kmin, kmax = min(k), max(k)
    minvo, maxvo = midVol[k == kmin].values[0], midVol[k == kmax].values[0]

    def volInterp(kout):
        """
        Interpolate vol between strikes;
        set constant outside strike range, use Stineman inside
        """
        if not isinstance(kout, np.ndarray):
            kout = np.array([kout])
        return np.where(
            kout < kmin, minvo,
            np.where(
                kout > kmax, maxvo,
                utils.stineman_interp(k, midVol.values, kout)
            )
        )
    
    # Integrand functions for calls and puts
    def cTilde(y):
        K = np.exp(y)
        vol = volInterp(y)
        price = utils.BSFormula(S=1., K=K, t=texp, r=0, vol=vol, callPutFlag=1)
        return np.exp(3*y) * price
    
    def pTilde(y):
        K = np.exp(y)
        vol = volInterp(y)
        price = utils.BSFormula(S=1., K=K, t=texp, r=0, vol=vol, callPutFlag=0)
        return np.exp(3*y) * price
    
    
    # Compute the integrals
    callIntegral, _ = quad(cTilde, 0, 10)
    putIntegral, _ = quad(pTilde, -10, 0)
    
    # Calculate the result
    # res = fwd ** 2 + 2 * (callIntegral + putIntegral)
    res = fwd ** 4 * (1 + 12 * (callIntegral + putIntegral))
    return res


def sigma_jim(texp, eta: float, hurst: utils.Hurst):
    return eta**2 * texp**hurst.h2 * utils.f_supH(utils.DELTA / texp, hurst)

def sigma_market(ivol_data, expiry: int):
    exp_vix2_mkt = vix2(ivol_data, expiry=expiry) * utils.DELTA / 10**4
    exp_vix4_mkt = vix4(ivol_data, expiry=expiry) * utils.DELTA**2 / 10**8
    return utils.sigma_lognormal(exp_vix2_mkt, exp_vix4_mkt)

def calibration_jim(texps, h, _eta):
    hurst = utils.Hurst(h)
    sigma_model = np.array([
        sigma_jim(texp, _eta, hurst) for texp in texps
    ])
    return sigma_model


def calibration_h_jim(texps, h):
    hurst = utils.Hurst(h)
    sigma_model = np.array([
        sigma_jim(texp, eta, hurst) for texp in texps
    ])
    return sigma_model

In [None]:

_df = df.filter(pl.col('Date') == dd).to_pandas().set_index('Date')
if _df.empty:
    
# address expirations with single obs
single_texp = _df.groupby('Texp')['Texp'].count()
single_texp = set(single_texp[single_texp == 1].index)
_df = _df[~_df['Texp'].isin(single_texp)]

exp_dates = _df['Expiry'].unique()
texp_array = _df['Texp'].unique()

# true values
sigma = np.array([
    sigma_market(_df, exp) for exp in exp_dates
])

# calibrate - we try 2-parameter calibration and if it fails
# we just fit H
obj = lambda x: np.sum((calibration_jim(texp_array, x[0], x[1]) - sigma)**2)
opt = minimize(
    obj, np.array([0.07, eta]),
    method='L-BFGS-B',
    bounds=((0., 1.), (1., None))
)
if opt.success:
    params = opt.x
    params = np.concatenate((opt.x, [0.0]))
else:
    print(f"CAPRA!!!!! NON SAI FARE NULLA! ({dd})")
# params, _ = curve_fit(
#     calibration_jim, texp_array, sigma,
#     p0=[0.07, eta],
#     bounds=([0., 0.], [1., np.inf])
# )
    obj = lambda x: np.sum((calibration_h_jim(texp_array, x[0]) - sigma)**2)
    opt = minimize(
        obj, np.array([0.07]),
        method='L-BFGS-B',
        bounds=[(0., 1.)]
    )
    if not opt.success:
        #print(f"CAPRA!!!!! NON SAI FARE PROPRIO NULLA! ({dd})rrr")
        arr = np.empty(2) * np.nan
        params = np.concatenate((arr, [2.0]))
    # params, _ = curve_fit(
    #     calibration_h_jim, texp_array, sigma,
    #     p0=[0.07],
    #     bounds=(0., 1.)
    # )
    else:
        params = np.concatenate((opt.x, [np.nan], [1.0]))


    