In [28]:
from empirical_study.empirical_study_normalized_tspl import find_optimal_parameters_normalized_tspl
from empirical_study.empirical_study_tspl import find_optimal_parameters_tspl
from empirical_study.empirical_study_2exp import find_optimal_parameters_exp


from empirical_study.utils import *
from empirical_study.plot_functions import *
import yfinance as yf

For a given volatility index $v$ and a market index $S$, for a given `setting` which is a list of tuples `(n,j)` or `(n, (j_1,..., j_k))`, for each timestep $t$ (in days):
$$
r_t = \frac{S_t - S_{t-1}}{S_{t-1}}
$$
$$
R_{n,t} = \sum_{t_i \leq t} K_n(t-t_i)r_{t_i}^n
$$

Then the model estimates the volatility as
$$
v_t^p = \beta_0 + \sum_{n} \sum_{j\in\{j_1,\dots, j_{i_k}\}} \beta_{n,j} R_{n,t}^j
$$
Note: when there is only one $j$ associated to $n$, $\beta_{n,j}$ is simply noted $\beta_n$.


For example, the linear model we suggest is defined with `p=1`, `setting=[(1,1), (2,1/2)]`, resulting in
$$
v_t = \beta_0 + \beta_1 R_{1,t} + \beta_2 R_{2,t}^{1/2}
$$
The model (M3) is defined by `p=2`, `setting=[(1,(1,2))]`, resulting in
$$
v_t^2 = \beta_0 + \beta_{1,1} R_{1,t} + \beta_{1,2} R_{1,t}^2
$$

Note that there is one kernel per $n$. The kernels are either time-shifted power-laws or convex combinations of two exponentials.

In [3]:
load_from = pd.to_datetime('1995-01-01')  # Need at least 4 years prior to the first training day
train_start_date = pd.to_datetime('2000-01-03')
test_start_date = pd.to_datetime('2019-01-01')
test_end_date = pd.to_datetime('2022-05-15')

In [4]:
# Load data on SPX and VIX
spx_data = yf.Ticker("^GSPC").history(start=load_from, end=test_end_date)
vix_data = yf.Ticker("^VIX").history(start=load_from, end=test_end_date)

In [5]:
spx_data.index = pd.to_datetime(spx_data.index.date)
vix_data.index = pd.to_datetime(vix_data.index.date)

In [6]:
spx = spx_data['Close']
vix = vix_data['Close'] / 100

In [7]:
dvix = vix.shift(1) - vix

In [8]:
max_delta = 1000  # Number of past returns used in the computation of R_{n,t} in business days
test_start = test_start_date
test_end = test_end_date
train_start = train_start_date

In [9]:
p = 1
setting = [(1, 1), (2, 1/2)] # Our linear model

In [10]:
import time

In [29]:
start = time.time()
sol = find_optimal_parameters_exp(vol=vix, index=spx, p=p, setting=setting, train_start_date=train_start,
                                  test_start_date=test_start, test_end_date=test_end, 
                                  max_delta=max_delta)
print(time.time() - start)

323.3469316959381


In [30]:
start = time.time()
sol_shift = find_optimal_parameters_exp(vol=vix.shift(1), index=spx, p=p, setting=setting, train_start_date=train_start,
                                  test_start_date=test_start, test_end_date=test_end, 
                                  max_delta=max_delta)
print(time.time() - start)

526.3570742607117


In [31]:
start = time.time()
sol_dvix = find_optimal_parameters_exp(vol=dvix, index=spx, p=p, setting=setting, train_start_date=train_start,
                                  test_start_date=test_start, test_end_date=test_end, 
                                  max_delta=max_delta)
print(time.time() - start)

3.5484347343444824


In [None]:
# start = time.time()
# sol_norm = find_optimal_parameters_normalized_tspl(vol=vix.shift(1), index=spx, train_start_date = pd.to_datetime("2000-01-03"), test_start_date = pd.to_datetime("2015-02-23"), test_end_date = pd.to_datetime("2017-04-12"), max_delta=1000)
# print(time.time() - start)

In [32]:
start = time.time()
sol_norm = find_optimal_parameters_normalized_tspl(vol=vix, index=spx, p=p, setting=setting, train_start_date=train_start,
                                  test_start_date=test_start, test_end_date=test_end, 
                                  max_delta=max_delta)
print(time.time() - start)

13.903037786483765


In [33]:
start = time.time()
sol_norm_shift = find_optimal_parameters_normalized_tspl(vol=vix.shift(1), index=spx, p=p, setting=setting, train_start_date=train_start,
                                  test_start_date=test_start, test_end_date=test_end, 
                                  max_delta=max_delta)
print(time.time() - start)

26.18277668952942


In [34]:
start = time.time()
sol_norm_dvix = find_optimal_parameters_normalized_tspl(vol=dvix, index=spx, p=p, setting=setting, train_start_date=train_start,
                                  test_start_date=test_start, test_end_date=test_end, 
                                  max_delta=max_delta)
print(time.time() - start)

8.286419153213501


In [None]:
# start = time.time()
# sol_norm = find_optimal_parameters_normalized_tspl(vol=vix.shift(1), index=spx, train_start_date = pd.to_datetime("2000-01-03"), test_start_date = pd.to_datetime("2015-02-23"), test_end_date = pd.to_datetime("2017-04-12"), max_delta=1000)
# print(time.time() - start)

In [35]:
sol['train_r2']

0.9471796382896874

In [36]:
sol['test_r2']

0.867547810167539

In [37]:
sol_shift['train_r2']

0.9303854411248064

In [38]:
sol_shift['test_r2']

0.8143168467279575

In [39]:
sol_dvix['train_r2']

0.26845154061438414

In [40]:
sol_dvix['test_r2']

0.19304133520658562

In [41]:
sol['opt_params']

{'beta_0': 0.05431907078829664,
 'theta_1': 0.8101658085631585,
 'lambda_1': array([52.69878387, 17.2437368 ]),
 'beta_1': -0.07789648173387251,
 'theta_2': 0.43008970008778785,
 'lambda_2': array([3.78857201, 1.15759303]),
 'beta_2': 0.821367161202075}

In [42]:
sol_dvix['opt_params']

{'beta_0': 0.0009064502242806891,
 'theta_1': 4.510785343601462e-08,
 'lambda_1': array([1068.45948895,  899.18825797]),
 'beta_1': 0.0010591774224455704,
 'theta_2': 0.10623163080470563,
 'lambda_2': array([1.64673437, 0.19744514]),
 'beta_2': -0.0038210484659504163}

In [43]:
sol_shift['opt_params']

{'beta_0': 0.05811450681242019,
 'theta_1': 0.6121202808238686,
 'lambda_1': array([14.34507747, 13.78335889]),
 'beta_1': -0.09693240492327206,
 'theta_2': 0.335403625086267,
 'lambda_2': array([0.87832703, 1.10822483]),
 'beta_2': 0.8085007271902024}

In [45]:
sol_norm_dvix['train_r2']

0.2699425370107994

In [46]:
sol_norm_dvix['test_r2']

0.19401204092287172

In [47]:
sol_norm_dvix['opt_params']

{'beta_0': -0.0014316438313277688,
 'beta_1': 4.348968536004848e-05,
 'beta_2': 0.008200376443027048,
 'alpha_1': 9.999999954142393,
 'alpha_2': 9.999909932161737,
 'delta_1': 0.0003428292729505453,
 'delta_2': 0.966238055794999}

In [None]:
sol['opt_params']