In [15]:
import autograd.numpy as np
import pandas as pd
import autograd as ag
import os
from tqdm.notebook import tqdm
import yfinance as yf

# Generate data from 4D GARACH process

This notebook generates data from the 4D simulator presented in Section 3.3.1, emulating the data that can be generated from a GARCH(1,1) process.

In [1]:
# ticker = '^GSPC'
# start = '2015-12-31'
# end = '2021-06-25'
# #downloading data
# prices = yf.download(ticker, start, end)['Close']
# #calculating returns
# returns = (np.array(prices)[1:]/np.array(prices)[:-1] - 1) * 100

In [51]:
nInputDim = 4 # Latent parameters
nOutputDim = 1 # Vol output
T = 5 # Future window range
forecast_window = range(1, T + 1)
# Latent params will be sampled from unifrom prior
latent_priorMin = 0
latent_priorMax = 1 

# Vol prior informed by domain knowledge (Gamma)
vol_prior_shape = 0.5 
vol_prior_scale = 1.5

# Returns prior infomred by domain knowledge (Normal)
prev_ret_prior_mean = 0.1
prev_ret_prior_std = 3

In [52]:
# GARCH (1,1) process that only returns conditional stds
def garch(params, conditional, ret, t, prev_sigma=None):
    
    # Explicity define parameters
    mu = params[0]
    omega = params[1]
    alpha = params[2]
    beta = params[3]
    
    resid = ret - mu
    
    if t == 1:
        sigma2 = omega + alpha * resid**2 + beta * conditional ** 2
    else: 
        sigma2 = omega + (alpha + beta) * prev_sigma ** 2
        
    return sigma2 ** 0.5

In [53]:
garch_agrad = ag.grad(garch)

In [54]:
def gen_dataset(n):
    priorSamples_latent = np.random.uniform(latent_priorMin, latent_priorMax, (n, nInputDim))
    prev_vol_samples = np.random.gamma(shape=vol_prior_shape, scale=vol_prior_scale, size=(n, 1))
    prev_ret_samples = np.random.normal(loc=prev_ret_prior_mean, scale=prev_ret_prior_std, size=(n, 1))

    synOutputs = []

    l = [[i,i,i,i] for i in forecast_window];
    l_flat = [item for l in l for item in l]
    cols = [np.array(l_flat), np.array(list(range(nInputDim)) * T)]
    synPureData_grad = pd.DataFrame(index=range(n), columns=cols)
    for t in forecast_window:
        temp_out = []
        if t == 1:
            for s in range(n):
                val = garch(priorSamples_latent[s, :], prev_vol_samples[s], prev_ret_samples[s], t)
                val_grad = garch_agrad(priorSamples_latent[s, :], prev_vol_samples[s], prev_ret_samples[s], t)
                temp_out.append(float(val))
                for i in range(nInputDim):
                    synPureData_grad.loc[s, (t, i)] = val_grad[i]
        else:
            for s in range(n):
                val = garch(priorSamples_latent[s, :], prev_vol_samples[s], prev_ret_samples[s], t, synOutputs[t-2][s])
                val_grad = garch_agrad(priorSamples_latent[s, :], prev_vol_samples[s], prev_ret_samples[s], t, synOutputs[t-2][s])
                temp_out.append(float(val))
                for i in range(nInputDim):
                    synPureData_grad.loc[s, (t, i)] = val_grad[i]
                    
        synOutputs.append(temp_out)
    synPureData = pd.DataFrame(synOutputs).T 
    synPureData.columns  = forecast_window
    synInputs_latent = []
    for i in range(nInputDim):
        synInputs_latent.append(priorSamples_latent[:,i].tolist())

    synInputs_latent = pd.DataFrame(synInputs_latent).T 
    prev_vol_samples = pd.DataFrame(prev_vol_samples)
    prev_ret_samples = pd.DataFrame(prev_ret_samples)
    
    return synInputs_latent, prev_vol_samples, prev_ret_samples, synPureData, synPureData_grad

In [55]:
x_train_latent, x_train_vol, x_train_ret, y_train, y_train_grad = gen_dataset(1000)
x_test_latent, x_test_vol, x_test_ret, y_test, y_test_grad = gen_dataset(1000)

In [60]:
# #os.makedirs('data')
# # Save data
# x_train_latent.to_csv('data/t5/x_train_latent.csv')
# x_train_vol.to_csv('data/t5/x_train_vol.csv')
# x_train_ret.to_csv('data/t5/x_train_ret.csv')
# y_train.to_csv('data/t5/y_train.csv')
# y_train_grad.to_csv('data/t5/y_train_grad.csv')

# x_test_latent.to_csv('data/t5/x_test_latent.csv')
# x_test_vol.to_csv('data/t5/x_test_vol.csv')
# x_test_ret.to_csv('data/t5/x_test_ret.csv')
# y_test.to_csv('data/t5/y_test.csv')
# y_test_grad.to_csv('data/t5/y_test_grad.csv')