In [1]:
%load_ext autoreload
%autoreload 2

#### Notebook Description

This notebook is used to fit the CTMC-VAE model once the CTMC model parameters have been prefitted to the IV data. Slight modifications can be made to fit other models shown on the manuscript (such as the L'evy processes). 

This notebook allows the exploration of certain hyperparameters, specifically beta, network structure, latent dimension, and batch size and the resultant networks are outputted to a separate folder in the Networks folder. For details of the full algorthim, please refer to our manuscript "Arbitrage-Free Implied Volatility Surface Generation1with Variational Autoencoders". 

In [2]:
import torch
import numpy as np
import pickle
import ctmc
from VAE_fit import fit_VAE
from pathlib import Path
import json
import matplotlib.pyplot as plt
from copy import deepcopy as dc
import pandas as pd

We first define the json encoder to save network parameters to be loaded when loading each network.

In [3]:
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NpEncoder, self).default(obj)

We first load in the fitted parameters from the appropriate data file and normalize the data to prepare for VAE training.

In [4]:
ID = 'AUD'
batch_output_dir = 'Networks/' + ID + "_CTMC_VAE/"

In [5]:
# loads training and testing days
with open("all_cur_train_valid_days_new.pickle", 'rb') as handle:
    all_days = pickle.load(handle)
with open('Data/' + ID + '_fitted_params.pickle', 'rb') as handle:
    params = pickle.load(handle)
    
# Creates normalized data
train_params = []
ts = np.array(params['keys'])
params = params['values']
for day in all_days['train']:
    train_params.append(torch.squeeze(params[np.where(ts==pd.Timestamp(day[:10]))]))
train_params = torch.vstack(train_params)
train_mean = torch.mean(train_params, dim=0)
train_std = torch.std(train_params, dim=0)
train_norm = (train_params - train_mean)/train_std

We then define the hyperparameters we wish to explore over, the ones provided here are simply some examples that can be used. 

In [6]:
epochs = 2000    # Number of epochs before stopping

param_dict ={    # Hyperparameters to run batch
    'batch_size' : [200],
    'latent_size' : np.array([3, 5, 10, 15]),
    'hidden_dim' : np.array([64]),
    'layers' : np.array([4]),
    'beta' : np.array([0.01, 0.1, 1, 10]),
}

tau = np.array([0.08333333, 0.16666667, 0.25, 0.5, 0.75, 1., 3., 5.])

param_mesh = np.array(np.meshgrid(*(list(range(len(param_dict[v]))) for i, v in enumerate(sorted(param_dict.keys())))), dtype = int).T.reshape(-1,len(param_dict.keys()))

In [7]:
def dict_idx(dict_keys, key):
    key_list = sorted(dict_keys)
    return key_list.index(key)

Finally we run the VAE training procedure and save the resultant network in a separate folder for each distinct set of hyperparameters.

In [9]:
def run_single_net(idx):
    print("Starting run: " + str(idx))
    # create new directory
    net_output_dir = batch_output_dir + "network" + str(idx) + "/"
    Path(net_output_dir).mkdir(parents=True, exist_ok=True)
    
    # construct hidden_dims
    hidden_dims = []
    for i in range(param_dict['layers'][param_mesh[idx, dict_idx(param_dict.keys(), 'layers')]]):
        hidden_dims.append(param_dict['hidden_dim'][param_mesh[idx, dict_idx(param_dict.keys(), 'hidden_dim')]] * (2 ** i))
    hidden_dims.reverse()
        
    # fit network
    losses, vae = fit_VAE(
        epochs=epochs,
        lr=0.001,
        full_data=train_norm.clone().detach(),
        batch_size=param_dict['batch_size'][param_mesh[idx, dict_idx(param_dict.keys(), 'batch_size')]],
        weight_decay=1e-3,
        latent_dim=param_dict['latent_size'][param_mesh[idx, dict_idx(param_dict.keys(), 'latent_size')]],
        beta=param_dict['beta'][param_mesh[idx,dict_idx(param_dict.keys(), 'beta')]],
        hidden_dims=dc(hidden_dims),
    )
    
    # Save each part of the network
    torch.save(vae.encoder.state_dict(), net_output_dir + "encoder")
    torch.save(vae.fc_mu.state_dict(), net_output_dir + "fc_mu")
    torch.save(vae.fc_var.state_dict(), net_output_dir + "fc_var")
    torch.save(vae.decoder_input.state_dict(), net_output_dir + "decoder_input")
    torch.save(vae.decoder.state_dict(), net_output_dir + "decoder")
    torch.save(vae.final_layer.state_dict(), net_output_dir + "final_layer")
    
    # Define parameter dictionary and write to text file using json.
    sum_dict = {
        'batch_size' : param_dict['batch_size'][param_mesh[idx, dict_idx(param_dict.keys(), 'batch_size')]],
        'latent_size' : param_dict['latent_size'][param_mesh[idx, dict_idx(param_dict.keys(), 'latent_size')]],
        'hidden_dim' : hidden_dims,
        'beta':param_dict['beta'][param_mesh[idx, dict_idx(param_dict.keys(), 'beta')]],
    }
    with open(net_output_dir + 'param_sum.txt', 'w') as file:
        file.write(json.dumps(sum_dict, cls=NpEncoder)) 
    
    # Save the normalized training data, normalization mean and variance to reference file inside the network folder
    data_sum = {}
    data_sum['train_norm'] = train_norm
    data_sum['train_std'] = train_std
    data_sum['train_mean'] = train_mean
    with open(net_output_dir + 'data_sum.pickle', 'wb') as handle:
        pickle.dump(data_sum, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [10]:
for i in range(0,16):
    run_single_net(i)

Starting run: 0


  Variable._execution_engine.run_backward(


Starting run: 1
Starting run: 2
Starting run: 3
Starting run: 4
Starting run: 5
Starting run: 6
Starting run: 7
Starting run: 8
Starting run: 9
Starting run: 10
Starting run: 11
Starting run: 12
Starting run: 13
Starting run: 14
Starting run: 15
