In [1]:
import ml_collections
import copy
import numpy as np
import matplotlib.pyplot as plt
import yaml
from tqdm import tqdm
import os
import torch
import torch.nn as nn
from os import path as pt
import pickle
from torch.utils.data import DataLoader, TensorDataset
from src.evaluation.summary import full_evaluation
from src.utils import set_seed, save_obj, load_obj

In [2]:
with open("./data/ref_log_return.pkl", "rb") as f:
    loaded_array = pickle.load(f)
train_log_return = torch.tensor(loaded_array)
print(train_log_return.shape)

with open("./data/ref_price.pkl", "rb") as f:
    loaded_array = pickle.load(f)
train_init_price = torch.tensor(loaded_array)
print(train_init_price.shape)

torch.Size([8937, 24, 3])
torch.Size([8937, 1, 3])


### Generative models for time series generation

In [3]:
# Load configuration dict
config_dir = 'configs/config.yaml'
with open(config_dir) as file:
    config = ml_collections.ConfigDict(yaml.safe_load(file))
    
set_seed(config.seed)

if (config.device ==
        "cuda" and torch.cuda.is_available()):
    config.update({"device": "cuda:0"}, allow_val_change=True)
else:
    config.update({"device": "cpu"}, allow_val_change=True)
    
class XYDataset(TensorDataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
        self.shape = X.shape

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        return self.X[index], self.Y[index]

### Data Construction

Since we have limited number of data, we choose to do cross-validation with 5-folds. We illustrate here just 1 division.

In [4]:
perm_idx = torch.randperm(train_log_return.shape[0])
train_size = int(0.8*train_log_return.shape[0])

cv_training_data = train_log_return[perm_idx[:train_size]].to(config.device).to(torch.float)
cv_init_price = train_init_price[perm_idx[:train_size]].to(config.device).to(torch.float)
cv_validation_data = train_log_return[perm_idx[train_size:]].to(config.device).to(torch.float)
cv_val_init_price = train_init_price[perm_idx[train_size:]].to(config.device).to(torch.float)

In [5]:
# Load the dataset
training_set = TensorDataset(cv_init_price, cv_training_data)

train_dl = DataLoader(
    training_set,
    batch_size=config.batch_size,
    shuffle=True
)

config.input_dim = cv_training_data[0][0].shape[-1]

### Generative model

Here we construct a generator and a discriminator for this task. Both the generator and discriminator takes as input the time series. Then we have the training algorithm TailGANTrainer.

In [6]:
from src.baselines.networks.discriminators import Discriminator
from src.baselines.networks.generators import Generator
from src.baselines.TailGAN import *

### Initialize the generator, discriminator and the trainer

In [7]:
# D_out_dim = 1
# return_seq = False

generator = Generator(config)
discriminator = Discriminator(config)
trainer = TailGANTrainer(G=generator, D=discriminator,
                    train_dl=train_dl, batch_size=config.batch_size, n_gradient_steps=config.steps,
                    config=config)


### Model training and saving

In [8]:
# Model training
trainer.fit(config.device)
# save_obj(trainer.G.state_dict(), './sample_submission_bundle/model_dict.pkl')

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:51<00:00, 10.21s/it]


### Synthetic data generation

In [9]:
g_state_dict = load_obj('./sample_submission_bundle/model_dict.pkl')

generator.load_state_dict(g_state_dict)

generator.eval()

eval_size = 1800

with torch.no_grad():
    fake_data = generator(batch_size = eval_size, device=config.device)
    
print(fake_data.shape)
# Save the data
# save_obj(fake_data, './sample_submission_bundle/fake_log_return.pkl')

torch.Size([1800, 24, 3])


### Model evaluation

Generic time series test metrics

In [10]:
from src.evaluation.strategies import log_return_to_price

config_dir = 'src/evaluation/config.yaml'
with open(config_dir) as file:
    eval_config = ml_collections.ConfigDict(yaml.safe_load(file))

fake_prices = log_return_to_price(fake_data, train_init_price[:eval_size, :, :])
cv_val = log_return_to_price(cv_validation_data, cv_val_init_price)

all_positive = (fake_prices > 0).all()
if not all_positive:
    raise ValueError("Sanity Check Failed: Some fake prices are not positive.")

res_dict_regular = full_evaluation(fake_prices, cv_val, eval_config)
for k, v in res_dict_regular.items():
    print(k, v)


200
 No metrics enabled in group = stylized_fact_scores
 No metrics enabled in group = implicit_scores
 No metrics enabled in group = sig_scores
 No metrics enabled in group = permutation_test
 No metrics enabled in group = distance_based_metrics
---- evaluation metric = var in group = tail_scores ----


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00,  5.87it/s]


---- evaluation metric = es in group = tail_scores ----


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00,  5.92it/s]


---- evaluation metric = max_drawback in group = trading_strat_scores ----


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:01<00:00,  4.50it/s]


---- evaluation metric = cumulative_pnl in group = trading_strat_scores ----


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00,  5.76it/s]

var_mean 0.032158513
var_std 0.01205634
es_mean 0.024387587
es_std 0.003264293
max_drawback_mean 0.03167302
max_drawback_std 0.0011285115
cumulative_pnl_mean 0.06434102
cumulative_pnl_std 0.0017977617



