In [1]:
!pip --quiet install pytorch-warmup

[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m[33m
[0m[33m  DEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m[33m
[0m[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m[33m
You should consider upgrading via the '/usr/local/opt/python@3.9/bin/python3.9 -m pip install --upgrade pip' command.[0m[33m
[0m

In [2]:
from IPython.display import clear_output 
!pip --quiet install pytorch_spiking pytorch_lightning 
clear_output()

In [4]:
import os
import time
import sys
from typing import List

import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules import Transformer, TransformerEncoder, TransformerEncoderLayer, LayerNorm
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import pytorch_lightning as pl
import pytorch_spiking
import pytorch_warmup as warmup

import matplotlib.pyplot as plt

In [5]:
config = {
  "plots": {
        "show_plots": False,
        "xticks_interval": 1200,
        "color_actual": "#001f3f",
        "color_train": "#3D9970",
        "color_val": "#0074D9",
        "color_test": "#FF4136",
        "color_pred_train": "#3D9970",
        "color_pred_val": "#0074D9",
        "color_pred_test": "#FF4136",
    },
    "data": {
        "train_split_size": 0.80,
        "input_window": 150,
        "output_window": 50,
        "train_batch_size": 32,
        "eval_batch_size": 1,
        "scaler": "normal"
    }, 
    "model_transformer": {
        "feature_size": 250,
        "nhead": 10,
        "num_layers": 2,
        "dropout": 0.2,
        "out_features": 1,
        "init_range": 2, #0.5
        "lr": 0.0002, #0.0001,
        "loss": "dilate"
    },
    "paths": {
        "drive": {
            "agg_trade": {
                "train": "/content/drive/MyDrive/IP/Repos/HFTransformer/input_data/",
                "test": "/content/drive/MyDrive/IP/Repos/HFTransformer/input_data/", 
            },
            "orderbook": {
                "train": "/content/drive/MyDrive/IP/Repos/HFTransformer/input_data/",
                "test": "/content/drive/MyDrive/IP/Repos/HFTransformer/input_data/",
            },
            "models": "/content/drive/MyDrive/IP/Repos/HFTransformer/models/",
            "figures": "/content/drive/MyDrive/IP/Repos/HFTransformer/figures/",
            "utils": "/content/drive/MyDrive/IP/Repos/HFTransformer/utils/",
        },
        "local": {
            "agg_trade": {
                "train": "./input_data/",
                "test": "./input_data/", 
            },
            "orderbook": {
                "train": "./input_data/",
                "test": "./input_data/",
            },
            "models": "./models/",
            "figures": "./figures/",
        }
    }
}

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
drive = True

In [7]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

## Data preparation: augmenting raw financial data

In [8]:
def augment_trade_data(df, lag, forecast_window=None):
    '''
    Augmenting input data.
    '''
    if forecast_window:
        df['lag_return'] = np.log(df['price'].shift(forecast_window)/df['price'].shift(forecast_window+1))
        return df.iloc[forecast_window+1:,:]
    if lag == 0:
        return df
    else:
        col_name = 'log_lag'+str(lag)+'_price'
        df[col_name] = np.log(df.price) - np.log(df.price).shift(lag)
        return df.iloc[lag:,:]

def select_features(df_train, df_test, features):
    '''
    Selecting relevant features.
    '''
    train_data = df_train[features]
    test_data = df_test[features]
    return train_data, test_data

## Defining Transformer Model

In [9]:
# The following helper functions and models were based on this repo: https://github.com/AIStream-Peelout/flow-forecast

class SimpleTransformer(nn.Module):
    def __init__(self, n_time_series, seq_len=100, output_seq_len=50, d_model=128,
                 n_heads=10, n_encoder_layers=2, n_decoder_layers=2, dropout=0.1, dim_feedforward=2048):
        super(SimpleTransformer, self).__init__()
        self.n_time_series = n_time_series
        self.seq_len = seq_len
        self.output_seq_len = output_seq_len
        self.d_model = d_model
        self.n_heads = n_heads
        self.n_encoder_layers = n_encoder_layers
        self.n_decoder_layers = n_decoder_layers
        self.dropout = dropout
        self.dim_feedforward = dim_feedforward

        self.mask = generate_square_subsequent_mask(self.seq_len).to(device)
        self.dense_shape = nn.Linear(self.n_time_series, self.d_model)
        self.pe = SimplePositionalEncoding(self.d_model)
        self.transformer = Transformer(self.d_model, nhead=self.n_heads, dropout=self.dropout, 
                                       num_encoder_layers=self.n_encoder_layers,
                                       num_decoder_layers=self.n_decoder_layers, activation=pytorch_spiking.SpikingActivation(nn.PReLU()),
                                        dim_feedforward=self.dim_feedforward)
        self.final_layer = nn.Linear(self.d_model, 1)
        self.tgt_mask = generate_square_subsequent_mask(self.output_seq_len).to(device)

    def forward(self, x, t, tgt_mask=None, src_mask=None):
        x = self.encode_sequence(x[:, :-1, :], src_mask)
        return self.decode_seq(x, t, tgt_mask)

    def pre_encode(self, x):
        x = self.dense_shape(x)
        x = self.pe(x)
        x = x.permute(1, 0, 2)
        return x

    def encode_sequence(self, x, src_mask=None):
        x = self.pre_encode(x)
        x = self.transformer.encoder(x, src_mask)
        return x

    def decode_seq(self, mem, t, tgt_mask=None, view_number=None):
        if view_number is None:
            view_number = self.output_seq_len
        if tgt_mask is None:
            tgt_mask = self.tgt_mask
        t = self.pre_encode(t)
        x = torch.Tensor(0)
        x = self.transformer.decoder(t, mem, tgt_mask=tgt_mask)
        x = self.final_layer(x)
        x = x.view(-1, view_number)
        return x
    
    
class SimplePositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=10000):
        super(SimplePositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return x
        #return self.dropout(x)

def generate_square_subsequent_mask(sz):
    """ Generates a square mask for the sequence. The masked positions are filled with float('-inf').
        Unmasked positions are filled with float(0.0).
    """
    mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
    mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
    return mask

def greedy_decode(model, src, max_len, real_target, device, unsqueeze_dim=1):
    """
    Mechanism to sequentially decode the model
    """
    src = src.float()
    real_target = real_target.float()
    if hasattr(model, "mask"):
        src_mask = model.mask
    memory = model.encode_sequence(src, src_mask)
    # Get last element of src array to forecast from
    ys = src[:, -1, :].unsqueeze(unsqueeze_dim)
    for i in range(max_len):
        mask = generate_square_subsequent_mask(i + 1).to(device)
        with torch.no_grad():
            out = model.decode_seq(memory, Variable(ys), Variable(mask), i + 1)
            real_target[:, i, 0] = out[:, i]
            src = torch.cat((src, real_target[:, i, :].unsqueeze(1)), 1)
            ys = torch.cat((ys, real_target[:, i, :].unsqueeze(1)), 1)
        memory = model.encode_sequence(src[:, i + 1:, :], src_mask)
    return ys[:, 1:, :]

## Defining CSV Loader

In [10]:
class CSVDataLoader(Dataset):
    def __init__(self, df, forecast_history, forecast_length, target_col, start_stamp=0, end_stamp=None, LAG=0):
        """
        A data loader that takes a CSV file and properly batches for use in training/eval a PyTorch model
        """
        super().__init__()
        self.forecast_history = forecast_history
        self.forecast_length = forecast_length
        self.LAG = LAG
        self.df = df.copy()
        if start_stamp != 0 and end_stamp is not None:
            if self.LAG == 0:
                self.df = self.df[start_stamp:end_stamp]
            else:
                self.df = self.df[start_stamp:end_stamp+self.LAG]
        elif start_stamp != 0:
            self.df = self.df[start_stamp:]
        elif end_stamp is not None:
            if self.LAG == 0:
                self.df = self.df[:end_stamp]
            else:
                self.df = self.df[:end_stamp+self.LAG]
        if (len(self.df) - self.df.count()).max() != 0:
            print('Missing values in data.')
            print(len(self.df) - self.df.count())
        self.counter = 0
        self.targ_col = target_col

    def __getitem__(self, idx):
        rows = self.df.iloc[idx: self.forecast_history + idx].copy().to_numpy()
        epsilon = 0.0001
        targs_idx_start = self.forecast_history + idx
        if self.LAG == 0:
            targ_rows = self.df.iloc[targs_idx_start: self.forecast_length + targs_idx_start].copy().to_numpy()
        else:
            future_prices = self.df.iloc[targs_idx_start + self.LAG - 1: targs_idx_start + self.LAG].copy().to_numpy()
            targ_rows = future_prices.copy()
            future_return = np.log(future_prices[0,0]/rows[-1:,0])*10_000
            targ_rows[0,0] = future_return
        src_data = rows
        src_std = np.std(src_data, axis = 0)+epsilon
        src_median = np.mean(src_data, axis = 0)
        src_std, src_median = src_std.flatten(), src_median.flatten()
        trg_dat = targ_rows
        src_data_medianized = torch.from_numpy((src_data-src_median)/src_std).float()
        src_median[0], src_std[0] = 0, 1.0
        trg_data_medianized = torch.from_numpy((trg_dat-src_median)/src_std).float()
        return src_data_medianized, trg_data_medianized, src_data


    def __len__(self):
        if self.LAG == 0:
            return (len(self.df) - self.forecast_history - self.forecast_length - 1)
        else:
            return (len(self.df) - self.forecast_history - self.LAG - 1)

## Defining Custom Losses

In [11]:
class MASELoss(nn.Module):
    def __init__(self, baseline_method):
        """
        This implements the MASE loss function (e.g. MAE_MODEL/MAE_NAIEVE)
        """
        super(MASELoss, self).__init__()
        self.method_dict = {"mean": lambda x, y: torch.mean(x, 1).unsqueeze(1).repeat(1, y[1], 1)}
        self.baseline_method = self.method_dict[baseline_method]

    def forward(self, target, output, train_data, m=1):
        if len(train_data.shape) < 3:
            train_data = train_data.unsqueeze(0)
        if m == 1 and len(target.shape) == 1:
            output = output.unsqueeze(0)
            output = output.unsqueeze(2)
            target = target.unsqueeze(0)
            target = target.unsqueeze(2)
        if len(target.shape) == 2:
            output = output.unsqueeze(0)
            target = target.unsqueeze(0)
        result_baseline = self.baseline_method(train_data, output.shape)
        MAE = torch.nn.L1Loss()
        mae2 = MAE(output, target)
        mase4 = MAE(result_baseline, target)
        if mase4 < 0.001:
            mase4 = 0.001
        return mae2 / mase4

## Defining Helper Functions

In [12]:
pytorch_criterion_dict = {"MAE":nn.L1Loss, "MSE":nn.MSELoss, "MASE":MASELoss}

def make_crit(model_params):
    """
    Creates the criterion for training from the parameters
    """
    training_params = model_params
    criterion_init_params = {}
    if "criterion_params" in training_params:
        criterion_init_params = training_params["criterion_params"]
    if type(training_params["criterion"]) == list:
        criterion = []
        i = 0
        for crit, param in zip(training_params["criterion"], criterion_init_params):
            res = pytorch_criterion_dict[crit](**param)
            i += 1
            criterion.append(res)
    else:
        criterion = pytorch_criterion_dict[training_params["criterion"]](**criterion_init_params)
    return criterion


def get_output_std(src, output, labels, probabilistic, output_std):
    """
    Handles un-scaling the model output.
    """
    output_dist = None
    if probabilistic:
        try:
            output_std = Variable(torch.from_numpy(output_std).type(torch.FloatTensor).to(device))
        except Exception:
            pass
        output_dist = torch.distributions.Normal(output, output_std)
    return src, output, labels, output_dist


def compute_loss(labels, output, src, criterion, validation_dataset, probabilistic=None, output_std=None, m=1):
    """
    Computes loss.
    """
    if not probabilistic and isinstance(output, torch.Tensor):
        if len(labels.shape) != len(output.shape):
            if len(labels.shape) > 1:
                if labels.shape[1] == output.shape[1]:
                    labels = labels.unsqueeze(2)
                else:
                    labels = labels.unsqueeze(0)
    if probabilistic:
        if type(output_std) != torch.Tensor:
            output_std = torch.from_numpy(output_std)
        if type(output) != torch.Tensor:
            output = torch.from_numpy(output)
        output_dist = torch.distributions.Normal(output, output_std)
    if validation_dataset:
        src, output, labels, output_dist = get_output_std(src, output, labels, probabilistic,output_std)
    if probabilistic:
        loss = -output_dist.log_prob(labels.float()).sum() 
    elif isinstance(criterion, MASELoss):
        loss = criterion(labels.float(), output, src, m)
    else:
        loss = criterion(output, labels.float())
    return loss


def multi_crit(crit_multi: List, output, labels, valid=None):
    i = 0
    loss = 0.0
    for crit in crit_multi:
        if len(output.shape) == 3:
            loss += compute_loss(labels[:, :, i], output[:, :, i], torch.rand(1, 2), crit, valid)
        else:
            loss += compute_loss(labels[:, i], output[:, i], torch.rand(1, 2), crit, valid)
    summed_loss = loss
    return summed_loss


def train_step(model, opt, criterion, data_loader, takes_target, device,
                       num_targets=1, probablistic=False, forward_params={}):
    """
    Performs training of a single model. Runs through one epoch of the data.
    """
    i = 0
    output_std = None
    running_loss = 0.0
    for src, trg, src_orig in data_loader:
        opt.zero_grad()
        if takes_target:
            forward_params["t"] = trg.to(device)
        src = src.to(device)
        trg = trg.to(device)
        output = model(src, **forward_params)
        if num_targets == 1:
            labels = trg[:, :, 0]
        elif num_targets > 1:
            labels = trg[:, :, 0:num_targets]
        if probablistic:
            output1 = output
            output = output.mean
            output_std = output1.stddev
        if type(criterion) == list:
            loss = multi_crit(criterion, output, labels, None)
        else:
            loss = compute_loss(labels, output, src, criterion, None, probablistic, output_std, m=num_targets)
        loss.backward()
        opt.step()
        running_loss += loss.item()
        i += 1
    total_loss = running_loss
    return total_loss


def validation(val_loader, model, criterion, device, decoder_structure=False,
                       num_targets=1, probabilistic=False):
    """
    Computes the validation loss metrics
    """
    crit_losses = dict.fromkeys(criterion, 0)
    model.eval()
    output_std = None
    labels = torch.Tensor(0).to(device)
    labels_all = torch.Tensor(0).to(device)
    output_all = torch.Tensor(0).to(device)
    src_all = torch.Tensor(0)
    with torch.no_grad():
        for src, targ, src_orig in val_loader:
            output = torch.Tensor(0).to(device)
            src_all = torch.cat((src_all, src_orig[:,:,0]))
            src = src if isinstance(src, list) else src.to(device)
            targ = targ if isinstance(targ, list) else targ.to(device)
            if decoder_structure:
                if type(model).__name__ == "SimpleTransformer":
                    targ_clone = targ.detach().clone()
                    output = greedy_decode(model, src, targ.shape[1], targ_clone, device=device)[:,:,0]
                    output_all = torch.cat((output_all, output))                                                                                 
            else:
                if probabilistic:
                    output_dist = model(src.float())
                    output = output_dist.mean.detach().numpy()
                    output_std = output_dist.stddev.detach().numpy()
                else:
                    output = model(src.float())
                    output_all = torch.cat((output_all, output))
            if num_targets == 1:
                labels = targ[:, :, 0]
            elif num_targets > 1:
                labels = targ[:, :, 0:num_targets]
            for crit in criterion:
                loss = compute_loss(labels, output, src, crit, False, probabilistic, output_std, m=num_targets)
                crit_losses[crit] += loss.item()
            labels_all = torch.cat((labels_all, labels))
    return list(crit_losses.values())[0], output_all, labels_all, src_all

def forecast(data_loader, model, criterion, forecast_horizon, device, decoder_structure=False,
                       num_targets=1, probabilistic=False):
    crit_losses = dict.fromkeys(criterion, 0)
    model.eval()
    output_decoder = torch.Tensor(0).to(device)
    labels = torch.Tensor(0).to(device)
    labels_all = torch.Tensor(0).to(device)
    src_all = torch.Tensor(0)
    output_mean, output_std = [], []
    counter = 0
    with torch.no_grad():
        for src, targ, src_orig in data_loader:
            if (counter % forecast_horizon) == 0:
                src_all = torch.cat((src_all, src_orig[:,:,0]))
                src = src if isinstance(src, list) else src.to(device)
                targ = targ if isinstance(targ, list) else targ.to(device)
                if decoder_structure:
                    if type(model).__name__ == "SimpleTransformer":
                        targ_clone = targ.detach().clone()
                        output = greedy_decode(model, src, targ.shape[1], targ_clone, device=device)[:,:,0]
                        output_decoder = torch.cat((output_decoder, output))                                                                                 
                else:
                    if probabilistic:
                        output_dist = model(src.float())
                        output_mean.append(output_dist.mean.detach().numpy())
                        output_std.append(output_dist.stddev.detach().numpy())
                    else:
                        output = model(src.float())
                        output_decoder = torch.cat((output_decoder, output))
                if num_targets == 1:
                    labels = targ[:, :, 0]
                elif num_targets > 1:
                    labels = targ[:, :, 0:num_targets]
                for crit in criterion:
                    loss = compute_loss(labels, output, src, crit, False, probabilistic, output_std, m=num_targets)
                    crit_losses[crit] += loss.item()
                labels_all = torch.cat((labels_all, labels))
            counter += 1
    if decoder_structure:
        return list(crit_losses.values())[0], output_decoder, labels_all, src_all
    elif probabilistic:
        return list(crit_losses.values())[0], (output_mean, output_std), labels_all, src_all
    else:
        return list(crit_losses.values())[0], output_decoder, labels_all, src_all

## Defining Trainer

In [13]:
def strategy_evaluator(true, pred):
    '''
    Evaluates trading strategy based on correct buys and sells
    '''
    total_buys, total_sells, total_holds = np.sum(true>0), np.sum(true<0), np.sum(true==0)
    total_correct_buys, total_correct_sells, total_correct_holds = 0, 0, 0
    for idx in range(len(true)):
        for jdx in range(len(true[0])):
            if true[idx,jdx] > 0 and pred[idx,jdx] > 0:
                total_correct_buys += 1
            elif true[idx,jdx] < 0 and pred[idx,jdx] < 0:
                total_correct_sells += 1
            elif true[idx,jdx] == 0 and pred[idx,jdx] == 0:
                total_correct_holds += 1
    total_correct_buys_r, total_correct_sells_r, total_correct_holds_r = (total_correct_buys/total_buys),(total_correct_sells/total_sells),(total_correct_holds/total_holds)
    return total_correct_buys_r.round(3), total_correct_sells_r.round(3), total_correct_holds_r.round(3)

In [14]:
def trainer(model, train_loader, validation_loader, test_loader, criterion, opt, scheduler,
            warmup_scheduler, max_epochs, batch_size, forecast_horizon, takes_target, shuffle=False,
            decoder_structure=True, probabilistic=False, num_targets=1, plot_prediction=True, save_path=None, LAG=0):
    start_time = time.time()
    
    data_loader = DataLoader(train_loader, batch_size=batch_size, shuffle=shuffle, sampler=None, batch_sampler=None, num_workers=0)
    validation_data_loader = DataLoader(validation_loader, batch_size=batch_size, shuffle=False, sampler=None, batch_sampler=None, num_workers=0)
    test_data_loader = DataLoader(test_loader, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, num_workers=0)
    forecast_data_loader = DataLoader(validation_loader, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, num_workers=0)
    
    for epoch in range(1, max_epochs+1):
        total_loss = train_step(model, opt, criterion, data_loader, takes_target, device, num_targets=num_targets)
        val_loss = 0
        if plot_prediction:
            val_loss, val_values, true_values, src_all = forecast(forecast_data_loader, model, criterion, forecast_horizon=forecast_horizon,
                                                                   device=device, decoder_structure=decoder_structure,
                                                                   num_targets=num_targets, probabilistic=probabilistic)
            fig, ax = plt.subplots(1, 1, figsize = (18, 8))
            ax.plot(true_values.cpu().view(-1), label='truth', alpha=0.3)
            ax.plot(val_values.cpu().view(-1), label='forecast', alpha=0.8)
            ax.set_xlim(left=0, right=len(true_values.cpu().view(-1)))
            plt.show()
        else:
            val_loss, val_values, true_values, src_all = validation(validation_data_loader, model, criterion, device,
                                                             decoder_structure=decoder_structure, num_targets=num_targets,
                                                             probabilistic=probabilistic)
        
        preds, trues, src = val_values.cpu().numpy(), true_values.cpu().numpy(), src_all.cpu().numpy()

        print(f'preds {preds.shape}')
        print(f'trues {trues.shape}')
        print(f'src {src.shape}')

        results = strategy_evaluator(trues, preds)
        r2_sklearn = r2_score(trues, preds)

        elapsed = time.time() - start_time
        print('-' * 88)
        print('| epoch {:3d} | {:5.2f} s | train loss {:5.5f} | val loss {:5.5f} | lr {:1.8f} | r2 sklearn: {:1.5f} | b, s, h: {:}|'.format(
                        epoch, elapsed, total_loss, val_loss, scheduler.get_last_lr()[0], r2_sklearn, results))
        print('-' * 88)
        start_time = time.time()

        if save_path:
            results = {
                    'model': 'Transformer_encdec',
                    'pred_len': forecast_horizon,
                    'epoch': epoch,
                    'train_loss': total_loss,
                    'val_loss': val_loss,
                    # 'r2_val': r2,
                    'r2_val_sklearn': r2_sklearn            
            }

            df = pd.DataFrame([results])
            df.to_csv(save_path, mode='a', header=not os.path.exists(save_path), index=False)

        with warmup_scheduler.dampening():
            scheduler.step()

    decoder_structure = True
    # test = validation(test_data_loader, model, [criterion], device, decoder_structure=decoder_structure, num_targets=num_targets,
    #                           probabilistic=probabilistic)
    # print('| test loss {:5.5f} |'.format(test))
    # model.params["run"] = session_params


## Model and Training

In [None]:
date_train = 'all' 
date_test = 'all'

if drive:
    agg_trade = pd.read_csv(config["paths"]["drive"]["agg_trade"]["train"]+date_train+'/orderbook.csv')
    agg_trade = agg_trade[1_000_000:]
    
    sys.path.append(config["paths"]["drive"]["utils"])
else:
    agg_trade = pd.read_csv(config["paths"]["local"]["agg_trade"]["train"]+date_train+'/orderbook_agg_trade_dollarvol.csv')
    agg_trade_test = pd.read_csv(config["paths"]["local"]["agg_trade"]["test"]+date_test+'/orderbook_agg_trade_dollarvol.csv')

In [None]:
agg_trade['price'] = agg_trade['w_midprice']
agg_trade = agg_trade[agg_trade['update_diff']>0]

agg_trade_test = agg_trade[2_000_000:]

trade = augment_trade_data(agg_trade, lag=30)
trade_test = augment_trade_data(agg_trade_test, lag=30)

target = 'price'

features = [target, 'lag_return',
            'bid1', 'bidqty1', 'bid2', 'bidqty2', 'bid3', 'bidqty3', 'bid4', 'bidqty4', 'bid5', 'bidqty5',
            'bid6', 'bidqty6', 'bid7', 'bidqty7', 'bid8', 'bidqty8', 'bid9', 'bidqty9',
            'ask1', 'askqty1', 'ask2', 'askqty2', 'ask3', 'askqty3', 'ask4', 'askqty4', 'ask5', 'askqty5',
            'ask6', 'askqty6', 'ask7', 'askqty7', 'ask8', 'askqty8', 'ask9', 'askqty9']

train_data, test_data = select_features(trade, trade_test, features)

In [None]:
forecast_history = 100 #100
forecast_len = 30
epochs = 400
batch_size = 128 #64

In [None]:
train_loader = CSVDataLoader(df=train_data[features], forecast_history=forecast_history, forecast_length=forecast_len, 
                             start_stamp=1_000_000, end_stamp=1_090_000, target_col=['log_returns'])
val_loader = CSVDataLoader(df=train_data[features], forecast_history=forecast_history, forecast_length=forecast_len,
                            start_stamp=1_091_000, end_stamp=1_100_000, target_col=['log_returns'])
test_loader = CSVDataLoader(df=test_data[features], forecast_history=forecast_history, forecast_length=forecast_len, start_stamp=2_035_000,
                            end_stamp=2_038_000, target_col=['log_returns'])

In [None]:
model_simple = SimpleTransformer(n_time_series=len(features), seq_len=forecast_history, output_seq_len=forecast_len, d_model=64, n_heads=4,
                          dropout=0.25, n_encoder_layers=2, n_decoder_layers=1).to(device)

criterion = nn.L1Loss(reduction='sum')
optimizer = optim.AdamW(model_simple.parameters(), lr=0.01, amsgrad=True) #0.00005
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.995)
warmup_scheduler = warmup.LinearWarmup(optimizer, warmup_period=1000)

# scheduler = ignite.handlers.param_scheduler.create_lr_scheduler_with_warmup(scheduler,
#                                                                     warmup_start_value=0.0,
#                                                                     warmup_end_value=0.01,
#                                                                     warmup_duration=10)

trainer(model_simple, train_loader, val_loader, test_loader, [criterion], optimizer, scheduler, warmup_scheduler, epochs, batch_size, forecast_len,
        takes_target=True, plot_prediction=True, shuffle=False)


## Optimal paramater search

In [15]:
date_train = 'all' 
date_test = 'all'

if drive:
    agg_trade = pd.read_csv(config["paths"]["drive"]["agg_trade"]["train"]+date_train+'/orderbook.csv')    
    sys.path.append(config["paths"]["drive"]["utils"])
else:
    agg_trade = pd.read_csv(config["paths"]["local"]["agg_trade"]["train"]+date_train+'/orderbook_agg_trade_dollarvol.csv')
    agg_trade_test = pd.read_csv(config["paths"]["local"]["agg_trade"]["test"]+date_test+'/orderbook_agg_trade_dollarvol.csv')

agg_trade['price'] = agg_trade['w_midprice']
agg_trade_test = agg_trade[4_500_000:]

In [17]:
save_path = os.path.join('/content/drive/MyDrive/IP/Repos/HFTransformer/results_normal',
                            str(int(time.time()))+'ablation_encoder_results.csv')
# save_path=None

forecast_history = 100 
epochs = 30
batch_size = 64 #64 for linear decoder

forecast_windows = [i for i in range(2,31)]

for forecast_window in forecast_windows:

    trade = augment_trade_data(agg_trade, lag=0, forecast_window=forecast_window)
    trade_test = augment_trade_data(agg_trade_test, lag=0, forecast_window=forecast_window)
    target = 'price' #log_lag'+str(forecast_window)+'_price'

    features = [target, 'lag_return',
            'bid1', 'bidqty1', 'bid2', 'bidqty2', 'bid3', 'bidqty3', 'bid4', 'bidqty4', 'bid5', 'bidqty5',
            'bid6', 'bidqty6', 'bid7', 'bidqty7', 'bid8', 'bidqty8', 'bid9', 'bidqty9',
            'ask1', 'askqty1', 'ask2', 'askqty2', 'ask3', 'askqty3', 'ask4', 'askqty4', 'ask5', 'askqty5',
            'ask6', 'askqty6', 'ask7', 'askqty7', 'ask8', 'askqty8', 'ask9', 'askqty9']

    trade, trade_test = select_features(trade, trade_test, features)

    train_loader = CSVDataLoader(df=trade[features], forecast_history=forecast_history, forecast_length=1, 
                             start_stamp=1_000_000, end_stamp=1_080_000, target_col=['log_returns'], LAG=forecast_window)
    val_loader = CSVDataLoader(df=trade[features], forecast_history=forecast_history, forecast_length=1,
                                start_stamp=1_080_001, end_stamp=1_100_000, target_col=['log_returns'], LAG=forecast_window)
    test_loader = CSVDataLoader(df=trade_test[features], forecast_history=forecast_history, forecast_length=1,
                                start_stamp=2_035_000, end_stamp=2_031_000, target_col=['log_returns'], LAG=forecast_window)

    model_simple = SimpleTransformer(n_time_series=len(features), seq_len=forecast_history, output_seq_len=1, d_model=80, n_heads=8,
                            dropout=0.3, n_encoder_layers=1, n_decoder_layers=1, dim_feedforward=128).to(device)

    criterion = nn.MSELoss(reduction='sum')
    optimizer = optim.AdamW(model_simple.parameters(), lr=0.0001, amsgrad=True)
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)
    warmup_scheduler = warmup.LinearWarmup(optimizer, warmup_period=1000)

    trainer(model_simple, train_loader, val_loader, test_loader, [criterion], optimizer, scheduler, warmup_scheduler, epochs, batch_size, forecast_horizon=1,
        takes_target=True, plot_prediction=False, shuffle=False, save_path=save_path, LAG=forecast_window)

    print(f'Done with prediction len {forecast_window}.')

preds (19898, 1)
trues (19898, 1)
src (19898, 100)
----------------------------------------------------------------------------------------
| epoch   1 | 54.52 s | train loss 33746.22851 | val loss 13625.75426 | lr 0.01000000 | r2 sklearn: -0.23243 | b, s, h: (0.497, 0.626, 0.0)|
----------------------------------------------------------------------------------------
preds (19898, 1)
trues (19898, 1)
src (19898, 100)
----------------------------------------------------------------------------------------
| epoch   2 | 54.21 s | train loss 12644.75581 | val loss 37027.56632 | lr 0.00980000 | r2 sklearn: -2.34909 | b, s, h: (0.48, 0.593, 0.0)|
----------------------------------------------------------------------------------------
preds (19898, 1)
trues (19898, 1)
src (19898, 100)
----------------------------------------------------------------------------------------
| epoch   3 | 53.52 s | train loss 6047.16074 | val loss 50438.93382 | lr 0.00960400 | r2 sklearn: -3.56213 | b, s, h: (0

KeyboardInterrupt: ignored