In [None]:
import numpy
import numpy as np
from numpy import array
import argparse
from __future__ import print_function
import pandas as pd
from sklearn import preprocessing
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from sklearn.model_selection import KFold
import warnings
import math
import matplotlib.pyplot as plt
from IPython.display import clear_output
from scipy.optimize import direct, Bounds, minimize
from scipy.stats import norm
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from sklearn.model_selection import KFold
from statistics import NormalDist
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

warnings.simplefilter('ignore')
FILENAME = './data/BTC_5min.csv'
data = pd.read_csv(FILENAME, index_col='system_time', parse_dates=True)
for i in range(5):
    for side in ('bids', 'asks'):
        data.loc[:, f'{side}_weighted_distance_{i}'] = (1+100*data.loc[:, f'{side}_distance_{i}'])*data.loc[:, f'{side}_notional_{i}']
data = pd.DataFrame(preprocessing.normalize(data.values, axis=0), columns=data.columns)
data.head()

In [None]:
class TSDataset(Dataset):
    def __init__(self, full_dataframe, target, features, sequence_length=5):
        self.features = features
        self.target = target
        self.sequence_length = sequence_length
        self.y = torch.tensor(full_dataframe[target].values).float()
        self.X = torch.tensor(full_dataframe[features].values).float()

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, i): 
        if i >= self.sequence_length - 1:
            i_start = i - self.sequence_length + 1
            x = self.X[i_start:(i + 1), :]
        else:
            padding = self.X[0].repeat(self.sequence_length - i - 1, 1)
            x = self.X[0:(i + 1), :]
            x = torch.cat((padding, x), 0)

        return x, self.y[i]

In [None]:
class ShallowRegressionLSTM(nn.Module):
    def __init__(self, hidden_units, layers, kernel_size, name='DepthOfBook', in_features=4, out_features=10):
        super().__init__()
        self.name = name
        
        self.hidden_units = hidden_units
        self.num_layers = layers

        self.conv = nn.Conv1d(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size)
        
        self.lstm = nn.LSTM(input_size=in_features,
                            hidden_size=hidden_units,
                            batch_first=True,
                            num_layers=self.num_layers)

        self.output_layer = nn.Linear(in_features=self.hidden_units, out_features=out_features)
        
        total_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
        print(self.name + ': total params:', total_params)

    def forward(self, x):
        batch_size = x.shape[0]
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_()
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_()
        
        x = torch.transpose(F.relu(self.conv(torch.transpose(x, 1, 2))), 1, 2)
        _, (hn, _) = self.lstm(x, (h0, c0))
        out = self.output_layer(hn[0]).flatten()

        return out    
    

def train_the_net(training_data, net):
    criterion = nn.MSELoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    
    for epoch in range(2):
        print(f'Epoch: {epoch}')
        for inputs, true_outputs in training_data:
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, true_outputs)
            loss.backward()
            optimizer.step()
    return net


def test_the_net(test_data, net):
    total_l2 = 0
    n_batches = len(test_data)
    with torch.no_grad():
        for inputs, true_outputs in test_data:
            outputs = net(inputs)
            total_l2 += F.mse_loss(outputs, true_outputs)
    return total_l2/n_batches

    
def compute_error(data, hidden_units, hidden_layers, lookback, kernel_size):
    avg_error = 0
    k_folds = 2
    torch.manual_seed(1729)

    batch_size=1
    target_names = [f'bids_weighted_distance_{i}' for i in range(4, -1, -1)] + [f'asks_weighted_distance_{i}' for i in range(5)]
    feature_names = ['midpoint', 'spread', 'buys', 'sells']
    
    for train, test in KFold(n_splits=k_folds, shuffle=True).split(data):
        training_data = TSDataset(data.loc[train, :], target_names, feature_names, lookback)
        test_data = TSDataset(data.loc[train, :], target_names, feature_names, lookback)
        
        training_dataloader = DataLoader(training_data, batch_size, shuffle=True)
        test_dataloader = DataLoader(test_data, batch_size, shuffle=False)

        net = ShallowRegressionLSTM(hidden_units, hidden_layers, kernel_size)
        net = train_the_net(training_dataloader, net)
        avg_error += test_the_net(test_dataloader, net)
    
    avg_error = (avg_error/k_folds).item()
    print(f'Average loss is: {avg_error}')
    return avg_error

In [None]:
MIN_BOUNDS = [5, 2, 10, 2]
MAX_BOUNDS = [100, 6, 300, 20]

def rbf_kernels(length_scales=[(x+1)/50 for x in range(25)]):
    for rbf_lengthscale in length_scales:
        yield RBF(length_scale=rbf_lengthscale, length_scale_bounds='fixed')


def expected_improvement(xi):
    def _func(model, xs, X_so_far,  y_so_far):
        mu, sigma = model.predict(xs, return_std=True)
        mu_sample = model.predict(X_so_far)

        sigma = sigma.reshape(-1, 1)

        mu_sample_opt = np.max(mu_sample)

        with np.errstate(divide='warn'):
            imp = mu - mu_sample_opt - xi
            Z = imp / sigma
            ei = imp * norm.cdf(Z) + sigma * norm.pdf(Z)
            ei[sigma == 0.0] = 0.0

        return -ei
    
    return _func


def next_guess(X_so_far, y_so_far, acquisition_function):
    model = GaussianProcessRegressor(kernel=RBF(), alpha=1e-12)
    model.fit(X_so_far, -1*y_so_far)
   
    bounds = Bounds(MIN_BOUNDS, MAX_BOUNDS)
    def objective_function(x):
        return acquisition_function(model, x.reshape(1, -1), X_so_far, y_so_far)
    
    
    guess = 1e16
    res = None
    
    for _ in range(25):
        x0 = np.random.randint(MIN_BOUNDS, MAX_BOUNDS)
        result = minimize(objective_function, x0=x0, bounds=bounds, method='Nelder-Mead')
        if result.fun < guess:
            guess = result.fun
            res = result.x
    return res.astype(int)


Xs = np.array([np.array(MIN_BOUNDS) + 0.5*(np.array(MAX_BOUNDS)-np.array(MIN_BOUNDS))]).astype(int)
ys = np.array([compute_error(data, *Xs[-1, :].tolist())])

for i in range(50):
    xi = max(0.01, 1 - (1/(1 + np.exp(-(i-40)))))
    X_n = next_guess(Xs, ys, expected_improvement(xi))
    Xs = np.vstack([Xs, X_n])
    y_n = compute_error(data, *Xs[-1, :].tolist())
    ys = np.vstack([ys, np.array([y_n])])
    print(X_n)

In [None]:
batch_size=1
hidden_units = 65
hidden_layers = 3
lookback = 196
kernel_size = 13

target_names = [f'bids_distance_{x}' for x in range(5)] + [f'asks_distance_{x}' for x in range(5)] + [f'bids_notional_{x}' for x in range(5)] + [f'asks_notional_{x}' for x in range(5)]
feature_names = ['midpoint', 'spread', 'buys', 'sells']

training_data = TSDataset(data, target_names, feature_names, lookback)
training_dataloader = DataLoader(training_data, batch_size, shuffle=True)

net = ShallowRegressionLSTM(hidden_units, hidden_layers)
net = train_the_net(training_dataloader, net)

In [None]:
for inputs, true_outputs in training_dataloader:
    with torch.no_grad():
        outputs = net(inputs)
    

In [None]:
outputs

In [None]:
true_outputs

In [None]:
F.mse_loss(outputs, true_outputs)

In [None]:
100*true_outputs