In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch.optim.lr_scheduler import CosineAnnealingLR
import torch.nn.utils as param
import os
import glob
from pathlib import Path
from sklearn.metrics import mean_squared_error, r2_score

import warnings
warnings.filterwarnings("ignore")

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else "mps"
print(f"Using {device} device")

Using mps device


In [None]:
train_data = np.load(Path('tick_data/train.npy'))
test_data = np.load(Path('tick_data/test.npy'))
val_data = np.load(Path('tick_data/val.npy'))

X_train = train_data[:,:,:-1].reshape(333,730,6,16).reshape(333*730,6,16)
X_val = val_data[:,:,:-1].reshape(40,730,6,16).reshape(40*730,6,16)
Y_train = train_data[:,:,-1].reshape(-1,1)
Y_val = val_data[:,:,-1].reshape(-1,1)

X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
Y_train = torch.tensor(Y_train.reshape(-1), dtype=torch.float32).to(device)
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
Y_val = torch.tensor(Y_val.reshape(-1), dtype=torch.float32).to(device)

X_train.shape

In [None]:
Y_train.shape

In [3]:
X_train = torch.tensor(np.load(Path("model_data/x_train.npy")), dtype=torch.float32).to(device)
X_val = torch.tensor(np.load(Path("model_data/x_val.npy")), dtype=torch.float32).to(device)
X_test = torch.tensor(np.load(Path("model_data/x_test.npy")), dtype=torch.float32).to(device)
Y_train = torch.tensor(np.load(Path("model_data/y_train.npy")), dtype=torch.float32)[:, -1].to(device)
Y_val = torch.tensor(np.load(Path("model_data/y_val.npy")), dtype=torch.float32)[:, -1].to(device)
Y_test = torch.tensor(np.load(Path("model_data/y_test.npy")), dtype=torch.float32)[:, -1].to(device)
X_train.shape

torch.Size([482, 20, 20])

In [4]:
Y_train.shape

torch.Size([482])

In [33]:
dilation_rates=[1,2] #,4,8]
# sequence_length = 6
sequence_length = 20
# num_features = 16
num_features = 20
num_epochs = 1_000
latent_dim = 70
hidden_dim = 400
t_max = num_epochs
batch_size = 2048
lr = 0.001

In [6]:
# Define VAE model
class VAE(nn.Module):
    def __init__(self, input_dim=num_features, hidden_dim=hidden_dim, latent_dim=latent_dim ):
        super(VAE, self).__init__()
        # Encoder
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.act1 = nn.PReLU()
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.act2 = nn.PReLU()
        self.fc3 = nn.Linear(hidden_dim, hidden_dim)
        self.act3 = nn.Tanh()
        self.fc_mu = nn.Linear(hidden_dim, latent_dim)
        self.fc_logvar = nn.Linear(hidden_dim, latent_dim)
        
        
        # Decoder
        self.fc4 = nn.Linear(latent_dim, hidden_dim)
        self.fc5 = nn.Linear(hidden_dim, input_dim)

    def encode(self, x):
        h = self.act1(self.fc1(x))
        h = self.act2(self.fc2(h))
        h = self.act3(self.fc3(h))
        return self.fc_mu(h), self.fc_logvar(h)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z):
        h = torch.relu(self.fc2(z))
        return torch.sigmoid(self.fc3(h))

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        return z, mu, logvar,


In [7]:
# Define a single block of dilated convolution
class TCNBlock(nn.Module):
    def __init__(self, input_channels, num_filters, kernel_size, dilation_rate):
        super(TCNBlock, self).__init__()
        self.conv1d = nn.Conv1d(input_channels, num_filters, kernel_size,
                                dilation=dilation_rate, padding='same',
                                bias=False)
        self.conv1d = param.weight_norm(self.conv1d)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv1d(x)
        return self.relu(x)

# Define the Temporal Convolutional Network
class TCN(nn.Module):
    def __init__(self, input_channels=latent_dim, num_filters=64, kernel_size=3, num_blocks=4, dilation_rates=[1,2,]):
        super(TCN, self).__init__()

        self.blocks = nn.ModuleList()
        self.blocks.append(VAE())
        current_input_channels = input_channels

        for dilation_rate in dilation_rates:
            self.blocks.append(TCNBlock(current_input_channels, num_filters, kernel_size, dilation_rate))
            current_input_channels = num_filters

        self.batch_norm = nn.BatchNorm1d(latent_dim)
        self.ff1 = nn.Linear(num_filters*sequence_length, 1)
        self.tcnact = nn.Tanhshrink()
        


    def forward(self, x):
        b = 0
        for block in self.blocks:
            if b == 0:
                x, x_mu, x_logvar = block(x)
                N = x.shape[0]
                x = x.view(N * sequence_length, latent_dim)
                x = self.batch_norm(x)
                x = x.view(N, sequence_length, latent_dim)
                x = torch.transpose(x,1,2)
            else:
                x = block(x)
            b+=1
        
        x = x.view(x.size(0), -1) 
        x = self.ff1(x)
        x1 = self.tcnact(x)
        return x1, x_mu, x_logvar



In [34]:
tcn = TCN().to(device)
criterion = nn.MSELoss(reduction='mean')
optimizer = optim.Adam(tcn.parameters(), lr=lr)
scheduler = CosineAnnealingLR(optimizer, T_max=t_max, eta_min=0.0001)


train_dataset = TensorDataset(X_train, Y_train.reshape(-1))
train_dataset = train_dataset
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataset = TensorDataset(X_val, Y_val.reshape(-1))
val_dataset = val_dataset
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

train_hist = []
val_hist = []

for epoch in range(num_epochs):
    total_loss = 0.0
    tcn.train()
    for batch_X, batch_y in train_loader:
        lower_val = torch.quantile(batch_X, 0.01, dim=0, keepdim=True)
        upper_val = torch.quantile(batch_X, 0.99, dim=0, keepdim=True)
        batch_X = torch.clamp(batch_X, min=lower_val, max=upper_val)
        batch_X = batch_X.float()
        predictions, p_mu, p_logvar = tcn(batch_X.float())

        predictions = predictions.float()  
        batch_y = batch_y.float()  
        p_logvar = p_logvar.float()  
        p_mu = p_mu.float() 

        lossmse = criterion(predictions, batch_y)
        losskl = -0.5 * torch.sum(1 + p_logvar - p_mu.pow(2) - p_logvar.exp())

        beta = (epoch + 1) / num_epochs
        #beta = 0.1
        loss = lossmse + beta * losskl

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
        total_loss += loss.item()

    scheduler.step()
    average_loss = total_loss/len(train_loader)
    train_hist.append(average_loss)

    tcn.eval()
    with torch.no_grad():
        total_val_loss = 0.0

        for batch_X_val, batch_y_val in val_loader:
            lower_valv = torch.quantile(batch_X_val, 0.01, dim=0, keepdim=True)
            upper_valv = torch.quantile(batch_X_val, 0.99, dim=0, keepdim=True)
            batch_X_val = torch.clamp(batch_X, min=lower_valv, max=upper_valv)
            predictions_val, _, _ = tcn(batch_X_val.float())

        
            lossmsev = criterion(predictions_val,batch_y_val.float())
            val_loss = lossmsev
            total_val_loss += val_loss.item()

        average_val_loss = total_val_loss / len(val_loader)
        val_hist.append(average_val_loss)

    if (epoch + 1) % 1 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}] - Training Loss: {average_loss:.3f}, MSE Loss: {lossmse:.6f}, , KL Loss: {losskl:.6f}, Test Loss: {average_val_loss:.6f}, Learning Rate: {optimizer.param_groups[0]["lr"]}')
        #print(predictions.max().item(), p_logvar.max().item(), p_mu.max().item(), predictions.min().item(), p_logvar.min().item(),p_mu.min().item())

Epoch [1/1000] - Training Loss: 1.192, MSE Loss: 0.324099, , KL Loss: 867.838806, Test Loss: 0.267465, Learning Rate: 0.0009999977793408363
Epoch [2/1000] - Training Loss: 0.986, MSE Loss: 0.320355, , KL Loss: 332.916901, Test Loss: 0.255497, Learning Rate: 0.0009999911173852617
Epoch [3/1000] - Training Loss: 0.790, MSE Loss: 0.306265, , KL Loss: 161.122238, Test Loss: 0.225266, Learning Rate: 0.0009999800141990275
Epoch [4/1000] - Training Loss: 0.734, MSE Loss: 0.271019, , KL Loss: 115.697540, Test Loss: 0.170967, Learning Rate: 0.0009999644698917174
Epoch [5/1000] - Training Loss: 1.197, MSE Loss: 0.209810, , KL Loss: 197.382706, Test Loss: 0.103436, Learning Rate: 0.0009999444846167473
Epoch [6/1000] - Training Loss: 0.685, MSE Loss: 0.130737, , KL Loss: 92.418350, Test Loss: 0.070350, Learning Rate: 0.0009999200585713642
Epoch [7/1000] - Training Loss: 1.052, MSE Loss: 0.069865, , KL Loss: 140.275330, Test Loss: 0.163303, Learning Rate: 0.000999891191996643
Epoch [8/1000] - Train

In [35]:
def results(model, X, Y):
    """"""
    model.eval()
    pred, _, _ = model(X)
    pred = pred.cpu().detach().numpy()
    Y = Y.cpu().detach().numpy()
    print("MSE:", mean_squared_error(Y, pred))
    print("r2:", r2_score(Y, pred))
    print("R1:", np.corrcoef(pred.T, Y.T))

results(tcn, X_train, Y_train)
results(tcn, X_val, Y_val)
results(tcn, X_test, Y_test)

MSE: 0.062857226
r2: -0.0191123082248692
R1: [[ 1.         -0.00735367]
 [-0.00735367  1.        ]]
MSE: 0.066880226
r2: -0.04961712606950752
R1: [[1.         0.04475253]
 [0.04475253 1.        ]]
MSE: 0.11064657
r2: -0.06554955981932209
R1: [[ 1.         -0.05079634]
 [-0.05079634  1.        ]]
