In [1]:
import pandas as pd
import numpy as np
import torch
import torchvision

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

torch.set_default_device(device)

In [3]:
df = pd.read_csv("data/2008_Global_Markets_Data.csv")
for x in (2009, 2023):
    df = pd.concat([df, pd.read_csv(f"data/{x}_Global_Markets_Data.csv")], axis = 0)

In [4]:
df["Date"] = pd.to_datetime(df["Date"])
df.set_index("Date", inplace = True)

In [5]:
train_size = int(0.8*len(df))

x_train = df.select_dtypes(include = np.number)[:train_size].astype(np.float32)
x_valid = df.select_dtypes(include = np.number)[train_size:int(0.9*len(df))].astype(np.float32)
x_test = df.select_dtypes(include = np.number)[int(0.9*len(df)):].astype(np.float32)

x_train["Ticker"] = df["Ticker"][:train_size]
x_valid["Ticker"] = df["Ticker"][train_size:int(0.9*len(df))]
x_test["Ticker"] = df["Ticker"][int(0.9*len(df)):]

In [6]:
n100_train = x_train[x_train["Ticker"] == "^N100"].drop("Ticker", axis = 1)
n100_valid = x_valid[x_valid["Ticker"] == "^N100"].drop("Ticker", axis = 1)
n100_test = x_test[x_test["Ticker"] == "^N100"].drop("Ticker", axis = 1)

In [7]:
mean, std = n100_train.values.mean(0), n100_train.values.std(0)

In [8]:
from torch.utils.data import DataLoader, Dataset

class TimeSeriesDataset(Dataset):
    def __init__(self, input_seq, window_length, transform = None):
        self.input_seq = input_seq
        self.window_length = window_length
        self.transform = transform
        
    def __len__(self):
        return len(self.input_seq) - self.window_length * 2
    
    def __getitem__(self, idx):
        window = idx + self.window_length
        seq = (np.array(self.input_seq[idx:window]) - mean) / std
        target_seq = (np.array(self.input_seq[window:window + self.window_length]) - mean) / std
        
        if self.transform:
            seq = self.transform(seq)
            target_seq = self.transform(target_seq)
            
        return seq, target_seq

In [9]:
next(iter(TimeSeriesDataset(n100_train, 7, torchvision.transforms.ToTensor())))

(tensor([[[ 0.3033,  0.2946,  0.2954,  0.2787,  0.2787, -0.0270],
          [ 0.2803,  0.2716,  0.2832,  0.2623,  0.2623, -0.3326],
          [ 0.2753,  0.3092,  0.2932,  0.3349,  0.3349,  0.8092],
          [ 0.3580,  0.3548,  0.3685,  0.3779,  0.3779,  0.6783],
          [ 0.3719,  0.3953,  0.3945,  0.3804,  0.3804,  0.5362],
          [ 0.3612,  0.3826,  0.3825,  0.4077,  0.4077,  0.3037],
          [ 0.4134,  0.4220,  0.4368,  0.4478,  0.4478, -0.2548]]]),
 tensor([[[ 0.4363,  0.4348,  0.4474,  0.4431,  0.4431,  0.2700],
          [ 0.4269,  0.4061,  0.3983,  0.3721,  0.3721,  0.1823],
          [ 0.3935,  0.3818,  0.3812,  0.3790,  0.3790, -0.3637],
          [ 0.3982,  0.4035,  0.4113,  0.4034,  0.4034, -0.6333],
          [ 0.3936,  0.4033,  0.4006,  0.3973,  0.3973, -0.9616],
          [ 0.3664,  0.3395,  0.3460,  0.3188,  0.3188, -0.3173],
          [ 0.3322,  0.3195,  0.3404,  0.3363,  0.3363, -0.5530]]]))

In [10]:
x_train = DataLoader(TimeSeriesDataset(n100_train, 7, torchvision.transforms.ToTensor()), batch_size = 1, shuffle = False)
x_valid = DataLoader(TimeSeriesDataset(n100_valid, 7, torchvision.transforms.ToTensor()), batch_size = 1, shuffle = False)
x_test = DataLoader(TimeSeriesDataset(n100_test, 7, torchvision.transforms.ToTensor()), batch_size = 1, shuffle = False)

In [11]:
class RNN(torch.nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.LSTM1 = torch.nn.LSTM(6, 64, 16)
        self.regression = torch.nn.Sequential(
            torch.nn.Dropout(0.3),
            torch.nn.Linear(64, 64),
            torch.nn.BatchNorm1d(7),
            torch.nn.ReLU(True),
            torch.nn.Linear(64, 6)
            )
        
        
    def forward(self, x):
        x = x.reshape(-1, 7, 6)
        x, _ = self.LSTM1(x)
        x = torch.nn.BatchNorm1d(7)(x)
        x = self.regression(x)
        return x

In [12]:
class BestModel:
    def __init__(self, best_loss = float("inf")):
        self.best_loss = best_loss
        
    def __call__(self, loss, model):
        if loss < self.best_loss:
            self.best_loss = loss
            torch.save(model.state_dict(), "params/best_model_param")

In [13]:
model = RNN()
model = model.to(device)

loss_fn = torch.nn.HuberLoss(delta = 0.8)
optimizer = torch.optim.NAdam(model.parameters(), lr = 1e-3)

best_model = BestModel()


def train(dataloader, valid_dataloader, model, loss_fn, optimizer, function = None):
    size = len(dataloader.dataset)
    model.train()
    for batch, (x, y) in enumerate(dataloader):
        x, y = x.to(device), y.reshape(-1, 7, 6).to(device)
        pred = model(x)
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(x)
            print(f"loss: {loss:>5f}  [{current:>3d}/{size:>3d}]")
    
    model.eval()
    num_batches = len(valid_dataloader)
    valid_loss = 0
    with torch.no_grad():
        for x, y in valid_dataloader:
            x, y = x.to(device), y.reshape(-1, 7, 6).to(device)
            pred = model(x)
            valid_loss += loss_fn(pred, y).item()
    if function:
        function(valid_loss, model)
    valid_loss /= num_batches
    print(f"Avg Valid loss: {valid_loss:>8f} \n")
            
    
            
def test(dataloader, model, loss_fn):
    num_batches = len(dataloader)
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for x, y in dataloader:
            x, y = x.to(device), y.reshape(-1, 7, 6).to(device)
            pred = model(x)
            test_loss += loss_fn(pred, y).item()
    test_loss /= num_batches
    print(f"Avg Test loss: {test_loss:>8f} \n")

In [14]:
epochs = 10
for epoch in range(epochs):
    print(f"Epoch {epoch+1}\n-------------------------------")
    train(x_train, x_valid, model, loss_fn, optimizer, best_model)

Epoch 1
-------------------------------
loss: 0.259609  [  1/395]
loss: 0.018820  [101/395]
loss: 0.011673  [201/395]
loss: 0.011512  [301/395]
Avg Valid loss: 0.032891 

Epoch 2
-------------------------------
loss: 1.366077  [  1/395]
loss: 0.015175  [101/395]
loss: 0.008586  [201/395]
loss: 0.011848  [301/395]
Avg Valid loss: 0.032504 

Epoch 3
-------------------------------
loss: 1.376228  [  1/395]
loss: 0.014558  [101/395]
loss: 0.020074  [201/395]
loss: 0.012281  [301/395]
Avg Valid loss: 0.033749 

Epoch 4
-------------------------------
loss: 1.367871  [  1/395]
loss: 0.018627  [101/395]
loss: 0.028128  [201/395]
loss: 0.014391  [301/395]
Avg Valid loss: 0.033292 

Epoch 5
-------------------------------
loss: 1.364536  [  1/395]
loss: 0.036430  [101/395]
loss: 0.031949  [201/395]
loss: 0.013180  [301/395]
Avg Valid loss: 0.035412 

Epoch 6
-------------------------------
loss: 1.397788  [  1/395]
loss: 0.035560  [101/395]
loss: 0.032085  [201/395]
loss: 0.013703  [301/395]
A

In [15]:
model.load_state_dict(torch.load("params/best_model_param"))
test(x_test, model, loss_fn)

Avg Test loss: 0.075570 



In [16]:
test_example = torch.tensor((n100_test[-14:-7].values - mean) / std)

In [17]:
pred = (model(test_example) * torch.tensor(std)) + torch.tensor(mean)
pred

tensor([[[1.3652e+03, 1.3793e+03, 1.3616e+03, 1.3698e+03, 1.3653e+03,
          1.7611e+08],
         [1.3434e+03, 1.3567e+03, 1.3406e+03, 1.3474e+03, 1.3434e+03,
          1.8022e+08],
         [1.3661e+03, 1.3801e+03, 1.3628e+03, 1.3708e+03, 1.3665e+03,
          1.7611e+08],
         [1.3450e+03, 1.3585e+03, 1.3417e+03, 1.3489e+03, 1.3446e+03,
          1.7973e+08],
         [1.3647e+03, 1.3788e+03, 1.3612e+03, 1.3693e+03, 1.3649e+03,
          1.7625e+08],
         [1.3434e+03, 1.3570e+03, 1.3401e+03, 1.3473e+03, 1.3430e+03,
          1.8000e+08],
         [1.3477e+03, 1.3614e+03, 1.3444e+03, 1.3517e+03, 1.3474e+03,
          1.7923e+08]]], device='cuda:0', grad_fn=<AddBackward0>)

In [18]:
n100_test[-7:].values

array([[1.376540e+03, 1.383100e+03, 1.372070e+03, 1.381640e+03,
        1.381640e+03, 3.020746e+08],
       [1.382410e+03, 1.388590e+03, 1.380170e+03, 1.388490e+03,
        1.388490e+03, 2.871619e+08],
       [1.388240e+03, 1.388430e+03, 1.381150e+03, 1.387250e+03,
        1.387250e+03, 2.351059e+08],
       [1.387600e+03, 1.394100e+03, 1.387580e+03, 1.390510e+03,
        1.390510e+03, 2.320258e+08],
       [1.390160e+03, 1.390160e+03, 1.365260e+03, 1.375220e+03,
        1.375220e+03, 3.159345e+08],
       [1.375330e+03, 1.402670e+03, 1.375330e+03, 1.401480e+03,
        1.401480e+03, 3.911457e+08],
       [1.400680e+03, 1.403500e+03, 1.391240e+03, 1.400610e+03,
        1.400610e+03, 4.174905e+08]], dtype=float32)

In [19]:
#losses/percent errors
diff = torch.abs(pred - torch.tensor(n100_test[-7:].values))
(diff / torch.abs(torch.tensor(n100_test[-7:].values)) * torch.tensor(100))

tensor([[[ 0.8260,  0.2733,  0.7638,  0.8562,  1.1803, 41.6993],
         [ 2.8213,  2.2980,  2.8663,  2.9589,  3.2508, 37.2420],
         [ 1.5970,  0.6017,  1.3322,  1.1828,  1.4981, 25.0951],
         [ 3.0724,  2.5516,  3.3052,  2.9952,  3.3003, 22.5402],
         [ 1.8338,  0.8194,  0.3006,  0.4277,  0.7509, 44.2135],
         [ 2.3198,  3.2579,  2.5597,  3.8694,  4.1724, 53.9820],
         [ 3.7813,  3.0031,  3.3640,  3.4913,  3.7962, 57.0694]]],
       device='cuda:0', grad_fn=<MulBackward0>)