In [167]:
from model.lstm import LSTM
from model.rnn import RNN
from model.lstm_teacher_forcing import LSTM_Forcing
import pandas as pd
import numpy as np
import torch
import time
from torch.utils.data import DataLoader, TensorDataset, Dataset
import matplotlib.pyplot as plt

In [163]:
# param
window_size = 10
input_dim = 135
hidden_dim = 32
num_layers = 2
output_dim = 1

batch_size = 32
epochs = 10

BNBUSDT= 0
BTCUSDT= 1
ETHUSDT= 2

lookback = 20

path_data = "../training_data/data_norm_2021_2022.csv"
save_model_path = "./results/"
# test_path = "./data/data_2023.csv"

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Create DataSet

In [193]:
class TrainingDataset(Dataset):
    def __init__(self, path_data, window_size):
        self.data = pd.read_csv(path_data, index_col=0)
        self.seq_len  = window_size
        self.index = self.data.index.unique()
    
    def __len__(self):
        return len(self.index)-1
    
    def __getitem__(self, idx_time):
        d_x = idx_time - self.seq_len + 1
        d_y = idx_time
        X = self.data.loc[self.index[d_x:idx_time]]
        if X.empty:
            X = np.zeros(((self.seq_len-1) *3, 5)).reshape(1,-1)
            return X , np.array([0])
        
        data_y = self.data.loc[self.index[d_y:idx_time + 1]]
        y = data_y[data_y["symbol"]==BTCUSDT].close.values
        
        return np.array(X).reshape(1,-1), y
    
    def __iter__(self):
        self.current_index = 0
        return self

    def __next__(self):
        if self.current_index < len(self.index):
            result = self[self.current_index]
            self.current_index += 1
            return result
        else:
            raise StopIteration

In [195]:
data = TrainingDataset(path_data, 10)
data_loader = DataLoader(data, batch_size=64, shuffle=False)

In [None]:
# iter(data)
# next(data)

### Training with RNN Model

In [None]:
model_rnn = RNN(input_dim=135, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers, device=device)
criterion = torch.nn.MSELoss(reduction='mean')
optimiser = torch.optim.Adam(model_rnn.parameters(), lr=0.02)

In [147]:
# # Test
# data_iter = iter(data_loader)
# x_batch, y_batch = next(data_iter)
# y_train_pred = model_rnn(x_batch.float())
# loss = criterion(y_train_pred, y_batch.float())
# optimiser.zero_grad()
# loss.backward()

In [None]:
# Training loop
start_time = time.time()

loss_values = []

for epoch in range(200):
    # print("================3==============")
    total_loss = 0.0
    for x_batch, y_batch in data_loader:
        # print(x_batch.shape)
        # print(y_batch.shape)
        y_train_pred = model_rnn(x_batch.float())
        loss = criterion(y_train_pred, y_batch.float())
        optimiser.zero_grad()
        loss.backward()
        print("Epoch ", epoch, "MSE: ", loss.item())
        optimiser.step()
        total_loss += loss.item()
        # break

    loss_values.append(total_loss)
    if epoch%10==0:
        # print("y_train_pred: ", y_train_pred, "y_batch: ", y_batch)
        print(f"Epoch {epoch}, Average Loss: {total_loss / len(data_loader)}")
        torch.save(model_rnn, "results/model_rnn.pt")
    # break

training_time = time.time() - start_time
print("Training time: {:.2f} seconds".format(training_time))

### Training LSTM

In [164]:
model = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers, device=device)
criterion = torch.nn.MSELoss(reduction='mean')
optimiser = torch.optim.Adam(model.parameters(), lr=0.3)

In [None]:
# Training loop
start_time = time.time()
loss_values = []

for epoch in range(epochs):
    # print("================3==============")
    total_loss = 0.0
    for x_batch, y_batch in data_loader:
        # print(x_batch.shape)
        # print(y_batch.shape)
        y_train_pred = model(x_batch.float())
        loss = criterion(y_train_pred, y_batch.float())
        optimiser.zero_grad()
        loss.backward()
        print("Epoch ", epoch, "MSE: ", loss.item())
        optimiser.step()
        total_loss += loss.item()
        # break
    loss_values.append(total_loss /  len(data_loader))
    if epoch%100==0:
        print(f"Epoch {epoch}, Average Loss: {total_loss / len(data_loader)}")
    # break

training_time = time.time() - start_time
print("Training time: {:.2f} seconds".format(training_time))

### Training LSTM Teacher Forcing - Error

In [258]:

import torch
import torch.nn as nn
import torch.optim as optim
import random

class LSTM_Forcing(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(LSTM_Forcing, self).__init__()
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.output_size = output_size
    self.lstm = nn.LSTMCell(self.input_size, self.hidden_size)
    self.linear = nn.Linear(self.hidden_size, self.output_size)

  def forward(self, input, future=0, y=None):
    outputs = []

    #reset the state of LSTM
    #the state is kept till the end of the sequence
    h_t = torch.zeros(input.size(0), self.hidden_size, dtype=torch.float32)
    c_t = torch.zeros(input.size(0), self.hidden_size, dtype=torch.float32)

    for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):
      h_t, c_t = self.lstm(input_t, (h_t,c_t))
      output = self.linear(h_t)
      outputs += [output]

    for i in range(future): #teacher forcing
      if y is not None and random.random()>0.5:
        output = y[:,[i]]
      h_t, c_t = self.lstm(output,(h_t,c_t))
      output = self.linear(h_t)
      outputs += [output]
    outputs = torch.stack(outputs,1).squeeze(2)
    return outputs 
   
model = LSTM_Forcing(input_size=input_dim, hidden_size=hidden_dim, output_size=output_dim)
criterion = torch.nn.MSELoss(reduction='mean')
optimiser = torch.optim.Adam(model.parameters(), lr=0.3)

In [263]:
iter(data)
x_batch, y_batch = next(data)

In [265]:
x_batch.shape

(1, 135)

In [266]:
lstm = nn.LSTMCell(135, 64)
lstm(torch.from_numpy(x_batch))

RuntimeError: mat1 and mat2 must have the same dtype, but got Double and Float

In [190]:
# Test
# data_iter = iter(data_loader)
# x_batch, y_batch = next(data_iter)
# y_train_pred = model(x_batch)
# loss = criterion(y_train_pred, y_batch.float())
# optimiser.zero_grad()
# loss.backward()

In [None]:
# Training loop
start_time = time.time()
loss_values = []

for epoch in range(epochs):
    # print("================3==============")
    total_loss = 0.0
    for x_batch, y_batch in data_loader:
        # print(x_batch.shape)
        # print(y_batch.shape)
        y_train_pred = model(x_batch.float())
        loss = criterion(y_train_pred, y_batch.float())
        optimiser.zero_grad()
        loss.backward()
        print("Epoch ", epoch, "MSE: ", loss.item())
        optimiser.step()
        total_loss += loss.item()
        # break
    loss_values.append(total_loss /  len(data_loader))
    if epoch%100==0:
        print(f"Epoch {epoch}, Average Loss: {total_loss / len(data_loader)}")
    # break

training_time = time.time() - start_time
print("Training time: {:.2f} seconds".format(training_time))

### Traning with Transformer