In [None]:
import pandas as pd
pd.options.mode.chained_assignment = None

import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from collections import deque

from torch.utils.tensorboard import SummaryWriter
from utils.nn_data_classifier import load_data, Classifier
from utils.preprocess import preprocess, RNNDataset

writer = SummaryWriter(log_dir='logs')



Load the dataset

In [None]:
historical_data = load_data()

historical_data['minute'] = historical_data.time.dt.minute
historical_data = historical_data[historical_data.minute == 0]
historical_data.drop('minute', axis=1, inplace=True)

historical_data

Classify data

In [None]:
data = historical_data.copy()
time_outlook = 1
data['Prediction'] = data.Price.shift(-time_outlook)
data.dropna(inplace=True)
data.drop('time', axis=1,inplace=True)
data.reset_index(drop=True, inplace=True)
data

Sort data

In [None]:
SEQ_LEN = 24

In [None]:
dataset = []
sequence = deque(maxlen=SEQ_LEN)
y = data.Prediction
X = data.drop('Prediction', axis=1)

for seq, price in zip(X.values, y):
    sequence.append(seq)
    length = len(sequence)

    if length == SEQ_LEN:
        new_seq = np.array(sequence)
        entry = [new_seq, price]
        dataset.append([new_seq, price])
dataset = np.array(dataset)
dataset

In [None]:
class PredDataSet(Dataset):
    def __init__(self, data: np.array, transform = None, target_transform = None):
        self._data = data
        self._transform = transform
        self._target_transform = target_transform

    def __len__(self):
        return len(self._data)

    def __getitem__(self, x:int):
        X = torch.tensor(self._data[x][0])
        y = torch.tensor(self._data[x][1])
        return X.float(), y.float()

In [None]:
total_len = len(dataset)
training_size = int(0.6 * total_len)
validation_size = int(0.2 * total_len)
testing_size = int(0.2 * total_len)

train_df = dataset[:training_size]
validation_df = dataset[training_size:training_size+validation_size]
testing_df = dataset[-testing_size:]

training = PredDataSet(train_df)
validation = PredDataSet(validation_df)
testing = PredDataSet(testing_df)

In [None]:
from torch import nn
class RNN_module(nn.Module):
    def __init__(self, hidden_size, input_size, num_layers):
        super(RNN_module, self).__init__()
        self._num_layers = num_layers
        self._input_size = input_size
        self._hidden_size = hidden_size
        self._output_size = 1

        self.lstm = nn.LSTM(input_size = self._input_size, hidden_size = self._hidden_size, 
                            num_layers = self._num_layers, batch_first = True)
        self.fc = nn.Linear(in_features=self._hidden_size, out_features= self._output_size)

    def __str__(self):
        return f"RNN LSTM Model w/ {self._input_size} features and {self._num_layers} layers and {self._hidden_size} of hidden size"

    def forward(self, input):
        lstm_output, (h_n, c_n) = self.lstm(input)
        pred = self.fc(lstm_output[:, -1, :])
        
        return pred

In [None]:
from torch.utils.data import DataLoader

learning_rate = 0.01
dim_size = training[0][0].shape[1]
hidden_size = 60
batch_size = 64
epochs = 10
layers = 1

train_dataloader = DataLoader(training, batch_size = batch_size, shuffle = True)
validation_dataloader = DataLoader(validation, batch_size = batch_size, shuffle = True)
test_dataloader = DataLoader(testing, batch_size = batch_size, shuffle = False)

model = RNN_module(hidden_size = hidden_size, input_size = dim_size,
                   num_layers = layers)

In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
CHECKPOINT_PATH = './models_parameters/LSTM/checkpoints_2/'
BEST_PATH = './models_parameters/LSTM/best_model.pth'

def epoch_training(model, train_dataloader, criterion, epoch, total_epochs, optimizer):
    n_of_steps = len(train_dataloader)
    running_loss = 0

    for current_batch, (sequence, prices) in enumerate(train_dataloader):
        #forward: we are calculating the loss given the parameters
        outputs = model(sequence).view(-1)
        loss = criterion(input=outputs, target = prices)

        #backward: lets update the parameters given the current loss
        optimizer.zero_grad() #nullifies the current gradients. If you don't do this, gradients will be added up (you don't want that)
        loss.backward() #computates the bwrd-prop gradient for each model parameter
        optimizer.step() #updates the model current parameter using the gradients.

        running_loss += loss.item()

        if (current_batch + 1) % 50 == 0:
            print(f"epoch {epoch+1}/{total_epochs}, current step(batch): {current_batch+1}/{n_of_steps}, loss = {loss.item():.4f} ")
            writer.add_scalar('training loss: ', running_loss/50, epoch * n_of_steps + current_batch)
            running_loss = 0
            
    writer.add_scalar('Epoch loss: ', loss, epoch + 1)


def epoch_validate(model, validation_dataloader, criterion, epoch, total_epochs):
    with torch.no_grad():
        n_samples = 0
        total_loss = 0
        for current_batch, (sequence, prices) in enumerate(validation_dataloader):

            #forward: we are calculating the loss given the parameters
            outputs = model(sequence).view(-1)
            loss = criterion(input=outputs, target = prices).item()

            batch_size = outputs.shape[0]
            loss *= batch_size
            total_loss += loss
            n_samples += batch_size

        final_loss = total_loss / n_samples

        print(f"epoch {epoch+1}/{total_epochs} final_loss: {final_loss}")
        writer.add_scalar('Validation Accuracy: ', final_loss, epoch+1)

    return final_loss


def train_loop(model: RNN_module, train_dataloader: DataLoader, criterion: torch.nn, validation_dataloader: DataLoader, epochs: int, optimizer: torch.optim):
    
    max_precision = 0
    is_best = False

    for epoch in range(epochs):
        epoch_training(model, train_dataloader, criterion, epoch, epochs, optimizer)

        precision = epoch_validate(model, validation_dataloader, criterion, epoch, epochs)

        if precision < max_precision:
            is_best = True
            max_precision = precision
        else:
            is_best = False
        
        checkpoint = {
            'epoch': epoch+1,
            'model_state': model.state_dict(),
            'optim_state': optimizer.state_dict()
        }

        if is_best:
            torch.save(checkpoint, BEST_PATH)
        
        torch.save(checkpoint, CHECKPOINT_PATH+f'model_{epoch+1}.pth')

In [None]:
c = model.state_dict()

In [None]:
def test_loop(test_dataloader: DataLoader, model: nn.Module):
    with torch.no_grad():
        n_corrects = 0
        n_samples = 0

        for current_batch, (sequence, label) in enumerate(test_dataloader):
            #forward: we are calculating the loss given the parameters
            outputs = model(sequence)
            predictions = torch.argmax(outputs, 1)

            n_samples += outputs.shape[0]
            n_corrects += (predictions == label).sum().item()

            if (current_batch + 1) % 200 == 0:
                print(f"test batch: {current_batch+1}/{len(test_dataloader)}, current accuracy: {100 * n_corrects / n_samples}")

        acc = 100.0 * n_corrects / n_samples
        print(f"final test accuracy: {acc}")


In [None]:
train_loop(model, train_dataloader=train_dataloader, criterion=criterion, validation_dataloader = validation_dataloader, epochs=epochs, optimizer=optimizer)
# test_loop(test_dataloader=test_dataloader, model=model)

In [None]:
for name, param in model.named_parameters():
    print(name, param.grad.abs().sum())


Using best model in validation

In [None]:
best_model = RNN_module(hidden_size = hidden_size, input_size = dim_size,
                     output_size = number_of_classes, num_layers = 1)

checkpoint = torch.load(BEST_PATH)
print(f'Model type: {best_model}')
print(f'Best performing model found at {checkpoint["epoch"]}ºepoch')

best_model.load_state_dict(state_dict=checkpoint['model_state'], strict=True)
best_model.eval()

test_loop(test_dataloader=test_dataloader, model=best_model)

In [None]:
checkpoint = torch.load('./models_parameters/LSTM/checkpoints_2/model_1.pth')

a = checkpoint['model_state']

checkpoint = torch.load('./models_parameters/LSTM/checkpoints_2/model_10.pth')

b = checkpoint['model_state']

In [None]:
writer.close()