In [1]:
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import KFold
from torch.utils.data import DataLoader, TensorDataset
from copy import deepcopy
import itertools
from PriceData import DataProcessing
from LSTM import LSTM

### Model parameters

In [2]:
# Hyperparameters
NUM_EPOCHS      = 1000
LEARNING_RATE   = 0.0001

HIDDEN_SIZE     = 32            # Dimensionality of hidden units
NUM_FC_FEATURES = 128           # Number of output features, first FF layer
NUM_LSTM_LAYERS = 2             # Number of LSTM Layers

SEQ_LENGTH      = 60            # Length of inputs sequences
BATCH_SIZE      = 16

# Regular parameters
input_size  = 1      # Number of input features
output_size = 1      # Number of output features

criterion = nn.MSELoss()    # Set MSE as loss function

nr_folds = 10               # Number of folds in K-fold cross validation
kf = KFold(n_splits = nr_folds)

In [3]:
learning_rates  = [0.01, 0.001, 0.0001, 0.00001]
# batch_sizes     = [32, 64, 128, 256, 512]
batch_sizes     = [512]
hidden_sizes    = [16, 32, 48, 60]
fc_features     = [64, 96, 128, 160]
num_lstms       = [2,4,6,8]
seq_lengths     = [30, 60, 90, 120]

HP_combinations = list(itertools.product(seq_lengths, batch_sizes, learning_rates,
                                         fc_features, hidden_sizes, num_lstms))


In [4]:
len(HP_combinations)

1024

### Data Processing

In [5]:
# data_process = DataProcessing(seq_length=SEQ_LENGTH, batch_size=BATCH_SIZE)
# X_train, y_train, X_test, y_test = data_process.get_process_data()

### LSTM model

In [6]:
# Set device to CUDA
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

  return torch._C._cuda_getDeviceCount() > 0


### Training and Testing

In [7]:
## Struct to store model info
class ModelInfo():
    def __init__(self, loss=float(0)):
        self.inputs     = []
        self.labels     = []
        self.outputs    = []
        self.loss       = loss
        self.params     = None
        self.lstm       = None
        self.train_loader  = None
        self.val_loader    = None
        self.input_scaler  = None
        self.output_scaler = None

# Store best model of every fold
results = {i:ModelInfo() for i in range(nr_folds)}
for i in range(nr_folds): results[i].loss = np.inf

In [8]:
class Hyperparams():
    def __init__(self, learning_rate, hidden_size, num_fc, num_lstm, avg_val_error=float(0)):
        self.learning_rate  = learning_rate
        self.hidden_size    = hidden_size
        self.num_fc         = num_fc
        self.num_lstm       = num_lstm

        self.avg_val_error  = avg_val_error


In [9]:
# for fold, (train_indices, val_indices) in enumerate(kf.split(X_train)):
#     print(f'----------\nFOLD {fold}')

#     train_loader, val_loader = data_process.create_fold_sets(train_indices, val_indices)

#     results[fold].input_scaler  = data_process.input_scaler
#     results[fold].output_scaler = data_process.output_scaler

#     hp_results = []
#     for learning_rate, hidden_size, num_fc, num_lstm in HP_combinations:
#         hp_results.append(Hyperparams(learning_rate, hidden_size, num_fc, num_lstm))

#         # Create LSTM object and move it to the GPU
#         lstm = LSTM(output_size, input_size, hidden_size, num_lstm, num_fc, device).to(device)

#         # Initialize optimizer
#         optimizer = torch.optim.Adam(lstm.parameters(), lr=LEARNING_RATE)

#         epochs_wo_improvement = 0
#         for epoch in range(NUM_EPOCHS):
#             if epochs_wo_improvement > 50:
#                 print('early stopping!')
#                 break

#             for i, data in enumerate(train_loader, 0):
#                 inputs, targets = data
#                 targets = targets.reshape(targets.shape[0],1)

#                 inputs  = inputs.to(device)
#                 targets = targets.to(device)

#                 # Zero the gradients
#                 optimizer.zero_grad()

#                 # Forward pass
#                 outputs = lstm.forward(inputs)
#                 # Compute loss
#                 loss = criterion(outputs, targets)
#                 # Backward pass
#                 loss.backward()
#                 # Parameter update
#                 optimizer.step()

#             val_results = ModelInfo()
#             val_results.loss = 0
#             val_results.params = deepcopy(lstm.state_dict())
#             val_results.model  = deepcopy(lstm)
#             with torch.no_grad():
#                 for i, val_data in enumerate(val_loader, 0):
#                     val_inputs, val_targets = val_data
#                     val_targets = val_targets.reshape(val_targets.shape[0], 1)

#                     val_inputs  = val_inputs.to(device)
#                     val_targets = val_targets.to(device)

#                     val_outputs = lstm.forward(val_inputs)
#                     val_loss = criterion(val_outputs, val_targets)

#                     for i in range(val_inputs.shape[0]):
#                         single_input = val_inputs[i].flatten()
#                         single_label = val_targets[i]
#                         single_output = val_outputs[i]

#                         val_results.inputs.append(single_input)
#                         val_results.labels.append(single_label)
#                         val_results.outputs.append(single_output)

#                     val_results.loss += float(val_loss.item())
#             val_results.loss /= len(val_loader)

#             if results[fold].loss > val_results.loss:
#                 val_results.train_loader  = deepcopy(train_loader)
#                 val_results.val_loader    = deepcopy(val_loader)
#                 val_results.input_scaler  = deepcopy(results[fold].input_scaler)
#                 val_results.output_scaler = deepcopy(results[fold].output_scaler)
#                 results[fold]             = deepcopy(val_results)

#                 print(f'epoch {epoch} | validation loss: {val_results.loss} | new best model!')
#                 epochs_wo_improvement = 0
#             else:
#                 print(f'epoch {epoch} | validation loss: {val_results.loss}')
#                 epochs_wo_improvement += 1

#         X_test_norm, y_test_norm, _, _ = data_process.normalize(X_test, y_test, results[fold].input_scaler, results[fold].output_scaler, fit=0)

#         lstm_test = results[fold].model

#         lstm_test.load_state_dict(results[fold].params)

#         # Set the model to evaluation mode
#         lstm_test.eval()

#         X_test_norm = X_test_norm.to(device)
#         test_output = lstm_test.forward(X_test_norm).to(device)

#         test_output_np = test_output.cpu().data.numpy()
#         test_labels_np = y_test_norm.data.numpy()
#         test_labels_np = test_labels_np.reshape(test_labels_np.shape[0], 1)

#         X_plot = results[fold].input_scaler.inverse_transform(test_output_np)
#         y_plot = results[fold].output_scaler.inverse_transform(test_labels_np)

#         errors = np.abs(X_plot - y_plot)
#         avg_error = np.mean(errors)
#         print(f'Average test error: {avg_error}\n')

#         hp_results[-1].avg_val_error

#     best_avg_val_loss = np.inf
#     best_config       = None
#     for hp_result in hp_results:
#         if hp_result.avg_val_loss < best_avg_val_loss:
#             best_config = hp_result
#             best_avg_val_loss = hp_result.avg_val_loss
#     print('----------')
#     print(f'Best config:')
#     print(f'    learning rate       : {hp_result.learning_rate}')
#     print(f'    hidden size         : {hp_result.hidden_size}')
#     print(f'    num of fc features  : {hp_result.hidden_size}')
#     print(f'    num of lstm layers  : {hp_result.hidden_size}')
#     print(f'    AVERAGE VAL ERROR   : {hp_result.avg_val_loss}')



----

In [10]:
hp_results = [ModelInfo(loss=float(np.inf)) for i in range(len(HP_combinations))]

for k, (seq_length, batch_size, lr, num_fc, h_size, num_lstm) in enumerate(HP_combinations):
    # Create data processing object
    data_process = DataProcessing(seq_length=seq_length, batch_size=batch_size)

    # Create data loaders
    train_loader, val_loader, _ = data_process.get_process_data()

    # Store current hyper parameter config
    hp_results.append(Hyperparams(lr, h_size, num_fc, num_lstm))

    # Create LSTM object and move it to the GPU
    lstm = LSTM(output_size, input_size, h_size, num_lstm, num_fc, device).to(device)

    # Initialize optimizer
    optimizer = torch.optim.Adam(lstm.parameters(), lr=LEARNING_RATE)

    epochs_wo_improvement = 0
    for epoch in range(NUM_EPOCHS):
        if epochs_wo_improvement > 50:
            print('early stopping!')
            break

        for i, data in enumerate(train_loader, 0):
            inputs, targets = data
            targets = targets.reshape(targets.shape[0],1)

            inputs  = inputs.to(device)
            targets = targets.to(device)

            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = lstm.forward(inputs)
            # Compute loss
            loss = criterion(outputs, targets)
            # Backward pass
            loss.backward()
            # Parameter update
            optimizer.step()

        val_results         = ModelInfo()
        val_results.loss    = 0
        val_results.params  = deepcopy(lstm.state_dict())
        val_results.model   = deepcopy(lstm)

        with torch.no_grad():
            for i, val_data in enumerate(val_loader, 0):
                val_inputs, val_targets = val_data
                val_targets = val_targets.reshape(val_targets.shape[0], 1)

                val_inputs  = val_inputs.to(device)
                val_targets = val_targets.to(device)

                val_outputs = lstm.forward(val_inputs)
                val_loss = criterion(val_outputs, val_targets)

                for i in range(val_inputs.shape[0]):
                    single_input = val_inputs[i].flatten()
                    single_label = val_targets[i]
                    single_output = val_outputs[i]

                    val_results.inputs.append(single_input)
                    val_results.labels.append(single_label)
                    val_results.outputs.append(single_output)

                val_results.loss += float(val_loss.item())
        val_results.loss /= len(val_loader)

        if hp_results[k].loss > val_results.loss:
            val_results.train_loader  = deepcopy(train_loader)
            val_results.val_loader    = deepcopy(val_loader)
            val_results.input_scaler  = deepcopy(hp_results[k].input_scaler)
            val_results.output_scaler = deepcopy(hp_results[k].output_scaler)
            hp_results[k]             = deepcopy(val_results)

            print(f'epoch {epoch} | validation loss: {val_results.loss} | new best model!')
            epochs_wo_improvement = 0
        else:
            print(f'epoch {epoch} | validation loss: {val_results.loss}')
            epochs_wo_improvement += 1

    X_test_norm, y_test_norm = data_process.X_test_norm, data_process.y_test_norm 

    lstm_test = hp_results[k].model

    lstm_test.load_state_dict(hp_results[k].params)

    # Set the model to evaluation mode
    lstm_test.eval()

    X_test_norm = X_test_norm.to(device)
    test_output = lstm_test.forward(X_test_norm).to(device)

    test_output_np = test_output.cpu().data.numpy()
    test_labels_np = y_test_norm.data.numpy()
    test_labels_np = test_labels_np.reshape(test_labels_np.shape[0], 1)

    X_plot = data_process.in_scaler.inverse_transform(test_output_np)
    y_plot = data_process.out_scaler.inverse_transform(test_labels_np)

    errors = np.abs(X_plot - y_plot)
    avg_error = np.mean(errors)
    print(f'Average test error: {avg_error}\n')

    hp_results[-1].avg_val_error

best_avg_val_loss = np.inf
best_config       = None
for hp_result in hp_results:
    if hp_result.avg_val_loss < best_avg_val_loss:
        best_config = hp_result
        best_avg_val_loss = hp_result.avg_val_loss
print('----------')
print(f'Best config:')
print(f'    learning rate       : {hp_result.learning_rate}')
print(f'    hidden size         : {hp_result.hidden_size}')
print(f'    num of fc features  : {hp_result.hidden_size}')
print(f'    num of lstm layers  : {hp_result.hidden_size}')
print(f'    AVERAGE VAL ERROR   : {hp_result.avg_val_loss}')



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


epoch 0 | validation loss: 0.10369424894452095 | new best model!
epoch 1 | validation loss: 0.10136613249778748 | new best model!
epoch 2 | validation loss: 0.09909230843186378 | new best model!
epoch 3 | validation loss: 0.0968654528260231 | new best model!
epoch 4 | validation loss: 0.09468969330191612 | new best model!
epoch 5 | validation loss: 0.09258710592985153 | new best model!
epoch 6 | validation loss: 0.09054819867014885 | new best model!
epoch 7 | validation loss: 0.08856380730867386 | new best model!
epoch 8 | validation loss: 0.08664322644472122 | new best model!
epoch 9 | validation loss: 0.08478881418704987 | new best model!
epoch 10 | validation loss: 0.08299784734845161 | new best model!
epoch 11 | validation loss: 0.08124902844429016 | new best model!
epoch 12 | validation loss: 0.07955995947122574 | new best model!
epoch 13 | validation loss: 0.07792172953486443 | new best model!
epoch 14 | validation loss: 0.07633668929338455 | new best model!
epoch 15 | validation

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

epoch 999 | validation loss: 0.0003466204507276416 | new best model!
Average test error: 858.783447265625






epoch 0 | validation loss: 0.13189927861094475 | new best model!
epoch 1 | validation loss: 0.12988174706697464 | new best model!
epoch 2 | validation loss: 0.12792056426405907 | new best model!
epoch 3 | validation loss: 0.12601033598184586 | new best model!
epoch 4 | validation loss: 0.1241430938243866 | new best model!
epoch 5 | validation loss: 0.1223304383456707 | new best model!
epoch 6 | validation loss: 0.1205684207379818 | new best model!
epoch 7 | validation loss: 0.11886004731059074 | new best model!
epoch 8 | validation loss: 0.11720284074544907 | new best model!
epoch 9 | validation loss: 0.11558663472533226 | new best model!
epoch 10 | validation loss: 0.11401988938450813 | new best model!
epoch 11 | validation loss: 0.11250251531600952 | new best model!
epoch 12 | validation loss: 0.11102714017033577 | new best model!
epoch 13 | validation loss: 0.10960221290588379 | new best model!
epoch 14 | validation loss: 0.10821399465203285 | new best model!
epoch 15 | validation l

KeyboardInterrupt: 