In [9]:
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import KFold
from torch.utils.data import DataLoader, TensorDataset
from copy import deepcopy
import itertools
from PriceData import DataProcessing
from LSTM import LSTM

### Model parameters

In [10]:
# Hyperparameters
NUM_EPOCHS      = 1000
LEARNING_RATE   = 0.0001

HIDDEN_SIZE     = 32            # Dimensionality of hidden units
NUM_FC_FEATURES = 128           # Number of output features, first FF layer
NUM_LSTM_LAYERS = 2             # Number of LSTM Layers

SEQ_LENGTH      = 60            # Length of inputs sequences
BATCH_SIZE      = 16

# Regular parameters
input_size  = 1      # Number of input features
output_size = 1      # Number of output features

criterion = nn.MSELoss()    # Set MSE as loss function

nr_folds = 10               # Number of folds in K-fold cross validation
kf = KFold(n_splits = nr_folds)

In [11]:
learning_rates  = [0.01, 0.001, 0.0001, 0.00001]
batch_sizes     = [32, 64, 128, 256, 512]
hidden_sizes    = [16, 32, 48, 60]
fc_features     = [64, 96, 128, 160]
num_lstms       = [2,4,6,8,10]
seq_lengths     = [30, 60, 90, 120]

HP_combinations = list(itertools.product(seq_lengths, batch_sizes, learning_rates,
                                         fc_features, hidden_sizes, num_lstms))


### Data Processing

In [12]:
data_process = DataProcessing(seq_length=SEQ_LENGTH, batch_size=BATCH_SIZE)
X_train, y_train, X_test, y_test = data_process.get_process_data()

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


### LSTM model

In [13]:
# Set device to CUDA
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Training and Testing

In [14]:
## Struct to store model info
class ModelInfo():
    def __init__(self):
        self.inputs     = []
        self.labels     = []
        self.outputs    = []
        self.loss       = float(0)
        self.params     = None
        self.lstm       = None
        self.train_loader  = None
        self.val_loader    = None
        self.input_scaler  = None
        self.output_scaler = None

# Store best model of every fold
results = {i:ModelInfo() for i in range(nr_folds)}
for i in range(nr_folds): results[i].loss = np.inf

In [15]:
class Hyperparams():
    def __init__(self, learning_rate, hidden_size, num_fc, num_lstm):
        self.learning_rate  = learning_rate
        self.hidden_size    = hidden_size
        self.num_fc         = num_fc
        self.num_lstm       = num_lstm

        self.avg_val_error  = float(0.0)


In [16]:
for fold, (train_indices, val_indices) in enumerate(kf.split(X_train)):
    print(f'----------\nFOLD {fold}')

    train_loader, val_loader = data_process.create_fold_sets(train_indices, val_indices)

    results[fold].input_scaler  = data_process.input_scaler
    results[fold].output_scaler = data_process.output_scaler

    hp_results = []
    for learning_rate, hidden_size, num_fc, num_lstm in HP_combinations:
        hp_results.append(Hyperparams(learning_rate, hidden_size, num_fc, num_lstm))

        # Create LSTM object and move it to the GPU
        lstm = LSTM(output_size, input_size, hidden_size, num_lstm, num_fc, device).to(device)

        # Initialize optimizer
        optimizer = torch.optim.Adam(lstm.parameters(), lr=LEARNING_RATE)

        epochs_wo_improvement = 0
        for epoch in range(NUM_EPOCHS):
            if epochs_wo_improvement > 50:
                print('early stopping!')
                break

            for i, data in enumerate(train_loader, 0):
                inputs, targets = data
                targets = targets.reshape(targets.shape[0],1)

                inputs  = inputs.to(device)
                targets = targets.to(device)

                # Zero the gradients
                optimizer.zero_grad()

                # Forward pass
                outputs = lstm.forward(inputs)
                # Compute loss
                loss = criterion(outputs, targets)
                # Backward pass
                loss.backward()
                # Parameter update
                optimizer.step()

            val_results = ModelInfo()
            val_results.loss = 0
            val_results.params = deepcopy(lstm.state_dict())
            val_results.model  = deepcopy(lstm)
            with torch.no_grad():
                for i, val_data in enumerate(val_loader, 0):
                    val_inputs, val_targets = val_data
                    val_targets = val_targets.reshape(val_targets.shape[0], 1)

                    val_inputs  = val_inputs.to(device)
                    val_targets = val_targets.to(device)

                    val_outputs = lstm.forward(val_inputs)
                    val_loss = criterion(val_outputs, val_targets)

                    for i in range(val_inputs.shape[0]):
                        single_input = val_inputs[i].flatten()
                        single_label = val_targets[i]
                        single_output = val_outputs[i]

                        val_results.inputs.append(single_input)
                        val_results.labels.append(single_label)
                        val_results.outputs.append(single_output)

                    val_results.loss += float(val_loss.item())
            val_results.loss /= len(val_loader)

            if results[fold].loss > val_results.loss:
                val_results.train_loader  = deepcopy(train_loader)
                val_results.val_loader    = deepcopy(val_loader)
                val_results.input_scaler  = deepcopy(results[fold].input_scaler)
                val_results.output_scaler = deepcopy(results[fold].output_scaler)
                results[fold]             = deepcopy(val_results)

                print(f'epoch {epoch} | validation loss: {val_results.loss} | new best model!')
                epochs_wo_improvement = 0
            else:
                print(f'epoch {epoch} | validation loss: {val_results.loss}')
                epochs_wo_improvement += 1

        X_test_norm, y_test_norm, _, _ = data_process.normalize(X_test, y_test, results[fold].input_scaler, results[fold].output_scaler, fit=0)

        lstm_test = results[fold].model

        lstm_test.load_state_dict(results[fold].params)

        # Set the model to evaluation mode
        lstm_test.eval()

        X_test_norm = X_test_norm.to(device)
        test_output = lstm_test.forward(X_test_norm).to(device)

        test_output_np = test_output.cpu().data.numpy()
        test_labels_np = y_test_norm.data.numpy()
        test_labels_np = test_labels_np.reshape(test_labels_np.shape[0], 1)

        X_plot = results[fold].input_scaler.inverse_transform(test_output_np)
        y_plot = results[fold].output_scaler.inverse_transform(test_labels_np)

        errors = np.abs(X_plot - y_plot)
        avg_error = np.mean(errors)
        print(f'Average test error: {avg_error}\n')

        hp_results[-1].avg_val_error

    best_avg_val_loss = np.inf
    best_config       = None
    for hp_result in hp_results:
        if hp_result.avg_val_loss < best_avg_val_loss:
            best_config = hp_result
            best_avg_val_loss = hp_result.avg_val_loss
    print('----------')
    print(f'Best config:')
    print(f'    learning rate       : {hp_result.learning_rate}')
    print(f'    hidden size         : {hp_result.hidden_size}')
    print(f'    num of fc features  : {hp_result.hidden_size}')
    print(f'    num of lstm layers  : {hp_result.hidden_size}')
    print(f'    AVERAGE VAL ERROR   : {hp_result.avg_val_loss}')



----------
FOLD 0
epoch 0 | validation loss: 0.001761812313426552 | new best model!
epoch 1 | validation loss: 0.018288477889395187
epoch 2 | validation loss: 0.06086118120167937
epoch 3 | validation loss: 0.07532273924776486
epoch 4 | validation loss: 0.0525768439152411
epoch 5 | validation loss: 0.019067051487841775
epoch 6 | validation loss: 0.01105225134441363
epoch 7 | validation loss: 0.007463814491139991
epoch 8 | validation loss: 0.0048502348363399506
epoch 9 | validation loss: 0.003779321805008554
epoch 10 | validation loss: 0.0030623741227567996
epoch 11 | validation loss: 0.002455197337050257
epoch 12 | validation loss: 0.0019417295822807188
epoch 13 | validation loss: 0.0015008760663996717 | new best model!
epoch 14 | validation loss: 0.0011887966533582325 | new best model!
epoch 15 | validation loss: 0.0008943323916485367 | new best model!
epoch 16 | validation loss: 0.0011996896085163047
epoch 17 | validation loss: 0.000772604310701094 | new best model!
epoch 18 | validat

  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


epoch 0 | validation loss: 0.03747114871761629
epoch 1 | validation loss: 0.07045771128364972
epoch 2 | validation loss: 0.08580687801752772
epoch 3 | validation loss: 0.05321381241083145
epoch 4 | validation loss: 0.024651128532631055
epoch 5 | validation loss: 0.013367949851921626
epoch 6 | validation loss: 0.008483010926283896
epoch 7 | validation loss: 0.006090152468199709
epoch 8 | validation loss: 0.005334101376190249
epoch 9 | validation loss: 0.0039941875137239026
epoch 10 | validation loss: 0.00358812884535707
epoch 11 | validation loss: 0.003482550328564165
epoch 12 | validation loss: 0.0027151019686633454
epoch 13 | validation loss: 0.0027636323294635595
epoch 14 | validation loss: 0.002299312754725439
epoch 15 | validation loss: 0.0018600386387593712
epoch 16 | validation loss: 0.0020581679912408746
epoch 17 | validation loss: 0.0016941077005217917
epoch 18 | validation loss: 0.0016667777339794806
epoch 19 | validation loss: 0.0014784201604405617
epoch 20 | validation loss:

  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


epoch 0 | validation loss: 0.07240971603563853
epoch 1 | validation loss: 0.08577950884188924
epoch 2 | validation loss: 0.08394483849406242
epoch 3 | validation loss: 0.06456244417599269
epoch 4 | validation loss: 0.02674811559596232
epoch 5 | validation loss: 0.011649663809553854
epoch 6 | validation loss: 0.004411954285127909
epoch 7 | validation loss: 0.0029602793199176502
epoch 8 | validation loss: 0.0018518849371633092
epoch 9 | validation loss: 0.0021329005477517577
epoch 10 | validation loss: 0.0012304186852166563
epoch 11 | validation loss: 0.0011569405963070625
epoch 12 | validation loss: 0.0018618420498179539
epoch 13 | validation loss: 0.000931308970653585
epoch 14 | validation loss: 0.0009011529436975252
epoch 15 | validation loss: 0.0008778388426955124
epoch 16 | validation loss: 0.0009333158180067715
epoch 17 | validation loss: 0.0009493267420371662
epoch 18 | validation loss: 0.0010251514109508467
epoch 19 | validation loss: 0.0011516671636075313
epoch 20 | validation l

KeyboardInterrupt: 

----

In [None]:
hp_results = []
for seq_length, batch_size, lr, fnn_feats, h_size, num_lstms in HP_combinations:
    data_process = DataProcessing(seq_length, batch_size)
    data_process.get_process_data()

    train_loader, test_loader = data_process.create_fold_sets()

    hp_results.append(Hyperparams(learning_rate, hidden_size, num_fc, num_lstm))

    # Create LSTM object and move it to the GPU
    lstm = LSTM(output_size, input_size, hidden_size, num_lstm, num_fc, device).to(device)

    # Initialize optimizer
    optimizer = torch.optim.Adam(lstm.parameters(), lr=LEARNING_RATE)

    epochs_wo_improvement = 0
    for epoch in range(NUM_EPOCHS):
        if epochs_wo_improvement > 50:
            print('early stopping!')
            break

        for i, data in enumerate(train_loader, 0):
            inputs, targets = data
            targets = targets.reshape(targets.shape[0],1)

            inputs  = inputs.to(device)
            targets = targets.to(device)

            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = lstm.forward(inputs)
            # Compute loss
            loss = criterion(outputs, targets)
            # Backward pass
            loss.backward()
            # Parameter update
            optimizer.step()

        val_results = ModelInfo()
        val_results.loss = 0
        val_results.params = deepcopy(lstm.state_dict())
        val_results.model  = deepcopy(lstm)
        with torch.no_grad():
            for i, val_data in enumerate(val_loader, 0):
                val_inputs, val_targets = val_data
                val_targets = val_targets.reshape(val_targets.shape[0], 1)

                val_inputs  = val_inputs.to(device)
                val_targets = val_targets.to(device)

                val_outputs = lstm.forward(val_inputs)
                val_loss = criterion(val_outputs, val_targets)

                for i in range(val_inputs.shape[0]):
                    single_input = val_inputs[i].flatten()
                    single_label = val_targets[i]
                    single_output = val_outputs[i]

                    val_results.inputs.append(single_input)
                    val_results.labels.append(single_label)
                    val_results.outputs.append(single_output)

                val_results.loss += float(val_loss.item())
        val_results.loss /= len(val_loader)

        if results[fold].loss > val_results.loss:
            val_results.train_loader  = deepcopy(train_loader)
            val_results.val_loader    = deepcopy(val_loader)
            val_results.input_scaler  = deepcopy(results[fold].input_scaler)
            val_results.output_scaler = deepcopy(results[fold].output_scaler)
            results[fold]             = deepcopy(val_results)

            print(f'epoch {epoch} | validation loss: {val_results.loss} | new best model!')
            epochs_wo_improvement = 0
        else:
            print(f'epoch {epoch} | validation loss: {val_results.loss}')
            epochs_wo_improvement += 1

    X_test_norm, y_test_norm, _, _ = data_process.normalize(X_test, y_test, results[fold].input_scaler, results[fold].output_scaler, fit=0)

    lstm_test = results[fold].model

    lstm_test.load_state_dict(results[fold].params)

    # Set the model to evaluation mode
    lstm_test.eval()

    X_test_norm = X_test_norm.to(device)
    test_output = lstm_test.forward(X_test_norm).to(device)

    test_output_np = test_output.cpu().data.numpy()
    test_labels_np = y_test_norm.data.numpy()
    test_labels_np = test_labels_np.reshape(test_labels_np.shape[0], 1)

    X_plot = results[fold].input_scaler.inverse_transform(test_output_np)
    y_plot = results[fold].output_scaler.inverse_transform(test_labels_np)

    errors = np.abs(X_plot - y_plot)
    avg_error = np.mean(errors)
    print(f'Average test error: {avg_error}\n')

    hp_results[-1].avg_val_error

best_avg_val_loss = np.inf
best_config       = None
for hp_result in hp_results:
    if hp_result.avg_val_loss < best_avg_val_loss:
        best_config = hp_result
        best_avg_val_loss = hp_result.avg_val_loss
print('----------')
print(f'Best config:')
print(f'    learning rate       : {hp_result.learning_rate}')
print(f'    hidden size         : {hp_result.hidden_size}')
print(f'    num of fc features  : {hp_result.hidden_size}')
print(f'    num of lstm layers  : {hp_result.hidden_size}')
print(f'    AVERAGE VAL ERROR   : {hp_result.avg_val_loss}')

