In [6]:
import os
import pandas as pd
import torch
from torch.utils.data import Subset
import numpy as np

import tensorflow as tf
import keras
from sklearn.preprocessing import MinMaxScaler

from utils.TSDataset import TimeSeriesDataset
from utils.TSDataset import data_load
from utils.plot import *
from utils.split_train_val_test import *
from utils.compute_metric import compute_metrics, append_score

from torch.utils.data import DataLoader

import matplotlib as mpl
import matplotlib.pyplot as plt


In [7]:
project_dir = os.path.dirname(os.path.abspath('__file__'))
project_dir

data_path = os.path.join(project_dir, 'data', 'processed', 'BTC-USD-sample.csv')

# Load data
data1, x_scaler1, y_scaler1 = data_load(data_path, x_scaler='minmax', y_scaler='minmax')

# Multi-Step Prediction

In [8]:
record = []

In [51]:
forecast_horizons = [1, 10, 30, 60]
train_df, test_df = split_train_val_test(data1, train_frac=0.7)
scores = []
target_col = 'y'

test_target = test_df[target_col].values

for h in forecast_horizons:
    naive_y_true = []  # will hold the true future values for every forecasting window
    naive_y_pred = []  # will hold the corresponding naive predictions
    
    for i in range(len(test_target) - h):
        # True values: for instance, at time step 0, this gets indices 1 to h (i.e., 1:11 when h=10)
        y_true = test_target[i + 1: i + h + 1]
        # Naive predictions: create an array of length h filled with the value at time step i
        y_pred = np.full((h,), test_target[i])
        
        naive_y_true.append(y_true)
        naive_y_pred.append(y_pred)
    
    naive_y_true = y_scaler1.inverse_transform(naive_y_true)
    naive_y_pred = y_scaler1.inverse_transform(naive_y_pred)
    mse_naive, mae_naive, huber_naive = compute_metrics(naive_y_true, naive_y_pred)
    
    # Print out the performance for this forecast horizon
    record = {
        'model': 'Naive',
        'h-step Forecast': h,
        'mse': mse_naive,
        'mae': mae_naive,
        'huber': huber_naive
    }
    append_score(scores, record)
for i in scores:
    print(i)

{'model': 'Naive', 'h-step Forecast': 1, 'mse': 1160.3737790527032, 'mae': 25.10380253502343, 'huber': np.float32(24.65289)}
{'model': 'Naive', 'h-step Forecast': 10, 'mse': 5275.25768559061, 'mae': 54.060173825503384, 'huber': np.float32(53.572163)}
{'model': 'Naive', 'h-step Forecast': 30, 'mse': 13377.803699702952, 'mae': 85.68954353741499, 'huber': np.float32(85.194984)}
{'model': 'Naive', 'h-step Forecast': 60, 'mse': 26082.668655180554, 'mae': 116.48661504629631, 'huber': np.float32(115.99019)}


In [19]:
window_size = 50
target_col = 'y'
feature_cols = ['x1', 'x2', 'x3', 'x4', 'x5']
return_index = True

In [20]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

for h in forecast_horizons:
    train_df, test_df = split_train_val_test(data1, train_frac=0.7)
    train_set = TimeSeriesDataset(dataframe=train_df, window_size=window_size, forecast_horizon=h, feature_cols=feature_cols, target_col=target_col, return_index=return_index)
    test_set = TimeSeriesDataset(dataframe=test_df, window_size=window_size, forecast_horizon=h, feature_cols=feature_cols, target_col=target_col, return_index=return_index)

    X_train, y_train, x_dates, y_dates = train_set.X_seq, train_set.y_seq, train_set.x_dates, train_set.y_dates
    X_test, y_test, x_dates_test, y_dates_test = test_set.X_seq, test_set.y_seq, test_set.x_dates, test_set.y_dates

    dense_model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(window_size, len(feature_cols))),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(h)
    ])

    dense_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

    history_dense = dense_model.fit(X_train, y_train, epochs=50,
                                    validation_data=(X_test, y_test), verbose=0)

    #plot_learning_curves(history_dense.history)
    y_pred = dense_model.predict(X_test)
    y_pred = y_scaler1.inverse_transform(y_pred.reshape(-1, 1))
    y_true = y_scaler1.inverse_transform(y_test.reshape(-1, 1))
    
    mse_dense, mae_dense, huber_dense = compute_metrics(y_true, y_pred)
    
    record = {
        'model': 'Dense',
        'h-step Forecast': h,
        'mse': mse_dense,
        'mae': mae_dense,
        'huber': huber_dense
    }
    append_score(scores, record)

[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 504us/step
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 492us/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 490us/step
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 488us/step


In [21]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

for h in forecast_horizons:
    train_df, test_df = split_train_val_test(data1, train_frac=0.7)
    train_set = TimeSeriesDataset(dataframe=train_df, window_size=window_size, forecast_horizon=h, feature_cols=feature_cols, target_col=target_col, return_index=return_index)
    test_set = TimeSeriesDataset(dataframe=test_df, window_size=window_size, forecast_horizon=h, feature_cols=feature_cols, target_col=target_col, return_index=return_index)

    X_train, y_train, x_dates, y_dates = train_set.X_seq, train_set.y_seq, train_set.x_dates, train_set.y_dates
    X_test, y_test, x_dates_test, y_dates_test = test_set.X_seq, test_set.y_seq, test_set.x_dates, test_set.y_dates

    rnn_model = tf.keras.Sequential([
    tf.keras.layers.SimpleRNN(units=20, input_shape=(window_size, len(feature_cols))),
    tf.keras.layers.Dense(h)
    ])

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    rnn_model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

    history_rnn = rnn_model.fit(X_train, y_train, epochs=20,
                                validation_data=(X_test, y_test), verbose=0)

    y_pred = rnn_model.predict(X_test)
    y_pred = y_scaler1.inverse_transform(y_pred.reshape(-1, 1))
    y_true = y_scaler1.inverse_transform(y_test.reshape(-1, 1))
    
    mse_rnn, mae_rnn, huber_rnn = compute_metrics(y_true, y_pred)
    
    record = {
        'model': 'Simple RNN',
        'h-step Forecast': h,
        'mse': mse_rnn,
        'mae': mae_rnn,
        'huber': huber_rnn
    }
    scores.append(record)
    #plot_learning_curves(history_rnn.history)

  super().__init__(**kwargs)


[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


  super().__init__(**kwargs)


[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


  super().__init__(**kwargs)


[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


  super().__init__(**kwargs)


[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [22]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

for h in forecast_horizons:
    train_df, test_df = split_train_val_test(data1, train_frac=0.7)
    train_set = TimeSeriesDataset(dataframe=train_df, window_size=window_size, forecast_horizon=h, feature_cols=feature_cols, target_col=target_col, return_index=return_index)
    test_set = TimeSeriesDataset(dataframe=test_df, window_size=window_size, forecast_horizon=h, feature_cols=feature_cols, target_col=target_col, return_index=return_index)

    X_train, y_train, x_dates, y_dates = train_set.X_seq, train_set.y_seq, train_set.x_dates, train_set.y_dates
    X_test, y_test, x_dates_test, y_dates_test = test_set.X_seq, test_set.y_seq, test_set.x_dates, test_set.y_dates

    rnn_model = tf.keras.Sequential([
    tf.keras.layers.SimpleRNN(units=20, return_sequences=True, input_shape=(window_size, len(feature_cols))),
    tf.keras.layers.SimpleRNN(units=20, return_sequences=False),
    tf.keras.layers.Dense(h)
    ])

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    rnn_model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

    history_rnn = rnn_model.fit(X_train, y_train, epochs=20,
                                validation_data=(X_test, y_test), verbose=0)

    y_pred = rnn_model.predict(X_test)
    y_pred = y_scaler1.inverse_transform(y_pred.reshape(-1, 1))
    y_true = y_scaler1.inverse_transform(y_test.reshape(-1, 1))
    
    mse_rnn, mae_rnn, huber_rnn = compute_metrics(y_true, y_pred)
    
    record = {
        'model': 'Deep RNN',
        'h-step Forecast': h,
        'mse': mse_rnn,
        'mae': mae_rnn,
        'huber': huber_rnn
    }
    append_score(scores, record)
    #plot_learning_curves(history_rnn.history)

  super().__init__(**kwargs)


[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


  super().__init__(**kwargs)


[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


  super().__init__(**kwargs)


[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


  super().__init__(**kwargs)


[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


In [23]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

for h in forecast_horizons:
    train_df, test_df = split_train_val_test(data1, train_frac=0.7)
    train_set = TimeSeriesDataset(dataframe=train_df, window_size=window_size, forecast_horizon=h, feature_cols=feature_cols, target_col=target_col, return_index=return_index)
    test_set = TimeSeriesDataset(dataframe=test_df, window_size=window_size, forecast_horizon=h, feature_cols=feature_cols, target_col=target_col, return_index=return_index)

    X_train, y_train, x_dates, y_dates = train_set.X_seq, train_set.y_seq, train_set.x_dates, train_set.y_dates
    X_test, y_test, x_dates_test, y_dates_test = test_set.X_seq, test_set.y_seq, test_set.x_dates, test_set.y_dates

    rnn_model = tf.keras.Sequential([
    tf.keras.layers.SimpleRNN(units=20, return_sequences=True, input_shape=(window_size, len(feature_cols))),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.SimpleRNN(units=20, return_sequences=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(h)
    ])

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    rnn_model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

    history_rnn = rnn_model.fit(X_train, y_train, epochs=20,
                                validation_data=(X_test, y_test), verbose=0)

    y_pred = rnn_model.predict(X_test)
    y_pred = y_scaler1.inverse_transform(y_pred.reshape(-1, 1))
    y_true = y_scaler1.inverse_transform(y_test.reshape(-1, 1))
    
    mse_rnn, mae_rnn, huber_rnn = compute_metrics(y_true, y_pred)
    
    record = {
        'model': 'RNN Batch Normalization',
        'h-step Forecast': h,
        'mse': mse_rnn,
        'mae': mae_rnn,
        'huber': huber_rnn
    }
    append_score(scores, record)
    #plot_learning_curves(history_rnn.history)

  super().__init__(**kwargs)


[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


  super().__init__(**kwargs)


[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


  super().__init__(**kwargs)


[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


  super().__init__(**kwargs)


[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


In [24]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

for h in forecast_horizons:
    train_df, test_df = split_train_val_test(data1, train_frac=0.7)
    train_set = TimeSeriesDataset(dataframe=train_df, window_size=window_size, forecast_horizon=h, feature_cols=feature_cols, target_col=target_col, return_index=return_index)
    test_set = TimeSeriesDataset(dataframe=test_df, window_size=window_size, forecast_horizon=h, feature_cols=feature_cols, target_col=target_col, return_index=return_index)

    X_train, y_train, x_dates, y_dates = train_set.X_seq, train_set.y_seq, train_set.x_dates, train_set.y_dates
    X_test, y_test, x_dates_test, y_dates_test = test_set.X_seq, test_set.y_seq, test_set.x_dates, test_set.y_dates

    rnn_model = tf.keras.Sequential([
    tf.keras.layers.LSTM(units=20, return_sequences=True, input_shape=(window_size, len(feature_cols))),
    tf.keras.layers.LSTM(units=20, return_sequences=False),
    tf.keras.layers.Dense(h)
    ])

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    rnn_model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

    history_rnn = rnn_model.fit(X_train, y_train, epochs=20,
                                validation_data=(X_test, y_test), verbose=0)

    y_pred = rnn_model.predict(X_test)
    y_pred = y_scaler1.inverse_transform(y_pred.reshape(-1, 1))
    y_true = y_scaler1.inverse_transform(y_test.reshape(-1, 1))
    
    mse_rnn, mae_rnn, huber_rnn = compute_metrics(y_true, y_pred)
    
    record = {
        'model': 'LSTM',
        'h-step Forecast': h,
        'mse': mse_rnn,
        'mae': mae_rnn,
        'huber': huber_rnn
    }
    append_score(scores, record)
    #plot_learning_curves(history_rnn.history)

  super().__init__(**kwargs)


[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


  super().__init__(**kwargs)


[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


  super().__init__(**kwargs)


[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


  super().__init__(**kwargs)


[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


In [None]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)
forecast_horizons = [10]
for h in forecast_horizons:
    train_df, test_df = split_train_val_test(data1, train_frac=0.7)
    train_set = TimeSeriesDataset(dataframe=train_df, window_size=window_size, forecast_horizon=h, feature_cols=feature_cols, target_col=target_col, return_index=return_index)
    test_set = TimeSeriesDataset(dataframe=test_df, window_size=window_size, forecast_horizon=h, feature_cols=feature_cols, target_col=target_col, return_index=return_index)

    X_train, y_train, x_dates, y_dates = train_set.X_seq, train_set.y_seq, train_set.x_dates, train_set.y_dates
    X_test, y_test, x_dates_test, y_dates_test = test_set.X_seq, test_set.y_seq, test_set.x_dates, test_set.y_dates

    rnn_model = tf.keras.Sequential([
    tf.keras.layers.GRU(units=20, return_sequences=True, input_shape=(window_size, len(feature_cols))),
    tf.keras.layers.GRU(units=20, return_sequences=False),
    tf.keras.layers.Dense(h)
    ])

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    rnn_model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

    history_rnn = rnn_model.fit(X_train, y_train, epochs=3,
                                validation_data=(X_test, y_test), verbose=0)

    y_pred = rnn_model.predict(X_test)

    y_pred = y_scaler1.inverse_transform(y_pred.reshape(-1, 1))
    y_true = y_scaler1.inverse_transform(y_test.reshape(-1, 1))
    
    mse_rnn, mae_rnn, huber_rnn = compute_metrics(y_true, y_pred)
    print(mse_rnn)
    print(mae_rnn)
    print(huber_rnn)
    
    record = {
        'model': 'GRU',
        'h-step Forecast': h,
        'mse': mse_rnn,
        'mae': mae_rnn,
        'huber': huber_rnn
    }
    append_score(scores, record)
    #plot_learning_curves(history_rnn.history)

  super().__init__(**kwargs)


[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
(1441, 10)
[0.22915536 0.21745737 0.2389725  0.22488208 0.21692193 0.23346631
 0.23073803 0.25786266 0.22497006 0.22984926]
[12781.507]
[12812.77999632]
8071.985366673137
70.98088142615924
70.48248


In [49]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

forecast_horizons = [1, 10, 30, 60]
for h in forecast_horizons:
    train_df, test_df = split_train_val_test(data1, train_frac=0.7)
    train_set = TimeSeriesDataset(dataframe=train_df, window_size=window_size, forecast_horizon=h, feature_cols=feature_cols, target_col=target_col, return_index=return_index)
    test_set = TimeSeriesDataset(dataframe=test_df, window_size=window_size, forecast_horizon=h, feature_cols=feature_cols, target_col=target_col, return_index=return_index)

    X_train, y_train, x_dates, y_dates = train_set.X_seq, train_set.y_seq, train_set.x_dates, train_set.y_dates
    X_test, y_test, x_dates_test, y_dates_test = test_set.X_seq, test_set.y_seq, test_set.x_dates, test_set.y_dates

   
    cnn_model = keras.models.Sequential([
    keras.layers.Input(shape=(window_size, len(feature_cols))),
    keras.layers.Conv1D(filters=16, kernel_size=2, padding="valid"),
    keras.layers.LSTM(32, return_sequences=True),
    keras.layers.LSTM(32, return_sequences=False),
    keras.layers.Dense(h)
    ])

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.004717880792838919)
    cnn_model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

    history_rnn = cnn_model.fit(X_train, y_train, epochs=20,
                                validation_data=(X_test, y_test), verbose=0)

    y_pred = cnn_model.predict(X_test)
    y_pred = y_scaler1.inverse_transform(y_pred.reshape(-1, 1))
    y_true = y_scaler1.inverse_transform(y_test.reshape(-1, 1))
    
    mse_rnn, mae_rnn, huber_rnn = compute_metrics(y_true, y_pred)
    
    record = {
        'model': 'CNN-LSTM',
        'h-step Forecast': h,
        'mse': mse_rnn,
        'mae': mae_rnn,
        'huber': huber_rnn
    }
    append_score(cnn_lstm_scores, record)
    #plot_learning_curves(history_rnn.history)

[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
{'model': 'CNN-LSTM', 'h-step Forecast': 1, 'mse': 1776.0033139586972, 'mae': 32.65602552087393, 'huber': np.float32(32.15984)}
{'model': 'CNN-LSTM', 'h-step Forecast': 10, 'mse': 6122.235423251777, 'mae': 58.971645334105524, 'huber': np.float32(58.473972)}
{'model': 'CNN-LSTM', 'h-step Forecast': 30, 'mse': 15892.558204614965, 'mae': 93.31983408544221, 'huber': np.float32(92.82109)}
{'model': 'CNN-LSTM', 'h-step Forecast': 60, 'mse': 26614.687826459573, 'mae': 117.01620187787084, 'huber': np.float32(116.51741)}
