In [1]:
import DataGeneration
from statsmodels.tsa.api import VAR
from MultiBlock import MultiTrendBlock, MultiSeasonalityBlock
from NBeats import Stack, N_BEATS
from Dataset import DataSet
from InterrelationBlocks import CompanionMatrixBlock
import tensorflow as tf
import pandas as pd
import numpy as np
# Set tensorflow and numpy seed
tf.random.set_seed(42)
np.random.seed(42)

In [2]:
num_variables = 3
var_lag = 4
data = DataGeneration.generate_simulations(20, num_variables, 1000, var_lag)

# Companion DGP Test

- How does it react with different DGPs?
- Does it recognize if there is no VAR component or what the VAR component is if there is one?
=> Save the companion matrix of the training CompanionMatrixBlock to compare to actual VAR companion matrix  AND VAR
=> Save the companion matrix of the basic VAR model to compare to actual VAR companion matrix AND CompanionMatrixBlock

1. Test on all simulations
2. Parameters will be the lag of the DGP
3. Forecast 1 ahead

In [3]:
companion_all = []

for sim in range(len(data)):
    for composition in data[sim]['data'].keys():
        print('simulation:', sim, 'data composition:', composition, end=' -> ')
        # Prepare data
        dataset = DataSet(data[sim]['data'][composition].values)
        horizon = 1
        back_horizon = var_lag
        dataset.preprocessing(horizon, back_horizon, 0.8, 0.1)
        # Create model
        comp = CompanionMatrixBlock(num_variables, horizon, back_horizon, var_lag, regularizer=None, trainable_backcast=False)
        model = N_BEATS([Stack([comp])])
        # Train N-BEATS
        model.build(input_shape=(None, back_horizon, num_variables))
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss='mse')
        early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20, mode='min')
        history = model.fit(dataset.X_train, dataset.y_train, epochs=1000, batch_size=128, validation_data=(dataset.X_val, dataset.y_val), callbacks=[early_stopping], verbose=0)
        forecast = model.predict(dataset.X_test, verbose=0)
        print(len(history.history['loss']), 'epochs', end=' - ')
        # Train VAR
        var = VAR(dataset.train)
        var = var.fit(back_horizon)
        var_companion = var.params[1:].T
        # Save results
        companion_all.append({'sim': sim, 'composition': composition, 'nbeats': comp._companion_horizon.numpy(), 'var': var_companion, 'epochs': len(history.history['loss']), 'forecast': forecast, 'real': data[sim]['arguments']['companion_matrix'], 'error': forecast.flatten() - dataset.y_test.flatten()})

simulation: 0 data composition: ir -> 265 epochs - simulation: 0 data composition: t_ir -> 408 epochs - simulation: 0 data composition: s_ir -> 93 epochs - simulation: 0 data composition: t_s_ir -> 399 epochs - simulation: 0 data composition: t_s -> 39 epochs - simulation: 0 data composition: rw_ir -> 162 epochs - simulation: 0 data composition: t_rw_ir -> 174 epochs - simulation: 0 data composition: s_rw_ir -> 214 epochs - simulation: 0 data composition: t_s_rw_ir -> 256 epochs - simulation: 0 data composition: rw -> 353 epochs - simulation: 1 data composition: ir -> 128 epochs - simulation: 1 data composition: t_ir -> 36 epochs - simulation: 1 data composition: s_ir -> 85 epochs - simulation: 1 data composition: t_s_ir -> 310 epochs - simulation: 1 data composition: t_s -> 37 epochs - simulation: 1 data composition: rw_ir -> 248 epochs - simulation: 1 data composition: t_rw_ir -> 300 epochs - simulation: 1 data composition: s_rw_ir -> 165 epochs - simulation: 1 data composition: t_s_

In [4]:
pd.DataFrame(companion_all).to_json('results/companion_all.json')

# Companion forecast horizon test
- How does it react if you increase forecast horizon?
=> Save the companion matrix of the training CompanionMatrixBlock to compare to actual VAR companion matrix  AND VAR
=> Save the companion matrix of the basic VAR model to compare to actual VAR companion matrix AND CompanionMatrixBlock

1. Test on only IR data
2. Parameters will be the lag of the DGP
2. Horizon 1, 5, 10, 20, 50

In [5]:
horizon_ir = []

for sim in range(len(data)):
    for horizon in [1, 5, 10, 20, 50]:
        print('simulation:', sim,'horizon:' ,horizon, end=' -> ')
        # Prepare data
        dataset = DataSet(data[sim]['data']['ir'].values)
        back_horizon = var_lag
        dataset.preprocessing(horizon, back_horizon, 0.8, 0.1)
        # Create model
        comp = CompanionMatrixBlock(num_variables, horizon, back_horizon, back_horizon, regularizer=None, trainable_backcast=False)
        model = N_BEATS([Stack([comp])])
        # Train N-BEATS
        model.build(input_shape=(None, back_horizon, num_variables))
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss='mse')
        early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20, mode='min')
        history = model.fit(dataset.X_train, dataset.y_train, epochs=1000, batch_size=128, validation_data=(dataset.X_val, dataset.y_val), callbacks=[early_stopping], verbose=0)
        forecast = model.predict(dataset.X_test, verbose=0)
        print(len(history.history['loss']), 'epochs', end=' - ')
        # Train VAR
        var = VAR(dataset.train)
        var = var.fit(back_horizon)
        var_companion = var.params[1:].T
        # Save results
        horizon_ir.append({'sim': sim, 'composition': 'ir', 'horizon': horizon, 'nbeats': comp._companion_horizon.numpy(), 'var': var_companion, 'epochs': len(history.history['loss']), 'forecast': forecast, 'real': data[sim]['arguments']['companion_matrix'], 'error': forecast.flatten() - dataset.y_test.flatten()})

111 epochs - simulation: 1 horizon: 5 -> 154 epochs - simulation: 1 horizon: 10 -> 152 epochs - simulation: 1 horizon: 20 -> 169 epochs - simulation: 1 horizon: 50 -> 1000 epochs - simulation: 2 horizon: 1 -> 293 epochs - simulation: 2 horizon: 5 -> 146 epochs - simulation: 2 horizon: 10 -> 198 epochs - simulation: 2 horizon: 20 -> 104 epochs - simulation: 2 horizon: 50 -> 1000 epochs - simulation: 3 horizon: 1 -> 62 epochs - simulation: 3 horizon: 5 -> 100 epochs - simulation: 3 horizon: 10 -> 367 epochs - simulation: 3 horizon: 20 -> 353 epochs - simulation: 3 horizon: 50 -> 1000 epochs - simulation: 4 horizon: 1 -> 112 epochs - simulation: 4 horizon: 5 -> 155 epochs - simulation: 4 horizon: 10 -> 371 epochs - simulation: 4 horizon: 20 -> 808 epochs - simulation: 4 horizon: 50 -> 1000 epochs - simulation: 5 horizon: 1 -> 102 epochs - simulation: 5 horizon: 5 -> 179 epochs - simulation: 5 horizon: 10 -> 140 epochs - simulation: 5 horizon: 20 -> 161 epochs - simulation: 5 horizon: 50 -

In [6]:
pd.DataFrame(horizon_ir).to_json('results/horizon_ir.json')

# Misspecified lag test
- How does it react if misspecified?
=> Save the companion matrix of the training CompanionMatrixBlock to compare to actual VAR companion matrix  AND VAR
=> Save the companion matrix of the basic VAR model to compare to actual VAR companion matrix AND CompanionMatrixBlock

1. Test on only IR data
2. Lag to use: real lag/2, real lag, rea leag*2
3. Forecast 1 ahead


In [7]:
misspecified_ir = []

for sim in range(len(data)):
    for lag in [var_lag/2, var_lag, var_lag*2, var_lag*4]:
        print('simulation:', sim,'lag:' ,lag, end=' -> ')
        # Prepare data
        dataset = DataSet(data[sim]['data']['ir'].values)
        back_horizon = int(lag)
        horizon = 1
        dataset.preprocessing(1, back_horizon, 0.8, 0.1)
        # Create model
        comp = CompanionMatrixBlock(num_variables, horizon, back_horizon, back_horizon, regularizer=None, trainable_backcast=False)
        model = N_BEATS([Stack([comp])])
        # Train N-BEATS
        model.build(input_shape=(None, back_horizon, num_variables))
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss='mse')
        early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20, mode='min')
        history = model.fit(dataset.X_train, dataset.y_train, epochs=1000, batch_size=128, validation_data=(dataset.X_val, dataset.y_val), callbacks=[early_stopping], verbose=0)
        forecast = model.predict(dataset.X_test, verbose=0)
        print(len(history.history['loss']), 'epochs', end=' - ')
        # Train VAR
        var = VAR(dataset.train)
        var = var.fit(back_horizon)
        var_companion = var.params[1:].T
        # Save results
        misspecified_ir.append({'sim': sim, 'composition': 'ir', 'lags_included': back_horizon, 'nbeats': comp._companion_horizon.numpy(), 'var': var_companion, 'epochs': len(history.history['loss']), 'forecast': forecast, 'real': data[sim]['arguments']['companion_matrix'], 'error': forecast.flatten() - dataset.y_test.flatten()})

simulation: 0 lag: 2.0 -> 204 epochs - simulation: 0 lag: 4 -> 124 epochs - simulation: 0 lag: 8 -> 90 epochs - simulation: 0 lag: 16 -> 120 epochs - simulation: 1 lag: 2.0 -> 77 epochs - simulation: 1 lag: 4 -> 158 epochs - simulation: 1 lag: 8 -> 112 epochs - simulation: 1 lag: 16 -> 91 epochs - simulation: 2 lag: 2.0 -> 172 epochs - simulation: 2 lag: 4 -> 208 epochs - simulation: 2 lag: 8 -> 211 epochs - simulation: 2 lag: 16 -> 160 epochs - simulation: 3 lag: 2.0 -> 86 epochs - simulation: 3 lag: 4 -> 85 epochs - simulation: 3 lag: 8 -> 179 epochs - simulation: 3 lag: 16 -> 79 epochs - simulation: 4 lag: 2.0 -> 77 epochs - simulation: 4 lag: 4 -> 92 epochs - simulation: 4 lag: 8 -> 72 epochs - simulation: 4 lag: 16 -> 79 epochs - simulation: 5 lag: 2.0 -> 121 epochs - simulation: 5 lag: 4 -> 102 epochs - simulation: 5 lag: 8 -> 117 epochs - simulation: 5 lag: 16 -> 92 epochs - simulation: 6 lag: 2.0 -> 81 epochs - simulation: 6 lag: 4 -> 127 epochs - simulation: 6 lag: 8 -> 87 epo

In [8]:
pd.DataFrame(misspecified_ir).to_json('results/misspecified_ir.json')

# Full Model
- Can the model determine which components are not present? (Basically turn and off blocks. e.g. If there no trend, the trend stack basically shuts off?)
- Does the correlation block integrate with the N-Beats architecture?
=> Save the companion matrix of the training CompanionMatrixBlock to compare to actual VAR companion matrix
=> Save the input of the correlation block to check if stationary/compare to actual IR component
1. Test on all
2. Use real lag
3. Forecast 1 ahead
4. Use base hyperparameters

In [9]:
all = []

for sim in range(len(data)):
    for composition in data[sim]['data'].keys():
        print('simulation:', sim, 'data composition:', composition, end=' -> ')
        # Prepare data
        dataset = DataSet(data[sim]['data'][composition].values)
        horizon = 1
        back_horizon = 100
        dataset.preprocessing(horizon, back_horizon, 0.8, 0.1)
        # Create model
        trend = MultiTrendBlock(num_variables, horizon, back_horizon)
        trend = Stack([trend for _ in range(3)])
        seasonal = MultiSeasonalityBlock(num_variables, horizon, back_horizon)
        seasonal = Stack([seasonal for _ in range(3)])
        comp = CompanionMatrixBlock(num_variables, horizon, back_horizon, var_lag, regularizer=None, trainable_backcast=False)
        model = N_BEATS([trend, seasonal, Stack([comp])])
        # Train N-BEATS
        model.build(input_shape=(None, back_horizon, num_variables))
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss='mse')
        early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20, mode='min')
        history = model.fit(dataset.X_train, dataset.y_train, epochs=1000, batch_size=128, validation_data=(dataset.X_val, dataset.y_val), callbacks=[early_stopping], verbose=0)
        forecast = model.predict(dataset.X_test, verbose=0)
        print(len(history.history['loss']), 'epochs', end=' - ')
        model(dataset.X_test)
        # Train VAR
        var = VAR(dataset.train)
        var = var.fit(var_lag)
        var_companion = var.params[1:].T
        # Save results
        all.append({'sim': sim, 'composition': composition, 'nbeats': comp._companion_horizon.numpy(), 'var': var_companion, 'epochs': len(history.history['loss']), 'input':comp._past_input.numpy(), 'forecast': forecast, 'real': data[sim]['arguments']['companion_matrix'], 'error': forecast.flatten() - dataset.y_test.flatten()})

simulation: 0 data composition: ir -> 112 epochs - simulation: 0 data composition: t_ir -> 20 epochs - simulation: 0 data composition: s_ir -> 309 epochs - simulation: 0 data composition: t_s_ir -> 20 epochs - simulation: 0 data composition: t_s -> 20 epochs - simulation: 0 data composition: rw_ir -> 35 epochs - simulation: 0 data composition: t_rw_ir -> 20 epochs - simulation: 0 data composition: s_rw_ir -> 20 epochs - simulation: 0 data composition: t_s_rw_ir -> 20 epochs - simulation: 0 data composition: rw -> 20 epochs - simulation: 1 data composition: ir -> 165 epochs - simulation: 1 data composition: t_ir -> 20 epochs - simulation: 1 data composition: s_ir -> 78 epochs - simulation: 1 data composition: t_s_ir -> 20 epochs - simulation: 1 data composition: t_s -> 20 epochs - simulation: 1 data composition: rw_ir -> 20 epochs - simulation: 1 data composition: t_rw_ir -> 20 epochs - simulation: 1 data composition: s_rw_ir -> 20 epochs - simulation: 1 data composition: t_s_rw_ir -> 2

In [10]:
pd.DataFrame(all).to_json('results/all.json')

In [11]:
trends = []

for sim in range(len(data)):
    for composition in data[sim]['data'].keys():
        print('simulation:', sim, 'data composition:', composition, end=' -> ')
        # Prepare data
        dataset = DataSet(data[sim]['data'][composition].values)
        horizon = 1
        back_horizon = 100
        dataset.preprocessing(horizon, back_horizon, 0.8, 0.1)
        # Create model
        trend = MultiTrendBlock(num_variables, horizon, back_horizon)
        trend = Stack([trend for _ in range(3)])
        comp = CompanionMatrixBlock(num_variables, horizon, back_horizon, var_lag, regularizer=None, trainable_backcast=False)
        model = N_BEATS([trend, Stack([comp])])
        # Train N-BEATS
        model.build(input_shape=(None, back_horizon, num_variables))
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss='mse')
        early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20, mode='min')
        history = model.fit(dataset.X_train, dataset.y_train, epochs=1000, batch_size=128, validation_data=(dataset.X_val, dataset.y_val), callbacks=[early_stopping], verbose=0)
        forecast = model.predict(dataset.X_test, verbose=0)
        print(len(history.history['loss']), 'epochs', end=' - ')
        model(dataset.X_test)
        # Train VAR
        var = VAR(dataset.train)
        var = var.fit(var_lag)
        var_companion = var.params[1:].T
        # Save results
        trends.append({'sim': sim, 'composition': composition, 'nbeats': comp._companion_horizon.numpy(), 'var': var_companion, 'epochs': len(history.history['loss']), 'input':comp._past_input.numpy(), 'forecast': forecast, 'real': data[sim]['arguments']['companion_matrix'], 'error': forecast.flatten() - dataset.y_test.flatten()})

simulation: 0 data composition: ir -> 169 epochs - simulation: 0 data composition: t_ir -> 40 epochs - simulation: 0 data composition: s_ir -> 618 epochs - simulation: 0 data composition: t_s_ir -> 105 epochs - simulation: 0 data composition: t_s -> 76 epochs - simulation: 0 data composition: rw_ir -> 43 epochs - simulation: 0 data composition: t_rw_ir -> 114 epochs - simulation: 0 data composition: s_rw_ir -> 43 epochs - simulation: 0 data composition: t_s_rw_ir -> 34 epochs - simulation: 0 data composition: rw -> 22 epochs - simulation: 1 data composition: ir -> 76 epochs - simulation: 1 data composition: t_ir -> 51 epochs - simulation: 1 data composition: s_ir -> 62 epochs - simulation: 1 data composition: t_s_ir -> 65 epochs - simulation: 1 data composition: t_s -> 47 epochs - simulation: 1 data composition: rw_ir -> 115 epochs - simulation: 1 data composition: t_rw_ir -> 42 epochs - simulation: 1 data composition: s_rw_ir -> 68 epochs - simulation: 1 data composition: t_s_rw_ir ->

In [12]:
pd.DataFrame(trends).to_json('results/trend.json')

In [13]:
seasonals = []

for sim in range(len(data)):
    for composition in data[sim]['data'].keys():
        print('simulation:', sim, 'data composition:', composition, end=' -> ')
        # Prepare data
        dataset = DataSet(data[sim]['data'][composition].values)
        horizon = 1
        back_horizon = 100
        dataset.preprocessing(horizon, back_horizon, 0.8, 0.1)
        # Create model
        seasonal = MultiSeasonalityBlock(num_variables, horizon, back_horizon)
        seasonal = Stack([seasonal for _ in range(3)])
        comp = CompanionMatrixBlock(num_variables, horizon, back_horizon, var_lag, regularizer=None, trainable_backcast=False)
        model = N_BEATS([seasonal, Stack([comp])])
        # Train N-BEATS
        model.build(input_shape=(None, back_horizon, num_variables))
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss='mse')
        early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20, mode='min')
        history = model.fit(dataset.X_train, dataset.y_train, epochs=1000, batch_size=128, validation_data=(dataset.X_val, dataset.y_val), callbacks=[early_stopping], verbose=0)
        forecast = model.predict(dataset.X_test, verbose=0)
        print(len(history.history['loss']), 'epochs', end=' - ')
        model(dataset.X_test)
        # Train VAR
        var = VAR(dataset.train)
        var = var.fit(var_lag)
        var_companion = var.params[1:].T
        # Save results
        seasonals.append({'sim': sim, 'composition': composition, 'nbeats': comp._companion_horizon.numpy(), 'var': var_companion, 'epochs': len(history.history['loss']), 'input':comp._past_input.numpy(), 'forecast': forecast, 'real': data[sim]['arguments']['companion_matrix'], 'error': forecast.flatten() - dataset.y_test.flatten()})

simulation: 0 data composition: ir -> 48 epochs - simulation: 0 data composition: t_ir -> 100 epochs - simulation: 0 data composition: s_ir -> 232 epochs - simulation: 0 data composition: t_s_ir -> 160 epochs - simulation: 0 data composition: t_s -> 51 epochs - simulation: 0 data composition: rw_ir -> 67 epochs - simulation: 0 data composition: t_rw_ir -> 97 epochs - simulation: 0 data composition: s_rw_ir -> 71 epochs - simulation: 0 data composition: t_s_rw_ir -> 89 epochs - simulation: 0 data composition: rw -> 107 epochs - simulation: 1 data composition: ir -> 96 epochs - simulation: 1 data composition: t_ir -> 24 epochs - simulation: 1 data composition: s_ir -> 168 epochs - simulation: 1 data composition: t_s_ir -> 34 epochs - simulation: 1 data composition: t_s -> 187 epochs - simulation: 1 data composition: rw_ir -> 89 epochs - simulation: 1 data composition: t_rw_ir -> 116 epochs - simulation: 1 data composition: s_rw_ir -> 369 epochs - simulation: 1 data composition: t_s_rw_ir

In [14]:
pd.DataFrame(seasonals).to_json('results/seasonal.json')

In [15]:
results = {'seasonal': seasonals, 'trend': trends, 'all': all, 'companion_all': companion_all, 'horizon_ir': horizon_ir, 'misspecified_ir': misspecified_ir}

In [16]:
for key in results.keys():
    results[key] = pd.DataFrame(results[key])
    results[key].to_json('results/'+key+'.json')