# Ensemble

Before running, make sure you have downloaded th ezip file for each model, extracted the model from it, and saved it in a directory named "model". Models can be access on [google drive](https://drive.google.com/drive/folders/1ldH5LbvOHNMP_eBM74szzdETlycwVdjW?usp=sharing).

In [1]:
import utils 
from tensorflow import keras
import pathlib
from  tensorflow.keras.metrics import mean_squared_error
import numpy as np


2022-11-07 11:44:53.467992: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# LSTM

**Prepare data**

In [2]:
df = utils.load_and_clean_master_dataset()

df_train, df_val, df_test = utils.split_train_test_val(df)

X_train, y_train = utils.split_Xy(df_train)
X_val, y_val = utils.split_Xy(df_val)
X_test, y_test = utils.split_Xy(df_test)

X_train, scaler = utils.normalize(X_train, train=True)
X_val, _ = utils.normalize(X_val, train=False, scaler=scaler)
X_test, _ = utils.normalize(X_test, train=False, scaler=scaler)

i =7
X_train, X_val, X_test = X_train[utils.cols[:i+1]], X_val[utils.cols[:i+1]], X_test[utils.cols[:i+1]]


X_train, y_train = utils.to_batch(X_train, y_train)
X_val, y_val = utils.to_batch(X_val, y_val)
X_test, y_test = utils.to_batch(X_test, y_test)

**Reload model**

In [4]:
crnt_dir = pathlib.Path('.').parent.resolve()
lstm_models = []
for i in range(5):
    lstm_models.append(keras.models.load_model(crnt_dir/'lstm'/f'model_{i}'))


2022-11-07 11:47:13.502968: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


**Evaluate model**

In [5]:
y_pred_lstms = []
for model in lstm_models:
    y_pred_lstms.append(model(X_test))

y_pred_lstm = lstm_models[0](X_test)
mean_squared_error(y_test.to_numpy(), y_pred_lstm.numpy().reshape(-1))

<tf.Tensor: shape=(), dtype=float32, numpy=0.0032882523>

# Simple NN

**Prepare data**

In [6]:
df = utils.load_and_clean_master_dataset()

df_train, df_val, df_test = utils.split_train_test_val(df)

X_train, y_train = utils.split_Xy(df_train)
X_val, y_val = utils.split_Xy(df_val)
X_test, y_test = utils.split_Xy(df_test)

i = 4
X_train, X_val, X_test = X_train[utils.cols[:i+1]], X_val[utils.cols[:i+1]], X_test[utils.cols[:i+1]]


**Reload model**

In [7]:
crnt_dir = pathlib.Path('.').parent.resolve()
nn_models = []
for i in range(5):
    nn_models.append(keras.models.load_model(crnt_dir/'DNN'/f'model_{i}'))

**Evaluate model**

In [8]:
# Take into account sliding window of models
X_test, y_test = X_test[20:], y_test[20:]

y_pred_nns = []
for model in nn_models:
    y_pred_nns.append(np.array([p[0] for p in model.predict(X_test)]))

y_pred_nn = np.array([p[0] for p in nn_models[0].predict(X_test)])
mean_squared_error(y_test.to_numpy(), y_pred_nn.reshape(-1))



<tf.Tensor: shape=(), dtype=float32, numpy=0.0007316234>

# Transformer

**Prepare data**

In [9]:
df = utils.load_and_clean_master_dataset()

df_train, df_val, df_test = utils.split_train_test_val(df)

X_train, y_train = utils.split_Xy(df_train)
X_val, y_val = utils.split_Xy(df_val)
X_test, y_test = utils.split_Xy(df_test)

i = 4
X_train, X_val, X_test = X_train[utils.cols[:i+1]], X_val[utils.cols[:i+1]], X_test[utils.cols[:i+1]]

X_train, y_train = utils.to_batch(X_train, y_train)
X_val, y_val = utils.to_batch(X_val, y_val)
X_test, y_test = utils.to_batch(X_test, y_test)

**Reload model**

In [10]:
crnt_dir = pathlib.Path('.').parent.resolve()
transformer_models = []
for i in range(2):
    transformer_models.append(keras.models.load_model(crnt_dir/'transformer'/f'model_{i+1}'))
transformer_model = keras.models.load_model(crnt_dir/'transformer'/'model_1')

**Evaluate model**

In [11]:

y_pred_transformers = []
for model in transformer_models[1:]:
    y_pred_transformers.append(model(X_test))

y_pred_transformer = transformer_model(X_test)
mean_squared_error(y_test.to_numpy(), y_pred_transformer.numpy().reshape(-1))

<tf.Tensor: shape=(), dtype=float32, numpy=0.0011829048>

# Ensemble

In [12]:
y_pred_nns = [y_pred_nn.reshape((-1,1)) for y_pred_nn in y_pred_nns ]
y_pred = np.mean([*y_pred_nns, *y_pred_lstms, *y_pred_transformers], axis=0)
ensemble_mse = mean_squared_error(y_test.to_numpy(), y_pred.reshape(-1))

In [13]:
print(f'Final (ensemble) MSE is {ensemble_mse}')

Final (ensemble) MSE is 0.0005525783053599298


In [28]:
pmin = y_pred.min(axis=0)
pmax = y_pred.max(axis=0)

drawdown = (pmin-pmax) / pmax 
print(f"Mean Drawdown: {np.mean(drawdown)}")

bias = np.abs(np.mean(y_pred - np.expand_dims(y_test, axis=-1)))
print(f"Mean Bias: {np.mean(bias)}")

variance = np.mean(np.var([*y_pred_nns, *y_pred_lstms, *y_pred_transformers], axis=0))
print(f"variance: {variance}")


Mean Drawdown: -0.10428985953330994
Mean Bias: 0.006440697781402273
variance: 0.0010006490629166365


In [29]:
drawdown


array([-0.10428986], dtype=float32)

In [17]:
np.mean(y_pred)

1.3126215