Train a StackedSCINet to forecast BTC/USD price time seires

Imports

In [1]:
%load_ext tensorboard

In [1]:
import os
from datetime import datetime
from matplotlib import pyplot as plt
from ta.momentum import AwesomeOscillatorIndicator
from ta.trend import ADXIndicator
from ta.volatility import AverageTrueRange
from ta.volume import AccDistIndexIndicator
from ta import add_all_ta_features
from ta.utils import dropna
from tensorflow.python.keras.callbacks import EarlyStopping
from SCINet import make_simple_scinet, make_simple_stacked_scinet, StackedSCINetLoss
from gtda.time_series import SlidingWindow, Stationarizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import tensorflow as tf
from livelossplot import PlotLossesKeras

ModuleNotFoundError: No module named 'livelossplot'

Parameters

In [3]:
lag_length, horizon = 32, 2
learning_rate = 5e-3
h, kernel_size, L, K = 4, 5, 3, 3
kernel_regularizer = tf.keras.regularizers.L1L2(0.1, 0.1)

In [4]:
# Load stocks data
data = pd.read_csv(os.path.join('crypto_data', 'Bitstamp_BTCUSD_1h.csv')).set_index('date')
data = data.drop(columns=['unix', 'symbol', 'Volume BTC'])
prices_cols = data.columns

# Clean NaN values
data = dropna(data)

# Add ta features filling NaN values
data['ao'] = AwesomeOscillatorIndicator(high=data['high'], low=data['low'], fillna=True).awesome_oscillator()
data['adi'] = AccDistIndexIndicator(high=data['high'], low=data['low'], close=data['close'], volume=data['Volume USD'],
                                    fillna=True).acc_dist_index()
data['atr'] = AverageTrueRange(high=data['high'], low=data['low'], close=data['close'],
                               fillna=True).average_true_range()

adx_indicator = ADXIndicator(high=data['high'], low=data['low'], close=data['close'], fillna=True)
data['adx'] = adx_indicator.adx()
data['adx_pos'] = adx_indicator.adx_pos()
data['adx_neg'] = adx_indicator.adx_neg()

data = data[27:]
indicator_cols = data.columns[len(prices_cols):]

  dip[idx] = 100 * (self._dip[idx] / value)
  din[idx] = 100 * (self._din[idx] / value)


Splitting, preprocessing and target preperation

In [5]:
# Split data -- train:val:test == 6:2:2
train_cutoff, val_cutoff = int(len(data) * 0.6), int(len(data) * 0.8)
train_data, val_data, test_data = data[:train_cutoff], data[train_cutoff:val_cutoff], data[val_cutoff:]

stationariser = Stationarizer()
train_data[1:][prices_cols] = stationariser.fit_transform(train_data[prices_cols])
val_data[1:][prices_cols] = stationariser.transform(val_data[prices_cols])
test_data[1:][prices_cols] = stationariser.transform(test_data[prices_cols])

train_data, val_data, test_data = train_data[1:], val_data[1:], test_data[1:]

scaler = StandardScaler()
train_data = scaler.fit_transform(train_data)
val_data = scaler.transform(val_data)
test_data = scaler.transform(test_data)

# Segment into train/val/test examples (may use stride < size for train_data only but may cause data leak)
windows = SlidingWindow(size=lag_length + horizon, stride=2)
train_data = windows.fit_transform(train_data)
windows = SlidingWindow(size=lag_length + horizon, stride=lag_length + horizon)
val_data = windows.fit_transform(val_data)
test_data = windows.transform(test_data)

# Split all time series segments into x and y
X_train, y_train = train_data[:, :-horizon, :], train_data[:, -horizon:, :]
X_val, y_val = val_data[:, :-horizon, :], val_data[:, -horizon:, :]
X_test, y_test = test_data[:, :-horizon, :], test_data[:, -horizon:, :]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


Set output directories

In [8]:
# Create output directory
output_dir = os.path.join('saved_models_with_logs', 'BTC-USD', datetime.now().strftime('%HH%MM %d-%b-%Y'))
os.makedirs(output_dir, exist_ok=True)

log_dir = os.path.join(output_dir, 'logs')
diagram_path = os.path.join(output_dir, 'model_diagram.png')

Train model

In [9]:
# Proceed with SCINet
model = make_simple_stacked_scinet(X_train.shape, horizon=horizon, K=K, L=L, h=h, kernel_size=kernel_size,
                                   learning_rate=learning_rate, kernel_regularizer=kernel_regularizer,
                                   diagram_path=diagram_path)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, min_delta=0, verbose=1, restore_best_weights=True)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

history = model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=32, epochs=500,
                    callbacks=[early_stopping, tensorboard_callback])

# Save model and training history
model.save(output_dir)
pd.DataFrame(history.history).to_csv(os.path.join(output_dir, 'train_history.csv'))

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
lookback_window (InputLayer)    [(None, 32, 11)]     0                                            
__________________________________________________________________________________________________
stacked_scinet (StackedSCINet)  (3, None, 2, 11)     434478      lookback_window[0][0]            
__________________________________________________________________________________________________
tf.__operators__.getitem (Slici (None, 2, 11)        0           stacked_scinet[0][0]             
__________________________________________________________________________________________________
outputs (Identity)              (None, 2, 11)        0           tf.__operators__.getitem[0][0]   
______________________________________________________________________________________________



INFO:tensorflow:Assets written to: saved_models_with_logs\BTC-USD\21H17M 04-Jan-2022\assets


INFO:tensorflow:Assets written to: saved_models_with_logs\BTC-USD\21H17M 04-Jan-2022\assets


Plot errors and loss

In [10]:
# Plot some metrics
plt.plot(history.history['outputs_mae'])
plt.plot(history.history['val_outputs_mae'])
plt.title('model error')
plt.ylabel('mean absolute error')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.savefig(os.path.join(output_dir, 'outputs_mae.png'))
plt.clf()

# Plot loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.savefig(os.path.join(output_dir, 'loss.png'))
plt.clf()

<Figure size 432x288 with 0 Axes>

Evaluate

In [11]:
# Reconstruct/load model
# output_dir = 'saved_models_with_logs/BTC-USD/18H11M 04-Jan-2022'
model = tf.keras.models.load_model(output_dir, custom_objects={'StackedSCINetLoss': StackedSCINetLoss()})

# Evaluate
scores = model.evaluate(X_test, y_test)
print(f'scores: {scores}')

# Predict
y_pred, _ = model.predict(X_test, batch_size=1)
print(np.mean(np.abs((y_pred - y_test))))  # manual mae for sanity check

y_pred = scaler.inverse_transform(y_pred.reshape(-1, 11))
y_test = scaler.inverse_transform(y_test.reshape(-1, 11))
columns = pd.MultiIndex.from_product([['test', 'pred'], data.columns])
df = pd.DataFrame(np.concatenate((y_test, y_pred), axis=1), columns=columns)

scores: [140.72720336914062, 123.14376068115234, 0.3363029956817627, 0.2625609338283539]
0.2625609452025969
