In [1]:
from warnings import filterwarnings

filterwarnings("ignore")
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.layers import ConvLSTM2D

from modules.data_fetcher import download_historical_data

2023-04-10 11:52:31.170932: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-10 11:52:32.257049: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-04-10 11:52:32.257152: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-04-10 11:52:34.183053: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-

In [2]:
data = {}

tickers = ["BTC-USDT", "ETH-USDT", "XRP-USDT", "LTC-USDT", "BCH-USDT", "EOS-USDT"]

for ticker in tickers:
    df = download_historical_data(ticker, "1day").loc["2020-11-20":]
    df["Log_Close"] = np.log(df.Close.apply(lambda x: 1.0 if x == 0.0 else x))
    df["Returns"] = df.Close.pct_change()
    df["Log_Returns"] = df.Log_Close.pct_change()
    df["Log_Volume"] = np.log(np.abs(df.Volume.apply(lambda x: 1.0 if x == 0.0 else x)))
    df["Vol20"] = df.Log_Returns.rolling(20).std()
    df["EMA20"] = df.Close.ewm(20).mean()
    df.dropna(inplace=True)
    data[ticker] = df


In [3]:
# Remove NaN values
for ticker in data:
    data[ticker] = data[ticker].fillna(method="ffill")
# Split the data into training and test sets
train_data = {}
test_data = {}
for ticker in data:
    ticker_data = data[ticker]
    train_data[ticker], test_data[ticker] = train_test_split(
        ticker_data, test_size=0.2, shuffle=False
    )
# Normalize the training data
scaler = StandardScaler()
for ticker in train_data:
    train_data[ticker] = pd.DataFrame(
        scaler.fit_transform(train_data[ticker]),
        columns=train_data[ticker].columns,
        index=train_data[ticker].index,
    )
# Normalize the test data using the same scaler object
for ticker in test_data:
    test_data[ticker] = pd.DataFrame(
        scaler.transform(test_data[ticker]),
        columns=test_data[ticker].columns,
        index=test_data[ticker].index,
    )


In [4]:
for ticker in train_data:
    train_data[ticker]["Target"] = (
        train_data[ticker]["Close"].shift(-5) > train_data[ticker]["Close"]
    ).astype(int)
for ticker in test_data:
    test_data[ticker]["Target"] = (
        test_data[ticker]["Close"].shift(-5) > test_data[ticker]["Close"]
    ).astype(int)


In [5]:
# Select N tickers for example purposes
n_ExampleSymbols = 2

train_data = dict(list(train_data.items())[:n_ExampleSymbols])
test_data = dict(list(test_data.items())[:n_ExampleSymbols])

## LSTM


In [13]:
def build_lstm_model(n_features):
    model = Sequential()
    model.add(LSTM(32, input_shape=(n_features, 1),return_sequences=True))
    model.add(LSTM(16, input_shape=(n_features, 1),dropout=0.2))
    model.add(Dense(8, activation="relu"))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
    return model


lstm_models = {}
for ticker in train_data:
    X_train = train_data[ticker].drop(["Target"], axis=1)
    y_train = train_data[ticker]["Target"]
    n_features = X_train.shape[1]
    X_train = X_train.values.reshape((X_train.shape[0], X_train.shape[1], 1))
    lstm_models[ticker] = build_lstm_model(n_features)
    lstm_models[ticker].fit(
        X_train,
        y_train,
        epochs=50,
        verbose=1,
        validation_split=0.2,
        shuffle=True,
        callbacks=[
            tf.keras.callbacks.EarlyStopping(
                monitor="val_accuracy",
                verbose=1,
                patience=8,
                mode="max",
                restore_best_weights=True,
            )
        ],
        use_multiprocessing=True,
    )


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 9: early stopping
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 9: early stopping


## Conv1D


In [7]:
from keras.layers import Conv1D, MaxPooling1D, Flatten


def build_cnn_model(n_features):
    model = Sequential()
    model.add(
        Conv1D(
            filters=64, kernel_size=2, activation="relu", input_shape=(n_features, 1)
        )
    )
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(50, activation="relu"))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
    return model


cnn_models = {}
for ticker in train_data:
    X_train = train_data[ticker].drop(["Target"], axis=1)
    y_train = train_data[ticker]["Target"]
    n_features = X_train.shape[1]
    X_train = X_train.values.reshape((X_train.shape[0], X_train.shape[1], 1))
    cnn_models[ticker] = build_cnn_model(n_features)
    cnn_models[ticker].fit(
        X_train,
        y_train,
        epochs=100,
        verbose=1,
        validation_split=0.2,
        shuffle=True,
        callbacks=[
            tf.keras.callbacks.EarlyStopping(
                monitor="val_accuracy",
                verbose=1,
                patience=8,
                mode="max",
                restore_best_weights=True,
            )
        ],
        use_multiprocessing=True,
    )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 5: early stopping
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 11: early stopping


## ConvLSTM2D


In [8]:
from keras.layers import ConvLSTM2D


def build_convlstm_model(n_features):
    model = Sequential()
    model.add(
        ConvLSTM2D(
            filters=64,
            kernel_size=(1, 2),
            activation="relu",
            input_shape=(1, 1, n_features, 1),
        )
    )
    model.add(Flatten())
    model.add(Dense(50, activation="relu"))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
    return model


convlstm_models = {}
for ticker in train_data:
    X_train = train_data[ticker].drop(["Target"], axis=1)
    y_train = train_data[ticker]["Target"]
    n_features = X_train.shape[1]
    X_train = X_train.values.reshape((X_train.shape[0], 1, 1, n_features, 1))
    convlstm_models[ticker] = build_convlstm_model(n_features)
    convlstm_models[ticker].fit(
        X_train,
        y_train,
        epochs=100,
        verbose=1,
        validation_split=0.2,
        shuffle=True,
        callbacks=[
            tf.keras.callbacks.EarlyStopping(
                monitor="val_accuracy",
                verbose=1,
                patience=8,
                mode="max",
                restore_best_weights=True,
            )
        ],
        use_multiprocessing=True,
    )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 5: early stopping
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 5: early stopping


In [None]:
import numpy as np
from sklearn.metrics import roc_auc_score, accuracy_score, recall_score, f1_score

def evaluate(model, X_test, y_test):
    y_pred = model.predict(X_test)
    auc_score = roc_auc_score(y_test, y_pred)
    y_pred = [1 if p > 0.5 else 0 for p in y_pred]
    precision = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    return auc_score, precision, recall, f1

lstm_aucs = []
lstm_precisions = []
lstm_recalls = []
lstm_f1s = []
for ticker in test_data:
    X_test = test_data[ticker].drop(['Target'], axis=1)
    y_test = test_data[ticker]['Target']
    n_features = X_test.shape[1]
    X_test = X_test.values.reshape((X_test.shape[0], X_test.shape[1], 1))
    lstm_auc, lstm_precision, lstm_recall, lstm_f1 = evaluate(lstm_models[ticker], X_test, y_test)
    lstm_aucs.append(lstm_auc)
    lstm_precisions.append(lstm_precision)
    lstm_recalls.append(lstm_recall)
    lstm_f1s.append(lstm_f1)

cnn_aucs = []
cnn_precisions = []
cnn_recalls = []
cnn_f1s = []
for ticker in test_data:
    X_test = test_data[ticker].drop(['Target'], axis=1)
    y_test = test_data[ticker]['Target']
    n_features = X_test.shape[1]
    X_test = X_test.values.reshape((X_test.shape[0], X_test.shape[1], 1))
    cnn_auc, cnn_precision, cnn_recall, cnn_f1 = evaluate(cnn_models[ticker], X_test, y_test)
    cnn_aucs.append(cnn_auc)
    cnn_precisions.append(cnn_precision)
    cnn_recalls.append(cnn_recall)
    cnn_f1s.append(cnn_f1)

convlstm_aucs = []
convlstm_precisions = []
convlstm_recalls = []
convlstm_f1s = []
for ticker in test_data:
    X_test = test_data[ticker].drop(['Target'], axis=1)
    y_test = test_data[ticker]['Target']
    n_features = X_test.shape[1]
    X_test = X_test.values.reshape((X_test.shape[0], 1, 1, n_features, 1))
    convlstm_auc, convlstm_precision, convlstm_recall, convlstm_f1 = evaluate(convlstm_models[ticker], X_test, y_test)
    convlstm_aucs.append(convlstm_auc)
    convlstm_precisions.append(convlstm_precision)
    convlstm_recalls.append(convlstm_recall)
    convlstm_f1s.append(convlstm_f1)
print('LSTM ROC AUC Score: {:.2f}, Precision: {:.2f}%, F1 Score: {:.2f}'.format(np.mean(lstm_aucs), np.mean(lstm_precision) * 100, np.mean(lstm_f1s)))
print('CNN ROC AUC Score: {:.2f}, Precision: {:.2f}%, F1 Score: {:.2f}'.format(np.mean(cnn_aucs),  np.mean(cnn_precision) * 100, np.mean(cnn_f1s)))
print('ConvLSTM ROC AUC Score: {:.2f},  Precision: {:.2f}%, F1 Score: {:.2f}'.format(np.mean(convlstm_aucs), np.mean(convlstm_precision) * 100, np.mean(convlstm_f1s)))