In [1]:
import warnings

warnings.filterwarnings("ignore")

from concurrent.futures import ThreadPoolExecutor, as_completed, ProcessPoolExecutor
import numpy as np
import pandas as pd

# ML/DL
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split

import plotly.graph_objects as go
from plotly.subplots import make_subplots
from random import randint
import matplotlib.pyplot as plt

from quant_invest_lab.data_provider import download_crypto_historical_data

2023-03-13 13:59:36.932157: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-13 13:59:37.774931: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-03-13 13:59:37.775009: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-03-13 13:59:42.722827: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-

In [23]:
from scipy.signal import savgol_filter

INPUT_STEPS = 30  # Step as input
COVERAGE_STEPS = 7  # Step in common between input and output
OUTPUT_STEPS = 14  # Output steps

OUTPUT_SIZE = COVERAGE_STEPS + OUTPUT_STEPS
BATCH_SIZE = 64
LABEL_COL = "Close_denoised"

currencies = [
    "BTC-USDT",
    "ETH-USDT",
    "ADA-USDT",
    "AAVE-USDT",
    "ALGO-USDT",
    "EGLD-USDT",
    "LINK-USDT",
    "DOT-USDT",
    "SOL-USDT",
    "UNI-USDT",
    "XRP-USDT",
    "AVAX-USDT",
    "AXS-USDT",
    "NEAR-USDT",
    "MATIC-USDT",
    "NKN-USDT",
    "LOOM-BTC",
    "GEM-USDT",
    "CUSD-USDT",
    "LTC3L-USDT",
    "OAS-USDT",
    "NEO-BTC",
    "KNC-USDT",
    "GRIN-BTC",
    "LYM-USDT",
    "NIM-ETH",
    "SUTER-BTC",
    "NULS-BTC",
    "HAI-USDT",
    "MITX-USDT",
    "PDEX-USDT",
    "HTR-BTC",
    "FLAME-USDT",
    "EPX-USDT",
    "PUNDIX-BTC",
    "AOG-USDT",
    "VET-BTC",
    "ATOM3L-USDT",
    "TWT-BTC",
    "AGLD-USDT",
    "KAI-ETH",
    "YLD-USDT",
    "CPC-ETH",
    "COTI-BTC",
    "CWS-USDT",
    "PEEL-BTC",
    "SENSO-USDT",
    "ALICE-USDT",
    "UNIC-USDT",
    "SYS-BTC",
    "IHC-USDT",
    "PNT-BTC",
    "BULL-USDT",
    "BOSON-ETH",
    "ROOBEE-BTC",
    "SWASH-USDT",
    "XEC-USDT",
    "BCH-USDC",
    "ATOM3S-USDT",
    "CHSB-BTC",
    "LINA-USDT",
    "ETC-USDT",
    "RUNE-USDT",
    "MPLX-USDT",
    "TRAC-BTC",
    "HYDRA-USDT",
    "ASTR-USDT",
    "WBTC-ETH",
    "CSIX-ETH",
    "BCH-USDT",
    "NGM-USDT",
    "JAR-USDT",
    "TONE-USDT",
    "POSI-USDT",
    "XDB-BTC",
    "EOS3L-USDT",
    "HYVE-BTC",
    "RUNE-USDC",
    "ETC-USDC",
    "KAVA-USDT",
]
print(f"Total currencies : {len(currencies)}")


def get_currency(symbol: str):
    df = download_crypto_historical_data(symbol, "12hour")
    df["Returns"] = df.Close.pct_change()
    df.dropna(inplace=True)
    df.drop(
        columns=[
            "Timestamp",
        ],
        inplace=True,
    )  #'Close','Open','High','Low'
    df["Close_denoised"] = savgol_filter(df.Close.values, 30, 2, mode="nearest")
    print(f"{symbol} : {df.shape}")
    return df.dropna()


def normalize_and_split_sequence(
    input_dataframe: pd.DataFrame,
    look_back: int = 60,
    forecast_horizon: int = 20,
    coverage: int = 20,
    column_labels: str | list[str] = "Close",
):
    features_scaled = StandardScaler().fit_transform(
        input_dataframe.drop(columns=[column_labels])
    )
    labels_scaled = MinMaxScaler().fit_transform(
        input_dataframe[column_labels].values.reshape(-1, 1)
    )

    X, y = [], []
    for i in range(len(input_dataframe)):
        lag_end = i + look_back
        forecast_end = lag_end + forecast_horizon
        if forecast_end > len(input_dataframe):
            break
        X.append(features_scaled[i:lag_end])
        y.append(labels_scaled[lag_end - coverage : forecast_end])
    res = {"X": np.array(X), "y": np.array(y)}
    return res


def main(symbol: str):
    return normalize_and_split_sequence(
        get_currency(symbol),
        INPUT_STEPS,
        OUTPUT_STEPS,
        COVERAGE_STEPS,
        column_labels=LABEL_COL,
    )

Total currencies : 80


In [24]:
x_n_y = []
with ProcessPoolExecutor(max_workers=10) as executor:
    processes = [executor.submit(main, cur) for cur in currencies]

for task in as_completed(processes):
    x_n_y.append(task.result())

print("Splitting done !")

X = np.vstack(tuple([x["X"] for x in x_n_y]))
y = np.vstack(tuple([x["y"] for x in x_n_y]))
X.shape, y.shape

UNI-USDT : (1815, 8)SOL-USDT : (1173, 8)

ALGO-USDT : (2723, 8)AAVE-USDT : (1745, 8)
BTC-USDT : (3795, 8)

ADA-USDT : (2697, 8)DOT-USDT : (1868, 8)LINK-USDT : (1871, 8)
ETH-USDT : (3795, 8)
EGLD-USDT : (1091, 8)


XRP-USDT : (3035, 8)
AVAX-USDT : (1477, 8)
AXS-USDT : (1211, 8)
NEAR-USDT : (1225, 8)
MATIC-USDT : (1339, 8)
NKN-USDT : (1089, 8)
CUSD-USDT : (1341, 8)
KNC-USDT : (643, 8)
OAS-USDT : (183, 8)
GEM-USDT : (307, 8)
LTC3L-USDT : (1297, 8)
LYM-USDT : (3246, 8)
GRIN-BTC : (3018, 8)
LOOM-BTC : (3565, 8)
NEO-BTC : (3793, 8)
FLAME-USDT : (1030, 8)
HAI-USDT : (1381, 8)
EPX-USDT : (637, 8)HTR-BTC : (1483, 8)PUNDIX-BTC : (1407, 8)

NIM-ETH : (2767, 8)

PDEX-USDT : (1392, 8)
MITX-USDT : (1505, 8)
SUTER-BTC : (2183, 8)
NULS-BTC : (3574, 8)
TWT-BTC : (963, 8)
CWS-USDT : (1351, 8)
ATOM3L-USDT : (1041, 8)AGLD-USDT : (1031, 8)
YLD-USDT : (1022, 8)

AOG-USDT : (328, 8)
COTI-BTC : (2756, 8)
KAI-ETH : (1887, 8)
CPC-ETH : (3089, 8)
VET-BTC : (3369, 8)
SYS-BTC : (639, 8)
IHC-USDT : (553, 8)PEEL-BTC

# Recurrent Neural Networks


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)


## GRU


In [5]:
gru_model = tf.keras.Sequential(
    [
        tf.keras.layers.GRU(
            32,
            return_sequences=False,
        ),
        tf.keras.layers.Dense(
            OUTPUT_SIZE * 1, kernel_initializer=tf.initializers.zeros()
        ),
        tf.keras.layers.Reshape([OUTPUT_SIZE, 1]),
    ]
)

gru_model.compile(
    loss=tf.keras.losses.MeanSquaredError(),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=[tf.keras.metrics.MeanAbsoluteError()],
)

history = gru_model.fit(
    X_train,
    y_train,
    epochs=10,
    validation_split=0.2,
    batch_size=BATCH_SIZE,
    shuffle=True,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor="val_loss",
            verbose=1,
            patience=4,
            mode="min",
            restore_best_weights=True,
        )
    ],
    use_multiprocessing=True,
)

2023-03-13 13:59:55.668942: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-03-13 13:59:55.669745: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-03-13 13:59:55.669843: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (DESKTOP-154PLAM): /proc/driver/nvidia/version does not exist
2023-03-13 13:59:55.673267: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [6]:
pred_gru = gru_model(X_test)

## LSTM


In [None]:
lstm_model = tf.keras.Sequential(
    [
        tf.keras.layers.LSTM(
            32,
            return_sequences=False,
        ),
        tf.keras.layers.Dense(
            OUTPUT_SIZE * 1, kernel_initializer=tf.initializers.zeros()
        ),
        tf.keras.layers.Reshape([OUTPUT_SIZE, 1]),
    ]
)

lstm_model.compile(
    loss=tf.keras.losses.MeanSquaredError(),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=[tf.keras.metrics.MeanAbsoluteError()],
)

history = lstm_model.fit(
    X_train,
    y_train,
    epochs=10,
    validation_split=0.2,
    batch_size=BATCH_SIZE,
    shuffle=True,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor="val_loss",
            verbose=1,
            patience=4,
            mode="min",
            restore_best_weights=True,
        )
    ],
    use_multiprocessing=True,
)

In [None]:
pred_lstm = gru_model(X_test)

In [22]:
ind = randint(0, len(y_test) - 1)
print(ind)

fig = make_subplots(
    rows=1,
    cols=1,
    subplot_titles=("Comparing"),
    shared_xaxes=True,
)

# fig.add_trace(
#     go.Scatter(
#         name="LSTM x Conv Prediction",
#         x=list(range(len(y_test[ind]))),
#         y= (pred_conv[ind].reshape(OUTPUT_SIZE)+ pred_gru[ind].reshape(OUTPUT_SIZE))/2,
#     ),
#     row=1,
#     col=1,
# )

fig.add_trace(
    go.Scatter(
        name="GRU Prediction",
        x=list(range(len(y_test[ind]))),
        y=np.array(pred_gru[ind]).reshape(OUTPUT_SIZE),
    ),
    row=1,
    col=1,
)

fig.add_trace(
    go.Scatter(
        name="LSTM Prediction",
        x=list(range(len(y_test[ind]))),
        y=np.array(pred_lstm[ind]).reshape(OUTPUT_SIZE),
    ),
    row=1,
    col=1,
)

fig.add_trace(
    go.Scatter(
        name=LABEL_COL,
        x=list(range(len(y_test[ind]))),
        y=y_test[ind].reshape(OUTPUT_SIZE),
    ),
    row=1,
    col=1,
)

fig.add_shape(
    type="line",
    x0=COVERAGE_STEPS,
    y0=np.min(
        [y_test[ind].reshape(OUTPUT_SIZE), np.array(pred_gru[ind]).reshape(OUTPUT_SIZE)]
    )
    - 10,
    x1=COVERAGE_STEPS,
    y1=np.max(
        [y_test[ind].reshape(OUTPUT_SIZE), np.array(pred_gru[ind]).reshape(OUTPUT_SIZE)]
    )
    + 10,
    line={"color": "Black"},
    xref="x",
    yref="y",
    row=1,
    col=1,
)

fig.update_layout(
    xaxis_rangeslider_visible=False,
    showlegend=True,
    title_text="Comparing Prediction and real data",
)
fig.show()

4254
