# Import


In [2]:
import warnings

warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
from datetime import datetime
from scipy.signal import savgol_filter

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier

from keras.layers import (
    Input,
    Dense,
    Conv1D,
    MaxPooling1D,
    UpSampling1D,
    BatchNormalization,
    LSTM,
    RepeatVector,
    ConvLSTM1D,
    Flatten,
)
from keras.models import Model
from keras.models import model_from_json
from keras import regularizers
import keras
import tensorflow as tf

early_stop = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=1e-3,
    patience=8,
    verbose=1,
    mode="auto",
    baseline=None,
    restore_best_weights=True,
)

from quant_invest_lab.data_provider import download_crypto_historical_data
from modules.indicators import compute_lagging_regression

2023-05-18 15:19:00.427286: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-18 15:19:01.084302: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-18 15:19:01.084326: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-05-18 15:19:05.790530: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-

# Get data


In [3]:
symbol = "BTC-USDT"

df_BTC = download_crypto_historical_data(symbol, "1hour").iloc[-3000:]
df_BTC["Return"] = df_BTC.Close.pct_change()
df_BTC.dropna(inplace=True)

df_BTC["Close_filtered"] = savgol_filter(df_BTC["Close"].values, 25, 2, mode="nearest")
print(df_BTC.shape)
df_BTC.head()

(2999, 10)


Unnamed: 0_level_0,Timestamp,Open,Close,High,Low,Amount,Volume,Returns,Return,Close_filtered
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-01-13 16:00:00,1673622000.0,18998.9,19260.5,19309.5,18993.4,1160.606223,22247670.0,0.013769,0.013769,19228.152618
2023-01-13 17:00:00,1673626000.0,19260.6,19131.2,19271.6,19064.0,427.925713,8189067.0,-0.006713,-0.006713,19303.40856
2023-01-13 18:00:00,1673629000.0,19131.2,19256.2,19392.2,19106.7,485.314231,9359598.0,0.006534,0.006534,19400.605391
2023-01-13 19:00:00,1673633000.0,19256.2,19292.8,19384.4,19256.1,249.70499,4822401.0,0.001901,0.001901,19506.841217
2023-01-13 20:00:00,1673636000.0,19292.8,19360.6,19369.0,19247.7,222.083237,4286924.0,0.003514,0.003514,19630.632425


# Trying to denoise.


## Normalize data


In [None]:
WINDOW = 50
INDEXING_WINDOW = 1

df_BTC["Close_filtered_norm"] = (
    df_BTC.Close_filtered - df_BTC.Close_filtered.mean()
) / df_BTC.Close_filtered.std()
df_BTC["Close_norm"] = (df_BTC.Close - df_BTC.Close.mean()) / df_BTC.Close.std()

## Train and Test training


In [None]:
# df_BTC =  df_BTC.iloc[-5000:]

X = np.array(
    [
        df_BTC.Close_norm.iloc[i : i + WINDOW].values.reshape(1, -1)
        for i in range(len(df_BTC) - WINDOW)
    ]
)[:-INDEXING_WINDOW]
y = np.array(
    [
        df_BTC.Close_filtered_norm.iloc[i : i + WINDOW].values.reshape(1, -1)
        for i in range(len(df_BTC) - WINDOW)
    ]
)[INDEXING_WINDOW:]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True)

In [None]:
fig, ax_left = plt.subplots(1, figsize=(25, 8))

ax_left.plot(
    list(range(len(X[0][0]))),
    X[0][0] * df_BTC.Close_filtered.std() + df_BTC.Close_filtered.mean(),
    color="green",
    label="X1",
)
ax_left.plot(
    [i + INDEXING_WINDOW for i in range(len(X[INDEXING_WINDOW][0]))],
    X[INDEXING_WINDOW][0] * df_BTC.Close_filtered.std() + df_BTC.Close_filtered.mean(),
    color="green",
    label="X2",
    linestyle="--",
)
ax_left.plot(
    [i + INDEXING_WINDOW for i in range(len(y[INDEXING_WINDOW][0]))],
    y[0][0] * df_BTC.Close_filtered.std() + df_BTC.Close_filtered.mean(),
    color="blue",
    label="y",
)
ax_left.legend(loc="upper left")
print(X_train.shape)

### Only dense


In [None]:
input_window = Input(shape=(1, WINDOW))
encoded = Dense(10, activation="relu")(input_window)
inter_1 = Dense(30, activation="relu")(encoded)
decoded = Dense(WINDOW, activation="linear")(inter_1)
autoencoder = Model(input_window, decoded)
encoder = Model(input_window, encoded)


autoencoder.summary()
autoencoder.compile(optimizer="adam", loss="mse")

history = autoencoder.fit(
    X,
    y,
    epochs=200,
    batch_size=1024,
    shuffle=True,
    validation_split=0.2,
    callbacks=[early_stop],
)

### With LSTM


In [None]:
input_window = Input(shape=(1, WINDOW))
encoded = LSTM(
    4, kernel_initializer="he_uniform", return_sequences=True, name="encoder_1"
)(input_window)
encoded_2 = Dense(
    10,
    activation="relu",
)(encoded)
norm_1 = BatchNormalization()(encoded_2)
inter_1 = Dense(30, activation="relu")(norm_1)
norm_2 = BatchNormalization()(inter_1)
decoded = Dense(WINDOW, activation="linear")(norm_2)
autoencoder = Model(input_window, decoded)
encoder = Model(input_window, encoded)


autoencoder.summary()
autoencoder.compile(optimizer="adam", loss=tf.keras.losses.MeanSquaredError())

history = autoencoder.fit(
    X,
    y,
    epochs=200,
    batch_size=1024,
    shuffle=True,
    validation_split=0.2,
    callbacks=[early_stop],
)

In [None]:
predictions = autoencoder.predict(X)
print(len(X))
print(len(predictions))

In [None]:
from random import randint

index = -randint(0, len(y) - 1)

fig, ax_left = plt.subplots(1, figsize=(25, 8))


ax_left.plot(
    list(range(len(y[index][0]))),
    y[index][0] * df_BTC.Close_filtered.std() + df_BTC.Close_filtered.mean(),
    color="green",
    label="Y",
)
ax_left.plot(
    list(range(len(predictions[index][0]))),
    savgol_filter(
        predictions[index][0] * df_BTC.Close_filtered.std()
        + df_BTC.Close_filtered.mean(),
        25,
        2,
        mode="nearest",
    ),
    color="blue",
    label="Y pred",
)
ax_left.legend(loc="upper left")