In [1]:
from copy import deepcopy
from datetime import date, datetime, timedelta
from functools import partial
from importlib import reload
from pathlib import Path

import numpy as np
import pandas as pd
import plotly.io as pio
import tensorflow as tf
from datapoints import assets
from query_datasets import get_data
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import layers
from tensorflow.keras.activations import sigmoid, tanh
from tensorflow.keras.metrics import Accuracy, Precision, Recall
from tensorflow.keras.models import Model
from tools import dataframe, training, wandb_api
from tqdm import tqdm
from wandb.keras import WandbCallback

log_wandb = True
repo_path = Path().resolve().parent
pio.renderers.default = "browser"


2022-05-25 23:34:17.126498: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-05-25 23:34:17.126577: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
if log_wandb:
    import wandb

    wandb_api.login()
    run = wandb.init(
        project="crypto_prediction",
        group="Adaboost LSTM",
        job_type="test",
    )
    config = wandb.config

else:
    config = {}

config["job_type"] = run.job_type if "run" in locals() else "test"
config["train_val_test_split"] = [0.66, 1-0.66, 0]
config["interval"] = "1d"
config["timesteps"] = 8
config["lag"] = 1
config["ago"] = 3000
config["batch_size"] = 64
config["learning_rate"] = 0.0003

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmatiasetcheverry[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/matias/.netrc
2022-05-25 23:34:26.004461: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-05-25 23:34:26.004566: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [4]:
import pickle

root_path = Path().resolve() / "tmp"
pickle.load(open(root_path / "adaboostlstmall.pkl", 'rb'))

array([  896.23,   914.  ,   914.  , ..., 40667.07, 46416.45, 52427.8 ])

In [3]:
interesting_tickers = [
    # "XRP",
    "EOS",
    # "NEO",
    # "ALGO",
    # "SNX",
    # "ETH",
    # "AAVE",
    # "BNB",
    # "BTC",
    # "DOT",
    # "XTZ",
    # "TRX",
    # "ADA",
    # "MATIC",
    # "DOGE",
    # "KLAY",
    # "AVAX",
    # "GRT",
    # "SAND",
    # "SOL",
    # "MANA",
    # "ATOM",
    # "VET",
    # "OMG",
]


In [4]:
def compute_features(data, timesteps=8, lag=5):
    features = data.copy(deep=True)

    for i in range(timesteps):
        for col in data.columns:
            features[f"{col}_{i}"] = features[col] #.shift(i)
            # features[f"open_{i}"] = features.loc[:, "Open"].shift(i).pct_change()
            # features[f"high_{i}"] = features.loc[:, "High"].shift(i).pct_change()
            # features[f"low_{i}"] = features.loc[:, "Low"].shift(i).pct_change()
            # features[f"close_{i}"] = features.loc[:, "Close"].shift(i).pct_change()
            # features[f"volume_{i}"] = features.loc[:, "Volume"].shift(i).pct_change()

    labels = features["Close"].shift(-lag)  # - features["Open"].shift(-1)
    features = features.drop(labels=data.columns, axis=1)

    scaler = MinMaxScaler()
    features = features.replace(
        to_replace=[np.inf, -np.inf, float("inf"), float("inf")],
        value=0,
    ).dropna()
    df_scaled = scaler.fit_transform(features)
    df_scaled = pd.DataFrame(df_scaled, columns=features.columns, index=features.index)
    return df_scaled, labels


def create_asset(
    ticker,
    interval,
    beginning_date,
    ending_date,
    compute_features=lambda x: x,
):

    klines = get_data.download_klines(
        ticker,
        interval,
        beginning_date=beginning_date,
        ending_date=ending_date,
        directory=Path().resolve() / "tmp",
    )
    trends = get_data.download_trends(
        ticker,
        interval,
        beginning_date=beginning_date,
        ending_date=ending_date,
        directory=Path().resolve() / "tmp",
    )
    blockchain_infos = get_data.download_blockchain(
        "BTC",
        interval,
        beginning_date=beginning_date,
        ending_date=ending_date,
        directory=Path().resolve() / "tmp",
    )
    santiment = get_data.download_santiment(
        "BTC",
        interval,
        beginning_date=beginning_date,
        ending_date=ending_date,
        directory=Path().resolve() / "tmp",
    )
    data = pd.concat([klines, blockchain_infos, trends, santiment], axis=1).astype("float32")
    data = data.replace(
        to_replace=[np.inf, -np.inf, float("inf"), float("inf")],
        value=0,
    )

    features, labels = compute_features(data)

    return assets.TrainAsset(
        ticker=ticker,
        df=data,
        labels=labels,
        features=features,
        interval=interval,
        compute_features=compute_features,
    )


class DataModule:
    def __init__(
        self,
        config,
        compute_features=None,
        inputs=None,
        save_klines=True,
    ):
        self.config = config
        self.compute_features = compute_features
        self.inputs = inputs
        self.save_klines = save_klines

        self.setup()

    def setup(self):
        self.train_datapoints = []
        for input in self.inputs:
            dp = create_asset(
                **input,
                interval=self.config["interval"],
                compute_features=self.compute_features,
            )
            if dp == []:
                continue
            dp.df = dp.df.dropna()
            dp.labels = dp.labels.dropna()
            dp._features = dp._features.dropna()

            common_index = dp.df.index.intersection(dp.labels.index)
            common_index = common_index.intersection(dp._features.index)

            dp.df = dp.df.loc[common_index]
            dp.labels = dp.labels.loc[common_index]
            dp._features = dp._features.loc[common_index]

            train_dp = assets.TrainAsset(
                ticker=input["ticker"],
                df=dp.df,
                labels=dp.labels,
                features=dp._features,
                interval=self.config["interval"],
                compute_features=self.compute_features,
            )
            if not train_dp.isempty:
                self.train_datapoints.append(train_dp)
            else:
                print(
                    f"{dp.ticker} is empty from {input['beginning_date']} to {input['ending_date']}."
                )

    def clean_datapoints(self, datapoints):
        return datapoints

    def concat_and_shuffle(self, features, labels):
        assert len(features) == len(labels)
        _features = np.concatenate(features, axis=0)
        _labels = np.concatenate(labels, axis=0)
        assert len(_features) == len(_labels)
        p = np.random.permutation(len(_features))
        return _features[p], _labels[p]

    def nest_train_test_val_split(
        self, datapoints, offset, train_size, val_size, test_size=0
    ):
        train_features = []
        train_labels = []
        val_features = []
        val_labels = []
        test_datapoints = {}
        for dp in datapoints:
            train_beginning = offset
            train_ending = train_beginning + train_size
            val_beginning = train_ending
            val_ending = val_beginning + val_size

            test_beginning = val_beginning
            test_ending = val_ending
            # test_beginning = val_ending
            # test_ending = test_beginning + test_size

            train_features.append(dp._features[train_beginning:train_ending])
            train_labels.append(dp.labels[train_beginning:train_ending])
            val_features.append(dp._features[val_beginning:val_ending])
            val_labels.append(dp.labels[val_beginning:val_ending])

            test_datapoints[dp.ticker] = assets.TrainAsset(
                ticker=dp.ticker,
                df=dp.df.iloc[test_beginning:test_ending],
                labels=dp.labels.iloc[test_beginning:test_ending],
                features=dp._features.iloc[test_beginning:test_ending],
                interval=dp.interval,
                compute_features=dp.compute_features,
            )

        return (
            self.concat_and_shuffle(train_features, train_labels),
            self.concat_and_shuffle(val_features, val_labels),
            test_datapoints,
        )

    def _init_train_val_data(self, train_datapoints):
        train_datapoints = self.clean_datapoints(train_datapoints)
        if self.config["train_val_test_split"][0] > 1:
            train_size = int(self.config["train_val_test_split"][0])
        else:
            train_size = int(
                len(train_datapoints[0].df) * self.config["train_val_test_split"][0]
            )
        if self.config["train_val_test_split"][1] > 1:
            val_size = int(self.config["train_val_test_split"][1])
        else:
            val_size = int(
                len(train_datapoints[0].df) * self.config["train_val_test_split"][1]
            )
        if self.config["train_val_test_split"][2] > 1:
            test_size = int(self.config["train_val_test_split"][2])
        else:
            test_size = int(
                len(train_datapoints[0].df) * self.config["train_val_test_split"][2]
            )
        print(f"train_size: {train_size}, val_size: {val_size}, test_size: {test_size}")
        max_offset = max(
            len(train_datapoints[0].df) - (train_size + val_size + test_size), 1
        )
        train_datasets = []
        val_datasets = []
        test_datapoints = []
        for offset in range(0, max_offset, val_size + test_size):
            train_dataset, val_dataset, test_datapoint = self.nest_train_test_val_split(
                train_datapoints, offset, train_size, val_size, test_size
            )
            train_datasets.append(train_dataset)
            val_datasets.append(val_dataset)
            test_datapoints.append(test_datapoint)
        return train_datasets, val_datasets, test_datapoints


config["job_type"] = run.job_type if "run" in locals() else "test"
config["train_val_test_split"] = [0.66, 1-0.66, 0]
config["interval"] = "1d"
config["timesteps"] = 8
config["lag"] = 1
config["ago"] = 3000
config["batch_size"] = 64
config["learning_rate"] = 0.0003

inputs = [
    {
        "ticker": ticker,
        "beginning_date": datetime.combine(date.today(), datetime.min.time())
        - dataframe.convert_to_timedelta(config["interval"], ago=config["ago"]),
        "ending_date": datetime(
            2022, 5, 21
        ),  # datetime.combine(date.today(), datetime.min.time()),
    }
    for ticker in interesting_tickers
]

dm = DataModule(
    config,
    partial(compute_features, timesteps=config["timesteps"], lag=config["lag"]),
    inputs,
    save_klines=True,
)
train_datasets, val_datasets, test_datapoints = dm._init_train_val_data(
    dm.train_datapoints
)
print(f"Length training dataset: {len(train_datasets)}")
print(f"Length validation dataset: {len(train_datasets)}")
print(f"Length test dataset: {len(train_datasets)}")

print(f"Shape training sample: {train_datasets[0][0].shape}")
print(f"Shape training sample: {train_datasets[0][0].shape[1] / config['timesteps']}")

print(f"Shape validation sample: {val_datasets[0][0].shape}")
config["input_size"] = train_datasets[0][0].shape[1]
assert (
    config["input_size"] // config["timesteps"]
    == config["input_size"] / config["timesteps"]
)


train_size: 1089, val_size: 561, test_size: 0
Length training dataset: 1
Length validation dataset: 1
Length test dataset: 1
Shape training sample: (1089, 152)
Shape training sample: 19.0
Shape validation sample: (561, 152)



DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider using pd.concat instead.  To get a de-fragmented frame, use `newframe = frame.copy()`



In [5]:
class LSTMModel(Model):
    def __init__(self, config):
        super().__init__()
        self.config = config

        self.model = self.build_model()

        self.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=config["learning_rate"]),
            loss="mse",
            metrics=["mae", "mse"],
        )

    def build_model(self):
        inputs = layers.Input(
            shape=(self.config["input_size"]),
        )
        outputs = layers.Reshape(
            (
                self.config["timesteps"],
                -1,
            )
        )(inputs)
        outputs = layers.LSTM(512, activation=layers.ReLU(), return_sequences=True)(
            outputs
        )
        outputs = layers.LSTM(256, activation=layers.ReLU(), return_sequences=True)(
            outputs
        )
        outputs = layers.LSTM(128, activation=layers.ReLU(), dropout=0.3)(outputs)
        outputs = layers.Flatten()(outputs)
        # outputs = layers.Dense(128, activation=layers.ReLU())(outputs)
        outputs = layers.Dense(1, activation=None)(outputs)
        return Model(inputs=inputs, outputs=outputs, name="model")

    def call(self, klines):
        return self.model(klines)


def metrics_precision(targets, predictions):
    bool_predictions = tf.math.greater(predictions, 0)
    bool_targets = tf.math.greater(targets, 0)
    tp = tf.math.reduce_sum(tf.cast(bool_predictions[bool_targets], tf.float32))
    return tp / tf.math.reduce_sum(tf.cast(bool_predictions, tf.float32))




In [6]:
checkpoint_path = Path(run.dir) if "run" in locals() else Path("model/")
script_path = Path(run.dir) if "run" in locals() else Path(".")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    # filepath=checkpoint_path / "model.{epoch:02d}-val_loss-{val_loss:.2f}",
    filepath=checkpoint_path / "best_model",
    monitor="val_loss",
    mode="min",
    save_best_only=True,
    save_format="tf",
)
script_callback = training.ScriptCheckpoint(dirpath=script_path, datamodule=dm)

train_dataset = train_datasets[0]
val_dataset = val_datasets[0]

train_data = tf.data.Dataset.from_tensor_slices(
    (train_dataset[0], train_dataset[1])
).shuffle(len(train_dataset[0]), reshuffle_each_iteration=True, seed=23).batch(
    config["batch_size"],
    drop_remainder=False,
    num_parallel_calls=tf.data.AUTOTUNE,
)
val_data = tf.data.Dataset.from_tensor_slices(
    (val_dataset[0], val_dataset[1])
).shuffle(len(val_dataset[0]), reshuffle_each_iteration=True, seed=23).batch(
    config["batch_size"],
    drop_remainder=False,
    num_parallel_calls=tf.data.AUTOTUNE,
)

model = LSTMModel(config)
model.fit(
    train_data,
    validation_data=val_data,
    epochs=400,
    callbacks=[checkpoint_callback, script_callback, WandbCallback()],
)


2022-05-25 20:44:04.269071: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-05-25 20:44:04.269115: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-05-25 20:44:04.269132: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (Matias): /proc/driver/nvidia/version does not exist
2022-05-25 20:44:04.269459: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/400



INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets
[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Saving the model to HDF5 format requires the model to be a Functional model or a Sequential model. It does not work for subclassed models, because such models are defined via the body of a Python method, which isn't safely serializable. Consider saving to the Tensorflow SavedModel format (by setting save_format="tf") or using `save_weights`.


Epoch 2/400
Epoch 3/400



INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400



INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


Epoch 8/400



INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


Epoch 9/400
Epoch 10/400
Epoch 11/400



INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


Epoch 12/400



INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


Epoch 13/400
Epoch 14/400
Epoch 15/400



INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400



INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400



INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400



INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


Epoch 66/400
Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400



INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


Epoch 73/400



INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


Epoch 74/400
Epoch 75/400



INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


INFO:tensorflow:Assets written to: /home/matias/crypto_prediction/ada_lstm/wandb/run-20220525_204401-31ronsbi/files/best_model/assets


Epoch 76/400
Epoch 77/400
Epoch 78/400
Epoch 79/400

KeyboardInterrupt: 

In [7]:
classifiers = []
for train_dataset, val_dataset in zip(train_datasets, val_datasets):
    train_data = tf.data.Dataset.from_tensor_slices(
        (train_dataset[0], train_dataset[1])
    ).shuffle(len(train_dataset[0]), reshuffle_each_iteration=True, seed=23).batch(
        config["batch_size"],
        drop_remainder=True,
        num_parallel_calls=tf.data.AUTOTUNE,
    )
    val_data = tf.data.Dataset.from_tensor_slices(
        (val_dataset[0], val_dataset[1])
    ).shuffle(len(val_dataset[0]), reshuffle_each_iteration=True, seed=23).batch(
        config["batch_size"],
        drop_remainder=True,
        num_parallel_calls=tf.data.AUTOTUNE,
    )

    model = LSTMModel(config)
    model.fit(
        train_data,
        validation_data=val_data,
        epochs=125,
        # steps_per_epoch=3,
    )
    classifiers.append(model)


Epoch 1/125
Epoch 2/125
Epoch 3/125
Epoch 4/125
Epoch 5/125
Epoch 6/125
Epoch 7/125
Epoch 8/125
Epoch 9/125
Epoch 10/125
Epoch 11/125
Epoch 12/125
Epoch 13/125
Epoch 14/125
Epoch 15/125
Epoch 16/125
Epoch 17/125
Epoch 18/125
Epoch 19/125
Epoch 20/125
Epoch 21/125
Epoch 22/125
Epoch 23/125
Epoch 24/125
Epoch 25/125
Epoch 26/125
Epoch 27/125
Epoch 28/125
Epoch 29/125
Epoch 30/125
Epoch 31/125
Epoch 32/125
Epoch 33/125
Epoch 34/125
Epoch 35/125
Epoch 36/125
Epoch 37/125
Epoch 38/125
Epoch 39/125
Epoch 40/125
Epoch 41/125
Epoch 42/125
Epoch 43/125
Epoch 44/125
Epoch 45/125
Epoch 46/125
Epoch 47/125
Epoch 48/125
Epoch 49/125
Epoch 50/125
Epoch 51/125
Epoch 52/125
Epoch 53/125
Epoch 54/125
Epoch 55/125
Epoch 56/125
Epoch 57/125
Epoch 58/125
Epoch 59/125
Epoch 60/125
Epoch 61/125
Epoch 62/125
Epoch 63/125
Epoch 64/125
Epoch 65/125
Epoch 66/125
Epoch 67/125
Epoch 68/125
Epoch 69/125
Epoch 70/125
Epoch 71/125
Epoch 72/125
Epoch 73/125
Epoch 74/125
Epoch 75/125
Epoch 76/125
Epoch 77/125
Epoch 78

KeyboardInterrupt: 

In [None]:
model.save("best_model")



INFO:tensorflow:Assets written to: best_model/assets


INFO:tensorflow:Assets written to: best_model/assets


In [11]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import keras

dp = test_datapoints[0]["EOS"]
x = tf.data.Dataset.from_tensor_slices(
    dp.features
).batch(
    config["batch_size"],
    drop_remainder=False,
    num_parallel_calls=tf.data.AUTOTUNE,
)
dp.predictions = np.squeeze(model.predict(x, batch_size=1))
print(dp.predictions.shape)
labels = dp.labels.to_numpy()

mse = mean_squared_error(labels, dp.predictions)
mae = mean_absolute_error(labels, dp.predictions)

bool_predictions = dp.predictions > 0
bool_labels = labels > 0
dp.precision, dp.recall, dp.accuracy = training.precision_recall_accuracy_metrics(bool_predictions, bool_labels)
print(f"MSE: {mse}")
print(f"MAE: {mae}")
print(f"PRECISION: {dp.precision}")
print(f"RECALL: {dp.recall}")
print(f"ACCURACY: {dp.accuracy}")

(561,)
MSE: 0.6992290019989014
MAE: 0.7071053385734558
PRECISION: 1.0
RECALL: 1.0
ACCURACY: 1.0


In [19]:
for index, (classifier, test_datapoint) in enumerate(zip(classifiers, test_datapoints)):
    if index == 0:
        base_datapoints = {ticker: deepcopy(dp) for ticker, dp in test_datapoint.items()}
        for ticker in base_datapoints.keys():
            x = tf.data.Dataset.from_tensor_slices(
                base_datapoints[ticker].features
            ).batch(
                config["batch_size"],
                drop_remainder=False,
                num_parallel_calls=tf.data.AUTOTUNE,
            )
            base_datapoints[ticker].predictions = classifier.predict(
                x
            )
            
            # base_datapoints[ticker].probabilities = classifier.predict_proba(base_datapoints[ticker].features)[:, 1]
    else:
        for ticker, dp in test_datapoint.items():
            base_datapoints[ticker].df = pd.concat(
                [base_datapoints[ticker].df, dp.df]
            )
            base_datapoints[ticker].labels = pd.concat(
                [base_datapoints[ticker].labels, dp.labels]
            )
            base_datapoints[ticker]._features = pd.concat(
                (base_datapoints[ticker]._features, dp._features)
            )
            x = tf.data.Dataset.from_tensor_slices(
                dp.features
            ).batch(
                config["batch_size"],
                drop_remainder=False,
                num_parallel_calls=tf.data.AUTOTUNE,
            )
            base_datapoints[ticker].predictions = np.concatenate(
                [
                    base_datapoints[ticker].predictions,
                    classifier.predict(
                x
            ),
                ]
            )
            # base_datapoints[dp.ticker].probabilities = np.concatenate(
            #     [
            #         base_datapoints[dp.ticker].probabilities,
            #         classifier.predict_proba(dp.features)[:, 1],
            #     ]
            # )

base_precision = 0
base_recall = 0
base_accuracy = 0
for ticker, dp in base_datapoints.items():
    bool_predictions = dp.predictions > 0
    bool_labels = dp.labels > 0
    (
        base_datapoints[ticker].precision,
        base_datapoints[ticker].recall,
        base_datapoints[ticker].accuracy,
    ) = training.precision_recall_accuracy_metrics(bool_predictions, bool_labels)

    base_precision += base_datapoints[ticker].precision
    base_recall += base_datapoints[ticker].recall
    base_accuracy += base_datapoints[ticker].accuracy

    print(
        f"{ticker}: {base_datapoints[ticker].precision} \t {base_datapoints[ticker].recall} \t {base_datapoints[ticker].accuracy}"
    )
print("AVERAGE")
print(base_precision/len(base_datapoints.values()), base_recall/len(base_datapoints.values()), base_accuracy/len(base_datapoints.values()))


EOS: 1.0 	 1.0 	 1.0
AVERAGE
1.0 1.0 1.0


In [25]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

m = 1

fig = make_subplots(
    rows=m,
    cols=1,
    subplot_titles=[dp.ticker for dp in base_datapoints.values()],
    horizontal_spacing=0.0001,
    vertical_spacing=0.1,
    shared_xaxes=True,
)

for index, (ticker, dp) in enumerate(base_datapoints.items()):
    if index >= m:
        break
    predictions = dp.predictions
    labels = dp.labels
    df = dp.df

    fig.add_trace(
        go.Scatter(
            x=labels.index,
            y=labels,
            # showlegend=False,
            line=dict(color="black", width=1),
            name=f"{dp.ticker} close",
        ),
        row=index + 1,
        col=1,
    )
    # next_close =  df["Open"].shift(-1) + np.squeeze(predictions)

    fig.add_trace(
        go.Scatter(
            # x=df.index[1:],
            # y=np.squeeze(predictions)[:-1],
            x=labels.index, 
            y=np.squeeze(predictions),
            # showlegend=False,
            line=dict(color="red", width=1),
            name=f"{dp.ticker} predictions",
        ),
        row=index + 1,
        col=1,
    )

fig.update_layout(height=450*m, width=1000, margin=dict(l=10, r=20, t=30, b=10))
fig.show()


In [None]:
test_datapoints[0]["BTC"].df.index

DatetimeIndex(['2016-12-04 00:00:00+00:00', '2016-12-05 00:00:00+00:00',
               '2016-12-06 00:00:00+00:00', '2016-12-07 00:00:00+00:00',
               '2016-12-08 00:00:00+00:00', '2016-12-09 00:00:00+00:00',
               '2016-12-10 00:00:00+00:00', '2016-12-11 00:00:00+00:00',
               '2016-12-12 00:00:00+00:00', '2016-12-13 00:00:00+00:00',
               ...
               '2017-06-12 00:00:00+00:00', '2017-06-13 00:00:00+00:00',
               '2017-06-14 00:00:00+00:00', '2017-06-15 00:00:00+00:00',
               '2017-06-16 00:00:00+00:00', '2017-06-17 00:00:00+00:00',
               '2017-06-18 00:00:00+00:00', '2017-06-19 00:00:00+00:00',
               '2017-06-20 00:00:00+00:00', '2017-06-21 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='Datetime', length=200, freq=None)

In [None]:
from datetime import datetime, timedelta
import vectorbt as vbt

vbt.settings.portfolio["fees"] = 0.001
vbt.settings.portfolio["slippage"] = 0.0025
data = vbt.Data.from_data(
    {dp.ticker: dp.df for dp in base_datapoints.values()},
    download_kwargs={},
)

# probabilities = vbt.Data.from_data(
#     {dp.ticker: dp.probabilities for dp in base_datapoints.values()},
#     download_kwargs={},
# )
# predictions = pd.DataFrame(
#     probabilities.get().values.argsort(axis=1) > 20,
#     columns=probabilities.get().columns,
#     index=data.get("Open").index,
# )
predictions = vbt.Data.from_data(
    {dp.ticker: (dp.predictions) & (dp.df["Low"].shift(-1) > dp.df["Open"]) for dp in base_datapoints.values()},
    download_kwargs={},
).get().set_index(data.get("Open").index)



In [None]:
# def apply_rf(*args, **kwargs):
#     proba = kwargs["proba"]
#     price = np.squeeze(np.stack(args[1:], axis=1))
#     length = price.shape[0]
#     try:
#         probabilities = rf.predict_proba(price)
#         direction = np.argmax(probabilities > proba, axis=1)
#     except ValueError:
#         direction = np.zeros(length)
#     return direction


# def plot_trix(trix, signal, column=None, fig=None):
#     fig = trix.vbt.plot(fig=fig)
#     fig = signal.vbt.plot(fig=fig)


# RF = vbt.IndicatorFactory(
#     input_names=list(data.data.values())[0].columns[6:],
#     output_names=["direction"],
#     # subplots=dict(
#     #     plot_outputs=dict(
#     #         plot_func=plot_trix,
#     #         resolve_trix=True,
#     #         resolve_signal=True,
#     #     )
#     # ),
# ).from_apply_func(
#     apply_rf,
#     proba=0.5,
# )
# direction = RF.run(
#     *data.get()[6:],
#     run_unique=True,
#     short_name="entries",
#     per_column=True,
#     pass_col=True
# )
# trend_ma = vbt.MA.run(data.get("Close"), window=50, ewm=True, run_unique=True)

ohlcstcx = vbt.OHLCSTCX.run(
    entries=predictions,
    open=data.get("Open"),
    high=data.get("High"),
    low=data.get("Low"),
    close=data.get("Close"),
    sl_stop=1,
    sl_trail=True,
    tp_stop=1,
)

exits = ~predictions | ohlcstcx.exits
entries =  ohlcstcx.entries


pf = vbt.Portfolio.from_signals(
    data.get("Open"),
    entries=entries,
    exits=exits,
    freq=timedelta(hours=1),
)
total_return = pf.total_return()
total_return.sort_values(ascending=False)
# total_return, total_return[total_return != 0].mean(), total_return[
#     total_return != 0
# ].median()

ohlcstcx_sl_stop  ohlcstcx_sl_trail  ohlcstcx_tp_stop  symbol
1                 True               1                 SNX       884.713758
                                                       AAVE      217.688148
                                                       GRT       208.297247
                                                       AVAX      206.120314
                                                       VET       114.720291
                                                       MANA       98.681459
                                                       SAND       98.457012
                                                       SOL        97.924417
                                                       OMG        96.581529
                                                       ATOM       62.458016
                                                       XTZ        56.012370
                                                       NEO        49.037215
                          

In [None]:
pf.loc[total_return.sort_values(ascending=False).index[-1]].stats()

Start                         2022-01-23 13:00:00+00:00
End                           2022-05-11 20:00:00+00:00
Period                                108 days 08:00:00
Start Value                                       100.0
End Value                                    358.076433
Total Return [%]                             258.076433
Benchmark Return [%]                         -17.962077
Max Gross Exposure [%]                            100.0
Total Fees Paid                              167.214081
Max Drawdown [%]                               4.077526
Max Drawdown Duration                  12 days 09:00:00
Total Trades                                        365
Total Closed Trades                                 365
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  49.863014
Best Trade [%]                                 7.533664
Worst Trade [%]                               -0

In [None]:
pf.loc[total_return.sort_values(ascending=False).index[1]].stats()

Start                         2022-01-23 13:00:00+00:00
End                           2022-05-11 20:00:00+00:00
Period                                108 days 08:00:00
Start Value                                       100.0
End Value                                  21868.814841
Total Return [%]                           21768.814841
Benchmark Return [%]                         -47.073791
Max Gross Exposure [%]                            100.0
Total Fees Paid                             2833.322626
Max Drawdown [%]                               0.811995
Max Drawdown Duration                   1 days 08:00:00
Total Trades                                        394
Total Closed Trades                                 394
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  85.532995
Best Trade [%]                                12.103071
Worst Trade [%]                               -0