In [62]:
import pickle
import sys
from dataclasses import dataclass
from datetime import datetime, timedelta
from importlib import reload
from pathlib import Path

import get_data
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import pytz
import talib
import tensorflow as tf
import yfinance as yf
from datasets import assets
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from tensorflow.keras import layers
from tensorflow.keras.activations import sigmoid, tanh
from tensorflow.keras.models import Model
from tools import inspect_code, plotting, training, wandb_api
from torch.utils.data import DataLoader, Dataset, TensorDataset
from tqdm import tqdm
from wandb.keras import WandbCallback

import wandb
from utils import DataModule, _concatenate_indicators

repo_path = Path().resolve().parent
log_wandb = True
tf.config.list_physical_devices("CPU")[0].device_type


'CPU'

In [63]:
if log_wandb:
    import wandb

    wandb_api.login()
    run = wandb.init(
        project="crypto-prediction",
        group="Initial Gan",
        job_type="test",
    )


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/matias/.netrc
2022-04-16 23:54:54.014055: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-04-16 23:54:54.014128: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [64]:
config = {}
config["job_type"] = run.job_type if "run" in locals() else "test"
config["train_val_test_split"] = [0, 0, 1]
config["interval"] = "1d"
config["n_estimator"] = 1
config["max_depth"] = 1

dm = DataModule(
    config=config,
    compute_metrics=_concatenate_indicators,
    csv_file=repo_path / "DATE.csv",
)
dm.setup()


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [57]:
interesting_tickers = [
    "XRP",
    "EOS",
    "BTC",
    "NEO",
    "ALGO",
    "SNX",
    "ETH",
    "AAVE",
    "BNB",
    "BTC",
    "DOT",
    "XTZ",
    "TRX",
    "XRP",
    "ETH",
    "ADA",
    "MATIC",
    "DOGE",
    "KLAY",
    "AVAX",
    "GRT",
    "SAND",
    "SOL",
    "BNB",
    "MANA",
    "ATOM",
    "VET",
    "AVAX",
    "ATOM",
    "FLOKI",
    "MANA",
    "OMG",
    "LUNA",
]


In [60]:
max_precision = 0

n_estimators = [2000]
max_depths = [10]
intervals = ["1d"]
forests = []

inputs = [
    {
        "ticker": ticker,
        "beginning_date": datetime.now() - timedelta(days=1600),
        "ending_date": datetime.now() - timedelta(days=400),
    }
    for ticker in interesting_tickers
]

for interval in intervals:
    for n_row, n_estimator in enumerate(n_estimators):
        for n_col, max_depth in enumerate(max_depths):
            if log_wandb:
                import wandb

                wandb_api.login()
                run = wandb.init(
                    project="crypto-prediction",
                    group="Initial Gan",
                    job_type="test",
                )

            if log_wandb:
                config = wandb.config
            else:
                config = {}

            config["job_type"] = run.job_type if "run" in locals() else "test"
            config["train_val_test_split"] = [0.7, 0.15, 0.15]
            config["interval"] = interval
            config["n_estimator"] = n_estimator
            config["max_depth"] = max_depth

            dm = DataModule(
                config=config,
                compute_metrics=_concatenate_indicators,
                inputs = inputs
            )
            dm.setup()
            train_dataset = dm.train_dataset
            val_dataset = dm.val_dataset

            rf = RandomForestClassifier(
                n_estimators=config["n_estimator"], max_depth=config["max_depth"]
            )
            rf.fit(train_dataset[0], train_dataset[1])
            predictions = rf.predict(val_dataset[0])
            predictions_proba = rf.predict_proba(val_dataset[0])
            recall = recall_score(
                val_dataset[1].reshape(-1, 1),
                predictions.reshape(-1, 1),
            )
            precision = precision_score(
                val_dataset[1].reshape(-1, 1),
                predictions.reshape(-1, 1),
            )
            accuracy = accuracy_score(
                val_dataset[1].reshape(-1, 1),
                predictions.reshape(-1, 1),
            )
            print(
                "interval",
                interval,
                "n_estimator:",
                config["n_estimator"],
                "max_depth:",
                config["max_depth"],
            )
            print("Precision:", precision)
            print("Recall:", recall)
            print("Accuracy:", accuracy)
            print("----------------------------------------")
            wandb.log({"precision": precision, "recall": recall, "accuracy": accuracy})

            dir_path = run.dir
            filename_datamodule = Path(dir_path) / "datamodule_script.txt"
            with open(filename_datamodule, "w") as file:
                file.write(inspect_code.get_class_code(type(dm)))

            wandb.sklearn.plot_roc(val_dataset[1], predictions_proba, ["down", "up"])
            wandb.sklearn.plot_precision_recall(
                val_dataset[1], predictions_proba, ["down", "up"]
            )

            if precision > max_precision:
                print("Saving model")
                # max_precision = precision
                with open("rf.pkl", "wb") as file:
                    pickle.dump(rf, file)
                wandb.save("rf.pkl")
                Path("rf.pkl").unlink()
            run.finish()

            del dm
            del rf







VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

2022-04-16 23:41:03.065155: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-04-16 23:41:03.065201: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

VBox(children=(Label(value='0.098 MB of 0.099 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.990915…

0,1
accuracy,▁
precision,▁
recall,▁

0,1
accuracy,0.75327
precision,0.76044
recall,0.74157
