## Imports

In [2]:
import pandas as pd

from sklearn import preprocessing
from collections import deque
import numpy as np
import random
import time

import tensorflow as tf
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.regularizers import l1
from tensorflow.keras.regularizers import l2
# pt CuDNNLSTM look at this https://stackoverflow.com/questions/60468385/is-there-cudnnlstm-or-cudnngru-alternative-in-tensorflow-2-0

from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

The next steps are as follow:
### 1. create the main dataframe
### 2. load the data

In [3]:
main_df = pd.DataFrame()

ratios = ["BCHUSDT-data", "BTCUSDT-data", "ETHUSDT-data", "LTCUSDT-data"]
# ratios = ["BCH-USD", "BTC-USD", "ETH-USD", "LTC-USD"]

for ratio in ratios:
    dataset = pd.read_csv(
        f"crypto_data/{ratio}.csv")
    # we only need the close and volume
    dataset.rename(
        columns={"close": f"{ratio}-close", "volume": f"{ratio}-volume"}, inplace=True
    )
    # print(dataset.head())

    # set the time as the index
    dataset.set_index("time", inplace=True)

    # drop the ones we don't need
    dataset.drop(["low", "high", "open"], axis=1, inplace=True)

    # merge all the columns on the index
    if main_df.empty:
        main_df = dataset
    else:
        main_df = main_df.join(dataset)

In [4]:
main_df.head()

Unnamed: 0_level_0,BCHUSDT-data-close,BCHUSDT-data-volume,BTCUSDT-data-close,BTCUSDT-data-volume,ETHUSDT-data-close,ETHUSDT-data-volume,LTCUSDT-data-close,LTCUSDT-data-volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1577836800,203.95,409.1,7180.97,202.94,128.91,1883.58,41.19,1355.11
1577837700,203.95,158.42,7178.45,128.24,128.78,3686.07,41.2,649.58
1577838600,204.54,245.08,7179.56,83.49,128.86,1141.18,41.26,478.91
1577839500,204.43,60.74,7177.02,97.14,128.87,1058.34,41.28,344.47
1577840400,205.48,547.23,7190.86,103.52,129.13,1369.77,41.38,613.06


In [5]:
main_df.columns.values

array(['BCHUSDT-data-close', 'BCHUSDT-data-volume', 'BTCUSDT-data-close',
       'BTCUSDT-data-volume', 'ETHUSDT-data-close', 'ETHUSDT-data-volume',
       'LTCUSDT-data-close', 'LTCUSDT-data-volume'], dtype=object)

## Define the parameters of the dataset.

In [6]:
SEQ_LEN = 16
FUTURE_PRED = 4
COIN = ratios[1] #BTC-USDT
VAL_PCT = 0.15

## Create a classifier that specifies when to buy/sell


In [7]:
def classify(current, future):
    if float(current) <= float(future):
        return 1 # when the price is higher we buy
    else:
        return 0 # when the price is lower we sell

## Let's do some feature engineering
- add a 'future' column
- add a 'target' column

In [8]:
main_df["future"] = main_df[f"{COIN}-close"].shift(-FUTURE_PRED)
main_df[[f'{COIN}-close', 'future']].head()

Unnamed: 0_level_0,BTCUSDT-data-close,future
time,Unnamed: 1_level_1,Unnamed: 2_level_1
1577836800,7180.97,7190.86
1577837700,7178.45,7212.1
1577838600,7179.56,7211.87
1577839500,7177.02,7216.27
1577840400,7190.86,7211.97


In [9]:
main_df["target"] = list(
    map(classify, main_df[f"{COIN}-close"], main_df["future"])
)
main_df.head()

Unnamed: 0_level_0,BCHUSDT-data-close,BCHUSDT-data-volume,BTCUSDT-data-close,BTCUSDT-data-volume,ETHUSDT-data-close,ETHUSDT-data-volume,LTCUSDT-data-close,LTCUSDT-data-volume,future,target
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1577836800,203.95,409.1,7180.97,202.94,128.91,1883.58,41.19,1355.11,7190.86,1
1577837700,203.95,158.42,7178.45,128.24,128.78,3686.07,41.2,649.58,7212.1,1
1577838600,204.54,245.08,7179.56,83.49,128.86,1141.18,41.26,478.91,7211.87,1
1577839500,204.43,60.74,7177.02,97.14,128.87,1058.34,41.28,344.47,7216.27,1
1577840400,205.48,547.23,7190.86,103.52,129.13,1369.77,41.38,613.06,7211.97,1


## Split the data into training and validation sets

In [10]:
times = main_df.index.values
last_x_pct = main_df.index.values[-int(VAL_PCT * len(times))]

validation_main_df = main_df[main_df.index >= last_x_pct]
main_df = main_df[main_df.index < last_x_pct]

main_df.loc[main_df.index == last_x_pct] #the splitting place

Unnamed: 0_level_0,BCHUSDT-data-close,BCHUSDT-data-volume,BTCUSDT-data-close,BTCUSDT-data-volume,ETHUSDT-data-close,ETHUSDT-data-volume,LTCUSDT-data-close,LTCUSDT-data-volume,future,target
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1


In [11]:
main_df.describe()

Unnamed: 0,BCHUSDT-data-close,BCHUSDT-data-volume,BTCUSDT-data-close,BTCUSDT-data-volume,ETHUSDT-data-close,ETHUSDT-data-volume,LTCUSDT-data-close,LTCUSDT-data-volume,future,target
count,40332.0,40332.0,40332.0,40332.0,40332.0,40332.0,40332.0,40332.0,40332.0,40332.0
mean,299.436652,2330.671071,14784.185081,777.312977,452.283792,10022.202875,70.050401,10253.371268,14788.519364,0.519191
std,100.622552,3234.73533,10716.512124,821.647219,415.871782,10997.464335,41.504259,13392.2036,10722.280384,0.499638
min,132.54,0.0,3882.22,0.0,88.35,0.0,24.51,0.0,3882.22,0.0
25%,234.54,683.16,9083.0875,354.2475,207.9,3990.375,44.51,2709.15,9083.875,0.0
50%,260.805,1283.61,10258.815,546.875,321.9,6741.49,54.22,5602.44,10259.57,1.0
75%,329.21,2644.5525,15687.43,885.19,459.6025,11686.9225,74.39,12395.805,15700.28,1.0
max,983.68,66794.94,58261.48,20821.08,2976.38,194198.97,271.37,232497.38,58261.48,1.0


In [12]:
missing_target_values_pct = main_df['future'].isna().sum() * 100 /len(main_df['future'])
print(f'{missing_target_values_pct:0.3}%')

0.0%


There are 6% missing values in the target column alone.

In [13]:
main_df.isna().sum()

BCHUSDT-data-close     0
BCHUSDT-data-volume    0
BTCUSDT-data-close     0
BTCUSDT-data-volume    0
ETHUSDT-data-close     0
ETHUSDT-data-volume    0
LTCUSDT-data-close     0
LTCUSDT-data-volume    0
future                 0
target                 0
dtype: int64

In [14]:
main_df.shape

(40332, 10)

## We now remove/fill in the missing values with values that we believe to be correct.

In [15]:
main_df.fillna(method="ffill", inplace=True)
validation_main_df.fillna(method="ffill", inplace=True)

main_df.dropna(inplace=True)
validation_main_df.dropna(inplace=True)
main_df.isna().sum(), validation_main_df.isna().sum()

(BCHUSDT-data-close     0
 BCHUSDT-data-volume    0
 BTCUSDT-data-close     0
 BTCUSDT-data-volume    0
 ETHUSDT-data-close     0
 ETHUSDT-data-volume    0
 LTCUSDT-data-close     0
 LTCUSDT-data-volume    0
 future                 0
 target                 0
 dtype: int64,
 BCHUSDT-data-close     0
 BCHUSDT-data-volume    0
 BTCUSDT-data-close     0
 BTCUSDT-data-volume    0
 ETHUSDT-data-close     0
 ETHUSDT-data-volume    0
 LTCUSDT-data-close     0
 LTCUSDT-data-volume    0
 future                 0
 target                 0
 dtype: int64)

### The data is still not ready for processing. There is one last step required: preprocessing.
# Normalizing, Scaling and Balancing the dataset

The steps required are: dropping the future column, creating pct_change so that the data is clearer and normalizare + scale the data.

# SEQ_LEN
With a normal dataset, we would randomize the order in which the data is so that the model doesn't overfit; instead it learns on random data.

We can't do that here cuz the data would be too random and the model can't learn. Instead, what we'll do is take sequences of predefined lengths then shuffle those sequences. It's a really smart thing to do. We get continuous 1 hour (60 mins) periods and then shuffle them around.

In [16]:
def preprocessing_df(df):
    df.drop("future", axis=1)  # we drop the future column

    # use pct change to figure the change in price
    for col in df.columns:
        if col != "target":
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            
            df.replace([np.inf, -np.inf], np.nan, inplace=True)
            
            
            df.dropna(inplace=True)

            # normalize the data 0 -> 1 NOTE: normalization is a form of scaling
            df[col] = preprocessing.scale(df[col])
    df.dropna(inplace=True)
    sequencial_data = []
    prev_days = deque(maxlen=SEQ_LEN)
    for row in df.values:
        prev_days.append([x for x in row[:-1]])
        if len(prev_days) == SEQ_LEN:
            #separate the label/target
            sequencial_data.append([np.array(prev_days), row[-1]])

    random.shuffle(sequencial_data)

    """
    the last step in our preprocessing will be to balance the learning set
    """
    buys = []
    sells = []

    for seq, target in sequencial_data:
        if target == 0: # you need to sell
            sells.append([seq, target])
        elif target == 1: # you need to buy
            buys.append([seq, target])

    random.shuffle(buys)
    random.shuffle(sells)

    # how do we balance? Simple. we remove the excess

    lower_nr = min(len(buys), len(sells))

    buys = buys[:lower_nr]
    sells = sells[:lower_nr]

    sequencial_data = buys + sells

    random.shuffle(sequencial_data)

    # split into x and Y
    x = []
    Y = []

    for seq, target in sequencial_data:
        x.append(seq)
        Y.append(target)
    return np.array(x), np.array(Y)

In [17]:
train_x, train_Y = preprocessing_df(main_df)
test_x, test_Y = preprocessing_df(validation_main_df)

In [18]:
print(f"Training data: {len(train_x)}, Validation data: {len(test_x)}")
print(f" Training set Sells: {(train_Y.astype('int32') == 0).sum()}, Buys: {(train_Y.astype('int32') == 1).sum()}")
print(f" Validation set Sells: {(test_Y.astype('int32') == 0).sum()}, Buys: {(test_Y.astype('int32') == 1).sum()}")

Training data: 38766, Validation data: 6888
 Training set Sells: 19383, Buys: 19383
 Validation set Sells: 3444, Buys: 3444


As we can see, the total of entries is 68.010 + 17.120 = 85130 (after filling/dropping the missing values) and the ratio is 80/20.

# Now that the preprocessing is complete, we can go and train the model.

In [19]:
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

model = Sequential()
model.add(LSTM(64, kernel_regularizer=l2(1e-6), recurrent_regularizer=l2(1e-6), bias_regularizer=l2(1e-6), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(64, kernel_regularizer=l2(1e-6), recurrent_regularizer=l2(1e-6), bias_regularizer=l2(1e-6), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(64, kernel_regularizer=l2(1e-6), recurrent_regularizer=l2(1e-6), bias_regularizer=l2(1e-6)))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(10, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))

# Learning rate Finder

In [20]:
from matplotlib import pyplot as plt
import math
from keras.callbacks import LambdaCallback
import keras.backend as K

class LRFinder:
    """
    Plots the change of the loss function of a Keras model when the learning rate is exponentially increasing.
    See for details:
    https://towardsdatascience.com/estimating-optimal-learning-rate-for-a-deep-neural-network-ce32f2556ce0
    """
    def __init__(self, model):
        self.model = model
        self.losses = []
        self.lrs = []
        self.best_loss = 1e9

    def on_batch_end(self, batch, logs):
        # Log the learning rate
        lr = K.get_value(self.model.optimizer.lr)
        self.lrs.append(lr)

        # Log the loss
        loss = logs['loss']
        self.losses.append(loss)

        # Check whether the loss got too large or NaN
        if math.isnan(loss) or loss > self.best_loss * 4:
            self.model.stop_training = True
            return

        if loss < self.best_loss:
            self.best_loss = loss

        # Increase the learning rate for the next batch
        lr *= self.lr_mult
        K.set_value(self.model.optimizer.lr, lr)

    def find(self, x_train, y_train, start_lr, end_lr, batch_size=64, epochs=1):
        num_batches = epochs * x_train.shape[0] / batch_size
        self.lr_mult = (end_lr / start_lr) ** (1 / num_batches)

        # Remember the original learning rate
        original_lr = K.get_value(self.model.optimizer.lr)

        # Set the initial learning rate
        K.set_value(self.model.optimizer.lr, start_lr)

        callback = LambdaCallback(on_batch_end=lambda batch, logs: self.on_batch_end(batch, logs))

        self.model.fit(x_train, y_train,
                        batch_size=batch_size, epochs=epochs,
                        callbacks=[tensorboard, callback])

        # Restore the original learning rate
        K.set_value(self.model.optimizer.lr, original_lr)

    def plot_loss(self, n_skip_beginning=10, n_skip_end=5):
        """
        Plots the loss.
        Parameters:
            n_skip_beginning - number of batches to skip on the left.
            n_skip_end - number of batches to skip on the right.
        """
        plt.ylabel("loss")
        plt.xlabel("learning rate (log scale)")
        plt.plot(self.lrs[n_skip_beginning:-n_skip_end], self.losses[n_skip_beginning:-n_skip_end])
        plt.xscale('log')

    def plot_loss_change(self, sma=1, n_skip_beginning=10, n_skip_end=5, y_lim=(-0.01, 0.01)):
        """
        Plots rate of change of the loss function.
        Parameters:
            sma - number of batches for simple moving average to smooth out the curve.
            n_skip_beginning - number of batches to skip on the left.
            n_skip_end - number of batches to skip on the right.
            y_lim - limits for the y axis.
        """
        assert sma >= 1
        derivatives = [0] * sma
        for i in range(sma, len(self.lrs)):
            derivative = (self.losses[i] - self.losses[i - sma]) / sma
            derivatives.append(derivative)

        plt.ylabel("rate of loss change")
        plt.xlabel("learning rate (log scale)")
        plt.plot(self.lrs[n_skip_beginning:-n_skip_end], derivatives[n_skip_beginning:-n_skip_end])
        plt.xscale('log')
        plt.ylim(y_lim)

In [21]:
opt = Adam(learning_rate=1e-4, decay=1e-7)

# opt = Adam(learning_rate=0.01)
# tf.summary.scalar('learning rate', data=1e-10)
lr = ExponentialDecay(initial_learning_rate=1e-2, decay_steps=10, decay_rate=.1)
# opt = SGD(learning_rate=learning_rate)
LOSS='sparse_categorical_crossentropy'

# learning_rate = tf.compat.v1.train.exponential_decay(1e-10, global_step=global_step, decay_steps=100, decay_rate=1.3)

model.compile(loss=LOSS, optimizer=opt, metrics=['accuracy'])

## Hyperparameters, tensorboard and checkpoint

In [22]:
EPOCHS = 10
BATCH_SIZE = 32
# NAME = f"{COIN}-{SEQ_LEN}-{FUTURE_PRED}-VALID-PCT-{VAL_PCT}-LOSS-{LOSS}-OPT-{'Adam'}-{int(time.time())}"
# NAME = f"{COIN}-{SEQ_LEN}-{FUTURE_PRED}-VALID-PCT-model-without-normalization-LOSS-{LOSS}-OPT-{'SGD'}-{int(time.time())}"
# NAME = f"LRFinder-{COIN}-{SEQ_LEN}-{FUTURE_PRED}-{int(time.time())}"
NAME = f"Experiment-Binance-{SEQ_LEN}-{FUTURE_PRED}"

In [23]:
tensorboard = TensorBoard(log_dir=f'logs/{NAME}')
filepath = "RNN_Final-{epoch:02d}-{val_accuracy:.3f}"
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

# Fitting the model

In [24]:
history = model.fit(train_x, train_Y, 
        batch_size=BATCH_SIZE, 
        epochs=EPOCHS,
        validation_data=(test_x, test_Y),
        # callbacks=[tensorboard, checkpoint])
        callbacks=[tensorboard])


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [25]:
"""
lr_finder = LRFinder(model)
lr_finder.find(train_x,train_Y, start_lr=1e-10, end_lr=100, batch_size=128, epochs=20)
lr_finder.plot_loss(n_skip_beginning=20, n_skip_end=5)
plt.show()
"""

'\nlr_finder = LRFinder(model)\nlr_finder.find(train_x,train_Y, start_lr=1e-10, end_lr=100, batch_size=128, epochs=20)\nlr_finder.plot_loss(n_skip_beginning=20, n_skip_end=5)\nplt.show()\n'

# Score and saving the model

In [26]:
# Scoring
score = model.evaluate(test_x, test_Y, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# Saving
model.save(f"models/{NAME}")

Test loss: 0.06746745854616165
Test accuracy: 0.9759001135826111




INFO:tensorflow:Assets written to: models/Experiment-Binance-16-4\assets


INFO:tensorflow:Assets written to: models/Experiment-Binance-16-4\assets


In [27]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 16, 64)            18944     
_________________________________________________________________
dropout (Dropout)            (None, 16, 64)            0         
_________________________________________________________________
batch_normalization (BatchNo (None, 16, 64)            256       
_________________________________________________________________
lstm_1 (LSTM)                (None, 16, 64)            33024     
_________________________________________________________________
dropout_1 (Dropout)          (None, 16, 64)            0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 16, 64)            256       
_________________________________________________________________
lstm_2 (LSTM)                (None, 64)                3

In [28]:
predicted = model.predict(test_x)
expected = test_Y
expected[0]
predicted[0]

array([0.10787908, 0.89212096], dtype=float32)

In [29]:
from sklearn.metrics import confusion_matrix


predicted_final = np.argmax(predicted, axis=1)
confusion_matrix(expected, predicted_final)

array([[3348,   96],
       [  70, 3374]], dtype=int64)

In [30]:
(test_x==1).sum()

0

## According to the documentation, C1,0 is the value of the false negatives and C0,1 is the value of the false positives.

## The top row contains the actual class and the column contains the predicted class.
<pre>
predicted\ Actual|          |
class     \ class|  1(buy)  | 0(sell)   
___________\_____|__________|_________
1(buy)           |   8701   |    1
_________________|__________|_________
0(sell)          |   3968   |   4734
_________________|__________|_________
</pre>      


## Interpreting the model we understand that the model bought when it should have sold 1 time and sold ~4000 times when it should have bought.

In [31]:
%matplotlib inline
from fastai.vision.all import *
from fastbook import *

plot_function(torch.relu)

ModuleNotFoundError: No module named 'fastai'