In [22]:
import random
import time
from collections import deque
import numpy as np
import pandas as pd
from sklearn import preprocessing
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, CuDNNLSTM, BatchNormalization
from keras.callbacks import TensorBoard, ModelCheckpoint

In [32]:
DATA_FILE_NAME = 'BTC-USD'
DATA_VALUES = ['time', 'low', 'high', 'open', 'close', 'volume']
VALUES_TO_LEARN_FROM = ['close', 'volume']  # please include 'close' as 0th index value
DATA_REVERSED = False

SEQ_LEN = 60
FUTURE_PERIOD_PREDICT = 3
EPOCHS = 15
BATCH_SIZE = 64
VALIDATION_SIZE = 0.05
MODEL_NAME = f"{DATA_FILE_NAME}-{SEQ_LEN}-{FUTURE_PERIOD_PREDICT}-{int(time.time())}"

In [33]:
def targeting(current, future):
    if float(future) > float(current):
        return 1
    else:
        return 0


def preprocessing_df(df):
    for col in df.columns:
        if col != 'target':
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)

    df.dropna(inplace=True)

    sequential_data = []
    prev_days = deque(maxlen=SEQ_LEN)

    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days), i[-1]])

    random.shuffle(sequential_data)

    buys = []
    sells = []

    for seq, target in sequential_data:
        if target == 0:
            sells.append([seq, target])
        elif target == 1:
            buys.append([seq, target])

    random.shuffle(buys)
    random.shuffle(sells)

    lower = min(len(buys), len(sells))

    buys = buys[:lower]
    sells = sells[:lower]

    sequential_data = buys + sells
    random.shuffle(sequential_data)

    X = []
    y = []

    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)

    return np.array(X), y

In [34]:
pd.set_option('display.max_columns', None)
data = pd.read_csv(f'data/{DATA_FILE_NAME}.csv', names=DATA_VALUES)
print(data.head())

         time          low         high         open        close    volume
0  1528968660  6489.549805  6489.560059  6489.560059  6489.549805  0.587100
1  1528968720  6487.370117  6489.560059  6489.549805  6487.379883  7.706374
2  1528968780  6479.410156  6487.370117  6487.370117  6479.410156  3.088252
3  1528968840  6479.410156  6479.419922  6479.419922  6479.410156  1.404100
4  1528968900  6475.930176  6479.979980  6479.410156  6479.979980  0.753000


In [35]:
df = data[VALUES_TO_LEARN_FROM].copy()
if DATA_REVERSED:
    df = df[::-1]
    df = df.reset_index(drop=True)

df.fillna(method='ffill', inplace=True)
df.dropna(inplace=True)
print(df.head())

         close    volume
0  6489.549805  0.587100
1  6487.379883  7.706374
2  6479.410156  3.088252
3  6479.410156  1.404100
4  6479.979980  0.753000


In [36]:
df['future'] = df[VALUES_TO_LEARN_FROM[0]].shift(-FUTURE_PERIOD_PREDICT)
df['target'] = list(map(targeting, df[VALUES_TO_LEARN_FROM[0]], df['future']))
df = df.drop('future', 1)

df.dropna(inplace=True)
print(df.head(10))

         close    volume  target
0  6489.549805  0.587100       0
1  6487.379883  7.706374       0
2  6479.410156  3.088252       1
3  6479.410156  1.404100       0
4  6479.979980  0.753000       1
5  6480.000000  1.490900       0
6  6477.220215  2.731950       1
7  6480.000000  2.174240       0
8  6479.990234  0.903100       0
9  6478.660156  3.258786       1


In [37]:
train_df = df[(df.index <= int(len(df.index) * (1 - VALIDATION_SIZE)))].copy()
validation_df = df[(df.index > int(len(df.index) * (1 - VALIDATION_SIZE)))].copy()

train_x, train_y = preprocessing_df(train_df)
validation_x, validation_y = preprocessing_df(validation_df)

print("Training data: {}, Validation data: {}".format(len(train_x), len(validation_x)))
print("Dont buys: {}, Buys: {}".format(train_y.count(0), train_y.count(1)))
print("VALIDATION - Dont buys: {}, Buys: {}".format(validation_y.count(0), validation_y.count(1)))

Training data: 83162, Validation data: 4484
Dont buys: 41581, Buys: 41581
VALIDATION - Dont buys: 2242, Buys: 2242


In [38]:
# If you don't have tensorflow-gpu, you can change the CuDNNLSTM cell-layers to regular LSTM cell-layers (slower)
model = Sequential()

model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [39]:
tensorboard = TensorBoard(log_dir="logs/{}".format(MODEL_NAME))

filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max'))

In [43]:
history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y),
    callbacks=[tensorboard, checkpoint],
)

score = model.evaluate(validation_x, validation_y, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

model.save("models/{}".format(MODEL_NAME))

Train on 83162 samples, validate on 4484 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Test loss: 0.8653643688487751
Test accuracy: 0.5312221231043711
