In [4]:
import pandas as pd
from collections import deque
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

COIN = 'bitcoin'
TIME_SERIES_LENGTH = 3



In [5]:
df = pd.read_csv('../../data/processed_dataset.csv')

df['future'] = df[f"{COIN}_usd"].shift(-1)
df.dropna(inplace=True)

df.head()

Unnamed: 0,timestamp,bitcoin_usd,litecoin_usd,ripple_usd,ethereum_usd,China_confirmed,Italy_confirmed,Iran_confirmed,Spain_confirmed,Germany_confirmed,...,Iran_deaths,Spain_deaths,Germany_deaths,USA_deaths,France_deaths,S. Korea_deaths,Switzerland_deaths,UK_deaths,Portugal_deaths,future
0,2015-08-07,-0.003926,-0.002833,-0.002345,-0.004322,-0.003728,-0.001811,-0.003121,-0.004607,-0.002966,...,-0.005153,-0.005052,-0.003839,-0.001671,-0.00683,-0.002031,-0.002957,-0.004202,-0.004502,-0.494012
1,2015-08-08,-0.102065,-0.064364,0.018817,-0.494012,-0.003728,-0.001811,-0.003121,-0.004607,-0.002966,...,-0.005153,-0.005052,-0.003839,-0.001671,-0.00683,-0.002031,-0.002957,-0.004202,-0.004502,-0.45084
2,2015-08-10,0.017317,0.002074,0.016987,-0.45084,-0.003728,-0.001811,-0.003121,-0.004607,-0.002966,...,-0.005153,-0.005052,-0.003839,-0.001671,-0.00683,-0.002031,-0.002957,-0.004202,-0.004502,0.505988
3,2015-08-11,0.030931,0.0428,-0.009108,0.505988,-0.003728,-0.001811,-0.003121,-0.004607,-0.002966,...,-0.005153,-0.005052,-0.003839,-0.001671,-0.00683,-0.002031,-0.002957,-0.004202,-0.004502,0.159471
4,2015-08-12,-0.015589,-0.017605,-0.022307,0.159471,-0.003728,-0.001811,-0.003121,-0.004607,-0.002966,...,-0.005153,-0.005052,-0.003839,-0.001671,-0.00683,-0.002031,-0.002957,-0.004202,-0.004502,0.413853


In [6]:
# Values for our target column
def price_variation(present, future):
    if float(future) > float(present):
        return 1
    else:
        return 0
    
def preprocess_df(df):
    df = df.drop(['future'], axis=1)

    sequential_data = []
    prev_days = deque(maxlen=TIME_SERIES_LENGTH)
    for i, row in df.iterrows():
        prev_days.append([row[c] for c in df.drop(['target', 'timestamp'], axis=1).columns])
        if len(prev_days) == TIME_SERIES_LENGTH:
            sequential_data.append([np.array(prev_days), row['target']])
    random.shuffle(sequential_data)

    # balance to fifty fifty
    buys = []
    sells = []
    for seq, target in sequential_data:
        if target > 0:
            buys.append([seq, target])
        else:
            sells.append([seq, target])

    lower = min(len(buys), len(sells))

    buys = buys[:lower]
    sells = sells[:lower]

    sequential_data = buys + sells
    random.shuffle(sequential_data)
    xs = []
    ys = []
    for seq, target in sequential_data:
        xs.append(seq)
        ys.append(target)
    
    return np.array(xs), np.array(ys)
    
df['target'] = list(map(price_variation, df[f"{COIN}_usd"], df["future"]))

times = sorted(df.timestamp.values)
last_5pct = times[-21]

x, y = preprocess_df(df)

nsamples = y.shape[0]
sample_indices = random.sample(list(range(nsamples)), int(nsamples*0.05))
train_indices = [x for x in range(nsamples) if x not in sample_indices]

train_x = x[train_indices]
train_y = y[train_indices]
valid_x = x[sample_indices]
valid_y = y[sample_indices]

In [7]:
EPOCHS = 100
BATCH_SIZE = 128
LEARNING_RATE = 0.001

number = train_x.shape[1] * train_x.shape[2]

NAME = f"{COIN}-SL={TIME_SERIES_LENGTH}-LR={LEARNING_RATE:.3f}-BATCH={BATCH_SIZE:03d}"

model = Sequential()
model.add(LSTM(number, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(number, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(int(number/2), input_shape=(train_x.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(int(number/8), activation="relu"))

model.add(Dense(2, activation="softmax"))

opt = tf.keras.optimizers.Adam(lr=LEARNING_RATE)
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

tensorboard = TensorBoard(log_dir=f'logs/{NAME}')
filepath = "RNN_Final-{epoch:03d}-{val_accuracy:.3f}"
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max'))

history = model.fit(train_x, train_y,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_data=(valid_x, valid_y),
                    callbacks=[tensorboard, checkpoint])


Train on 1600 samples, validate on 84 samples
Epoch 1/100
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: models/RNN_Final-001-0.417.model/assets
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100

KeyError: 'val_accuracy'