In [28]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from collections import deque
import random
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, CuDNNLSTM, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard


In [2]:
df_test = pd.read_csv('crypto_data/LTC-USD.csv', names=['time','low','high','open','close','volume'])
df_test.head()

Unnamed: 0,time,low,high,open,close,volume
0,1528968660,96.580002,96.589996,96.589996,96.580002,9.6472
1,1528968720,96.449997,96.669998,96.589996,96.660004,314.387024
2,1528968780,96.470001,96.57,96.57,96.57,77.129799
3,1528968840,96.449997,96.57,96.57,96.5,7.216067
4,1528968900,96.279999,96.540001,96.5,96.389999,524.539978


In [33]:
SEQ_LEN = 60
FUTURE_PERIOD_PREDICT = 3
RATIO_TO_PREDICT = "BCH-USD"
EPOCHS = 10
BATCH_SIZE = 64

NAME = f"{RATIO_TO_PREDICT}-{SEQ_LEN}-SEQ-{(int(time.time()))}"

def classify(current, future):
    return 1 if float(future)>float(current) else 0


In [4]:
main_df = pd.DataFrame()

ratios = ["BTC-USD","LTC-USD","ETH-USD","BCH-USD"]

for ratio in ratios:
    dataset = f"crypto_data/{ratio}.csv"
    df = pd.read_csv(dataset,names=['time','low','high','open',f'{ratio}_close',f'{ratio}_volume'], index_col=["time"])
    df=df[[f'{ratio}_close',f'{ratio}_volume']]
    if len(main_df)==0:
        main_df=df
    else:
        main_df = main_df.join(df)
print(main_df.columns)
print(main_df.head())

Index(['BTC-USD_close', 'BTC-USD_volume', 'LTC-USD_close', 'LTC-USD_volume',
       'ETH-USD_close', 'ETH-USD_volume', 'BCH-USD_close', 'BCH-USD_volume'],
      dtype='object')
            BTC-USD_close  BTC-USD_volume  LTC-USD_close  LTC-USD_volume  \
time                                                                       
1528968660    6489.549805        0.587100      96.580002        9.647200   
1528968720    6487.379883        7.706374      96.660004      314.387024   
1528968780    6479.410156        3.088252      96.570000       77.129799   
1528968840    6479.410156        1.404100      96.500000        7.216067   
1528968900    6479.979980        0.753000      96.389999      524.539978   

            ETH-USD_close  ETH-USD_volume  BCH-USD_close  BCH-USD_volume  
time                                                                      
1528968660            NaN             NaN     871.719971        5.675361  
1528968720      486.01001       26.019083     870.859985       26

In [5]:
main_df['ETH-USD_close'].isnull().value_counts()

False    97422
True       302
Name: ETH-USD_close, dtype: int64

In [7]:
main_df['future'] = main_df[f"{RATIO_TO_PREDICT}_close"].shift(-FUTURE_PERIOD_PREDICT)

In [8]:
main_df[[f"{RATIO_TO_PREDICT}_close","future"]].head()

Unnamed: 0_level_0,LTC-USD_close,future
time,Unnamed: 1_level_1,Unnamed: 2_level_1
1528968660,96.580002,96.5
1528968720,96.660004,96.389999
1528968780,96.57,96.519997
1528968840,96.5,96.440002
1528968900,96.389999,96.470001


In [9]:
main_df['target'] = list(map(classify, main_df[f"{RATIO_TO_PREDICT}_close"], main_df["future"]))


In [10]:
main_df[[f"{RATIO_TO_PREDICT}_close", "future","target"]].head()

Unnamed: 0_level_0,LTC-USD_close,future,target
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1528968660,96.580002,96.5,0
1528968720,96.660004,96.389999,0
1528968780,96.57,96.519997,0
1528968840,96.5,96.440002,0
1528968900,96.389999,96.470001,1


In [11]:
times = sorted(main_df.index.values)

In [12]:
last_5pct = times[-int(0.05*len(times))]
last_5pct # threshold fro seperation of training and testing data

1534922100

In [13]:
validation_main_df = main_df[(main_df.index>=last_5pct)]
main_df = main_df[(main_df.index<last_5pct)]

In [15]:
main_df.shape

(92838, 10)

In [16]:
def preprocess_df(df):
    df = df.drop("future", 1)  # don't need this anymore.

    for col in df.columns:  # go through all of the columns
        if col != "target":  # normalize all ... except for the target itself!
            df[col] = df[col].pct_change()  # pct change "normalizes" the different currencies (each crypto coin has vastly diff values, we're really more interested in the other coin's movements)
            df.dropna(inplace=True)  # remove the nas created by pct_change
            df[col] = preprocessing.scale(df[col].values)  # scale between 0 and 1.

    df.dropna(inplace=True)  # cleanup again... jic.


    sequential_data = []  # this is a list that will CONTAIN the sequences
    prev_days = deque(maxlen=SEQ_LEN)  # These will be our actual sequences. They are made with deque, which keeps the maximum length by popping out older values as new ones come in

    for i in df.values:  # iterate over the values
        prev_days.append([n for n in i[:-1]])  # store all but the target
        if len(prev_days) == SEQ_LEN:  # make sure we have 60 sequences!
            sequential_data.append([np.array(prev_days), i[-1]])  # append those bad boys!

    random.shuffle(sequential_data)  # shuffle for good measure.

    buys = []  # list that will store our buy sequences and targets
    sells = []  # list that will store our sell sequences and targets

    for seq, target in sequential_data:  # iterate over the sequential data
        if target == 0:  # if it's a "not buy"
            sells.append([seq, target])  # append to sells list
        elif target == 1:  # otherwise if the target is a 1...
            buys.append([seq, target])  # it's a buy!

    random.shuffle(buys)  # shuffle the buys
    random.shuffle(sells)  # shuffle the sells!

    lower = min(len(buys), len(sells))  # what's the shorter length?

    buys = buys[:lower]  # make sure both lists are only up to the shortest length.
    sells = sells[:lower]  # make sure both lists are only up to the shortest length.

    sequential_data = buys+sells  # add them together
    random.shuffle(sequential_data)  # another shuffle, so the model doesn't get confused with all 1 class then the other.

    X = []
    y = []

    for seq, target in sequential_data:  # going over our new sequential data
        X.append(seq)  # X is the sequences
        y.append(target)  # y is the targets/labels (buys vs sell/notbuy)

    return np.array(X), y  # return X and y...and make X a numpy array!


In [18]:
train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)

print(f"train data: {len(train_x)} validation: {len(validation_x)}")
print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")


train data: 69188 validation: 3062
Dont buys: 34594, buys: 34594
VALIDATION Dont buys: 1531, buys: 1531


In [34]:
gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.33)
sess=tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

model = Sequential()
model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))


opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

# Compile model
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=opt,
    metrics=['accuracy']
)

tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

# Train model
history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y),
    callbacks=[tensorboard, checkpoint],
)



Train on 69188 samples, validate on 3062 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
