In [11]:
import pandas as pd
import numpy as np
import time
import random
import tensorflow as tf
from collections import deque
from sklearn import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, CuDNNLSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint, ModelCheckpoint

In [12]:
SEQ_LEN = 60
FUTURE_PERIOD_PREDICT = 3
RATIO_TO_PREDICT = "LTC-USD"
EPOCHS = 10  
BATCH_SIZE = 64  
NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"

In [13]:
def classify(current, future):
    if float(future) > float(current):
        return 1
    else: 
        return 0

In [14]:
def preprocess_df(df):

    df = df.drop("future", 1)  

    for col in df.columns:  
        if col != "target": 
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)  

    df.dropna(inplace=True)


    sequential_data = []  
    prev_days = deque(maxlen=SEQ_LEN)

    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == SEQ_LEN:  
            sequential_data.append([np.array(prev_days), i[-1]])  

    random.shuffle(sequential_data)  

    buys = []  
    sells = [] 

    for seq, target in sequential_data: 
        if target == 0:  
            sells.append([seq, target])  
        elif target == 1:  
            buys.append([seq, target]) 

    random.shuffle(buys)
    random.shuffle(sells) 

    lower = min(len(buys), len(sells)) 

    buys = buys[:lower]  
    sells = sells[:lower]

    sequential_data = buys+sells   
    random.shuffle(sequential_data)

    X = []
    y = []

    for seq, target in sequential_data:
        X.append(seq) 
        y.append(target) 

    return np.array(X), y 

In [15]:
main_df = pd.DataFrame()

dataset = 'C:\\Users\\Deepak\\Downloads\\BTC-USD.csv'  
df = pd.read_csv(dataset, names=['time', 'low', 'high', 'open', 'close', 'volume'])  


df.rename(columns={"close": f"BTC-USD_close", "volume": f"BTC-USD_volume"}, inplace=True)

df.set_index("time", inplace=True) 
df = df[[f"BTC-USD_close", f"BTC-USD_volume"]] 

if len(main_df)==0: 
    main_df = df  
else: 
    main_df = main_df.join(df)

main_df.fillna(method="ffill", inplace=True)  
main_df.dropna(inplace=True)

In [16]:
main_df.tail(10)

Unnamed: 0_level_0,BTC-USD_close,BTC-USD_volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1
1535214660,6707.799805,1.780853
1535214720,6708.100098,1.401337
1535214780,6708.379883,0.975295
1535214840,6710.089844,1.293573
1535214900,6712.990234,2.330975
1535214960,6713.140137,0.769891
1535215020,6714.52002,1.002652
1535215080,6714.52002,1.021925
1535215140,6715.0,3.645508
1535215200,6715.0,0.51356


In [17]:
main_df['future'] = main_df[f'BTC-USD_close'].shift(-FUTURE_PERIOD_PREDICT)
main_df['target'] = list(map(classify, main_df[f'BTC-USD_close'], main_df['future']))

main_df.dropna(inplace=True)

In [18]:
main_df.head(10)

Unnamed: 0_level_0,BTC-USD_close,BTC-USD_volume,future,target
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1528968660,6489.549805,0.5871,6479.410156,0
1528968720,6487.379883,7.706374,6479.97998,0
1528968780,6479.410156,3.088252,6480.0,1
1528968840,6479.410156,1.4041,6477.220215,0
1528968900,6479.97998,0.753,6480.0,1
1528968960,6480.0,1.4909,6479.990234,0
1528969020,6477.220215,2.73195,6478.660156,1
1528969080,6480.0,2.17424,6478.660156,0
1528969140,6479.990234,0.9031,6479.339844,0
1528969200,6478.660156,3.258786,6479.350098,1


In [19]:
main_df.tail(10)

Unnamed: 0_level_0,BTC-USD_close,BTC-USD_volume,future,target
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1535214480,6707.759766,2.444304,6707.799805,1
1535214540,6705.740234,3.068317,6708.100098,1
1535214600,6705.740234,2.210403,6708.379883,1
1535214660,6707.799805,1.780853,6710.089844,1
1535214720,6708.100098,1.401337,6712.990234,1
1535214780,6708.379883,0.975295,6713.140137,1
1535214840,6710.089844,1.293573,6714.52002,1
1535214900,6712.990234,2.330975,6714.52002,1
1535214960,6713.140137,0.769891,6715.0,1
1535215020,6714.52002,1.002652,6715.0,1


In [8]:
main_df.shape

(97721, 4)

In [20]:
times = sorted(main_df.index.values)
last_5pct = sorted(main_df.index.values)[-int(0.05*len(times))]

validation_main_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]

train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)

print(f"train data: {len(train_x)} validation: {len(validation_x)}")
print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")

train data: 83162 validation: 4490
Dont buys: 41581, buys: 41581
VALIDATION Dont buys: 2245, buys: 2245


In [21]:
train_x.shape

(83162, 60, 2)

In [23]:
len(train_y)

83162

In [26]:
train_x[0].reshape(60, 2)

array([[-1.44440090e-01, -6.04341078e-02],
       [-4.65569405e-01,  2.67103743e-02],
       [ 1.52601514e-01, -8.68198380e-02],
       [-8.04611852e-04, -8.13999305e-02],
       [-8.04611852e-04,  4.86606293e-02],
       [ 1.20500314e+00,  4.10319938e-01],
       [ 1.31833600e+00, -8.71533504e-02],
       [ 8.88360706e-01, -8.94675489e-02],
       [ 1.21927302e+00, -8.34809093e-02],
       [ 2.25936798e-01, -2.88823035e-02],
       [-5.10482072e-01, -7.55002816e-02],
       [-7.74408206e-01, -6.58628488e-02],
       [ 3.06608679e-01, -4.24677849e-02],
       [ 1.13676748e+00, -7.15580567e-02],
       [ 6.38932674e-01, -3.39260938e-02],
       [ 1.45334854e-01, -8.43975611e-02],
       [ 1.52741173e-02, -7.98343236e-02],
       [ 6.14418237e-01, -5.16882718e-02],
       [ 1.38954306e+00,  8.93674546e-02],
       [ 2.00841974e+00, -8.65040424e-02],
       [ 2.81986757e+00,  2.48399678e-01],
       [-2.13444788e+00, -8.80165177e-02],
       [-4.05544480e-01, -8.93758368e-02],
       [-2.

In [24]:
train_y[0]

1.0

In [9]:
model = Sequential()

model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))

opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}" 
checkpoint = ModelCheckpoint("{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) 

history = model.fit(train_x, train_y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(validation_x, validation_y))

score = model.evaluate(validation_x, validation_y, verbose=0)

print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 83162 samples, validate on 4490 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10