In [107]:
import pandas as pd
import os
import tensorflow as tf
from sklearn import preprocessing 
import numpy as np
import random
from collections import deque
import time
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, CuDNNLSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

In [63]:
tf.__version__

'1.9.0'

In [123]:
SEQ_LEN=60
FUTURE_PERIOD_PREDICT=3
RATIO_TO_PREDICT='LTC-USD'
EPOCHS=10
BATCH_SIZE=64
NAME=f"{RATIO_TO_PREDICT}-{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"

In [65]:
def classify(current,future):
    if float(future)>float(current):
        return 1
    else:
        return 0
    

In [86]:
def preprocess_df(df):
    df=df.drop('LTC-USD_future',1)
    
    for col in df.columns:
        if col!='LTC-USD_target':
            # normalizaing
            df[col]=df[col].pct_change()
            df.dropna(inplace=True)
            # scaling
            df[col]=preprocessing.scale(df[col].values)
            
    df.dropna(inplace=True)
    
    sequential_data=[]
    prev_days=deque(maxlen=SEQ_LEN)
    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days)==SEQ_LEN:
            sequential_data.append([np.array(prev_days),i[-1]])
            
    random.shuffle(sequential_data)
    
    # balancing data
    buys=[]
    sells=[]

    for seq, target in sequential_data:
        if target==0:
            sells.append([seq,target])
        if target==1:
            buys.append([seq,target])

    random.shuffle(buys)
    random.shuffle(sells)
    
    lower=min(len(buys),len(sells))
    
    buys= buys[:lower]
    sells=sells[:lower]
    
    sequential_data=buys+sells
    
    random.shuffle(sequential_data)
    
    X=[]
    y=[]
    
    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)
        
    return np.array(X), y

In [67]:
BCH=pd.read_csv(r'C:\Users\Gaya\Desktop\RNN\crypto_data\BCH-USD.csv',names=['time','low','high','open','close','volume'])
BTC=pd.read_csv(r'C:\Users\Gaya\Desktop\RNN\crypto_data\BTC-USD.csv',names=['time','low','high','open','close','volume'])
ETH=pd.read_csv(r'C:\Users\Gaya\Desktop\RNN\crypto_data\ETH-USD.csv',names=['time','low','high','open','close','volume'])
LTC=pd.read_csv(r'C:\Users\Gaya\Desktop\RNN\crypto_data\LTC-USD.csv',names=['time','low','high','open','close','volume'])

In [68]:
BCH.head()

Unnamed: 0,time,low,high,open,close,volume
0,1528968660,871.650024,871.72998,871.650024,871.719971,5.675361
1,1528968720,870.859985,871.719971,871.719971,870.859985,26.856577
2,1528968780,870.099976,871.090027,871.090027,870.099976,1.1243
3,1528968840,868.830017,870.950012,868.830017,870.789978,1.749862
4,1528968900,870.0,870.0,870.0,870.0,1.6805


In [69]:
main_df=pd.DataFrame()

In [70]:
os.chdir(r'C:\Users\Gaya\Desktop\RNN\crypto_data')

In [71]:
ratios=os.listdir()

In [72]:
for ratio in ratios:
    df=pd.read_csv(ratio,names=['time','low','high','open','close','volume'])
    df.rename(columns={'close': f"{ratio[:7]}_close","volume": f"{ratio[:7]}_volume"},inplace=True)
    df.set_index('time',inplace=True)
    df=df[['{}_close'.format(ratio[:7]),'{}_volume'.format(ratio[:7])]]
    if len(main_df)==0:
        main_df=df
        
    else:
        main_df=main_df.join(df)
        
print(main_df.head())

            BCH-USD_close  BCH-USD_volume  BTC-USD_close  BTC-USD_volume  \
time                                                                       
1528968660     871.719971        5.675361    6489.549805        0.587100   
1528968720     870.859985       26.856577    6487.379883        7.706374   
1528968780     870.099976        1.124300    6479.410156        3.088252   
1528968840     870.789978        1.749862    6479.410156        1.404100   
1528968900     870.000000        1.680500    6479.979980        0.753000   

            ETH-USD_close  ETH-USD_volume  LTC-USD_close  LTC-USD_volume  
time                                                                      
1528968660            NaN             NaN      96.580002        9.647200  
1528968720      486.01001       26.019083      96.660004      314.387024  
1528968780      486.00000        8.449400      96.570000       77.129799  
1528968840      485.75000       26.994646      96.500000        7.216067  
1528968900      4

In [73]:
main_df[f'{RATIO_TO_PREDICT}_future']=main_df[f'{RATIO_TO_PREDICT}_close'].shift(-FUTURE_PERIOD_PREDICT)

In [74]:
main_df

Unnamed: 0_level_0,BCH-USD_close,BCH-USD_volume,BTC-USD_close,BTC-USD_volume,ETH-USD_close,ETH-USD_volume,LTC-USD_close,LTC-USD_volume,LTC-USD_future
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1528968660,871.719971,5.675361,6489.549805,0.587100,,,96.580002,9.647200,96.500000
1528968720,870.859985,26.856577,6487.379883,7.706374,486.010010,26.019083,96.660004,314.387024,96.389999
1528968780,870.099976,1.124300,6479.410156,3.088252,486.000000,8.449400,96.570000,77.129799,96.519997
1528968840,870.789978,1.749862,6479.410156,1.404100,485.750000,26.994646,96.500000,7.216067,96.440002
1528968900,870.000000,1.680500,6479.979980,0.753000,486.000000,77.355759,96.389999,524.539978,96.470001
1528968960,869.989990,1.669014,6480.000000,1.490900,486.000000,7.503300,96.519997,16.991997,96.400002
1528969020,869.450012,0.865200,6477.220215,2.731950,485.989990,85.877251,96.440002,95.524078,96.400002
1528969080,869.989990,23.534929,6480.000000,2.174240,485.989990,160.915192,96.470001,175.205307,96.400002
1528969140,870.000000,2.300000,6479.990234,0.903100,485.989990,61.371887,96.400002,43.652802,96.400002
1528969200,870.320007,9.255514,6478.660156,3.258786,485.989990,42.687656,96.400002,8.160000,96.400002


In [75]:
main_df[f'{RATIO_TO_PREDICT}_target']=list(map(classify,main_df[f'{RATIO_TO_PREDICT}_close'],main_df[f'{RATIO_TO_PREDICT}_future']))

In [76]:
main_df.head()

Unnamed: 0_level_0,BCH-USD_close,BCH-USD_volume,BTC-USD_close,BTC-USD_volume,ETH-USD_close,ETH-USD_volume,LTC-USD_close,LTC-USD_volume,LTC-USD_future,LTC-USD_target
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1528968660,871.719971,5.675361,6489.549805,0.5871,,,96.580002,9.6472,96.5,0
1528968720,870.859985,26.856577,6487.379883,7.706374,486.01001,26.019083,96.660004,314.387024,96.389999,0
1528968780,870.099976,1.1243,6479.410156,3.088252,486.0,8.4494,96.57,77.129799,96.519997,0
1528968840,870.789978,1.749862,6479.410156,1.4041,485.75,26.994646,96.5,7.216067,96.440002,0
1528968900,870.0,1.6805,6479.97998,0.753,486.0,77.355759,96.389999,524.539978,96.470001,1


In [77]:
times=sorted(main_df.index.values)

In [78]:
times

[1528968660,
 1528968720,
 1528968780,
 1528968840,
 1528968900,
 1528968960,
 1528969020,
 1528969080,
 1528969140,
 1528969200,
 1528969260,
 1528969320,
 1528969380,
 1528969440,
 1528969500,
 1528969560,
 1528969620,
 1528969680,
 1528969740,
 1528969800,
 1528969860,
 1528969920,
 1528969980,
 1528970040,
 1528970100,
 1528970160,
 1528970220,
 1528970280,
 1528970340,
 1528970400,
 1528970460,
 1528970520,
 1528970580,
 1528970640,
 1528970700,
 1528970760,
 1528970820,
 1528970880,
 1528970940,
 1528971000,
 1528971060,
 1528971120,
 1528971180,
 1528971240,
 1528971300,
 1528971360,
 1528971420,
 1528971480,
 1528971540,
 1528971600,
 1528971660,
 1528971720,
 1528971780,
 1528971840,
 1528971900,
 1528971960,
 1528972020,
 1528972080,
 1528972140,
 1528972200,
 1528972260,
 1528972320,
 1528972380,
 1528972440,
 1528972500,
 1528972560,
 1528972620,
 1528972680,
 1528972740,
 1528972800,
 1528972860,
 1528972920,
 1528972980,
 1528973100,
 1528973160,
 1528973220,
 1528973280,

In [79]:
last_5p=times[-int(0.05*len(times))]

In [80]:
last_5p

1534879920

In [81]:
validation_main_df=main_df[(main_df.index>=last_5p)]

In [82]:
main_df=main_df[(main_df.index<last_5p)]

In [87]:
train_x,train_y= preprocess_df(main_df)
validation_x,validation_y= preprocess_df(validation_main_df)

In [88]:
print(f"train data: {len(train_x)} validation: {len(validation_x)}")
print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")

train data: 69400 validation: 3688
Dont buys: 34700, buys: 34700
VALIDATION Dont buys: 1844, buys: 1844


# Modeling

In [98]:
model=Sequential()
model.add(CuDNNLSTM(128,input_shape=(train_x.shape[1:]),return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128,input_shape=(train_x.shape[1:]),return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128,input_shape=(train_x.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32,activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2,activation='softmax'))

In [99]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cu_dnnlstm (CuDNNLSTM)       (None, 60, 128)           70656     
_________________________________________________________________
dropout (Dropout)            (None, 60, 128)           0         
_________________________________________________________________
batch_normalization (BatchNo (None, 60, 128)           512       
_________________________________________________________________
cu_dnnlstm_1 (CuDNNLSTM)     (None, 60, 128)           132096    
_________________________________________________________________
dropout_1 (Dropout)          (None, 60, 128)           0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 60, 128)           512       
_________________________________________________________________
cu_dnnlstm_2 (CuDNNLSTM)     (None, 128)               132096    
__________

In [100]:
opt=tf.keras.optimizers.Adam(lr=0.001,decay=1e-6)

In [102]:
model.compile(loss='sparse_categorical_crossentropy',
             optimizer=opt,
             metrics=['accuracy'])

In [125]:
tensorboard=TensorBoard(log_dir=f'logs/{NAME}')

In [126]:
filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

In [None]:
os.mkdir('models')
os.listdir()

In [140]:
start = time.time()
history=model.fit(train_x,train_y, 
                  batch_size=BATCH_SIZE,
                  epochs=EPOCHS,
                  validation_data=(validation_x,validation_y),
                  callbacks=[tensorboard,checkpoint])
end = time.time()
print ("Took %f ms" % ((end - start) * 1000.0))

Train on 69400 samples, validate on 3688 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Took 2631297.280550 ms
