In [87]:
import pandas as pd
import numpy as np
import os
from sklearn import preprocessing
from collections import deque
import random
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
#df=pd.read_csv('crypto_data/LTC-USD.csv',names=['time','low','high','open','close','volume'])

In [88]:
#df.head()
def preprocess_df(df):
    df = df.drop('future',1)
    for col in df.columns:
        if col!='target':
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col]=preprocessing.scale(df[col].values)
    df.dropna(inplace=True)
    sequential_data=[]
    prev_days = deque(maxlen=SEQ_LEN)
    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days)==SEQ_LEN:
            sequential_data.append([np.array(prev_days),i[-1]])
    
    random.shuffle(sequential_data)    
    buys=[]
    sells=[]
    for seq,target in sequential_data:
        if target==0:
            sells.append([seq,target])
        elif target==1:
            buys.append([seq,target])
    random.shuffle(buys)
    random.shuffle(sells)
    lower=min(len(buys),len(sells))
    buys=buys[:lower]
    sells=sells[:lower]
    sequential_data = buys+sells
    random.shuffle(sequential_data)
    X=[]
    Y=[]
    for seq,target in sequential_data:
        X.append(seq)
        Y.append(target)
    return np.array(X),np.array(Y)
        
    
                

In [89]:
SEQ_LEN=60 #SINCE THE DATA IS FOR EVERY MINUTE, LAST SEQUENCE IS TAKEN TO TRYNA MAKE THE PREDICTION FOR THE FUTURE 3 MINUTES OF DATA
FUTURE_PERIOD_PREDICT=3 #FUTURE 3 MINS FOR PREDICTION
RATIO_TO_PREDICT='LTC-USD' #COIN OF CONCERN
EPOCHS = 10
BATCH_SIZE=64
NAME=f'{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}'
def classify(current,future):
    if float(future)>float(current):
        return 1
    else:
        return 0
        

In [90]:
main_df=pd.DataFrame()
ratios=['BTC-USD','LTC-USD','ETH-USD','BCH-USD']
for ratio in ratios:
    dataset=f'crypto_data/{ratio}.csv'
    df=pd.read_csv(dataset,names=['time','low','high','open','close','volume'])
    #print(df.head())
    df.rename(columns={'close':f'{ratio}_close','volume':f'{ratio}_volume'},inplace=True)
    df.set_index('time',inplace=True)
    df=df[[f'{ratio}_close',f'{ratio}_volume']]
    if len(main_df)==0:
        main_df=df
    else:
        main_df=main_df.join(df)
print(main_df.columns)

Index(['BTC-USD_close', 'BTC-USD_volume', 'LTC-USD_close', 'LTC-USD_volume',
       'ETH-USD_close', 'ETH-USD_volume', 'BCH-USD_close', 'BCH-USD_volume'],
      dtype='object')


In [91]:
main_df['future'] = main_df[f'{RATIO_TO_PREDICT}_close'].shift(-FUTURE_PERIOD_PREDICT)
main_df['target'] = list(map(classify,main_df[f'{RATIO_TO_PREDICT}_close'],main_df['future']))
print(main_df[[f'{RATIO_TO_PREDICT}_close','future','target']].head(10))

            LTC-USD_close     future  target
time                                        
1528968660      96.580002  96.500000       0
1528968720      96.660004  96.389999       0
1528968780      96.570000  96.519997       0
1528968840      96.500000  96.440002       0
1528968900      96.389999  96.470001       1
1528968960      96.519997  96.400002       0
1528969020      96.440002  96.400002       0
1528969080      96.470001  96.400002       0
1528969140      96.400002  96.400002       0
1528969200      96.400002  96.400002       0


In [92]:
times=sorted(main_df.index.values)
last_5pct=times[-int(0.05*len(times))]
mdf=main_df[[f'{RATIO_TO_PREDICT}_close','future','target']]

In [93]:
val_main_df=mdf[(mdf.index>=last_5pct)]
main_df=mdf[mdf.index<last_5pct]

In [94]:
train_x,train_y=preprocess_df(main_df)
val_x,val_y=preprocess_df(val_main_df)

In [None]:
model = Sequential()
model.add(LSTM(128,input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())


model.add(LSTM(128,input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())


model.add(LSTM(128,input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32,activation='relu'))
model.add(Dropout(0.2))
model.add(tf.keras.layers.Flatten())
model.add(Dense(2,activation='softmax'))

opt = tf.keras.optimizers.Adam(lr=0.01)

model.compile(loss='sparse_categorical_crossentropy', optimizer=opt,metrics=['accuracy'])

tboard_log_dir = os.path.join("logs",NAME)
tensorboard = TensorBoard(log_dir = tboard_log_dir)
filepath = "RNN_Final-{epoch:02d}-{val_loss:.3f}"
checkpoint = ModelCheckpoint(tboard_log_dir + 'ep{epoch:03d}-val_loss{val_loss:.3f}.h5',
        monitor='val_loss', save_best_only=True, mode='max')
history = model.fit(train_x,train_y,batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(val_x,val_y),callbacks=[tensorboard,checkpoint])


Train on 76380 samples, validate on 3714 samples
Epoch 1/10

In [80]:
train_x.shape

(69188, 60, 8)