In [57]:
import pandas as pd
from sklearn import preprocessing
from collections import deque
import numpy as np
import random
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, CuDNNLSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

  from ._conv import register_converters as _register_converters


In [30]:
df=pd.read_csv("/home/anmol/Desktop/crypto_data/LTC-USD.csv", names=["time","low","high","open","close","volume"])
print(df.head())

         time        low       high       open      close      volume
0  1528968660  96.580002  96.589996  96.589996  96.580002    9.647200
1  1528968720  96.449997  96.669998  96.589996  96.660004  314.387024
2  1528968780  96.470001  96.570000  96.570000  96.570000   77.129799
3  1528968840  96.449997  96.570000  96.570000  96.500000    7.216067
4  1528968900  96.279999  96.540001  96.500000  96.389999  524.539978


In [31]:
main_df=pd.DataFrame()
ratios=["BTC-USD","LTC-USD","ETH-USD","BCH-USD"]
for ratio in ratios:
    dataset=f"/home/anmol/Desktop/crypto_data/{ratio}.csv"
    df=pd.read_csv(dataset,names=["time","low","high","open","close","volume"])
    #print(df.head())
    df.rename(columns={"close":f"{ratio}_close","volume":f"{ratio}_volume"},inplace=True)
    
    df.set_index("time",inplace=True)
    df=df[[f"{ratio}_close",f"{ratio}_volume"]]
    #print(df.head())
    
    if(len(main_df)==0):
        main_df=df
    else:
        main_df=main_df.join(df)

print(main_df.head())

            BTC-USD_close  BTC-USD_volume  LTC-USD_close  LTC-USD_volume  \
time                                                                       
1528968660    6489.549805        0.587100      96.580002        9.647200   
1528968720    6487.379883        7.706374      96.660004      314.387024   
1528968780    6479.410156        3.088252      96.570000       77.129799   
1528968840    6479.410156        1.404100      96.500000        7.216067   
1528968900    6479.979980        0.753000      96.389999      524.539978   

            ETH-USD_close  ETH-USD_volume  BCH-USD_close  BCH-USD_volume  
time                                                                      
1528968660            NaN             NaN     871.719971        5.675361  
1528968720      486.01001       26.019083     870.859985       26.856577  
1528968780      486.00000        8.449400     870.099976        1.124300  
1528968840      485.75000       26.994646     870.789978        1.749862  
1528968900      4

In [32]:
SEQ_LEN=60
FUTURE_PERIOD_PREDICT=3
RATIO_TO_PREDICT="LTC-USD"

def classify(current,future):
    if float(future)>float(current):
        return 1
    else:
        return 0

In [33]:
main_df['future']=main_df[f"{RATIO_TO_PREDICT}_close"].shift(-FUTURE_PERIOD_PREDICT)
print(main_df.head())    

            BTC-USD_close  BTC-USD_volume  LTC-USD_close  LTC-USD_volume  \
time                                                                       
1528968660    6489.549805        0.587100      96.580002        9.647200   
1528968720    6487.379883        7.706374      96.660004      314.387024   
1528968780    6479.410156        3.088252      96.570000       77.129799   
1528968840    6479.410156        1.404100      96.500000        7.216067   
1528968900    6479.979980        0.753000      96.389999      524.539978   

            ETH-USD_close  ETH-USD_volume  BCH-USD_close  BCH-USD_volume  \
time                                                                       
1528968660            NaN             NaN     871.719971        5.675361   
1528968720      486.01001       26.019083     870.859985       26.856577   
1528968780      486.00000        8.449400     870.099976        1.124300   
1528968840      485.75000       26.994646     870.789978        1.749862   
1528968900 

In [34]:
print(main_df[[f"{RATIO_TO_PREDICT}_close","future"]].head())

            LTC-USD_close     future
time                                
1528968660      96.580002  96.500000
1528968720      96.660004  96.389999
1528968780      96.570000  96.519997
1528968840      96.500000  96.440002
1528968900      96.389999  96.470001


In [35]:
main_df['target']=list(map(classify,main_df[f"{RATIO_TO_PREDICT}_close"],main_df["future"]))
print(main_df[[f"{RATIO_TO_PREDICT}_close","future","target"]].head(15))

            LTC-USD_close     future  target
time                                        
1528968660      96.580002  96.500000       0
1528968720      96.660004  96.389999       0
1528968780      96.570000  96.519997       0
1528968840      96.500000  96.440002       0
1528968900      96.389999  96.470001       1
1528968960      96.519997  96.400002       0
1528969020      96.440002  96.400002       0
1528969080      96.470001  96.400002       0
1528969140      96.400002  96.400002       0
1528969200      96.400002  96.400002       0
1528969260      96.400002  96.449997       1
1528969320      96.400002  96.419998       1
1528969380      96.400002  96.400002       0
1528969440      96.449997  96.419998       0
1528969500      96.419998  96.570000       1


In [38]:
times=sorted(main_df.index.values)
last_5pct=sorted(main_df.index.values)[-int(0.05*len(times))]
validation_main_df=main_df[(main_df.index >= last_5pct)]
main_df=main_df[main_df.index<last_5pct]

In [51]:
def preprocess_df(df):
    df=df.drop('future',1)
    
    for col in df.columns:
        if col!="target":
            df[col]=df[col].pct_change()
            df.dropna(inplace=True)
            df[col]=preprocessing.scale(df[col].values)
    df.dropna(inplace=True)
    sequential_data=[]
    prev_days=deque(maxlen=SEQ_LEN)
    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if(len(prev_days)==SEQ_LEN):
            sequential_data.append([np.array(prev_days),i[-1]])
    random.shuffle(sequential_data)
    
    buys=[]
    sells=[]
    
    for seq,target in sequential_data:
        if target == 0:
            sells.append([seq,target])
        elif target == 1:
            buys.append([seq,target])
    random.shuffle(buys)
    random.shuffle(sells)
    
    lower=min(len(buys),len(sells))
    
    buys=buys[:lower]
    sells=sells[:lower]
    
    sequential_data=buys+sells
    random.shuffle(sequential_data)
    
    x=[]
    y=[]
    
    for seq,target in sequential_data:
        x.append(seq)
        y.append(target)
        
    return np.array(x),y

In [52]:
train_x,train_y=preprocess_df(main_df)
validation_x,validation_y=preprocess_df(validation_main_df)


In [54]:
print(f"Train data: {len(train_x)} validation: {len(validation_x)}")
print(f"Dont buys:{train_y.count(0)} buys: {train_y.count(1)}")
print(f"validation dont buys: {validation_y.count(0)} buys: {validation_y.count(1)}")

Train data: 65962 validation: 3174
Dont buys:32981 buys: 32981
validation dont buys: 1587 buys: 1587


In [56]:
EPOCHS=10
BATCH_SIZE=64
NAME= f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"

In [64]:
model=Sequential()
model.add(LSTM(128,input_shape=(train_x.shape[1:]),return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128,input_shape=(train_x.shape[1:]),return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128,input_shape=(train_x.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation="relu"))
model.add(Dropout(0.2))

model.add(Dense(2, activation="softmax"))

opt=tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

model.compile(loss='sparse_categorical_crossentropy',
             optimizer=opt,
             metrics=['accuracy'])

tensorboard= TensorBoard(log_dir=f'log/{NAME}')

filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

history=model.fit(
    train_x,train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x,validation_y),
    callbacks=[tensorboard,checkpoint],
)

Train on 65962 samples, validate on 3174 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
