In [171]:
import pandas as pd
import numpy as np
import os
from sklearn import preprocessing
from collections import deque
import random
import time
#importing tensorflow stuff
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout,LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

In [172]:
df=pd.read_csv('crypto_data/LTC-USD.csv',names=['time','low','high','open','close','volume'])
print(df.head(5))

         time        low       high       open      close      volume
0  1528968660  96.580002  96.589996  96.589996  96.580002    9.647200
1  1528968720  96.449997  96.669998  96.589996  96.660004  314.387024
2  1528968780  96.470001  96.570000  96.570000  96.570000   77.129799
3  1528968840  96.449997  96.570000  96.570000  96.500000    7.216067
4  1528968900  96.279999  96.540001  96.500000  96.389999  524.539978


In [182]:
SEQ_LEN = 60
FUTURE_PERIOD_PREDICT = 10 #predicting 3 minutes ahead
RATIO_TO_PREDICT = 'ETH-USD'
EPOCHS = 10
BATCH_SIZE = 64
NAME = f"{RATIO_TO_PREDICT}-{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"




#Classify function
def classify(current,future):
    if float(future) > float(current):
        return 1
    else:
        return 0
    
#pre process function
def preprocess_df(df):
    df = df.drop('future',1)
    for col in df.columns:
        if col != 'target':
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)
    df.dropna(inplace=True)
    
    sequential_data=[]
    prev_days = deque(maxlen = SEQ_LEN)
    #what the above function does
    #it first makes a sequence of 60 as stated above, then as new values come in older values will pop out
    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days),i[-1]])
    random.shuffle(sequential_data)
    #balancing data
    buys = []
    sells = []
    
    for seq,target in sequential_data:
        if target == 0:
            sells.append([seq,target])
        elif target == 1:
            buys.append([seq,target])
    
    random.shuffle(buys)
    random.shuffle(sells)
    
    lower = min(len(buys),len(sells))
    
    buys = buys[:lower]
    sells = sells[:lower]
    
    sequential_data = buys+sells
    random.shuffle(sequential_data)
    
    X = []
    y = []
    
    for seq,target in sequential_data:
        X.append(seq)
        y.append(target)
    
    return np.array(X),y
    
    
    
#Making a dataset easier to read
main_df = pd.DataFrame()
ratios = ['BTC-USD','LTC-USD','ETH-USD','BCH-USD']
for ratio in ratios:
    dataset=f'crypto_data/{ratio}.csv'
    df = pd.read_csv(dataset,names=['time','low','high','open','close','volume'])
    #print(df.head(5))
    df.rename(columns = {'close':f'{ratio}_close','volume':f'{ratio}_volume'},inplace=True)
    df.set_index('time',inplace=True)
    df = df[[f'{ratio}_close',f'{ratio}_volume']]
    #print(df.head(5))
    if len(main_df) == 0:
        main_df = df
    else:
        main_df = main_df.join(df)

        
        
main_df['future'] = main_df[f'{RATIO_TO_PREDICT}_close'].shift(-FUTURE_PERIOD_PREDICT)
print(main_df[[f'{RATIO_TO_PREDICT}_close','future']].head())

            ETH-USD_close  future
time                             
1528968660            NaN   486.0
1528968720      486.01001   486.0
1528968780      486.00000   486.0
1528968840      485.75000   486.0
1528968900      486.00000   486.0


In [174]:
main_df['target'] = list(map(classify,main_df[f'{RATIO_TO_PREDICT}_close'],main_df['future']))
print(main_df[[f'{RATIO_TO_PREDICT}_close','future','target']].head(10))

            ETH-USD_close     future  target
time                                        
1528968660            NaN  485.75000       0
1528968720      486.01001  486.00000       0
1528968780      486.00000  486.00000       0
1528968840      485.75000  485.98999       1
1528968900      486.00000  485.98999       0
1528968960      486.00000  485.98999       0
1528969020      485.98999  485.98999       0
1528969080      485.98999  486.00000       1
1528969140      485.98999  486.00000       1
1528969200      485.98999  486.00000       1


In [175]:
#classify function works!
#next step
#build sequences
#balance data
#normalize data
#scale data

In [176]:
times = sorted(main_df.index.values)
last_5pct = times[-int(0.05*len(times))]
#print(last_5pct) #looking to split validation data and training data
#now going to split training data and validation data

validation_main_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]

In [178]:
#preprocess_df(main_df)
train_x,train_y = preprocess_df(main_df)
validation_x,validation_y = preprocess_df(validation_main_df)

train_x = np.asarray(train_x)
train_y = np.asarray(train_y)
validation_x = np.asarray(validation_x)
validation_y = np.asarray(validation_y)


  df = df.drop('future',1)
  df = df.drop('future',1)


In [179]:
model = Sequential()
model.add(LSTM(128,input_shape=(train_x.shape[1:]),return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model = Sequential()
model.add(LSTM(128,input_shape=(train_x.shape[1:]),return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model = Sequential()
model.add(LSTM(128,input_shape=(train_x.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32,activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2,activation='softmax'))

opt = tf.keras.optimizers.Adam(learning_rate=0.001,decay=1e-6)
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [180]:
tensorboard = TensorBoard(log_dir=f'logs/{NAME}')
filepath = 'RNN_Final-{epoch:02d}-{val_acc:.3f}'
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max'))

In [181]:
history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y),
    callbacks=[tensorboard, checkpoint],
)

Epoch 1/10

KeyError: 'Failed to format this callback filepath: "models/RNN_Final-{epoch:02d}-{val_acc:.3f}.model". Reason: \'val_acc\''