<a href="https://colab.research.google.com/github/I3ryI3e/fantastic-octo-barnacle/blob/master/RNNCrypto.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import requests        # for making http requests to binance
import json            # for parsing what binance sends back to us
import pandas as pd    # for storing and manipulating the data we get back
import numpy as np     # numerical python
from sklearn import preprocessing  #helps in the preprocessing function
from collections import deque
import matplotlib.pyplot as plt # for charts and such
import random  
import datetime as dt  # for dealing with times
import time
import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, CuDNNLSTM, BatchNormalization
import os

#Fetch data from how many days ago?
DAYS_AGO = 4
#1 Day = 86400000 miliseconds
DAY_TO_MILISECONDS = 86400000
#Days to miliseconds
DAYS_IN_MILISECONDS = DAYS_AGO * DAY_TO_MILISECONDS
#Number of samples (Max 1000)
LIMIT = 1000
#1 Minute = 60000 mili
MIN_EQUALS_MILS= 60000
# SEQ_LEN = How much hours are we going to give to the NN so it can predict 
SEQ_LEN = 60
# What Pair are we going to predict
RATIO_TO_PREDICT = 'ETHUSDT'
#How many hours ahead are we going to predict(if '1' we are going to predict if the price is going to be higher in the next hour)
FUTURE_PERIOD_PREDICT = 3
#Validation sample split
VALIDATION_PERCENTAGE = 0.05
TRAINING_DATA = 1-VALIDATION_PERCENTAGE
EPOCHS = 10
BATCH_SIZE = 64
#Name for the models
NAME = f"{SEQ_LEN}-SEQ--{FUTURE_PERIOD_PREDICT}--PRED--{RATIO_TO_PREDICT}--RATIO--{int(time.time())}"


In [0]:
#Connect to binance API and get the data in each pair we want information on
# INPUTS: symbol = The pair(ex= LTC-USDT) // Interval = 1h(1 hour intervals)
# OUTPUTS: DataFrame with the data 

def get_bars(symbol, startTime ,interval = '1m'):
 root_url = 'https://api.binance.com/api/v1/klines'
 url = root_url + '?symbol=' + symbol + '&interval=' + interval + '&startTime=' + str(startTime) + '&limit=' + str(LIMIT) 
 data = json.loads(requests.get(url).text)
 df = pd.DataFrame(data)
 df.columns = ['open_time',
               'open_price', 'high', 'low', 'close_price', 'volume',
               'close_time', 'qav', 'num_trades',
               'taker_base_vol', 'taker_quote_vol', 'ignore']
 df=df.drop(columns=['close_time', 'qav', 'num_trades',
               'taker_base_vol', 'taker_quote_vol', 'ignore'])
 return df

In [0]:
#Does the targets for the network.
#INPUTS: Current price; Future Price
#OUTPUT: 1 if the Future Price is higher than the current price, 0 otherwise

def classify(current, future):
  if float(future) > float(current):
    return 1
  else:
    return 0

In [0]:
def preprocess(df):
  df = df.drop('future', 1)
  pd.set_option('use_inf_as_na', True)
  for col in df.columns:
    if col != 'target':
      df[col]=df[col].pct_change()
      df.dropna(inplace=True)
      df[col] = preprocessing.scale(df[col].values)
      
  df.dropna(inplace=True)
  sequential_data = []
  prev_days= deque(maxlen=SEQ_LEN)
  #Making the sequeces // appending values until we get to SEQ_LEN and add it to sequential_data
  #that's going to be a list of sequences
  for i in df.values:
      prev_days.append([n for n in i[:-1]])
      if len(prev_days) == SEQ_LEN:
        sequential_data.append([np.array(prev_days),i[-1]])
  random.shuffle(sequential_data)
  
  buys = []
  sells = []
  
  for seq, target in sequential_data:
    if target == 0:
      sells.append([seq,target])
    elif target == 1:
      buys.append([seq,target])
      
  random.shuffle(buys)
  random.shuffle(sells)
   
  lower = min(len(buys), len(sells))
  
  buys = buys[:lower]
  sells = sells[:lower]
  
  sequential_data = buys+sells
  
  random.shuffle(sequential_data)
  
  X = []
  Y = []
  
  for seq, target in sequential_data:
    X.append(seq)
    Y.append(target)
  
  return np.array(X),Y
  
  

In [0]:
#Preparing all the DataFrame to work with

root_url = 'https://api.binance.com/api/v1/time'
now = json.loads(requests.get(root_url).text)
now = now.get('serverTime')
earlier5days = now - DAYS_IN_MILISECONDS
btcusdt = pd.DataFrame()
ethusdt = pd.DataFrame()
ltcusdt = pd.DataFrame()
xrpusdt = pd.DataFrame()

for x in range(earlier5days, now, (LIMIT*MIN_EQUALS_MILS)):
  btcusdt = pd.concat([btcusdt,get_bars('BTCUSDT',x)])
  ethusdt = pd.concat([ethusdt,get_bars('ETHUSDT',x)])
  ltcusdt = pd.concat([ltcusdt,get_bars('LTCUSDT',x)])
  xrpusdt = pd.concat([xrpusdt,get_bars('XRPUSDT',x)])

main_df = pd.DataFrame(ethusdt)
main_df= pd.merge(main_df,btcusdt,on='open_time',how='left',suffixes=('_ETHUSDT','_BTCUSDT')).fillna(method='ffill')

ltcusdtColumNames=ltcusdt.columns.tolist()
renamedNamesLTC=[]
for name in ltcusdtColumNames:
  renamedNamesLTC.append(name+"_LTCUSDT")
      
renamedNamesLTC=dict(zip(ltcusdtColumNames, renamedNamesLTC))
ltcusdt=ltcusdt.rename(index=str, columns=renamedNamesLTC)

xrpusdtColumNames=xrpusdt.columns.tolist()
renamedNamesXRP=[]
for name in xrpusdtColumNames:
  renamedNamesXRP.append(name+"_XRPUSDT")
      
renamedNamesXRP=dict(zip(xrpusdtColumNames, renamedNamesXRP))
xrpusdt=xrpusdt.rename(index=str, columns=renamedNamesXRP)


main_df= pd.concat([main_df.reset_index(drop=True),ltcusdt.reset_index(drop=True)], axis=1)
main_df= pd.concat([main_df.reset_index(drop=True),xrpusdt.reset_index(drop=True)], axis=1)
main_df.index = [dt.datetime.fromtimestamp(x/1000.0) for x in main_df.open_time]
main_df=main_df.drop(columns=['open_time','open_time_LTCUSDT','open_time_XRPUSDT'])


main_df['future'] = main_df[f'close_price_{RATIO_TO_PREDICT}'].shift(-FUTURE_PERIOD_PREDICT)
main_df['target'] = list(map(classify, main_df[f'close_price_{RATIO_TO_PREDICT}'], main_df['future']))



In [0]:
#Separate Validation data -- Last VALIDATION_PERCENTAGE of the data

times = sorted(main_df.index.values)
last_Xpct = times[-int(VALIDATION_PERCENTAGE*len(times))]

validation_data = main_df[(main_df.index >= last_Xpct)]
main_df= main_df[(main_df.index < last_Xpct)]

In [7]:
for col in main_df.columns:
  main_df[col]= pd.to_numeric(main_df[col])
for col in validation_data.columns:
  validation_data[col] = pd.to_numeric(validation_data[col])
  
train_x,train_y = preprocess(main_df)
validation_x, validation_y = preprocess(validation_data)

print(f"train data: {len(train_x)} validation: {len(validation_x)}")
print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")

model = Sequential()
model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())  #normalizes activation outputs, same reason you want to normalize your input data.

model.add(CuDNNLSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))

opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

# Compile model
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=opt,
    metrics=['accuracy']
)
if not os.path.exists('logs'):
    os.makedirs('logs')
  
tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

if not os.path.exists('models'):
    os.makedirs('models')

filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y),
    callbacks=[tensorboard, checkpoint],
)

# Score model
score = model.evaluate(validation_x, validation_y, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
# Save model
model.save("models/{}".format(NAME))

train data: 4998 validation: 198
Dont buys: 2499, buys: 2499
VALIDATION Dont buys: 99, buys: 99
Train on 4998 samples, validate on 198 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.800954980681641
Test accuracy: 0.44949494979598303
