<a href="https://colab.research.google.com/github/I3ryI3e/fantastic-octo-barnacle/blob/master/RNNCrypto.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import requests        # for making http requests to binance
import json            # for parsing what binance sends back to us
import pandas as pd    # for storing and manipulating the data we get back
import numpy as np     # numerical python
from sklearn import preprocessing  #helps in the preprocessing function
from collections import deque
import matplotlib.pyplot as plt # for charts and such
import random
    
import datetime as dt  # for dealing with times

# SEQ_LEN = How much hours are we going to give to the NN so it can predict 
SEQ_LEN = 48
# What Pair are we going to predict
RATIO_TO_PREDICT = 'ETHUSDT'
#How many hours ahead are we going to predict(if '1' we are going to predict if the price is going to be higher in the next hour)
FUTURE_PERIOD_PREDICT = 1
#Validation sample split
VALIDATION_PERCENTAGE = 0.05
TRAINING_DATA = 1-VALIDATION_PERCENTAGE


In [0]:
#Connect to binance API and get the data in each pair we want information on
# INPUTS: symbol = The pair(ex= LTC-USDT) // Interval = 1h(1 hour intervals)
# OUTPUTS: DataFrame with the data 

def get_bars(symbol, interval = '1h'):
 root_url = 'https://api.binance.com/api/v1/klines'
 url = root_url + '?symbol=' + symbol + '&interval=' + interval
 data = json.loads(requests.get(url).text)
 df = pd.DataFrame(data)
 df.columns = ['open_time',
               'open_price', 'high', 'low', 'close_price', 'volume',
               'close_time', 'qav', 'num_trades',
               'taker_base_vol', 'taker_quote_vol', 'ignore']
 df=df.drop(columns=['close_time', 'qav', 'num_trades',
               'taker_base_vol', 'taker_quote_vol', 'ignore'])

 df.index = [dt.datetime.fromtimestamp(x/1000.0) for x in df.open_time]
 return df

In [0]:
#Does the targets for the network.
#INPUTS: Current price; Future Price
#OUTPUT: 1 if the Future Price is higher than the current price, 0 otherwise

def classify(current, future):
  if float(future) > float(current):
    return 1
  else:
    return 0

In [0]:
def preprocess(df):
  df = df.drop('future', 1)
  print(df.head(5))
  for col in df.columns:
    if col != 'target':
      df[col]=df[col].astype(float).pct_change()
      df.dropna(inplace=True)
      df[col] = preprocessing.scale(df[col].values)
      
  df.dropna(inplace=True)
  
  sequential_data = []
  prev_days= deque(maxlen=SEQ_LEN)
  #Making the sequeces // appending values until we get to SEQ_LEN and add it to sequential_data
  #that's going to be a list of sequences
  for i in df.values:
      prev_days.append([n for n in i[:-1]])
      if len(prev_days) == SEQ_LEN:
        sequential_data.append([np.array(prev_days),i[-1]])
  print(df.head())      
  random.shuffle(sequential_data)

In [5]:
#Preparing all the DataFrame to work with

btcusdt = get_bars('BTCUSDT')
ethusdt = get_bars('ETHUSDT')
ltcusdt = get_bars('LTCUSDT')
xrpusdt = get_bars('XRPUSDT')

main_df = pd.DataFrame(ethusdt)

main_df= pd.merge(main_df,btcusdt,on='open_time',how='left',suffixes=('_ETHUSDT','_BTCUSDT')).fillna(method='ffill')

ltcusdtColumNames=ltcusdt.columns.tolist()
renamedNamesLTC=[]
for name in ltcusdtColumNames:
  renamedNamesLTC.append(name+"_LTCUSDT")
      
renamedNamesLTC=dict(zip(ltcusdtColumNames, renamedNamesLTC))
ltcusdt=ltcusdt.rename(index=str, columns=renamedNamesLTC)

xrpusdtColumNames=xrpusdt.columns.tolist()
renamedNamesXRP=[]
for name in xrpusdtColumNames:
  renamedNamesXRP.append(name+"_XRPUSDT")
      
renamedNamesXRP=dict(zip(xrpusdtColumNames, renamedNamesXRP))
xrpusdt=xrpusdt.rename(index=str, columns=renamedNamesXRP)


main_df= pd.concat([main_df.reset_index(drop=True),ltcusdt.reset_index(drop=True)], axis=1)
main_df= pd.concat([main_df.reset_index(drop=True),xrpusdt.reset_index(drop=True)], axis=1)


main_df['future'] = main_df[f'close_price_{RATIO_TO_PREDICT}'].shift(-FUTURE_PERIOD_PREDICT)
main_df['target'] = list(map(classify, main_df[f'close_price_{RATIO_TO_PREDICT}'], main_df['future']))

print(main_df[[f'close_price_{RATIO_TO_PREDICT}','future','target']].head(20))


   close_price_ETHUSDT        future  target
0         201.54000000  201.98000000       1
1         201.98000000  202.21000000       1
2         202.21000000  201.74000000       0
3         201.74000000  201.81000000       1
4         201.81000000  202.08000000       1
5         202.08000000  202.58000000       1
6         202.58000000  202.50000000       0
7         202.50000000  202.46000000       0
8         202.46000000  202.47000000       1
9         202.47000000  202.35000000       0
10        202.35000000  202.09000000       0
11        202.09000000  201.90000000       0
12        201.90000000  201.47000000       0
13        201.47000000  201.30000000       0
14        201.30000000  201.57000000       1
15        201.57000000  201.31000000       0
16        201.31000000  201.29000000       0
17        201.29000000  201.07000000       0
18        201.07000000  201.24000000       1
19        201.24000000  199.95000000       0


In [6]:
#Separate Validation data -- Last VALIDATION_PERCENTAGE of the data

validation_data = main_df.loc[(len(main_df)-(len(main_df)*VALIDATION_PERCENTAGE)):len(main_df)]
main_df= main_df[:(-int((len(main_df)*VALIDATION_PERCENTAGE)))]

print(len(main_df))
print(len(validation_data))
print(main_df.head())
print(validation_data.head())

475
25
       open_time open_price_ETHUSDT  high_ETHUSDT   low_ETHUSDT  \
0  1541163600000       200.72000000  202.27000000  200.68000000   
1  1541167200000       201.54000000  202.17000000  201.23000000   
2  1541170800000       202.06000000  203.74000000  201.67000000   
3  1541174400000       202.22000000  203.12000000  201.60000000   
4  1541178000000       201.78000000  203.13000000  201.57000000   

  close_price_ETHUSDT volume_ETHUSDT open_price_BTCUSDT   high_BTCUSDT  \
0        201.54000000  9500.56966000      6415.96000000  6442.00000000   
1        201.98000000  4885.57363000      6424.08000000  6448.00000000   
2        202.21000000  8203.44684000      6440.42000000  6447.94000000   
3        201.74000000  6225.26925000      6424.80000000  6433.84000000   
4        201.81000000  5136.43482000      6421.46000000  6437.62000000   

     low_BTCUSDT close_price_BTCUSDT  ...   close_price_LTCUSDT  \
0  6414.38000000       6424.91000000  ...           51.58000000   
1  6419.050

In [22]:
preprocess(main_df)
#train_x,train_y = preprocess_df(main_df)
#validation_x, validation_y = preprocess_df(validation_data)


       open_time open_price_ETHUSDT  high_ETHUSDT   low_ETHUSDT  \
0  1541163600000       200.72000000  202.27000000  200.68000000   
1  1541167200000       201.54000000  202.17000000  201.23000000   
2  1541170800000       202.06000000  203.74000000  201.67000000   
3  1541174400000       202.22000000  203.12000000  201.60000000   
4  1541178000000       201.78000000  203.13000000  201.57000000   

  close_price_ETHUSDT volume_ETHUSDT open_price_BTCUSDT   high_BTCUSDT  \
0        201.54000000  9500.56966000      6415.96000000  6442.00000000   
1        201.98000000  4885.57363000      6424.08000000  6448.00000000   
2        202.21000000  8203.44684000      6440.42000000  6447.94000000   
3        201.74000000  6225.26925000      6424.80000000  6433.84000000   
4        201.81000000  5136.43482000      6421.46000000  6437.62000000   

     low_BTCUSDT close_price_BTCUSDT  ...    low_LTCUSDT  close_price_LTCUSDT  \
0  6414.38000000       6424.91000000  ...    51.19000000          51.58