In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import time

In [2]:
%matplotlib inline
sns.set_style(style='whitegrid')

In [3]:
import binance_framework as bf

In [None]:
#Note: BCH HAS MANY NULL VALUES, BETTER NOT INCLUDE IT
currencies = ['BTCUSDT', 'ETHUSDT', 'LTCUSDT', 'XRPUSDT']
#coins = pd.read_csv('bitcoin_datasets/interval_30min.csv')
coins = bf.get_data_by_intervals(10, currencies, bf.INTERVAL_5MINUTE)
coins.info()

In [None]:
bf.show_time_skips(coins, currencies)

In [None]:
bf.keep_one_timestamp(coins,currencies)

In [None]:
#coins.drop('Unnamed: 0', axis=1, inplace=True)
bf.remove_ignore_columns(coins,currencies)
coins.set_index('Timestamp',inplace=True)

In [None]:
coins.head()

In [None]:
#plt.figure(figsize=(16,8))
#coins['BTCUSDT Close'].iloc[:720].plot()

In [None]:
SEQUENCE_LEN = 100 # 2 days and 4 hours
FUTURE_PERIOD_PREDICT = 5
RATIO_TO_PREDICT = 'BTCUSDT'

In [None]:
#i want to predict if in the next FUTURE_PERIOD_PREDICT
# the price will be higher than 1.25% of the current value
def classify(current_price, future_price):
    if future_price > (current_price + current_price*0.0125):
        return 1
    return 0

In [None]:
coins['Future Value'] = coins['BTCUSDT High'].shift(-FUTURE_PERIOD_PREDICT)
#coins['Future Value'] = 0.0
#coins.info()

In [None]:
coins['Target'] = list(map(classify, coins['BTCUSDT Close'], coins['Future Value']))

In [None]:
coins.dropna(inplace=True)

In [None]:
coins.info()

In [None]:
coins[ coins['Target'] ==0].count()[0]

In [None]:
times = coins.index.values
last_10pct = times[-int(0.1*len(times))]

In [None]:
last_10pct

In [None]:
coins.index[-1]

In [None]:
validation_data = coins[ (coins.index >= last_10pct)]

In [None]:
coins = coins[ (coins.index < last_10pct)]
coins.drop('Future Value', axis=1, inplace=True)

In [None]:
from sklearn import preprocessing

In [None]:
for col in coins.columns:
    if col != 'Target':
        coins[col] = coins[col].pct_change()
        coins.dropna(inplace=True)
        coins[col] = preprocessing.scale(coins[col].values)

In [None]:
coins.dropna(inplace=True)

In [None]:
from collections import deque

In [None]:
sequential_data = []
prev_days = deque(maxlen=SEQUENCE_LEN)

In [None]:
for i in coins.values:
    prev_days.append([n for n in i[:-1]])
    if len(prev_days) == SEQUENCE_LEN:
        sequential_data.append([np.array(prev_days), i[-1]])

In [None]:
import random

In [None]:
random.shuffle(sequential_data)

In [None]:
sequential_data

In [None]:
buys =[]
sells = []

In [None]:
for seq,target in sequential_data:
    if target == 0:
        sells.append([seq, target])
    elif target ==1:
        buys.append([seq,target])

In [None]:
random.shuffle(buys)
random.shuffle(sells)

In [None]:
lower = min(len(buys), len(sells))

In [None]:
buys = buys[:lower]
sells = sells[:lower]

In [None]:
sequential_data = buys+sells

In [None]:
random.shuffle(sequential_data)

In [None]:
X_train =[]
y_train = []

In [None]:
for seq,target in sequential_data:
    X_train.append(seq)
    y_train.append(target)

In [None]:
len(sells)

In [None]:
np.array(X_train)

In [None]:
SEQUENCE_LEN = 60
FUTURE_PERIOD_PREDICT = 3
RATIO_TO_PREDICT = 'ETHUSDT'
EPOCHS = 20
BATCH_SIZE = 256
NAME = 'SEQ-'+ str(SEQUENCE_LEN)+'-FUTPRED-'+str(FUTURE_PERIOD_PREDICT)+'-PRED-'+str(time.time())

In [None]:
#i want to predict if in the next FUTURE_PERIOD_PREDICT
# the price will be higher than 1.25% of the current value
def classify(current_price, future_price):
    if future_price > current_price:
        return 1
    return 0

In [None]:
coins['Future Value'] = coins[RATIO_TO_PREDICT+' High'].shift(-FUTURE_PERIOD_PREDICT)

In [None]:
coins['Target'] = list(map(classify, coins[RATIO_TO_PREDICT+' Close'], coins['Future Value']))

In [None]:
coins.dropna(inplace=True)

In [None]:
coins[ coins['Target'] ==0].count()[0]

In [None]:
times = coins.index.values
last_10pct = times[-int(0.1*len(times))]

In [None]:
last_10pct

In [None]:
validation_data = coins[ (coins.index >= last_10pct)]

In [None]:
coins = coins[ (coins.index < last_10pct)]

In [None]:
from sklearn import preprocessing

In [None]:
def preprocess_data(coins):
    coins.drop('Future Value', axis=1, inplace=True)
    coins.dropna()

      for col in coins.columns:
        if col != 'Target':
            coins[col] = coins[col].pct_change()
            coins.dropna(inplace=True)
            coins[col] = preprocessing.scale(coins[col].values)

      coins.dropna(inplace=True)

      sequential_data = []
      prev_days = deque(maxlen=SEQUENCE_LEN)

      for i in coins.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == SEQUENCE_LEN:
            sequential_data.append([np.array(prev_days), i[-1]])

      random.shuffle(sequential_data)

      buys = []
      sells = []
      for seq,target in sequential_data:
        if target == 0:
        sells.append([seq, target])
        elif target ==1:
            buys.append([seq,target])

      random.shuffle(buys)
      random.shuffle(sells)

      lower = min(len(buys), len(sells)) #we find min to have balanced classes

      buys = buys[:lower]
      sells = sells[:lower]

      sequential_data = buys+sells
      random.shuffle(sequential_data)

      X = []
      y = []

      for seq,target in sequential_data:
            X.append(seq)
            y.append(target)

    return np.array(X), np.array(y)

In [None]:
X_train, y_train = preprocess_data(coins)
X_test, y_test = preprocess_data(validation_data)

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

import tensorflow as tf

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping

In [None]:
model = Sequential()

model.add(LSTM(256, input_shape=(X_train.shape[1:]), return_sequences=True))
model.add(Dropout(0.5))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(X_train.shape[1:]), return_sequences=True))
model.add(Dropout(0.5))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(X_train.shape[1:])))
model.add(Dropout(0.5))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))

opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

model.compile(loss='sparse_categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [None]:
tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))
early_stopping = EarlyStopping(monitor='val_loss', patience=10, mode='min')

In [None]:
filepath = "RNN_Final-{epoch:02d}--{val_acc:.3f}"
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max'))

In [None]:
history = model.fit(
    X_train, y_train,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(X_test,y_test),
    callbacks= [tensorboard]
)

In [None]:
losses = pd.DataFrame(model.history.history)
losses.plot()