In [None]:
import pandas as pd
from binance.client import Client
from collections import deque
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.compat.v1.keras.layers import CuDNNLSTM
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint, ModelCheckpoint
import time
from datetime import datetime
from sklearn import preprocessing

In [None]:
SEQ_LEN = 60  # how long of a preceding sequence to collect for RNN
FUTURE_PERIOD_PREDICT = 3  # future prediction length
RATIO_TO_PREDICT = "BTC" # Coin being predicted
EPOCHS = 10  # passes
BATCH_SIZE = 64  # amount of in batch
NAME = f"{RATIO_TO_PREDICT}-{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}" # Change Constraint

In [None]:
def classify(current, future):
    if float(future) > float(current):  # if the future price is higher than the current = buy
        return 1
    else:
        return 0

In [None]:
def preprocess_df(df):
    df = df.drop("future", 1)
    for col in df.columns:
        if col != "target":
            if df[col].dtype != "object":  # only apply to numerical columns
                df[col] = df[col].pct_change()
                df[col].replace([np.inf, -np.inf], np.nan, inplace=True)  # replace inf with NaN
                df[col].dropna(inplace=True)  # remove NaN
                df[col] = preprocessing.scale(df[col].values)

    df.dropna(inplace=True)

    sequential_data = []
    prev_days = deque(maxlen=SEQ_LEN)

    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days), i[-1]])

    random.shuffle(sequential_data)

    buys = []
    sells = []

    for seq, target in sequential_data:
        if target == 0:
            sells.append([seq, target])
        elif target == 1:
            buys.append([seq, target])

    random.shuffle(buys)
    random.shuffle(sells)

    lower = min(len(buys), len(sells))

    buys = buys[:lower]
    sells = sells[:lower]

    sequential_data = buys+sells
    random.shuffle(sequential_data)

    X = []
    y = []

    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)

    return np.array(X), y


In [None]:
main_df = pd.DataFrame() # begin empty

ratios = ["BTC", "ETH"]  
for ratio in ratios:  # begin iteration

    print(ratio)
    dataset = f'data/{ratio}_2021_1y.csv'  # get the full path to the file.
    columns = ['OpenTime', 
            f'{ratio}-USD_Open', 
            f'{ratio}-USD_High', 
            f'{ratio}-USD_Low', 
            f'{ratio}-USD_Close', 
            f'{ratio}-USD_volume']
    df = pd.read_csv(dataset)  # read in specific file
    df = df[columns]

    print(df)
    
    dts = []
    df = df[df['OpenTime'].notnull()]
    for i in range(len(df)):
        dts.append(datetime.strptime(df['OpenTime'].iloc[i].split('.')[0], '%Y-%m-%d %H:%M:%S'))
    
    df['OpenTime'] = dts

    df.set_index("OpenTime", inplace=True)  # set time as index so we can join them on this shared time
    df = df[[f'{ratio}-USD_Close', f"{ratio}-USD_volume"]]  # ignore the other columns besides price and volume

    if len(main_df)==0:  # if the dataframe is empty
        main_df = df  # then it's just the current df
    else:  # otherwise, join this data to the main one
        main_df = main_df.join(df)

In [None]:
main_df.fillna(method="ffill", inplace=True)  # if there are gaps in data, use previously known values
main_df.dropna(inplace=True)
#print(main_df.head(50))  # print DF

In [None]:
main_df[f'{ratio}-USD_Close'] = pd.to_numeric(main_df[f'{ratio}-USD_Close'], errors='coerce')
main_df[f'{ratio}-USD_volume'] = pd.to_numeric(main_df[f'{ratio}-USD_volume'], errors='coerce')
main_df['future'] = main_df[f'{RATIO_TO_PREDICT}-USD_Close'].shift(-FUTURE_PERIOD_PREDICT)
#print(main_df['future'].to_string())
main_df['target'] = list(map(classify, main_df[f'{RATIO_TO_PREDICT}-USD_Close'], main_df['future']))
print(main_df.dtypes)
print(main_df['target'].to_string())

In [None]:
main_df.dropna(inplace=True)

In [None]:
times = sorted(main_df.index.values)
last_5pct = sorted(main_df.index.values)[-int(0.05*len(times))]

In [None]:
val_main_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]

In [None]:
print(main_df)
train_x, train_y = preprocess_df(main_df)
val_x, val_y = preprocess_df(val_main_df)

In [None]:
print(f"train data: {len(train_x)} validation: {len(val_x)}")
print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
print(f"VALIDATION Dont buys: {val_y.count(0)}, buys: {val_y.count(1)}")

In [None]:
model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

In [None]:
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

In [None]:
model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

In [None]:
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

In [None]:
model.add(Dense(2, activation='softmax'))

In [None]:
opt = tf.keras.optimizers.legacy.Adam(learning_rate=0.001, decay=1e-6)

In [None]:
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=opt,
    metrics=['accuracy']
)

In [None]:
tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

In [None]:
filepath = "RNN_Final-{epoch:02d}-{val_accuracy:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max'))

In [None]:
train_x = np.asarray(train_x)
train_y = np.asarray(train_y)
val_x = np.asarray(val_x)
val_y = np.asarray(val_y)

history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(val_x, val_y),
    callbacks=[tensorboard, checkpoint],
)

In [None]:
score = model.evaluate(val_x, val_y, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
# Save model
model.save("models/{}".format(NAME))
%tensorboard --logdir logs/fit # LAUNCH VALIDATION METRICS HERE