## Import Libraries

In [None]:
import json
from datetime import datetime
from os import path, mkdir

import numpy as np
import pandas as pd
import requests
import ta
from sklearn import preprocessing
import gc


## Data Processing

In [None]:
class PrepareData:

    def __init__(self, raw_data_dir, processed_data_dir, end_time):
        self.raw_data_dir = raw_data_dir
        self.processed_data_dir = processed_data_dir
        self.end_time = end_time

    def extract_data(self, symbol, interval):

        # Binance API url
        root_binance_url = 'https://api.binance.com/api/v1/klines'
        symbol_url = '?symbol='
        interval_url = '&interval='
        start_time_url = '&startTime='
        # limit is max 500 records, max 1200 requests/minute

        # check is data file exists
        fname = 'binance_' + symbol + '_' + interval + '.json'
        fpath = self.raw_data_dir + '/' + fname

        if not path.isdir(self.raw_data_dir):
            mkdir(self.raw_data_dir)

        if not path.isfile(fpath):
            print('Downloading data for {}, interval {}...'.format(symbol, interval))

            # check for first available timestamp and add 24h since at listing on exchange price varies wildly
            url = root_binance_url + symbol_url + symbol + interval_url + interval + start_time_url + '0'
            first_timestamp = json.loads(requests.get(url).text)[0][0]  # first timestamp in json
            day_in_millis = 86400000
            actual_timestamp = first_timestamp + day_in_millis

            url = root_binance_url + symbol_url + symbol + interval_url + interval + start_time_url + str(
                actual_timestamp)
            json_data = json.loads(requests.get(url).text)

            # new start time is the previous end timestamp, 500 is the limit/max
            start_time = json_data[-1][0]
            end_time = self.convert_date(self.end_time, to_timestamp=True)
            while start_time < end_time:
                url = root_binance_url + symbol_url + symbol + interval_url + interval + start_time_url + str(
                    start_time)
                data_new = json.loads(requests.get(url).text)
                # omit the first element as it is equal to the last on the previous list
                json_data = json_data + data_new[1:]
                start_time = json_data[-1][0]

            # save to disk
            with open(fpath, 'w') as f:
                json.dump(json_data, f, sort_keys=True, indent=4, ensure_ascii=False)

            # return dataframe
            df = pd.DataFrame(json_data)
            return df, fname

        else:
            print('Retrieving from file...')
            # read from disk into a pandas dataframe
            df = pd.read_json(fpath)
            return df, fname

    def process_data(self, data, fname):

        fname = fname.split('binance_')[1]
        folder_path = self.raw_data_dir + '/' + self.processed_data_dir
        file_path = folder_path + '/' + fname
        df = data

        if not path.isdir(folder_path):
            mkdir(folder_path)

        if not path.isfile(file_path):
            # remove any rows with null values
            df = df.dropna()

            # from binance-api-docs: https://github.com/binance-exchange/binance-official-api-docs/blob/master/rest-api.md
            col_names = ['Open Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Close Time', 'Quote Asset Volume',
                         'Number of trades', 'Taker buy base asset volume', 'Taker buy quote asset volume', 'Ignore']
            df.columns = col_names
            df.drop(df.columns[8:], axis=1, inplace=True)
            # Drop unnecessary columns
            col_drop_names = ['Open', 'High', 'Low', 'Volume', 'Close Time']
            df.drop(col_drop_names, axis=1, inplace=True)

            # Quote Asset Volume is volume in base currency = BTC
            df.rename(columns={'Quote Asset Volume': 'Volume'}, inplace=True)

            # remove rows after end time
            end_time = self.convert_date(self.end_time, to_timestamp=True)
            df = df[df['Open Time'] <= end_time]

            # sort by ascending date, so that last point represents 15th June 2018
            df = df.sort_values(by='Open Time')

            # calculate TA indicators
            print('Calculating TA indicators...')
            df = self.calculate_ta(df)

            # remove first 200 elements (MA_200 is nan)
            df = df[200:]

            # save to disk
            with open(file_path, 'w') as f:
                # get df to json format
                out = df.to_json(orient='records')
                f.write(out)

        else:
            print('Retrieving from file...')
            df = pd.read_json(file_path, orient='records')

        return df

    def load_processed_data(self, fname):
        folder_path = self.raw_data_dir + '/' + self.processed_data_dir
        file_path = folder_path + '/' + fname + '.json'
        return pd.read_json(file_path, orient='records')

    def calculate_ta(self, data):

        def moving_average(data_col, n):
            ma = data_col.rolling(window=n).mean()
            ma.fillna(0, inplace=True)
            return ma

        df = data

        # Trend Indicators
        # Moving Average (MA)
        # df['MA_10'] = moving_average(df['Close'], 10)
        df['MA_50'] = moving_average(df['Close'], 50)
        df['MA_200'] = moving_average(df['Close'], 200)

        # Exponential Moving Average (EMA)
        df['EMA'] = ta.ema_slow(df['Close'], n_slow=20, fillna=True)

        # Moving Average Convergence Divergence (MACD)
        df['MACD'] = ta.macd_diff(df['Close'], n_fast=12, n_slow=26, n_sign=9, fillna=True)

        # Momentum Indicators
        # Relative Strength Index (RSI)
        df['RSI'] = ta.rsi(df['Close'], n=14, fillna=True)

        # Volatility Indicators
        # Calc volatility manually
        # df['Volatility'] = (df['High'] - df['Low']) / df['Open']

        # Bollinger Bands (BB)
        df['BB_H'] = ta.bollinger_hband_indicator(df['Close'], n=20, ndev=2, fillna=True)
        df['BB_L'] = ta.bollinger_lband_indicator(df['Close'], n=20, ndev=2, fillna=True)

        # Scaling between -1 and 1 (if crosses 1, else -1)
        # df['BB_H'].replace(0, -1, inplace=True)
        # df['BB_L'].replace(0, -1, inplace=True)

        return df

    def drop_col(self, df, name='Open Time'):
        df.drop([name], axis=1, inplace=True)

    def convert_date(self, val, to_timestamp):
        if to_timestamp:
            dt = datetime.strptime(val, '%d.%m.%Y %H:%M:%S')
            millis_time = dt.timestamp() * 1000
            return int(millis_time)
        else:
            # time units are milliseconds
            date_col = pd.to_datetime(val, unit='ms')
            return date_col

    def check_if_nan(self, data):

        if data.isnull().values.any():
            null_cols = data.columns[data.isnull().any()]
            data[null_cols].isnull().sum()

            # print('Dataset contains null values')
            # print(data[data.isnull().any(axis=1)][null_cols].head())

            data.fillna(method='ffill', inplace=True)

    def create_inputs_minmax(self, data, x_win_size=50, y_win_size=1):
        # can store 2x in memory compared to float64
        tmp_data = data.astype('float32')

        self.drop_col(tmp_data, name='Open Time')
        self.check_if_nan(tmp_data)

        # BB_H, BB_L are in the range [0,1]
        # RSI is oscillator [0, 100] --> [0,1]
        tmp_data[['RSI']] = tmp_data[['RSI']] / 100

        # MaxMin Scaling
        scaler = self.maxmin_normalise(tmp_data)

        # for col in tmp_data.columns.values:
        #     print('{} -- First:'.format(col), tmp_data[col][0], 'Max:', tmp_data[col].max(), 'Min:', tmp_data[col].min())

        x_inputs = []
        y_inputs = []
        i = 0
        while (i + x_win_size + y_win_size) <= len(tmp_data):
            # e.g. x[0:50] y[50:51]
            x_win_data = tmp_data[i: i + x_win_size]
            y_win_data = tmp_data['Close'][i + x_win_size: i + x_win_size + y_win_size]

            # to numpy array
            x_win_arr = np.array(x_win_data)
            y_win_arr = np.array(y_win_data)
            x_inputs.append(x_win_arr)
            y_inputs.append(y_win_arr)

            i = i + 1

        x_inputs = np.array(x_inputs)
        y_inputs = np.array(y_inputs)
        # reshape for plotting (_,)
        y_inputs = np.reshape(y_inputs, (-1,))
        print('Shape X:', np.shape(x_inputs), 'Shape Y:', np.shape(y_inputs))

        return x_inputs, y_inputs, scaler

    def create_inputs_zero_base(self, data, x_win_size=50, y_win_size=1):
        # can store 2x in memory compared to float64
        tmp_data = data.astype('float32')

        self.drop_col(tmp_data, name='Open Time')
        self.check_if_nan(tmp_data)

        # BB_H, BB_L are in the range [0,1]
        # RSI is oscillator [0, 100]
        # -- Scale to [0, 2], then shift to [-1, 1] range
        tmp_data[['RSI']] = ((tmp_data[['RSI']] / 100) * 2) - 1

        x_inputs = []
        y_inputs = []
        close_bases = []
        i = 0
        while (i + x_win_size + y_win_size) <= len(tmp_data):
            # create a copy to preserve original data
            window_data = tmp_data[i: (i + x_win_size + y_win_size)].copy()
            window_data, close_base = self.zero_base_normalise(window_data)

            # x[0:50] y[50:51]
            x_win_data = window_data[: x_win_size]
            y_win_data = window_data['Close'].iloc[-1]

            # change to numpy array
            x_win_arr = np.array(x_win_data)
            x_inputs.append(x_win_arr)
            y_inputs.append(y_win_data)
            close_bases.append(close_base)

            i = i + 1

        x_inputs = np.array(x_inputs)
        y_inputs = np.array(y_inputs)
        close_bases = np.array(close_bases)

        print('Shape X:', np.shape(x_inputs), 'Shape Y:', np.shape(y_inputs))
        return x_inputs, y_inputs, close_bases

    def create_inputs_reinforcement(self, data, x_win_size=10):
        # can store 2x in memory compared to float64
        tmp_data = data.astype('float32')

        self.drop_col(tmp_data, name='Open Time')
        self.check_if_nan(tmp_data)

        # BB_H, BB_L are in the range [0,1]
        # RSI is oscillator [0, 100] --> [0,1]
        tmp_data[['RSI']] = tmp_data[['RSI']] / 100

        # MaxMin Scaling
        scaler = self.maxmin_normalise(tmp_data)

        # 76th input is 0, which can cause division with 0
        min_idx = tmp_data['Close'].idxmin()
        # replace that element with the previous one
        tmp_data['Close'].iloc[min_idx] = tmp_data['Close'].iloc[min_idx - 1]

        # no splitting into train and validation subsets
        inputs = []
        closing_prices = []
        i = 0
        while (i + x_win_size) <= len(tmp_data):
            # e.g. x[0:50]
            window = tmp_data[i: (i + x_win_size)]
            # price of the window (1st element) at which trade is executed
            close_price = window.loc[:, 'Close'].iloc[0]

            # to numpy array
            win_arr = np.array(window)
            inputs.append(win_arr)
            closing_prices.append(close_price)

            i = i + 1

        inputs = np.array(inputs)
        closing_prices = np.array(closing_prices)

        print('Shape Inputs:', np.shape(inputs))
        return inputs, closing_prices, scaler

    def maxmin_normalise(self, data):

        # BB_H, BB_L, RSI are good
        cols = [name for name in data.columns if name in ['BB_H', 'BB_L', 'RSI']]
        tmp_df = pd.DataFrame()
        for col in cols:
            tmp_df[col] = data[col]

        # rescale to [0, 1]
        scaler = preprocessing.MinMaxScaler(feature_range=(0, 1), copy=False)
        scaler.fit_transform(data.values)

        # replace BB_H, BB_L and RSI with original values
        for col in cols:
            data[col] = tmp_df[col]

        return scaler

    def zero_base_normalise(self, data):

        # closing price base for inverse scaling
        close_base = data.loc[:, 'Close'].iloc[0]

        # BB_H, BB_L, RSI are good
        norm_cols = [name for name in data.columns if name not in ['BB_H', 'BB_L', 'RSI']]
        for col in norm_cols:
            # normalise against the 1st element for each window
            tmp_base = data.loc[:, col].iloc[0]
            data.loc[:, col] = (data.loc[:, col] / tmp_base) - 1

        return data, close_base

    def split_train_test(self, df, train_set_size=0.8):
        train_set = df[:int(train_set_size * len(df))]
        test_set = df[int(train_set_size * len(df)):]
        return train_set, test_set


## Plotting 

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import plotly.graph_objs as go
import plotly.offline as py
from plotly import tools as pytls


def plotly_layout(plot_title, ty1, ty2):
    layout = go.Layout(
        title=plot_title,
        titlefont=dict(
            family='Courier New, monospace'
        ),
        legend=dict(orientation='h'),
        xaxis=dict(type='date'),
        yaxis=dict(
            domain=[0, 0.3],
            title=ty1,
            titlefont=dict(
                family='Courier New, monospace',
                size=16
            ),
            hoverformat='.4f',
            tickformat='.f'
        ),
        yaxis2=dict(
            domain=[0.4, 1],
            title=ty2,
            titlefont=dict(
                family='Courier New, monospace',
                size=16
            ),
            hoverformat='.8f',
            tickformat='.6f'
        )
    )

    return layout


def plotly_individual(data, tag):
    py.init_notebook_mode(connected=True)

    trace_price = go.Scatter(
        x=data['Open Time'],
        y=data['Close'],
        name=tag[:3]
    )

    trace_volume = go.Scatter(
        x=data['Open Time'],
        y=data['Volume'],
        xaxis='x',
        yaxis='y2',
        name=tag[:3]
    )

    ty1 = 'Volume [BTC]'
    ty2 = 'Closing Price [BTC]'
    layout = plotly_layout(tag, ty1, ty2)

    fig = pytls.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.01)
    fig.append_trace(trace_price, 2, 1)
    fig.append_trace(trace_volume, 1, 1)

    fig = go.Figure(fig, layout=layout)
    py.iplot(fig, filename=tag)


def plotly_all(data, labels, normalise=False):
    py.init_notebook_mode(connected=True)

    fig = pytls.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.01)
    plot_title, ty1, ty2 = '', '', ''

    colours = ['#1F77B4', '#B2182B', '#FF7F0E']
    for i, key in enumerate(labels):
        price_data = data[key]['Close']
        vol_data = data[key]['Volume']
        open_times = data[key]['Open Time']
        plot_title = 'Comparison of Closing Prices and Volume of EOS, TRX and ONT'
        ty1 = 'Volume [BTC]'
        ty2 = 'Closing Price [BTC]'

        if normalise:
            price_data = plot_scale(price_data)
            vol_data = plot_scale(vol_data)
            plot_title = 'Comparison of Normalised Closing Prices and Volume of EOS, TRX and ONT'
            ty1 = ty1[:-6]
            ty2 = ty2[:-6]

        tmp_price = go.Scatter(
            x=open_times,
            y=price_data,
            name=key[:3],
            line=dict(color=colours[i])
        )
        fig.append_trace(tmp_price, 2, 1)

        tmp_vol = go.Scatter(
            x=open_times,
            y=vol_data,
            xaxis='x',
            yaxis='y2',
            name=key[:3],
            line=dict(color=colours[i]),
            showlegend=False
        )
        fig.append_trace(tmp_vol, 1, 1)

    layout = plotly_layout(plot_title, ty1, ty2)

    fig = go.Figure(fig, layout=layout)
    py.iplot(fig, filename='all-together')


def plot_individual(data, tag):
    p = PrepareData()

    fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, gridspec_kw={'height_ratios': [3, 1]})
    fig.suptitle(tag, fontsize=16)

    date_col = p.convert_date(data['Open Time'], False)
    ax1.plot(date_col, data['Close'])
    ax1.grid(True)
    ax1.set_ylabel('Closing Price [BTC]', fontsize=12)
    ax1.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
    ax1.locator_params(axis='y', nbins=12)
    ax1.yaxis.set_major_formatter(ticker.FormatStrFormatter('%.6f'))

    ax2.plot(date_col, data['Volume'])
    ax2.grid(True)
    ax2.set_ylabel('Volume [BTC]', fontsize=12)
    ax2.locator_params(axis='y', nbins=4)

    fig.autofmt_xdate()
    fig.tight_layout()
    fig.subplots_adjust(top=0.90)
    plt.show()


def plot_all(data):
    p = PrepareData()

    eos = data['EOSBTC_30m']
    eos_np = plot_scale(eos['Close'])
    trx = data['TRXBTC_30m']
    trx_np = plot_scale(trx['Close'])
    ont = data['ONTBTC_30m']
    ont_np = plot_scale(ont['Close'])
    date_eos = p.convert_date(eos['Open Time'], False)
    date_trx = p.convert_date(trx['Open Time'], False)
    date_ont = p.convert_date(ont['Open Time'], False)
    plt.plot(date_eos, eos_np, 'r', date_trx, trx_np, 'b', date_ont, ont_np, 'g')

    plt.grid(True)
    plt.ylabel('Normalised Closing Price', fontsize=12)
    plt.legend()

    plt.show()


def plot_scale(data_col):
    # min-max scaling (values between 0 and 1)
    scaled = (data_col - min(data_col)) / (max(data_col) - min(data_col))
    return scaled


def plotly_results_layout(plot_title, x_label, y_label):
    layout = go.Layout(
        title=plot_title,
        titlefont=dict(
            family='Courier New, monospace'
        ),
        legend=dict(orientation='h'),
        xaxis=dict(
            title=x_label,
            titlefont=dict(
                family='Courier New, monospace',
                size=16
            )
        ),
        yaxis=dict(
            title=y_label,
            titlefont=dict(
                family='Courier New, monospace',
                size=16
            ),
            hoverformat='.8f',
            tickformat='.6f'
        )
    )

    return layout


def plotly_loss(history, tag):
    py.init_notebook_mode(connected=True)

    # Plot Loss
    plot_title = 'Model Loss {}'.format(tag)
    ty1 = 'Number of Epochs'
    ty2 = 'Loss'

    trace_loss = go.Scatter(
        x=history.epoch,
        y=history.history['loss'],
        name='Loss'
    )

    trace_val_loss = go.Scatter(
        x=history.epoch,
        y=history.history['val_loss'],
        name='Validation Loss'
    )

    layout = plotly_results_layout(plot_title, ty1, ty2)
    data = [trace_loss, trace_val_loss]

    fig = dict(data=data, layout=layout)
    py.iplot(fig, filename='model-loss')


def plotly_prediction(predict_df, target, tag):
    py.init_notebook_mode(connected=True)

    # Plot Loss
    plot_title = 'Prediction {}'.format(tag)
    ty1 = 'Date'
    ty2 = 'Price'

    trace_target = go.Scatter(
        x=predict_df['Open Time'],
        y=target,
        name='Actual'
    )

    trace_predicted = go.Scatter(
        x=predict_df['Open Time'],
        y=predict_df['Results'],
        name='Predicted'
    )

    layout = plotly_results_layout(plot_title, ty1, ty2)
    # process timestamps as date
    layout.update(xaxis=dict(type='date'))

    data = [trace_target, trace_predicted]

    fig = dict(data=data, layout=layout)
    py.iplot(fig, filename='prediction-results')


## ML Model

In [None]:
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.recurrent import LSTM
from keras.models import Sequential, load_model


class Model:

    def __init__(self):
        # load configuration file
        configs = json.loads(open('config.json').read())

        self.neurons = configs['model']['neurons']  # number of hidden units in the LSTM layer
        self.activation_function = configs['model']['activation_function']
        self.loss_function = configs['model']['loss_function']  # loss function for calculating the gradient
        self.optimizer = configs['model']['optimizer']  # optimizer for applying gradient descent
        self.dropout = configs['model']['dropout']  # dropout rate used after each LSTM layer to avoid overfitting
        self.model_dir = configs['model']['models_dir']
        self.dqn_dir = configs['model']['dqn_dir']

    def build_network(self, shape, output_size, reinforcement):
        # start stacking layers
        model = Sequential()
        model.add(LSTM(
            self.neurons,
            # return_sequences=True,
            input_shape=(shape[1], shape[2]),  # Shape X (1105, 50, 9), Shape Y (1105, )
            activation=self.activation_function
        ))
        model.add(Dropout(self.dropout))

        # model.add(LSTM(neurons, activation=activation_function))
        # model.add(Dropout(dropout))

        model.add(Dense(units=output_size))  # 1 (price) or 3 (hold, long, short)
        if not reinforcement:
            model.add(Activation(self.activation_function))
        else:
            model.add(Activation('linear'))

        model.compile(loss=self.loss_function, optimizer=self.optimizer)
        model.summary()

        return model


def save_network(model, model_name, reinforcement):
    # load configuration file
    configs = json.loads(open('config.json').read())
    model_dir = configs['model']['models_dir']
    dqn_dir = configs['model']['dqn_dir']

    fpath = dqn_dir if reinforcement else model_dir
    if not path.isdir(fpath):
        mkdir(fpath)

    filename = fpath + '/' + model_name + '.h5'
    model.save(filename)


def load_network(model_name):
    # load configuration file
    configs = json.loads(open('config.json').read())
    model_dir = configs['model']['models_dir']
    dqn_dir = configs['model']['dqn_dir']

    if not path.isfile(model_dir + '/' + model_name + '.h5'):
        print('Model {} does not exist...'.format(model_name))
    else:
        return load_model(model_dir + '/' + model_name + '.h5')


## Main LSTM

In [None]:
# load configuration file
configs = json.loads(open('config.json').read())

symbols = configs['binance']['symbols']
intervals = configs['binance']['intervals']  # 1d does not give enough data
end_time = configs['binance']['end_time']
x_window_size = configs['data']['x_window_size']
y_window_size = configs['data']['y_window_size']
input_scaling = configs['data']['input_scaling']
train_set_size = configs['data']['train_set_size']
raw_data_dir = configs['data']['raw_data_dir']
processed_data_dir = configs['data']['processed_data_dir']
batch_size = configs['model']['batch_size']  # default 32
epochs = configs['model']['epochs']

p = PrepareData(raw_data_dir, processed_data_dir, end_time)

# Extract and Process Data
df_col = {}
for sym in symbols:
    for t in intervals:
        print('\nGetting data for {}, interval {}'.format(sym, t))
        df, fname = p.extract_data(sym, t)

        print('\nProcessing data for {}, interval {}'.format(sym, t))
        df_col[sym + '_' + t] = p.process_data(df, fname)

In [None]:
def plot_raw_data(df_col):
    # Matplotlib plot
    key = 'EOSBTC_30m'
    plot_individual(df_col[key], key)
    key = 'TRXBTC_30m'
    plot_individual(df_col[key], key)
    key = 'ONTBTC_30m'
    plot_individual(df_col[key], key)
    plot_all(df_col)

    # Plotly plot
    key = 'EOSBTC_30m'
    plotly_individual(df_col[key], key)
    key = 'TRXBTC_30m'
    plotly_individual(df_col[key], key)
    key = 'ONTBTC_30m'
    plotly_individual(df_col[key], key)
    labels = ['EOSBTC_30m', 'TRXBTC_30m', 'ONTBTC_30m']
    plotly_all(df_col, labels, normalise=True)
    
    
# plot_raw_data(df_col)

In [None]:
# Work with "label" chart from here onward
label = configs['data']['chart']
chart = df_col[label]

# Split to training and testing data
train_set, test_set = p.split_train_test(chart, train_set_size=train_set_size)

if input_scaling == 'minmax':
    # creates training inputs and outputs (input data and labels for supervised learning)
    print('Generating training inputs and lables (X_train, Y_train)...')
    X_train, Y_train, train_scaler = p.create_inputs_minmax(train_set, x_win_size=x_window_size, y_win_size=y_window_size)
    # creates testing inputs and outputs (validation set to check if the NN is overfitting)
    print('Generating testing inputs and lables (X_test, Y_test)...')
    X_test, Y_test, test_scaler = p.create_inputs_minmax(test_set, x_win_size=x_window_size, y_win_size=y_window_size)

    model_name = label + '_e' + str(epochs) + '_maxmin'

else:
    # creates training inputs and outputs (input data and labels for supervised learning)
    print('Generating training inputs and lables (X_train, Y_train)...')
    X_train, Y_train, train_bases = p.create_inputs_zero_base(train_set, x_win_size=x_window_size, y_win_size=y_window_size)
    # creates testing inputs and outputs (validation set to check if the NN is overfitting)
    print('Generating testing inputs and lables (X_test, Y_test)...')
    X_test, Y_test, test_bases = p.create_inputs_zero_base(test_set, x_win_size=x_window_size, y_win_size=y_window_size)

    model_name = label + '_e' + str(epochs) + '_zerobase'


Train the LSTM

In [None]:
 # clean up the memory
gc.collect()
# fix random seed for reproducibility
np.random.seed(202)

# create model architecture
lstm = Model()
lstm_model = lstm.build_network(shape=X_train.shape, output_size=1, reinforcement=False)

history = lstm_model.fit(X_train, Y_train, epochs=epochs, shuffle=True, verbose=2, batch_size=batch_size, validation_data=(X_test, Y_test))

# save the model to a file
save_network(lstm_model, model_name, reinforcement=False)
# plot model loss
plotly_loss(history, label)

In [None]:
def one_step_prediction(X_test, Y_test, chart, label, lstm_model):
    predictions = lstm_model.predict(X_test, verbose=2)

    # keep date for plotting
    size_test_set = X_test.shape[0]
    predict_df = pd.DataFrame(chart['Open Time'].iloc[-size_test_set:])

    # need to reset the index to start from 0
    predict_df.index = range(len(predict_df))
    predict_df['Results'] = pd.DataFrame(predictions)

    # plot prediction results
    plotly_prediction(predict_df, Y_test, label)

    return predictions


# single_step_p = one_step_prediction(X_test, Y_test, chart, label, lstm_model)


In [None]:
def multi_step_prediction(lstm_model, p, inputs, x_window_size, y_window_size, input_scaling):
    # 200 needed for MA_200 + 50 for window length
    test_set = inputs[:250].copy()
    # need to reset the index to start from 0
    test_set.index = range(len(test_set))

    predictions = []
    for i in range(50):
        print('Iteration:', i + 1)

        if input_scaling == 'maxmin':
            X_test, Y_test, scaler = p.create_inputs_minmax(test_set, x_win_size=x_window_size, y_win_size=y_window_size)
        else:
            X_test, Y_test, close_bases = p.create_inputs_zero_base(test_set, x_win_size=x_window_size, y_win_size=y_window_size)

        # only do prediction on the last window
        last = X_test[-1]
        last = np.reshape(last, (1, X_test.shape[1], X_test.shape[2]))
        out = lstm_model.predict(last, verbose=2)

        # ndarray with same number of features required for inverse scaling
        tmp_nd = np.zeros((len(out), X_test.shape[2]))
        # get Close column index (has to match for inverse scaling)
        close_ix = test_set.columns.get_loc('Close')
        # assign LSTM output to that column
        tmp_nd[:, close_ix] = out

        # perform inverse scaling and save the correct col value to out_inv
        if input_scaling == 'maxmin':
            out_inv = scaler.inverse_transform(tmp_nd)[:, close_ix]
        else:
            out_inv = (out + 1) * close_bases[-1]

        # change to float
        out_inv = float(out_inv)

        predictions.append(out_inv)
        print('Predicted:', out_inv, 'True:', inputs.loc[:, 'Close'].iloc[i + 250])

        # add output back to the initial df
        test_set = test_set.append({'Close': out_inv}, ignore_index=True)

        # calculate indicators
        test_set = p.calculate_ta(test_set)

    return predictions


# multi_step_p = multi_step_prediction(lstm_model, p, test_set, x_window_size, y_window_size, input_scaling)


Evaluate

In [None]:
from sklearn.metrics import mean_squared_error
from math import sqrt

def evaluate():
    rmse = sqrt(mean_squared_error(lstm_out, Y_test))
    print('Test RMSE: %.3f' % rmse)


    # load and plot some other networks
    loaded_label = 'EOSBTC_30m_150'
    loaded_model = load_network(loaded_label)

    # evaluate loaded model on test data
    loaded_model.compile(loss=loss_function, optimizer=optimizer, metrics=['mae'])
    score = loaded_model.evaluate(X_train, Y_train, verbose=0)
    print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))

    # load and plot some other networks
    loaded_label = 'EOSBTC_4h_100'
    loaded_model = load_network(loaded_label)

    # evaluate loaded model on test data
    loaded_model.compile(loss=loss_function, optimizer=optimizer, metrics=['mae'])
    score = loaded_model.evaluate(X_train, Y_train, verbose=0)
    print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))

    # load and plot some other networks
    loaded_label = 'EOSBTC_1h_150'
    loaded_model = load_network(loaded_label)

    # evaluate loaded model on test data
    loaded_model.compile(loss=loss_function, optimizer=optimizer, metrics=['mae'])
    score = loaded_model.evaluate(X_train, Y_train, verbose=0)
    print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))
    

# evaluate()


## Reinforcement Learning

In [None]:
import random
from collections import deque


class DQNAgent:

    def __init__(self, state_shape):
        # load configuration file
        configs = json.loads(open('config.json').read())
        self.action_size = configs['reinforcement']['action_size']  # hold, long. short
        self.gamma = configs['reinforcement']['gamma']  # reward discount rate
        self.epsilon = configs['reinforcement']['epsilon']  # exploration rate
        self.epsilon_min = configs['reinforcement']['epsilon_min']
        self.epsilon_decay = configs['reinforcement']['epsilon_decay']
        self.batch_size = configs['reinforcement']['batch_size']

        self.memory = deque(maxlen=2000)  # deque automatically removes oldest memory once the maxlen is reached
        self.state_shape = state_shape[1:]  # (_, window_len, features)
        self.inventory = []

        self.model = Model().build_network(shape=state_shape, output_size=self.action_size, reinforcement=True)
        self.history = 0

    def compute_action(self, state, evaluation):
        # get random action based on epsilon for exploration
        if not evaluation and np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)

        # returns a list
        options = self.model.predict(state)

        # returns the index of the max value in this state [_, _, _]
        return np.argmax(options[0])  # returns action

    def calculate_reward(self, action, price, long, short):
        reward = 0

        if action == 1:  # buy

            # if new trade
            if not long and not short:
                self.inventory.append(price)

            # if already short
            elif short:
                # close short, open long
                sold_price = self.inventory.pop()
                # short is inverse
                reward = (sold_price - price) / sold_price  # reward is percentage
                # reward = sold_price - price
                self.inventory.append(price)

            # set new trade
            long = True
            short = False

        elif action == 2:  # sell

            # if new trade
            if not long and not short:
                self.inventory.append(price)

            # if already long
            elif long:
                # close long, open short
                bought_price = self.inventory.pop()
                reward = (price - bought_price) / bought_price
                # reward = price - bought_price
                self.inventory.append(price)

            # set new trade
            long = False
            short = True

        return reward, long, short

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def experience_replay(self, batch_size):

        # fill minibatch with random states from the memory
        minibatch = random.sample(self.memory, batch_size)

        for state, action, reward, next_state, done in minibatch:
            target = reward

            if not done:
                # get max (highest probability) action
                tmp = np.amax(self.model.predict(next_state)[0])
                target = reward + self.gamma * tmp

            # predict next action
            target_f = self.model.predict(state)
            # target is [[_, _, _]]
            target_f[0][action] = target
            # train the model towards updated prediction
            history = self.model.fit(state, target_f, epochs=1, verbose=0)
            # save loss history
            self.history = history

        # update exploration/exploitation rate
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


In [None]:
import time


def main():
    # load configuration file
    configs = json.loads(open('config.json').read())

    raw_data_dir = configs['data']['raw_data_dir']
    processed_data_dir = configs['data']['processed_data_dir']
    end_time = configs['binance']['end_time']
    train_set_size = configs['data']['train_set_size']
    label = configs['data']['chart']

    window_size = configs['reinforcement']['window_size']

    p = PrepareData(raw_data_dir, processed_data_dir, end_time)

    df = p.load_processed_data(label)
    train_set, test_set = p.split_train_test(df, train_set_size=train_set_size)

    print('Generating inputs ...')
    inputs, closing_prices, scaler = p.create_inputs_reinforcement(train_set, x_win_size=window_size)

    agent = DQNAgent(np.shape(inputs))
    evaluation = False  # switch on at testing stage
    commission = 0.025

    episodes = configs['reinforcement']['episodes']
    rewards = []
    losses = []
    elapsed_times = [] 

    time_steps = len(inputs)
    for e in range(episodes):
        start_time = time.time()

        # reset the environment (first window [0:10])
        state = inputs[0]
        state = np.reshape(state, (1, agent.state_shape[0], agent.state_shape[1]))  # (1, 10, 9)
        total_reward = 100  # in percentage
        long = False
        short = False
        action = 0
        agent.inventory = []
        done = False

        # maybe clear memory every episode

        # sparse rewards, based on exited trade - next, implement them on a daily basis

        for t in range(time_steps):
            print('Step {}/{}'.format(t, time_steps))
            print('Agent\'s inventory:', agent.inventory, 'Action:', action, 'Total reward:', total_reward)
            # print('Reward', reward)

            # compute action based on current state
            action = agent.compute_action(state, evaluation)

            # calculate reward
            reward, long, short = agent.calculate_reward(action, closing_prices[t], long, short)
            total_reward = total_reward + reward

            if total_reward < 0 or t == time_steps - 1:
                done = True

            if done:
                end_time = time.time()
                
                if total_reward < 0:
                    print('REKT')
                    
                rewards.append(total_reward)
                losses.append(agent.history.history)
                elapsed_times.append(end_time - start_time)
                print("--------------------------------")
                print("Episode: {}/{}, score: {}, training loss: {}".format(e, episodes, total_reward, agent.history.history))
                print("--------------------------------")
                break

            # get next state
            next_state = inputs[t + 1]
            next_state = np.reshape(next_state, state.shape)

            # store in memory
            agent.remember(state, action, reward, next_state, done)

            # recall sometimes
            if len(agent.memory) > agent.batch_size:
                agent.experience_replay(agent.batch_size)

            if e % 1 == 0 and not e == 0:
                save_network(agent.model, 'model_ep{}'.format(e), reinforcement=True)

            state = next_state


if __name__ == '__main__':
    main()
