## Import Libraries

In [1]:
import json
from datetime import datetime
from os import path, mkdir

import numpy as np
import pandas as pd
import requests
import ta
from sklearn import preprocessing
import gc


## Data Processing

In [2]:
class PrepareData:

    def __init__(self):
        pass

    def extract_data(self, symbol, interval, end_time):

        # Binance API url
        root_binance_url = 'https://api.binance.com/api/v1/klines'
        symbol_url = '?symbol='
        interval_url = '&interval='
        start_time_url = '&startTime='
        # limit is max 500 records, max 1200 requests/minute

        # check is data file exists
        raw_data_dir = 'datasets'
        fname = 'binance_' + symbol + '_' + interval + '.json'

        if not path.isdir(raw_data_dir):
            mkdir(raw_data_dir)

        if not path.isfile(raw_data_dir + '/' + fname):
            print('Downloading data for {}, interval {}...'.format(symbol, interval))

            # check for first available timestamp and add 24h since at listing on exchange price vary wildly
            url = root_binance_url + symbol_url + symbol + interval_url + interval + start_time_url + '0'
            first_timestamp = json.loads(requests.get(url).text)[0][0]  # first timestamp in json
            day_in_millis = 86400000
            actual_timestamp = first_timestamp + day_in_millis

            url = root_binance_url + symbol_url + symbol + interval_url + interval + start_time_url + str(
                actual_timestamp)
            json_data = json.loads(requests.get(url).text)

            # new start time is the previous end timestamp, 500 is the limit/max
            start_time = json_data[-1][0]
            end_time = self.convert_date(end_time, to_timestamp=True)
            while start_time < end_time:
                url = root_binance_url + symbol_url + symbol + interval_url + interval + start_time_url + str(
                    start_time)
                data_new = json.loads(requests.get(url).text)
                # omit the first element as it is equal to the last on the previous list
                json_data = json_data + data_new[1:]
                start_time = json_data[-1][0]

            # save to disk
            with open(raw_data_dir + '/' + fname, 'w') as f:
                json.dump(json_data, f, sort_keys=True, indent=4, ensure_ascii=False)

            # return dataframe
            df = pd.DataFrame(json_data)
            return df, fname

        else:
            print('Retrieving from file...')
            # read from disk into a pandas dataframe
            file_path = raw_data_dir + '/' + fname
            df = pd.read_json(file_path)
            return df, fname

    def process_data(self, data, fname, end_time):

        fpath = 'datasets/processed'
        fname = fname.split('binance_')[1]
        df = data

        if not path.isdir(fpath):
            mkdir(fpath)

        if not path.isfile(fpath + '/' + fname):
            # remove any rows with null values
            df = df.dropna()

            # from binance-api-docs: https://github.com/binance-exchange/binance-official-api-docs/blob/master/rest-api.md
            col_names = ['Open Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Close Time', 'Quote Asset Volume',
                         'Number of trades', 'Taker buy base asset volume', 'Taker buy quote asset volume', 'Ignore']
            df.columns = col_names
            df.drop(df.columns[8:], axis=1, inplace=True)
            # Drop unnecessary columns
            col_drop_names = ['Open', 'High', 'Low', 'Volume', 'Close Time']
            df.drop(col_drop_names, axis=1, inplace=True)

            # Quote Asset Volume is volume in base currency = BTC
            df.rename(columns={'Quote Asset Volume': 'Volume'}, inplace=True)

            # remove rows after end time
            end_time = self.convert_date(end_time, to_timestamp=True)
            df = df[df['Open Time'] <= end_time]

            # sort by ascending date, so that last point represents 15th June 2018
            df = df.sort_values(by='Open Time')

            # calculate TA indicators
            print('Calculating TA indicators...')
            df = self.calculate_ta(df)

            # remove first 200 elements (MA_200 is nan)
            df = df[200:]

            # save to disk
            with open(fpath + '/' + fname, 'w') as f:
                # get df to json format
                out = df.to_json(orient='records')
                f.write(out)

        else:
            print('Retrieving from file...')
            df = pd.read_json(fpath + '/' + fname, orient='records')

        return df

    def calculate_ta(self, data):

        def moving_average(data_col, n):
            ma = data_col.rolling(window=n).mean()
            ma.fillna(0, inplace=True)
            return ma

        df = data

        # Trend Indicators
        # Moving Average (MA)
        # df['MA_10'] = moving_average(df['Close'], 10)
        df['MA_50'] = moving_average(df['Close'], 50)
        df['MA_200'] = moving_average(df['Close'], 200)

        # Exponential Moving Average (EMA)
        df['EMA'] = ta.ema_slow(df['Close'], n_slow=20, fillna=True)

        # Moving Average Convergence Divergence (MACD)
        df['MACD'] = ta.macd_diff(df['Close'], n_fast=12, n_slow=26, n_sign=9, fillna=True)

        # Momentum Indicators
        # Relative Strength Index (RSI)
        df['RSI'] = ta.rsi(df['Close'], n=14, fillna=True)

        # Volatility Indicators
        # Calc volatility manually
        # df['Volatility'] = (df['High'] - df['Low']) / df['Open']

        # Bollinger Bands (BB)
        df['BB_H'] = ta.bollinger_hband_indicator(df['Close'], n=20, ndev=2, fillna=True)
        df['BB_L'] = ta.bollinger_lband_indicator(df['Close'], n=20, ndev=2, fillna=True)

        # Scaling between -1 and 1 (if crosses 1, else -1)
        # df['BB_H'].replace(0, -1, inplace=True)
        # df['BB_L'].replace(0, -1, inplace=True)

        return df

    def drop_col(self, df, name='Open Time'):
        df.drop([name], axis=1, inplace=True)

    def convert_date(self, val, to_timestamp):
        if to_timestamp:
            dt = datetime.strptime(val, '%d.%m.%Y %H:%M:%S')
            millis_time = dt.timestamp() * 1000
            return int(millis_time)
        else:
            # time units are milliseconds
            date_col = pd.to_datetime(val, unit='ms')
            return date_col

    def check_if_nan(self, data):

        if data.isnull().values.any():
            null_cols = data.columns[data.isnull().any()]
            data[null_cols].isnull().sum()

            print('Dataset contains null values')
            print(data[data.isnull().any(axis=1)][null_cols].head())

            data.dropna(inplace=True)

    def create_inputs_minmax(self, data, x_win_size=50, y_win_size=1):
        # can store 2x in memory compared to float64
        tmp_data = data.astype('float32')

        self.drop_col(tmp_data, name='Open Time')
        self.check_if_nan(tmp_data)

        # BB_H, BB_L are in the range [0,1]
        # RSI is oscillator [0, 100] --> [0,1]
        tmp_data[['RSI']] = tmp_data[['RSI']] / 100

        # MaxMin Scaling
        scaler = self.maxmin_normalise(tmp_data)

        # for col in tmp_data.columns.values:
        #     print('{} -- First:'.format(col), tmp_data[col][0], 'Max:', tmp_data[col].max(), 'Min:', tmp_data[col].min())

        x_inputs = []
        y_inputs = []
        i = 0
        while (i + x_win_size + y_win_size) < len(tmp_data):
            # e.g. x[0:50] y[50:51]
            x_win_data = tmp_data[i: i + x_win_size]
            y_win_data = tmp_data['Close'][i + x_win_size: i + x_win_size + y_win_size]

            # to numpy array
            x_win_arr = np.array(x_win_data)
            y_win_arr = np.array(y_win_data)
            x_inputs.append(x_win_arr)
            y_inputs.append(y_win_arr)

            i = i + 1

        x_inputs = np.array(x_inputs)
        y_inputs = np.array(y_inputs)

        print('Shape X:', np.shape(x_inputs), 'Shape Y:', np.shape(y_inputs))

        return x_inputs, y_inputs, scaler

    def create_inputs_zero_base(self, data, x_win_size=50, y_win_size=1):
        # can store 2x in memory compared to float64
        tmp_data = data.astype('float32')

        self.drop_col(tmp_data, name='Open Time')
        self.check_if_nan(tmp_data)

        # BB_H, BB_L are in the range [0,1]
        # RSI is oscillator [0, 100]
        # -- Scale to [0, 2], then shift to [-1, 1] range
        tmp_data[['RSI']] = ((tmp_data[['RSI']] / 100) * 2) - 1

        # for col in tmp_data.columns.values:
        #     print('{} -- First:'.format(col), tmp_data[col][0], 'Max:', tmp_data[col].max(), 'Min:', tmp_data[col].min())

        x_inputs = []
        y_inputs = []
        i = 0
        while (i + x_win_size + y_win_size) < len(tmp_data):
            # create a copy to preserve original data
            window_data = tmp_data[i: (i + x_win_size + y_win_size)].copy()
            window_data = self.zero_base_normalise(window_data)

            # x[0:50] y[50:51]
            x_win_data = window_data[: x_win_size]
            y_win_data = window_data['Close'].iloc[-1]

            # change to numpy array
            x_win_arr = np.array(x_win_data)
            x_inputs.append(x_win_arr)
            y_inputs.append(y_win_data)

            i = i + 1

        x_inputs = np.array(x_inputs)
        y_inputs = np.array(y_inputs)

        print('Shape X:', np.shape(x_inputs), 'Shape Y:', np.shape(y_inputs))

        return x_inputs, y_inputs

    def maxmin_normalise(self, data):

        # BB_H, BB_L, RSI are good
        norm_cols = [name for name in data.columns if name not in ['BB_H', 'BB_L', 'RSI']]

        # rescale to [0, 1] across norm_cols
        scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
        for col in norm_cols:
            data[[col]] = scaler.fit_transform(data[[col]])

        return scaler

    def zero_base_normalise(self, data):

        # BB_H, BB_L, RSI are good
        norm_cols = [name for name in data.columns if name not in ['BB_H', 'BB_L', 'RSI']]
        for col in norm_cols:
            # normalise against the 1st element for each window
            tmp_base = data.loc[:, col].iloc[0]
            data.loc[:, col] = (data.loc[:, col] / tmp_base) - 1

        return data

    def split_train_test(self, df, training_size=0.8):
        train_set = df[:int(training_size * len(df))]
        test_set = df[int(training_size * len(df)):]
        return train_set, test_set
    

## Plotting 

In [3]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import plotly.graph_objs as go
import plotly.offline as py
from plotly import tools as pytls

# Plotly requires version 2.7.0
from plotly import __version__
print('Plotly version:', __version__)


def plotly_layout(plot_title, ty1, ty2):
    layout = go.Layout(
        title=plot_title,
        titlefont=dict(
            family='Courier New, monospace'
        ),
        legend=dict(orientation='h'),
        xaxis=dict(type='date'),
        yaxis=dict(
            domain=[0, 0.3],
            title=ty1,
            titlefont=dict(
                family='Courier New, monospace',
                size=16
            ),
            hoverformat='.4f',
            tickformat='.f'
        ),
        yaxis2=dict(
            domain=[0.4, 1],
            title=ty2,
            titlefont=dict(
                family='Courier New, monospace',
                size=16
            ),
            hoverformat='.8f',
            tickformat='.6f'
        )
    )

    return layout


def plotly_individual(data, tag):
    py.init_notebook_mode(connected=True)

    trace_price = go.Scatter(
        x=data['Open Time'],
        y=data['Close'],
        name=tag[:3]
    )

    trace_volume = go.Scatter(
        x=data['Open Time'],
        y=data['Volume'],
        xaxis='x',
        yaxis='y2',
        name=tag[:3]
    )

    ty1 = 'Volume [BTC]'
    ty2 = 'Closing Price [BTC]'
    layout = plotly_layout(tag, ty1, ty2)

    fig = pytls.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.01)
    fig.append_trace(trace_price, 2, 1)
    fig.append_trace(trace_volume, 1, 1)

    fig = go.Figure(fig, layout=layout)
    py.iplot(fig, filename=tag)


def plotly_all(data, labels, normalise=False):
    py.init_notebook_mode(connected=True)

    fig = pytls.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.01)
    plot_title, ty1, ty2 = '', '', ''

    colours = ['#1F77B4', '#B2182B', '#FF7F0E']
    for i, key in enumerate(labels):
        price_data = data[key]['Close']
        vol_data = data[key]['Volume']
        open_times = data[key]['Open Time']
        plot_title = 'Comparison of Closing Prices and Volume of EOS, TRX and ONT'
        ty1 = 'Volume [BTC]'
        ty2 = 'Closing Price [BTC]'

        if normalise:
            price_data = plot_scale(price_data)
            vol_data = plot_scale(vol_data)
            plot_title = 'Comparison of Normalised Closing Prices and Volume of EOS, TRX and ONT'
            ty1 = ty1[:-6]
            ty2 = ty2[:-6]

        tmp_price = go.Scatter(
            x=open_times,
            y=price_data,
            name=key[:3],
            line=dict(color=colours[i])
        )
        fig.append_trace(tmp_price, 2, 1)

        tmp_vol = go.Scatter(
            x=open_times,
            y=vol_data,
            xaxis='x',
            yaxis='y2',
            name=key[:3],
            line=dict(color=colours[i]),
            showlegend=False
        )
        fig.append_trace(tmp_vol, 1, 1)

    layout = plotly_layout(plot_title, ty1, ty2)

    fig = go.Figure(fig, layout=layout)
    py.iplot(fig, filename='all-together')


def plot_individual(data, tag):
    p = PrepareData()

    fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, gridspec_kw={'height_ratios': [3, 1]})
    fig.suptitle(tag, fontsize=16)

    date_col = p.convert_date(data['Open Time'], False)
    ax1.plot(date_col, data['Close'])
    ax1.grid(True)
    ax1.set_ylabel('Closing Price [BTC]', fontsize=12)
    ax1.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
    ax1.locator_params(axis='y', nbins=12)
    ax1.yaxis.set_major_formatter(ticker.FormatStrFormatter('%.6f'))

    ax2.plot(date_col, data['Volume'])
    ax2.grid(True)
    ax2.set_ylabel('Volume [BTC]', fontsize=12)
    ax2.locator_params(axis='y', nbins=4)

    fig.autofmt_xdate()
    fig.tight_layout()
    fig.subplots_adjust(top=0.90)
    plt.show()


def plot_all(data):
    p = PrepareData()

    eos = data['EOSBTC_30m']
    eos_np = plot_scale(eos['Close'])
    trx = data['TRXBTC_30m']
    trx_np = plot_scale(trx['Close'])
    ont = data['ONTBTC_30m']
    ont_np = plot_scale(ont['Close'])
    date_eos = p.convert_date(eos['Open Time'], False)
    date_trx = p.convert_date(trx['Open Time'], False)
    date_ont = p.convert_date(ont['Open Time'], False)
    plt.plot(date_eos, eos_np, 'r', date_trx, trx_np, 'b', date_ont, ont_np, 'g')

    plt.grid(True)
    plt.ylabel('Normalised Closing Price', fontsize=12)
    plt.legend()

    plt.show()


def plot_scale(data_col):
    # min-max scaling (values between 0 and 1)
    scaled = (data_col - min(data_col)) / (max(data_col) - min(data_col))
    return scaled


Plotly version: 2.7.0


## LSTM Model

In [4]:
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.recurrent import LSTM
from keras.models import Sequential, load_model

neurons = 50  # number of hidden units in the LSTM layer
output_size = 1  # one output for price prediction
activation_function = 'tanh'  # activation function for LSTM and Dense layer
loss_function = 'mse'  # loss function for calculating the gradient, in this case Mean Squared Error
optimizer = 'adam'  # optimizer for applying gradient decent
dropout = 0.2  # dropout rate used after each LSTM layer to avoid overfitting


def build_network(inputs):
    # start stacking layers
    model = Sequential()

    model.add(LSTM(
        neurons,
#         return_sequences=True,
        input_shape=(inputs.shape[1], inputs.shape[2]),  # Shape X (1105, 50, 9), Shape Y (1105, )
        activation=activation_function
    ))
    model.add(Dropout(dropout))

#     model.add(LSTM(neurons, return_sequences=True, activation=activation_function))
#     model.add(Dropout(dropout))
    
#     model.add(LSTM(neurons, activation=activation_function))
#     model.add(Dropout(dropout))

    model.add(Dense(units=output_size))
    model.add(Activation(activation_function))

    model.compile(loss=loss_function, optimizer=optimizer)
#     model.compile(loss=loss_function, optimizer=optimizer, metrics=['mae'])
    model.summary()

    return model


def save_network(model, model_name):
    model_dir = 'models'
    if not path.isdir(model_dir):
        mkdir(model_dir)

    model.save(model_dir + '/' + model_name + '.h5')


def load_network(model_name):
    model_dir = 'models'
    if not path.isfile(model_dir + '/' + model_name + '.h5'):
        print('Model {} does not exist...'.format(model_name))
    else:
        print('Model {} loaded'.format(model_name))
        model = load_model(model_dir + '/' + model_name + '.h5')
              
        return model

Using TensorFlow backend.


## Plot Results

In [18]:
def plotly_results_layout(plot_title, x_label, y_label):
    layout = go.Layout(
        title=plot_title,
        titlefont=dict(
            family='Courier New, monospace'
        ),
        legend=dict(orientation='h'),
        xaxis=dict(
            title=x_label,
            titlefont=dict(
                family='Courier New, monospace',
                size=16
            )
        ),
        yaxis=dict(
            title=y_label,
            titlefont=dict(
                family='Courier New, monospace',
                size=16
            ),
            hoverformat='.8f',
            tickformat='.6f'
        )
    )

    return layout


def plotly_loss(history, tag):
    py.init_notebook_mode(connected=True)

    # Plot Loss
    plot_title = 'Model Loss {}'.format(tag)
    ty1 = 'Number of Epochs'
    ty2 = 'Loss'

    trace_loss = go.Scatter(
        x=history.epoch,
        y=history.history['loss'],
        name='Loss'
    )

    trace_val_loss = go.Scatter(
        x=history.epoch,
        y=history.history['val_loss'],
        name='Validation Loss'
    )

    layout = plotly_results_layout(plot_title, ty1, ty2)
    data = [trace_loss, trace_val_loss]

    fig = dict(data=data, layout=layout)
    py.iplot(fig, filename='model-loss')


def plotly_prediction(predict_df, target, tag):
    py.init_notebook_mode(connected=True)

    # Plot Loss
    plot_title = 'Prediction {}'.format(tag)
    ty1 = 'Date'
    ty2 = 'Price'

    trace_target = go.Scatter(
        x=predict_df['Open Time'],
        y=target,
        name='Actual'
    )

    trace_predicted = go.Scatter(
        x=predict_df['Open Time'],
        y=predict_df['Results'],
        name='Predicted'
    )

    layout = plotly_results_layout(plot_title, ty1, ty2)
    # process timestamps as date
    layout.update(xaxis=dict(type='date'))

    data = [trace_target, trace_predicted]

    fig = dict(data=data, layout=layout)
    py.iplot(fig, filename='prediction-results')


## Main

In [6]:
symbols = ['EOSBTC', 'TRXBTC', 'ONTBTC']
intervals = ['5m', '15m', '30m', '1h', '4h']  # 1d does not give enough data
end_time = '15.07.2018 00:00:00'
x_window_size = 50
y_window_size = 1
input_scaling = 'aminmax'
# batch_size = 128 (default 32)
epochs = 50

p = PrepareData()

# Extract and Process Data
df_col = {}
for sym in symbols:
    for t in intervals:
        print('\nGetting data for {}, interval {}'.format(sym, t))
        df, fname = p.extract_data(sym, t, end_time)

        print('\nProcessing data for {}, interval {}'.format(sym, t))
        df_col[sym + '_' + t] = p.process_data(df, fname, end_time)
        


Getting data for EOSBTC, interval 5m
Retrieving from file...

Processing data for EOSBTC, interval 5m
Retrieving from file...

Getting data for EOSBTC, interval 15m
Retrieving from file...

Processing data for EOSBTC, interval 15m
Retrieving from file...

Getting data for EOSBTC, interval 30m
Retrieving from file...

Processing data for EOSBTC, interval 30m
Retrieving from file...

Getting data for EOSBTC, interval 1h
Retrieving from file...

Processing data for EOSBTC, interval 1h
Retrieving from file...

Getting data for EOSBTC, interval 4h
Retrieving from file...

Processing data for EOSBTC, interval 4h
Retrieving from file...

Getting data for TRXBTC, interval 5m
Retrieving from file...

Processing data for TRXBTC, interval 5m
Retrieving from file...

Getting data for TRXBTC, interval 15m
Retrieving from file...

Processing data for TRXBTC, interval 15m
Retrieving from file...

Getting data for TRXBTC, interval 30m
Retrieving from file...

Processing data for TRXBTC, interval 30m


In [15]:
# Plot Raw Data

# Matplotlib plot
# key = 'EOSBTC_30m'
# plot_individual(df_col[key], key)
# key = 'TRXBTC_30m'
# plot_individual(df_col[key], key)
# key = 'ONTBTC_30m'
# plot_individual(df_col[key], key)
#
# plot.plot_all(df_col)

# Plotly plot
# key = 'EOSBTC_30m'
# plotly_individual(df_col[key], key)
# key = 'TRXBTC_30m'
# plotly_individual(df_col[key], key)
# key = 'ONTBTC_30m'
# plotly_individual(df_col[key], key)

# labels = ['EOSBTC_30m', 'TRXBTC_30m', 'ONTBTC_30m']
# plotly_all(df_col, labels, normalise=True)


Only use EOSBTC_30m from here on

In [8]:
label = 'EOSBTC_1h'
eos = df_col[label]

# Split to training and testing data
train_set, test_set = p.split_train_test(eos)

if input_scaling == 'minmax':
    # creates training inputs and outputs (input data and labels for supervised learning)
    print('Generating training inputs and lables (X_train, Y_train)...')
    X_train, Y_train, train_scaler = p.create_inputs_minmax(train_set, x_win_size=x_window_size, y_win_size=y_window_size)
    # creates testing inputs and outputs (validation set to check if the NN is overfitting)
    print('Generating testing inputs and lables (X_test, Y_test)...')
    X_test, Y_test, test_scaler = p.create_inputs_minmax(test_set, x_win_size=x_window_size, y_win_size=y_window_size)
else:
    # creates training inputs and outputs (input data and labels for supervised learning)
    print('Generating training inputs and lables (X_train, Y_train)...')
    X_train, Y_train = p.create_inputs_zero_base(train_set, x_win_size=x_window_size, y_win_size=y_window_size)
    # creates testing inputs and outputs (validation set to check if the NN is overfitting)
    print('Generating testing inputs and lables (X_test, Y_test)...')
    X_test, Y_test = p.create_inputs_zero_base(test_set, x_win_size=x_window_size, y_win_size=y_window_size)

Generating training inputs and lables (X_train, Y_train)...
Shape X: (5041, 50, 9) Shape Y: (5041,)
Generating testing inputs and lables (X_test, Y_test)...
Shape X: (1223, 50, 9) Shape Y: (1223,)


Train the LSTM

In [9]:
# clean up the memory
gc.collect()

# fix random seed for reproducibility
np.random.seed(202)

# create model architecture
lstm_model = build_network(inputs=X_train)

# train the LSTM =)

# verbose: 0 = silent, 1 = progress bar, 2 = one line per epoch
# batch_size will default to 32
history = lstm_model.fit(X_train, Y_train, epochs=epochs, shuffle=True, verbose=2, validation_data=(X_test, Y_test))

# save the model to a file
model_name = label + '_' + str(epochs)
save_network(lstm_model, model_name)

lstm_out = lstm_model.predict(X_test, verbose=1)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 50)                12000     
_________________________________________________________________
dropout_1 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 51        
_________________________________________________________________
activation_1 (Activation)    (None, 1)                 0         
Total params: 12,051
Trainable params: 12,051
Non-trainable params: 0
_________________________________________________________________
Train on 5041 samples, validate on 1223 samples
Epoch 1/50
 - 6s - loss: 0.0335 - val_loss: 0.0048
Epoch 2/50
 - 5s - loss: 0.0127 - val_loss: 0.0015
Epoch 3/50
 - 5s - loss: 0.0071 - val_loss: 0.0018
Epoch 4/50
 - 5s - loss: 0.0052 - val_loss: 0.0012
Epoch 5/50
 - 5s -

Plot results

In [10]:
# keep date for plotting
size_test_set = X_test.shape[0]
predict_df = pd.DataFrame(eos['Open Time'].iloc[-size_test_set:])
# need to reset the index to start from 0
predict_df.index = range(len(predict_df))

predict_df['Results'] = pd.DataFrame(lstm_out)
print(predict_df.head())


       Open Time   Results
0  1527145200000 -0.063420
1  1527148800000 -0.062115
2  1527152400000 -0.069479
3  1527156000000 -0.076598
4  1527159600000 -0.058317


In [19]:
# plot model loss
plotly_loss(history, label)

# plot prediction results
plotly_prediction(predict_df, Y_test, label)


Evaluate

In [12]:
from sklearn.metrics import mean_squared_error
from math import sqrt
rmse = sqrt(mean_squared_error(lstm_out, Y_test))
print('Test RMSE: %.3f' % rmse)


# load and plot some other networks
loaded_label = 'EOSBTC_30m_150'
loaded_model = load_network(loaded_label)

# evaluate loaded model on test data
loaded_model.compile(loss=loss_function, optimizer=optimizer, metrics=['mae'])
score = loaded_model.evaluate(X_train, Y_train, verbose=0)
print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))

# load and plot some other networks
loaded_label = 'EOSBTC_4h_100'
loaded_model = load_network(loaded_label)

# evaluate loaded model on test data
loaded_model.compile(loss=loss_function, optimizer=optimizer, metrics=['mae'])
score = loaded_model.evaluate(X_train, Y_train, verbose=0)
print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))

# load and plot some other networks
loaded_label = 'EOSBTC_1h_150'
loaded_model = load_network(loaded_label)

# evaluate loaded model on test data
loaded_model.compile(loss=loss_function, optimizer=optimizer, metrics=['mae'])
score = loaded_model.evaluate(X_train, Y_train, verbose=0)
print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))


Test RMSE: 0.020
Model EOSBTC_30m_150 loaded
mean_absolute_error: 52.44%
Model EOSBTC_4h_100 loaded
mean_absolute_error: 23.79%
Model EOSBTC_1h_150 loaded
mean_absolute_error: 49.56%


In [20]:
# try out the loop prediction lol

