## Import Data

In [None]:
%matplotlib inline

import json
from datetime import datetime
from os import path

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
import plotly.graph_objs as go
import plotly.offline as py
import requests
import ta
from plotly import tools as pytls

Binance exchange API config

In [None]:
# Binance API url
root_binance_url = 'https://api.binance.com/api/v1/klines'
symbol_url = '?symbol='
interval_url = '&interval='
start_time_url = '&startTime='
end_time_url = '&endTime='
# limit is max 500 records, max 1200 requests/minute

symbols = ['EOSBTC', 'TRXBTC', 'ONTBTC']
# intervals: 1m, 3m, 5m, 15m, 30m, 1h, 2h, 4h, ...
intervals = ['5m', '15m', '30m', '1h', '4h', '1d']
end_date_time = '20.06.2018 00:00:00'

Function for converting datetime into timestamp and vice versa

In [None]:
def convert_date(val, to_timestamp):
    if to_timestamp:
        dt = datetime.strptime(val, '%d.%m.%Y %H:%M:%S')
        millis_time = dt.timestamp() * 1000
        return int(millis_time)
    else:
        # time units are milliseconds
        date_col = pd.to_datetime(val, unit='ms')
        return date_col

Download data until the set end time and save it into a .json file depending on
- the symbol (e.g. EOS, TRX, ONT)
- price interval (e.g. 5m, 1h, 1d)

In [None]:
def get_data(symbol, interval):
    # check is data file exists
    fname = 'binance_' + symbol + '_' + interval + '.json'
    if not path.isfile(fname):
        print('Downloading data for {}, interval {}'.format(symbol, interval)) 
        # from first available timestamp
        url = root_binance_url + symbol_url + symbol + interval_url + interval + start_time_url + '0'
        data = json.loads(requests.get(url).text)

        # new start time is the previous end timestamp, 500 is the limit/max
        start_time = data[-1][0]
        end_time = convert_date(end_date_time, True)
        while start_time < end_time:
            url = root_binance_url + symbol_url + symbol + interval_url + interval + start_time_url + str(start_time)
            data_new = json.loads(requests.get(url).text)
            # omit the first element as it is equal to the last on the previous list
            data = data + data_new[1:]
            start_time = data[-1][0]

        # save it to file
        with open(fname, 'w') as f:
            json.dump(data, f, sort_keys=True, indent=4, ensure_ascii=False)
    else:
        print('Data for {}, interval {} already exists...'.format(symbol, interval))
        
        

In [None]:
for sym in symbols:
    for t in intervals:
        print('\nGetting data for {}, interval {}'.format(sym, t))
        get_data(sym, t)

## Process Data

Remove null values and values greater than specified end time and unnecessary columns.

In [None]:
def process_data(fname):
    df = pd.read_json(fname)

    # remove any rows with null values
    df = df.dropna()

    # from binance-api-docs: https://github.com/binance-exchange/binance-official-api-docs/blob/master/rest-api.md
    col_names = ['Open Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Close Time', 'Quote Asset Volume',
                 'Number of trades', 'Taker buy base asset volume', 'Taker buy quote asset volume', 'Ignore']
    df.columns = col_names
    df.drop(df.columns[8:], axis=1, inplace=True)
    # Drop unnecessary columns
    col_drop_names = ['Open', 'High', 'Low', 'Volume', 'Close Time']
    df.drop(col_drop_names, axis=1, inplace=True)

    # Quote Asset Volume is volume in base currency = BTC
    df.rename(columns={'Quote Asset Volume': 'Volume'}, inplace=True)

    # remove rows after end time
    end_time = convert_date(end_date_time, to_timestamp=True)
    df = df[df['Open Time'] <= end_time]

    # sort by ascending date, so that last point represents 15th June 2018
    df = df.sort_values(by='Open Time')

    return df

Normalise values with min-max scaling or z-score

In [None]:
def normalise_data(data, minmax=True):
    if minmax:
        # min-max scaling (values between 0 and 1)
        nd = (data - min(data)) / (max(data) - min(data))
    else:
        # z-score (values are not between 0 and 1)
        nd = (data - data.mean()) / data.std()

    return nd

In [None]:
df_col = {}
for sym in symbols:
    for t in intervals:
        print('\nGetting data for {}, interval {}'.format(sym, t))
        get_data(sym, t)

        print('\nProcessing data for {}, interval {}'.format(sym, t))
        filename = 'binance_' + sym + '_' + t + '.json'

        df_col[sym + '_' + t] = process_data(filename)

## Plotting with Matplotlib

Functions for visualisation with Matplotlib

In [None]:
def plot_individual(data, tag):
    fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, gridspec_kw={'height_ratios': [3, 1]})
    fig.suptitle(tag, fontsize=16)

    date_col = convert_date(data['Open Time'], False)
    ax1.plot(date_col, data['Close'])
    ax1.grid(True)
    ax1.set_ylabel('Closing Price [BTC]', fontsize=12)
    ax1.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
    ax1.locator_params(axis='y', nbins=12)
    ax1.yaxis.set_major_formatter(ticker.FormatStrFormatter('%.6f'))

    ax2.plot(date_col, data['Volume'])
    ax2.grid(True)
    ax2.set_ylabel('Volume [BTC]', fontsize=12)
    ax2.locator_params(axis='y', nbins=4)

    fig.autofmt_xdate()
    fig.tight_layout()
    fig.subplots_adjust(top=0.90)
    plt.show()


def plot_all(data):
    eos = data['EOSBTC_30m']
    eos_np = normalise_data(eos['Close'], minmax=True)
    trx = data['TRXBTC_30m']
    trx_np = normalise_data(trx['Close'], minmax=True)
    ont = data['ONTBTC_30m']
    ont_np = normalise_data(ont['Close'], minmax=True)
    date_eos = convert_date(eos['Open Time'], False)
    date_trx = convert_date(trx['Open Time'], False)
    date_ont = convert_date(ont['Open Time'], False)
    plt.plot(date_eos, eos_np, 'r', date_trx, trx_np, 'b', date_ont, ont_np, 'g')

    plt.grid(True)
    plt.ylabel('Normalised Closing Price', fontsize=12)
    plt.legend()

    plt.show()

In [None]:
key = 'EOSBTC_30m'
plot_individual(df_col[key], key)
key = 'TRXBTC_30m'
plot_individual(df_col[key], key)
key = 'ONTBTC_30m'
plot_individual(df_col[key], key)

plot_all(df_col)

## Plotting with Plotly

Functions for visualisations using Plotly library

In [None]:
def plotly_layout(plot_title, ty1, ty2):
    layout = go.Layout(
        title=plot_title,
        titlefont=dict(
            family='Courier New, monospace'
        ),
        legend=dict(orientation='h'),
        xaxis=dict(type='date'),
        yaxis=dict(
            domain=[0, 0.3],
            title=ty1,
            titlefont=dict(
                family='Courier New, monospace',
                size=16
            ),
            hoverformat='.4f',
            tickformat='.f'
        ),
        yaxis2=dict(
            domain=[0.4, 1],
            title=ty2,
            titlefont=dict(
                family='Courier New, monospace',
                size=16
            ),
            hoverformat='.8f',
            tickformat='.6f'
        )
    )

    return layout


def plotly_individual(data, tag):
    py.init_notebook_mode(connected=True)

    trace_price = go.Scatter(
        x=data['Open Time'],
        y=data['Close'],
        name=tag[:3]
    )

    trace_volume = go.Scatter(
        x=data['Open Time'],
        y=data['Volume'],
        xaxis='x',
        yaxis='y2',
        name=tag[:3]
    )

    ty1 = 'Volume [BTC]'
    ty2 = 'Closing Price [BTC]'
    layout = plotly_layout(tag, ty1, ty2)

    fig = pytls.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.01)
    fig.append_trace(trace_price, 2, 1)
    fig.append_trace(trace_volume, 1, 1)

    fig = go.Figure(fig, layout=layout)
    py.iplot(fig, filename=tag)


def plotly_all(data, labels, normalise=False):
    py.init_notebook_mode(connected=True)

    fig = pytls.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.01)
    plot_title, ty1, ty2 = '', '', ''

    colours = ['#1F77B4', '#B2182B', '#FF7F0E']
    for i, key in enumerate(labels):
        price_data = data[key]['Close']
        vol_data = data[key]['Volume']
        open_times = data[key]['Open Time']
        plot_title = 'Comparison of Closing Prices and Volume of EOS, TRX and ONT'
        ty1 = 'Volume [BTC]'
        ty2 = 'Closing Price [BTC]'

        if normalise:
            price_data = normalise_data(price_data, minmax=True)
            vol_data = normalise_data(vol_data, minmax=True)
            plot_title = 'Comparison of Normalised Closing Prices and Volume of EOS, TRX and ONT'
            ty1 = ty1[:-6]
            ty2 = ty2[:-6]

        tmp_price = go.Scatter(
            x=open_times,
            y=price_data,
            name=key[:3],
            line=dict(color=colours[i])
        )
        fig.append_trace(tmp_price, 2, 1)

        tmp_vol = go.Scatter(
            x=open_times,
            y=vol_data,
            xaxis='x',
            yaxis='y2',
            name=key[:3],
            line=dict(color=colours[i]),
            showlegend=False
        )
        fig.append_trace(tmp_vol, 1, 1)

    layout = plotly_layout(plot_title, ty1, ty2)

    fig = go.Figure(fig, layout=layout)
    py.iplot(fig, filename='all-together')

In [None]:
key = 'EOSBTC_30m'
plotly_individual(df_col[key], key)
key = 'TRXBTC_30m'
plotly_individual(df_col[key], key)
key = 'ONTBTC_30m'
plotly_individual(df_col[key], key)

labels = ['EOSBTC_30m', 'TRXBTC_30m', 'ONTBTC_30m']
plotly_all(df_col, labels, normalise=True)

## Calculate TA Indicators

In [None]:
def calculate_ta(df):
    def moving_average(data_col, n):
        ma = data_col.rolling(window=n).mean()
        return ma

    # Trend Indicators
    # Moving Average (MA)
    # df['MA_10'] = moving_average(df['Close'], 10)
    df['MA_50'] = moving_average(df['Close'], 50)
    df['MA_200'] = moving_average(df['Close'], 200)

    # Exponential Moving Average (EMA)
    df['EMA'] = ta.ema_slow(df['Close'], n_slow=20, fillna=True)

    # Moving Average Convergence Divergence (MACD)
    df['MACD'] = ta.macd_diff(df['Close'], n_fast=12, n_slow=26, n_sign=9, fillna=True)

    # Momentum Indicators
    # Relative Strength Index (RSI)
    df['RSI'] = ta.rsi(df['Close'], n=14, fillna=True)

    # Volatility Indicators
    # calc volatility manually
    # df['Volatility'] = (df['High'] - df['Low']) / df['Open']

    # Bollinger Bands (BB)
    df['BB_H'] = ta.bollinger_hband_indicator(df['Close'], n=20, ndev=2, fillna=True)
    df['BB_L'] = ta.bollinger_lband_indicator(df['Close'], n=20, ndev=2, fillna=True)

    return df

In [None]:
for key in df_col:
    print('\nCalculating TA indicators for {}, interval {}'.format(key[:3], key.split('_')[1]))
    df_col[key] = calculate_ta(df_col[key])