In [1]:
import pandas as pd
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import numpy as np
import datetime

np.random.seed(42)

In [2]:
from math import pi
from bokeh.io import output_notebook
from bokeh.plotting import figure, show, output_file

output_notebook()

In [3]:
# For plot

def prepare_standardplot(title, xlabel):
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.suptitle(title)
    ax1.set_ylabel('categorical cross entropy')
    ax1.set_xlabel(xlabel)
    ax1.set_yscale('log')
    ax2.set_ylabel('accuracy [% correct]')
    ax2.set_xlabel(xlabel)
    return fig, ax1, ax2

def finalize_standardplot(fig, ax1, ax2):
    ax1handles, ax1labels = ax1.get_legend_handles_labels()
    if len(ax1labels) > 0:
        ax1.legend(ax1handles, ax1labels)
    ax2handles, ax2labels = ax2.get_legend_handles_labels()
    if len(ax2labels) > 0:
        ax2.legend(ax2handles, ax2labels)
    fig.tight_layout()
    plt.subplots_adjust(top=0.9)

def plot_history(history, title):
    fig, ax1, ax2 = prepare_standardplot(title, 'epoch')
    ax1.plot(history.history['loss'], label = "training")
    ax2.plot(history.history['binary_accuracy'], label = "training")
    finalize_standardplot(fig, ax1, ax2)
    return fig

In [4]:
def aggregate_market_values(dataframe, aggregation_period, unix_time=True):
    """
    timestamp / open / high / low / close / volume btc / volume currency / weighted price
    """    
    data = dataframe.copy()
    aggregation_factor = aggregation_period * SECONDS_IN_MINUTE
    
    if not unix_time:
        data.Timestamp = data.Timestamp.astype(np.int64) // 10**9
    
    data = dataframe.groupby(data.Timestamp // aggregation_factor).agg({
        'Open' : 'first',
        'High' : np.max,
        'Low' : np.min,
        'Close' : 'last',
        'Volume_(BTC)' : np.sum ,
        'Volume_(Currency)' : np.sum,
        'Weighted_Price' : np.mean,
    }).reset_index()
    
    data.Timestamp *= aggregation_factor
    
    if not unix_time:
        data.Timestamp = pd.to_datetime(data.Timestamp, unit='s')
    
    return data
    

In [5]:
SECONDS_IN_MINUTE = 60

In [6]:
df_raw_part1 = pd.read_csv('Data/bitstampUSD_1-min_data_2012-01-01_to_2018-01-08.csv')
df_raw_part2 = pd.read_csv('Data/bitstampUSD_30-min_data_january.csv', date_parser=True)

# Aggregate first part of data into chunks of 30 mins, second part already aggregated
df_p1 = aggregate_market_values(df_raw_part1, 30)
df_p1.Timestamp = pd.to_datetime(df_p1.Timestamp, unit='s')

df_p2 = df_raw_part2
df_p2.Timestamp = pd.to_datetime(df_p2.Timestamp)

df_raw = pd.concat([df_p1, df_p2]).reset_index(drop=True)

df_raw.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,2011-12-31 07:30:00,4.39,4.39,4.39,4.39,3.644647,16.0,4.39
1,2011-12-31 08:00:00,4.39,4.39,4.39,4.39,13.667426,60.000001,4.39
2,2011-12-31 08:30:00,4.39,4.39,4.39,4.39,13.667426,60.000001,4.39
3,2011-12-31 09:00:00,4.39,4.39,4.39,4.39,13.667426,60.000001,4.39
4,2011-12-31 09:30:00,4.39,4.39,4.39,4.39,13.667426,60.000001,4.39


In [7]:
def RSI(RS):
    return 100 - 100/(1 + RS)

In [8]:
df_drop = df_raw.drop(['Open', 'High', 'Low'], axis = 1)
df_drop.head(2)

Unnamed: 0,Timestamp,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,2011-12-31 07:30:00,4.39,3.644647,16.0,4.39
1,2011-12-31 08:00:00,4.39,13.667426,60.000001,4.39


In [9]:
day_sample = 2*24

In [10]:
df_delta = df_drop.copy()
df_delta['delta'] = df_delta['Close'] - df_delta.shift(day_sample)['Close']
df_delta.dropna(inplace = True)
df_delta.head(3)

Unnamed: 0,Timestamp,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,delta
48,2012-01-01 07:30:00,4.58,45.06,206.3748,4.58,0.19
49,2012-01-01 08:00:00,4.58,45.06,206.3748,4.58,0.19
50,2012-01-01 08:30:00,4.58,45.06,206.3748,4.58,0.19


In [11]:
df_gain_loss = df_delta.copy().dropna()
df_gain_loss['gain'] = df_gain_loss[df_gain_loss['delta'] >= 0]['delta']
df_gain_loss['loss'] = df_gain_loss[df_gain_loss['delta'] <= 0]['delta']
df_gain_loss['loss'] = -df_gain_loss['loss']
df_gain_loss.fillna(0.0, inplace = True)
df_gain_loss.head(3)

Unnamed: 0,Timestamp,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,delta,gain,loss
48,2012-01-01 07:30:00,4.58,45.06,206.3748,4.58,0.19,0.19,0.0
49,2012-01-01 08:00:00,4.58,45.06,206.3748,4.58,0.19,0.19,0.0
50,2012-01-01 08:30:00,4.58,45.06,206.3748,4.58,0.19,0.19,0.0


In [12]:
num_sample_14_days = 2*24*14

In [13]:
df_avg = df_gain_loss.copy()
df_avg['avg_gain'] = df_avg['gain'].rolling(num_sample_14_days).mean()
df_avg['avg_loss'] = df_avg['loss'].rolling(num_sample_14_days).mean()
df_avg.dropna(inplace = True)
df_avg.head()

Unnamed: 0,Timestamp,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,delta,gain,loss,avg_gain,avg_loss
719,2012-01-15 07:00:00,6.3,6.377001,40.175104,6.3,-0.1,0.0,0.1,0.280759,0.148929
720,2012-01-15 07:30:00,6.3,6.377001,40.175104,6.3,-0.1,0.0,0.1,0.280476,0.149077
721,2012-01-15 08:00:00,6.3,6.377001,40.175104,6.3,-0.1,0.0,0.1,0.280193,0.149226
722,2012-01-15 08:30:00,6.3,6.377001,40.175104,6.3,-0.1,0.0,0.1,0.279911,0.149375
723,2012-01-15 09:00:00,6.3,6.377001,40.175104,6.3,-0.1,0.0,0.1,0.279628,0.149524


In [14]:
df_rs = df_avg.copy()
df_rs['RS'] = df_rs['avg_gain']/df_rs['avg_loss']
df_rs.dropna(inplace = True)
df_rs.head(2)

Unnamed: 0,Timestamp,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,delta,gain,loss,avg_gain,avg_loss,RS
719,2012-01-15 07:00:00,6.3,6.377001,40.175104,6.3,-0.1,0.0,0.1,0.280759,0.148929,1.885192
720,2012-01-15 07:30:00,6.3,6.377001,40.175104,6.3,-0.1,0.0,0.1,0.280476,0.149077,1.881413


In [15]:
df_rsi = df_rs.copy()
df_rsi['RSI'] = df_rsi['RS'].apply(lambda x: RSI(x))
df_rsi.head(10)

Unnamed: 0,Timestamp,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,delta,gain,loss,avg_gain,avg_loss,RS,RSI
719,2012-01-15 07:00:00,6.3,6.377001,40.175104,6.3,-0.1,0.0,0.1,0.280759,0.148929,1.885192,65.34026
720,2012-01-15 07:30:00,6.3,6.377001,40.175104,6.3,-0.1,0.0,0.1,0.280476,0.149077,1.881413,65.294811
721,2012-01-15 08:00:00,6.3,6.377001,40.175104,6.3,-0.1,0.0,0.1,0.280193,0.149226,1.877643,65.249333
722,2012-01-15 08:30:00,6.3,6.377001,40.175104,6.3,-0.1,0.0,0.1,0.279911,0.149375,1.873879,65.203827
723,2012-01-15 09:00:00,6.3,6.377001,40.175104,6.3,-0.1,0.0,0.1,0.279628,0.149524,1.870123,65.158293
724,2012-01-15 09:30:00,6.3,6.377001,40.175104,6.3,-0.1,0.0,0.1,0.279345,0.149673,1.866375,65.11273
725,2012-01-15 10:00:00,7.15,32.436674,227.405179,6.441667,0.75,0.75,0.0,0.280179,0.149673,1.871943,65.180364
726,2012-01-15 10:30:00,7.15,30.038851,214.777782,7.15,0.75,0.75,0.0,0.281012,0.149673,1.87751,65.247737
727,2012-01-15 11:00:00,7.15,30.038851,214.777782,7.15,0.75,0.75,0.0,0.281845,0.149673,1.883078,65.314849
728,2012-01-15 11:30:00,7.15,30.038851,214.777782,7.15,0.75,0.75,0.0,0.282679,0.149673,1.888646,65.381703


In [16]:
TOOLS = "pan,wheel_zoom,box_zoom,reset,save"

In [17]:
def plot_df(df, column):
    p = figure(x_axis_type="datetime", tools=TOOLS, plot_width=990, title = "MSFT Candlestick")
    p.xaxis.major_label_orientation = pi/4
    p.grid.grid_line_alpha=0.3

    p.line(df.Timestamp[:], df[column][:], line_color='#ff0000', line_width=2, line_alpha=1, legend=column)
    p.line([df.iloc[0].Timestamp, df.iloc[len(df)-1].Timestamp],\
       [0,0], line_color='#000000', line_width=2, line_alpha=1, legend="0")

    show(p)

In [18]:
plot_df(df_rsi, 'RSI')