In [1]:
import pandas as pd
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import numpy as np
import datetime

np.random.seed(42)

In [2]:
from math import pi
from bokeh.io import output_notebook
from bokeh.plotting import figure, show, output_file

output_notebook()

In [3]:
# For plot

def prepare_standardplot(title, xlabel):
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.suptitle(title)
    ax1.set_ylabel('categorical cross entropy')
    ax1.set_xlabel(xlabel)
    ax1.set_yscale('log')
    ax2.set_ylabel('accuracy [% correct]')
    ax2.set_xlabel(xlabel)
    return fig, ax1, ax2

def finalize_standardplot(fig, ax1, ax2):
    ax1handles, ax1labels = ax1.get_legend_handles_labels()
    if len(ax1labels) > 0:
        ax1.legend(ax1handles, ax1labels)
    ax2handles, ax2labels = ax2.get_legend_handles_labels()
    if len(ax2labels) > 0:
        ax2.legend(ax2handles, ax2labels)
    fig.tight_layout()
    plt.subplots_adjust(top=0.9)

def plot_history(history, title):
    fig, ax1, ax2 = prepare_standardplot(title, 'epoch')
    ax1.plot(history.history['loss'], label = "training")
    ax2.plot(history.history['binary_accuracy'], label = "training")
    finalize_standardplot(fig, ax1, ax2)
    return fig

In [4]:
def aggregate_market_values(dataframe, aggregation_period, unix_time=True):
    """
    timestamp / open / high / low / close / volume btc / volume currency / weighted price
    """    
    data = dataframe.copy()
    aggregation_factor = aggregation_period * SECONDS_IN_MINUTE
    
    if not unix_time:
        data.Timestamp = data.Timestamp.astype(np.int64) // 10**9
    
    data = dataframe.groupby(data.Timestamp // aggregation_factor).agg({
        'Open' : 'first',
        'High' : np.max,
        'Low' : np.min,
        'Close' : 'last',
        'Volume_(BTC)' : np.sum ,
        'Volume_(Currency)' : np.sum,
        'Weighted_Price' : np.mean,
    }).reset_index()
    
    data.Timestamp *= aggregation_factor
    
    if not unix_time:
        data.Timestamp = pd.to_datetime(data.Timestamp, unit='s')
    
    return data
    

In [5]:
SECONDS_IN_MINUTE = 60

In [6]:
df_raw_part1 = pd.read_csv('Data/bitstampUSD_1-min_data_2012-01-01_to_2018-01-08.csv')
df_raw_part2 = pd.read_csv('Data/bitstampUSD_30-min_data_january.csv', date_parser=True)

# Aggregate first part of data into chunks of 30 mins, second part already aggregated
df_p1 = aggregate_market_values(df_raw_part1, 30)
df_p1.Timestamp = pd.to_datetime(df_p1.Timestamp, unit='s')

df_p2 = df_raw_part2
df_p2.Timestamp = pd.to_datetime(df_p2.Timestamp)

df_raw = pd.concat([df_p1, df_p2]).reset_index(drop=True)

df_raw.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,2011-12-31 07:30:00,4.39,4.39,4.39,4.39,3.644647,16.0,4.39
1,2011-12-31 08:00:00,4.39,4.39,4.39,4.39,13.667426,60.000001,4.39
2,2011-12-31 08:30:00,4.39,4.39,4.39,4.39,13.667426,60.000001,4.39
3,2011-12-31 09:00:00,4.39,4.39,4.39,4.39,13.667426,60.000001,4.39
4,2011-12-31 09:30:00,4.39,4.39,4.39,4.39,13.667426,60.000001,4.39


In [7]:
window_aggregation = 30
day_window = int(24*60 / window_aggregation)

df = aggregate_market_values(df_raw.copy(),window_aggregation, unix_time=False)

In [8]:
df_drop = df.drop(['Open', 'High', 'Low'], axis=1)
df_drop.head(2)

Unnamed: 0,Timestamp,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,2011-12-31 07:30:00,4.39,3.644647,16.0,4.39
1,2011-12-31 08:00:00,4.39,13.667426,60.000001,4.39


In [9]:
sample_12_days = 12*2*24
sample_26_days = 26*2*24

In [10]:
df_mov_avg = df_drop.copy()
df_mov_avg['MA_12'] = \
df_drop['Weighted_Price'].rolling(sample_12_days).mean()
df_mov_avg['MA_26'] = \
df_drop['Weighted_Price'].rolling(sample_26_days).mean()
df_mov_avg.dropna(inplace = True)
df_mov_avg.head(2)

Unnamed: 0,Timestamp,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,MA_12,MA_26
1247,2012-01-26 07:00:00,6.31,6.0,37.86,6.31,6.642791,6.324139
1248,2012-01-26 07:30:00,6.31,6.0,37.86,6.31,6.642564,6.325678


In [20]:
def EMA(close, last_EMA, time_periods):
    mult = 2/(time_periods + 1)
    return (close - last_EMA)*mult + last_EMA

In [75]:
df_shift = df_mov_avg.copy()
EMA_12 =[df_shift.iloc[0]['MA_12']]
EMA_26 =[df_shift.iloc[0]['MA_26']]
last_sample = -1

for i in range(1, len(df_shift)):
    if((i + last_sample) < 0):
        EMA_12.append(-1)
        EMA_26.append(-1)
    else:
        EMA_12.append(EMA(df_shift.iloc[i]['Close'], EMA_12[i + last_sample], 12*24*2))
        EMA_26.append(EMA(df_shift.iloc[i]['Close'], EMA_26[i + last_sample], 26*24*2))

df_shift['EMA_12'] = EMA_12
df_shift['EMA_26'] = EMA_26
df_shift.head(2)

Unnamed: 0,Timestamp,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,MA_12,MA_26,EMA_12,EMA_26
1247,2012-01-26 07:00:00,6.31,6.0,37.86,6.31,6.642791,6.324139,6.642791,6.324139
1248,2012-01-26 07:30:00,6.31,6.0,37.86,6.31,6.642564,6.325678,6.641638,6.324117


In [76]:
plot_df(df_shift, 'EMA_12')

In [89]:
df_macd = df_shift.copy()
df_macd['macd_line'] = df_macd['EMA_12'] - df_macd['EMA_26']
df_macd['MA_macd'] = df_macd['macd_line'].rolling(9).mean()
df_macd.dropna(inplace = True)

In [91]:
EMA_MACD = [df_macd['MA_macd'].iloc[0]]

for i in range(1, len(df_macd)):
    if((i + last_sample) < 0):
        EMA_MACD.append(-1)
    else:
        EMA_MACD.append(EMA(df_macd.iloc[i]['macd_line'], EMA_MACD[i+last_sample], 9*24*2))

In [97]:
df_macd['signal_line'] = EMA_MACD
df_macd['macd_histogram'] = df_macd['macd_line'] - df_macd['signal_line']
df_macd.head(2)

Unnamed: 0,Timestamp,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,MA_12,MA_26,EMA_12,EMA_26,macd_line,MA_macd,signal_line,macd_histogram
1255,2012-01-26 11:00:00,6.31,6.0,37.86,6.31,6.640974,6.336447,6.633674,6.323959,0.309715,0.314165,0.314165,-0.00445
1256,2012-01-26 11:30:00,6.31,6.0,37.86,6.31,6.640747,6.337986,6.632552,6.323937,0.308615,0.31305,0.31414,-0.005524


In [81]:
TOOLS = "pan,wheel_zoom,box_zoom,reset,save"

In [101]:
def plot_df(df, column):
    p = figure(x_axis_type="datetime", tools=TOOLS, plot_width=990, title = "MSFT Candlestick")
    p.xaxis.major_label_orientation = pi/4
    p.grid.grid_line_alpha=0.3

    p.line(df.Timestamp[:], df[column][:], line_color='#ff0000', line_width=2, line_alpha=1, legend=column)
    p.line([df.iloc[0].Timestamp, df.iloc[len(df)-1].Timestamp],\
       [0,0], line_color='#000000', line_width=2, line_alpha=1, legend="0")

    show(p)

In [102]:
plot_df(df_macd, 'macd_histogram')