In [1]:
import pandas as pd
from IPython.display import display, HTML
from keras.models import Sequential
from keras.layers import Dense,Dropout,BatchNormalization
import matplotlib.pyplot as plt
import numpy as np
import datetime

np.random.seed(42)

Using TensorFlow backend.


In [2]:
from math import pi
from bokeh.io import output_notebook
from bokeh.plotting import figure, show, output_file

output_notebook()

In [3]:
# Force CPU usage
import tensorflow as tf
from keras import backend as K

num_cores = 8

config = tf.ConfigProto(intra_op_parallelism_threads=num_cores,\
        inter_op_parallelism_threads=num_cores, allow_soft_placement=True,\
        device_count = {'CPU' : 1, 'GPU' : 0})
session = tf.Session(config=config)
K.set_session(session)

In [4]:
# For plot

def prepare_standardplot(title, xlabel):
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.suptitle(title)
    ax1.set_ylabel('categorical cross entropy')
    ax1.set_xlabel(xlabel)
    ax1.set_yscale('log')
    ax2.set_ylabel('accuracy [% correct]')
    ax2.set_xlabel(xlabel)
    return fig, ax1, ax2

def finalize_standardplot(fig, ax1, ax2):
    ax1handles, ax1labels = ax1.get_legend_handles_labels()
    if len(ax1labels) > 0:
        ax1.legend(ax1handles, ax1labels)
    ax2handles, ax2labels = ax2.get_legend_handles_labels()
    if len(ax2labels) > 0:
        ax2.legend(ax2handles, ax2labels)
    fig.tight_layout()
    plt.subplots_adjust(top=0.9)

def plot_history(history, title):
    fig, ax1, ax2 = prepare_standardplot(title, 'epoch')
    ax1.plot(history.history['loss'], label = "training")
    ax2.plot(history.history['binary_accuracy'], label = "training")
    finalize_standardplot(fig, ax1, ax2)
    return fig

In [5]:
df_raw = pd.read_csv('Data/bitstampUSD_1-min_data_2012-01-01_to_2018-01-08.csv')

In [6]:
display(df_raw.head())

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,1325317920,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
1,1325317980,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
2,1325318040,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
3,1325318100,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
4,1325318160,4.39,4.39,4.39,4.39,0.455581,2.0,4.39


In [7]:
df = df_raw.drop('Timestamp',1)
df[['Open','High','Low','Close']] = df[['Open','High','Low','Close']].apply(lambda x: np.log(x))
df = (df-df.mean())/df.std()

In [8]:
df.head()

Unnamed: 0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,-2.075365,-2.075469,-2.075199,-2.075289,-0.297232,-0.18443,-0.424385
1,-2.075365,-2.075469,-2.075199,-2.075289,-0.297232,-0.18443,-0.424385
2,-2.075365,-2.075469,-2.075199,-2.075289,-0.297232,-0.18443,-0.424385
3,-2.075365,-2.075469,-2.075199,-2.075289,-0.297232,-0.18443,-0.424385
4,-2.075365,-2.075469,-2.075199,-2.075289,-0.297232,-0.18443,-0.424385


In [9]:
df.describe()

Unnamed: 0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
count,3161057.0,3161057.0,3161057.0,3161057.0,3161057.0,3161057.0,3161057.0
mean,3.015734e-13,4.370263e-13,3.686399e-13,6.005367e-13,-5.074202e-14,8.689802e-16,-3.917382e-14
std,1.0,1.0,1.0,1.0,1.0,1.0,1.0
min,-2.150525,-2.150622,-2.634462,-2.634494,-0.3099179,-0.1844678,-0.4246458
25%,-0.393267,-0.3934676,-0.3933023,-0.3932387,-0.2974788,-0.1832231,-0.3772309
50%,0.2458171,0.2458036,0.2457736,0.2458136,-0.2521185,-0.1776162,-0.2588227
75%,0.5338638,0.5340192,0.533808,0.5339054,-0.06396828,-0.1345711,-0.1350566
max,2.302785,2.302313,2.302864,2.302737,162.6979,96.36451,8.272274


In [10]:
def add_MA_n_days_age(num_days):
    num_days_str = str(num_days)
    
    df[['Open_W_MA_'+num_days_str,'High_W_MA_'+num_days_str,'Low_W_MA_'+num_days_str,'Close_W_MA_'+num_days_str]] = df[['Open','High','Low','Close']].rolling(window=day_window * num_days).mean()
    df[['Open_MA_'+num_days_str,'High_MA_'+num_days_str,'Low_MA_'+num_days_str,'Close_MA_'+num_days_str]] = df[['Open','High','Low','Close']].rolling(window=day_window * num_days).mean()
    df[['Open_EMA_'+num_days_str,'High_EMA_'+num_days_str,'Low_EMA_'+num_days_str,'Close_EMA_'+num_days_str]] = df[['Open','High','Low','Close']].ewm(span=day_window * num_days).mean()

    df[['Open_MAX_'+num_days_str,'High_MAX_'+num_days_str,'Low_MAX_'+num_days_str,'Close_MAX_'+num_days_str]] = df[['Open','High','Low','Close']].rolling(window=day_window * num_days).max()
    df[['Open_MIN_'+num_days_str,'High_MIN_'+num_days_str,'Low_MIN_'+num_days_str,'Close_MIN_'+num_days_str]] = df[['Open','High','Low','Close']].rolling(window=day_window * num_days).min()

    df['Open_TENKAN_'+num_days_str] = 0.5 * (df['Open_MIN_'+num_days_str] + df['Open_MAX_'+num_days_str])
    df['High_TENKAN_'+num_days_str] = 0.5 * (df['High_MIN_'+num_days_str] + df['High_MAX_'+num_days_str])
    df['Low_TENKAN_'+num_days_str] = 0.5 * (df['Low_MIN_'+num_days_str] + df['Low_MAX_'+num_days_str])
    df['Close_TENKAN_'+num_days_str] = 0.5 * (df['Close_MIN_'+num_days_str] + df['Close_MAX_'+num_days_str])

def add_prices_n_days_ago(num_days):
    num_days_str = str(num_days)
    df[['Open_S_'+num_days_str,'High_S_'+num_days_str,'Low_S_'+num_days_str,'Close_S_'+num_days_str]] = df[['Open','High','Low','Close']].shift(day_window * num_days)

In [11]:
day_window = 60 * 24

df[['Open_W','High_W','Low_W','Close_W']] = df[['Open','High','Low','Close']].divide(df['Volume_(BTC)'],axis = 0)

add_prices_n_days_ago(1)
add_prices_n_days_ago(2)
add_prices_n_days_ago(3)

add_MA_n_days_age(5)
add_MA_n_days_age(10)
add_MA_n_days_age(15)

plusieurs y gains selon différents temps + vote

In [12]:
mean = (df['Open'] + df['Close']) * 0.5
res = mean.shift(day_window)

df['y'] = ((mean - res) > 0).astype(int)

In [13]:
display(df.sample(10))

Unnamed: 0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,Open_W,High_W,Low_W,...,Close_MAX_15,Open_MIN_15,High_MIN_15,Low_MIN_15,Close_MIN_15,Open_TENKAN_15,High_TENKAN_15,Low_TENKAN_15,Close_TENKAN_15,y
28593,-1.893913,-1.894032,-1.893733,-1.893842,0.5018,-0.181067,-0.423575,-3.774235,-3.774472,-3.773878,...,-1.804793,-1.956087,-1.9562,-1.955912,-1.956014,-1.880474,-1.880594,-1.880294,-1.880403,0
2278155,0.33251,0.3322,0.331751,0.332296,-0.247487,-0.16567,-0.228561,-1.343544,-1.342293,-1.340477,...,0.358198,0.31337,0.313123,0.313657,0.313378,0.335775,0.335584,0.336005,0.335788,0
1855185,0.104927,0.104799,0.105258,0.105103,-0.257139,-0.174195,-0.298483,-0.408054,-0.407557,-0.409342,...,0.153242,0.034635,0.034371,0.034898,0.03463,0.09399,0.094473,0.093919,0.093936,0
1975332,0.01846,0.019898,0.018209,0.018455,6.413254,0.92469,-0.317968,0.002878,0.003103,0.002839,...,0.02583,-0.017528,-0.017808,-0.018051,-0.017898,0.004048,0.003861,0.003939,0.003966,1
2799366,0.929671,0.929314,0.930065,0.929666,-0.301252,-0.17625,0.196505,-3.086028,-3.084843,-3.087335,...,0.935945,0.829257,0.828905,0.829643,0.829251,0.882233,0.882249,0.882266,0.882598,1
681392,-0.549072,-0.544833,-0.550629,-0.544942,0.923724,-0.116075,-0.389913,-0.594411,-0.589822,-0.596097,...,0.048714,-0.810614,-0.808634,-0.863136,-0.808486,-0.380958,-0.380111,-0.407055,-0.379886,1
2075807,0.343181,0.342871,0.343531,0.343189,-0.285591,-0.176988,-0.224373,-1.201655,-1.200568,-1.202877,...,0.355732,0.20279,0.20355,0.203128,0.202906,0.279285,0.279565,0.279602,0.279319,1
1511248,0.272267,0.272015,0.272558,0.272329,-0.255896,-0.169973,-0.250092,-1.063974,-1.062989,-1.065113,...,0.340215,0.155737,0.156243,0.152939,0.154571,0.247972,0.24807,0.246748,0.247393,0
1248788,0.333894,0.333584,0.334242,0.333901,-0.260675,-0.169594,-0.227943,-1.280882,-1.279694,-1.282219,...,0.353019,0.301013,0.300928,0.300443,0.300105,0.326889,0.327184,0.326723,0.326562,0
644555,-0.633232,-0.633459,-0.633032,-0.633271,0.159043,-0.162358,-0.39536,-3.981525,-3.982952,-3.980264,...,-0.584249,-0.974272,-0.97447,-0.979348,-0.979552,-0.779277,-0.779492,-0.781676,-0.7819,0


In [14]:
df = df.dropna(how ='any')

y = df.y
df = (df-df.mean())/df.std()
df.y = y
df = df.reset_index()
df = df.drop('index',1)
display(df.describe())

Unnamed: 0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,Open_W,High_W,Low_W,...,Close_MAX_15,Open_MIN_15,High_MIN_15,Low_MIN_15,Close_MIN_15,Open_TENKAN_15,High_TENKAN_15,Low_TENKAN_15,Close_TENKAN_15,y
count,3139458.0,3139458.0,3139458.0,3139458.0,3139458.0,3139458.0,3139458.0,3139458.0,3139458.0,3139458.0,...,3139458.0,3139458.0,3139458.0,3139458.0,3139458.0,3139458.0,3139458.0,3139458.0,3139458.0,3139458.0
mean,-3.392586e-13,-5.906626e-13,5.790729e-14,-4.446534e-13,6.607257e-14,1.27272e-14,-7.354502e-14,-2.902058e-17,-1.760952e-16,1.528909e-16,...,-3.545786e-13,-2.427295e-12,7.563479e-14,4.981674e-13,-7.594128e-13,1.256374e-12,-4.448784e-12,-3.648817e-12,-1.063144e-12,0.5553561
std,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.4969263
min,-2.183942,-2.184021,-2.67243,-2.672439,-0.3100156,-0.1851188,-0.4263604,-325.1624,-325.0301,-325.2853,...,-2.080054,-2.102651,-2.103383,-2.514825,-2.516158,-2.06334,-2.064458,-2.045646,-2.046634,0.0
25%,-0.3881069,-0.3878531,-0.3884049,-0.3882137,-0.29771,-0.1838289,-0.3769825,-0.00163569,-0.001634168,-0.001635967,...,-0.3817015,-0.4439232,-0.4454905,-0.4261698,-0.4277904,-0.4057692,-0.4057031,-0.4081473,-0.4090605,0.0
50%,0.2377383,0.2377454,0.2376952,0.2377903,-0.2522784,-0.1781013,-0.2600888,0.0001490599,0.000150112,0.0001486858,...,0.2438378,0.2741989,0.2751972,0.2802574,0.2797808,0.2623724,0.2623112,0.2668872,0.2659238,1.0
75%,0.5276694,0.5277287,0.5276388,0.5276713,-0.06478471,-0.134529,-0.1364458,0.002967752,0.002967989,0.002967562,...,0.5382577,0.5675958,0.5660385,0.5780274,0.5762685,0.5394798,0.5385775,0.5390893,0.5383868,1.0
max,2.31108,2.310591,2.31117,2.311028,162.1828,96.04464,8.246152,1272.633,1272.879,1272.564,...,2.253494,2.19302,2.191008,2.164114,2.16599,2.227555,2.226237,2.231059,2.231849,1.0


In [23]:
df_plot = df_raw[-100:].copy()

df_plot['Date'] = pd.to_datetime(df_plot.Timestamp, unit='s')

display(df_plot.head())

inc = df_plot.Close >= df_plot.Open
dec = df_plot.Open > df_plot.Close
barWidth = 0.66 * 60*1000 # one minute in ms

TOOLS = "pan,wheel_zoom,box_zoom,reset,save"

p = figure(x_axis_type="datetime", tools=TOOLS, plot_width=990, title = "MSFT Candlestick")
p.xaxis.major_label_orientation = pi/4
p.grid.grid_line_alpha=0.3

p.segment(df_plot.Date, df_plot.High, df_plot.Date, df_plot.Low, color="black")
p.vbar(df_plot.Date[inc], barWidth, df_plot.Open[inc], df_plot.Close[inc], fill_color="#48D922", line_color="black")
p.vbar(df_plot.Date[dec], barWidth, df_plot.Open[dec], df_plot.Close[dec], fill_color="#FF2828", line_color="black")

show(p)

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,Date
3160957,1515363660,16375.13,16398.43,16360.36,16398.43,8.982078,147272.68338,16396.281347,2018-01-07 22:21:00
3160958,1515363720,16398.42,16398.43,16367.19,16395.0,2.206147,36169.085754,16394.686249,2018-01-07 22:22:00
3160959,1515363780,16392.16,16394.99,16364.73,16394.96,9.091977,148916.82138,16378.925977,2018-01-07 22:23:00
3160960,1515363840,16394.97,16394.97,16367.63,16367.63,1.02644,16826.387556,16392.955996,2018-01-07 22:24:00
3160961,1515363900,16367.87,16395.22,16367.87,16395.22,0.358543,5877.339419,16392.280112,2018-01-07 22:25:00


In [None]:
length_test2=int(0.8*len(df.index))
window = day_window*30 * 2

index = df.index
index= index[0:length_test2]

indexList = [index[i:min(i + window,len(index))] for i in range(0, len(index), window)]
trainIndex = []
testIndex = []

for i in range(len(indexList)):
    if i%9 != 0:
        trainIndex += indexList[i].tolist()
    else:
        testIndex  += indexList[i].tolist()
        
print(len(trainIndex+testIndex) == len(index))
print(len(indexList))

In [None]:
train=df.iloc[trainIndex,:]
test1=df.iloc[testIndex,:]
test2=df.drop(train.index).drop(test1.index)

In [None]:
train_x = train.drop('y',1)
train_y = train['y']
test_x1 = test1.drop('y',1)
test_y1 = test1['y']
test_x2 = test2.drop('y',1)
test_y2 = test2['y']

model = Sequential()
model.add(Dense(128, input_dim=95, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['binary_accuracy'])

history = model.fit(train_x, train_y, epochs = 1, verbose=1)
scores1 = model.evaluate(test_x1, test_y1, verbose=0)
scores2 = model.evaluate(test_x2, test_y2, verbose=0)

model.save('my_model2.h5')

In [None]:
plot_history(history,"test")

In [None]:
print(scores1)
print(scores2)

In [None]:
from keras.models import load_model
 
    

with tf.device('/cpu:0'):

    model = load_model('my_model.h5')
    scores = model.evaluate(test_x, test_y, verbose=0)
    print(scores)
    
    del model
