In [1]:
from config import CONFIG
from utils import series_to_supervised

CONFIG

{'columns': ['Close', 'Volume', 'Low', 'High'],
 'csv_src_file': 'BTC_ETH',
 'filename': 'BTC_ETH_lstm_i288_o144_Close_Volume_Low_High',
 'folder': {'data': 'data/', 'weights': 'weights/'},
 'input_size': 288,
 'lstm_hidden_size': 200,
 'name': 'lstm',
 'output_size': 144,
 'pair': 'BTC_ETH',
 'period': 30}

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from matplotlib import pyplot
 
#data file path
dfp = ''.join([CONFIG['folder']['data'], CONFIG['csv_src_file'], '.csv'])

#Columns of price data to use
columns = CONFIG['columns']
# df = pd.read_csv(dfp).dropna().tail(1000000)
dataset = pd.read_csv(dfp)

# to drop values before 2018 1514764800, March 2018 1519862400, July 2017 1498867200
dataset = dataset[dataset.Timestamp > 1519862400]
dataset.head()

Unnamed: 0,Close,Timestamp,High,Low,Open,Volume
269507,0.082828,1519862700,0.082856,0.082729,0.082729,4.151247
269508,0.082609,1519863000,0.082828,0.082606,0.082828,5.551513
269509,0.082552,1519863300,0.082673,0.082547,0.082609,2.327443
269510,0.08246,1519863600,0.082625,0.082419,0.082552,1.519736
269511,0.082455,1519863900,0.08246,0.082418,0.082455,0.552411


In [3]:
values = dataset.loc[:,columns].values

In [4]:
# parameters to prepare the dataset for learning 
n_lag = CONFIG['input_size']
n_out = CONFIG['output_size']
n_features = len(columns)
n_lag,n_features,n_out

(288, 4, 144)

In [5]:
# scale dataset
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)

In [6]:
# frame as supervised learning
reframed = series_to_supervised(scaled, n_lag, n_out)
reframed.head(10)

Unnamed: 0,var1(t-288),var2(t-288),var3(t-288),var4(t-288),var1(t-287),var2(t-287),var3(t-287),var4(t-287),var1(t-286),var2(t-286),...,var3(t+141),var4(t+141),var1(t+142),var2(t+142),var3(t+142),var4(t+142),var1(t+143),var2(t+143),var3(t+143),var4(t+143)
288,0.911499,0.005383,0.909966,0.88822,0.904896,0.007199,0.906295,0.887362,0.903185,0.003018,...,0.782275,0.76532,0.795766,0.003916,0.787687,0.769936,0.78131,0.008893,0.783815,0.765188
289,0.904896,0.007199,0.906295,0.887362,0.903185,0.003018,0.904533,0.882607,0.900404,0.001971,...,0.787687,0.769936,0.78131,0.008893,0.783815,0.765188,0.785669,0.005755,0.783381,0.759371
290,0.903185,0.003018,0.904533,0.882607,0.900404,0.001971,0.900683,0.881134,0.900251,0.000716,...,0.783815,0.765188,0.785669,0.005755,0.783381,0.759371,0.783557,0.005765,0.783872,0.759353
291,0.900404,0.001971,0.900683,0.881134,0.900251,0.000716,0.900682,0.876073,0.89995,0.001567,...,0.783381,0.759371,0.783557,0.005765,0.783872,0.759353,0.785995,0.007049,0.785897,0.759686
292,0.900251,0.000716,0.900682,0.876073,0.89995,0.001567,0.899785,0.879122,0.900536,0.001585,...,0.783872,0.759353,0.785995,0.007049,0.785897,0.759686,0.792811,0.006897,0.788341,0.766715
293,0.89995,0.001567,0.899785,0.879122,0.900536,0.001585,0.89959,0.876207,0.898062,0.002247,...,0.785897,0.759686,0.792811,0.006897,0.788341,0.766715,0.786108,0.005211,0.786491,0.766776
294,0.900536,0.001585,0.89959,0.876207,0.898062,0.002247,0.89959,0.877975,0.90053,0.001073,...,0.788341,0.766715,0.786108,0.005211,0.786491,0.766776,0.780843,0.008527,0.781111,0.759659
295,0.898062,0.002247,0.89959,0.877975,0.90053,0.001073,0.89959,0.876218,0.898052,0.00099,...,0.786491,0.766776,0.780843,0.008527,0.781111,0.759659,0.780722,0.002326,0.781258,0.754445
296,0.90053,0.001073,0.89959,0.876218,0.898052,0.00099,0.89959,0.877294,0.900246,0.001661,...,0.781111,0.759659,0.780722,0.002326,0.781258,0.754445,0.778586,0.001087,0.781111,0.754322
297,0.898052,0.00099,0.89959,0.877294,0.900246,0.001661,0.89959,0.876158,0.900536,0.000313,...,0.781258,0.754445,0.778586,0.001087,0.781111,0.754322,0.780722,0.001369,0.781113,0.754322


In [7]:
# drop columns we don't want to predict
# We're only concerned with the estimating the close value,
# Close should be first in the list of column in the config file

cols_to_drop = []

for i in range (n_out):
    for j in range(1, n_features):
        cols_to_drop.append(reframed.shape[1]-(i*n_features+j))

reframed.drop(reframed.columns[cols_to_drop], axis=1, inplace=True)

reframed.head()

Unnamed: 0,var1(t-288),var2(t-288),var3(t-288),var4(t-288),var1(t-287),var2(t-287),var3(t-287),var4(t-287),var1(t-286),var2(t-286),...,var1(t+134),var1(t+135),var1(t+136),var1(t+137),var1(t+138),var1(t+139),var1(t+140),var1(t+141),var1(t+142),var1(t+143)
288,0.911499,0.005383,0.909966,0.88822,0.904896,0.007199,0.906295,0.887362,0.903185,0.003018,...,0.798666,0.80387,0.798064,0.798062,0.793931,0.791549,0.784497,0.791533,0.795766,0.78131
289,0.904896,0.007199,0.906295,0.887362,0.903185,0.003018,0.904533,0.882607,0.900404,0.001971,...,0.80387,0.798064,0.798062,0.793931,0.791549,0.784497,0.791533,0.795766,0.78131,0.785669
290,0.903185,0.003018,0.904533,0.882607,0.900404,0.001971,0.900683,0.881134,0.900251,0.000716,...,0.798064,0.798062,0.793931,0.791549,0.784497,0.791533,0.795766,0.78131,0.785669,0.783557
291,0.900404,0.001971,0.900683,0.881134,0.900251,0.000716,0.900682,0.876073,0.89995,0.001567,...,0.798062,0.793931,0.791549,0.784497,0.791533,0.795766,0.78131,0.785669,0.783557,0.785995
292,0.900251,0.000716,0.900682,0.876073,0.89995,0.001567,0.899785,0.879122,0.900536,0.001585,...,0.793931,0.791549,0.784497,0.791533,0.795766,0.78131,0.785669,0.783557,0.785995,0.792811


In [8]:
reframed_values = reframed.values
# split into train and test sets
training_size = int(0.8* reframed_values.shape[0])
train = reframed_values[:training_size, :]
test = reframed_values[training_size:, :]

In [9]:
# split into input and outputs
n_obs = n_lag * n_features

# We're only concerned with the estimating the close value,
# Close should be first in the list of column in the config file

n_outputs = n_out * n_features
train_x, train_y = train[:, :n_obs], train[:, -n_out:]
test_x, test_y = test[:, :n_obs], test[:, -n_out:]

# reshape input to be 3D [samples, timesteps, features]
train_x = train_x.reshape((train_x.shape[0], n_lag, n_features))
test_x = test_x.reshape((test_x.shape[0], n_lag, n_features))

# reshape output to be 3D [samples, timesteps, features]
train_y = train_y.reshape(-1, n_out, 1)
test_y = test_y.reshape(-1, n_out, 1)

print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)

((23320, 288, 4), (23320, 144, 1), (5831, 288, 4), (5831, 144, 1))


In [10]:
model_name=''.join([CONFIG['folder']['weights'], CONFIG['filename'], '_model', '.json'])
model_weights_name=''.join([CONFIG['folder']['weights'], CONFIG['filename'], '_model_weights', '.h5'])

In [11]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Input
from keras.layers import LSTM, CuDNNLSTM, GRU,CuDNNGRU
from keras.layers import Conv1D, AveragePooling1D, MaxPooling1D
from keras.layers import Dropout, Flatten
from keras.layers import Activation, BatchNormalization
from keras.layers import TimeDistributed
from keras.layers import Bidirectional
from keras.layers import RepeatVector
from keras.callbacks import ModelCheckpoint

units= CONFIG['lstm_hidden_size']
dropout = .1

# design network
model = Sequential()
model.add(Bidirectional(LSTM(units), input_shape=(train_x.shape[1], train_x.shape[2])))
model.add(Dropout(dropout))

model.add(RepeatVector(n_out))

model.add(LSTM(units/2, return_sequences=True))
model.add(Dropout(dropout))

# We're only concerned with the estimating the close value,
# otherwise use n_outputs instead of 1
# Dense(n_outputs, ...
model.add(TimeDistributed(Dense(1, activation='relu')))

model.compile(loss='mse', optimizer='adam')

# store model
# serialize model to JSON
model_json = model.to_json()
with open(model_name, "w") as json_file:
    json_file.write(model_json)

model.summary()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_1 (Bidirection (None, 400)               328000    
_________________________________________________________________
dropout_1 (Dropout)          (None, 400)               0         
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 144, 400)          0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 144, 100)          200400    
_________________________________________________________________
dropout_2 (Dropout)          (None, 144, 100)          0         
_________________________________________________________________
time_distributed_1 (TimeDist (None, 144, 1)            101       
Total params: 528,501
Trainable params: 528,501
Non-trainable params: 0
_________________________________________________________________


In [12]:
epochs=100
batch_size=512

In [None]:
# fit network
history = model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size,
                    validation_data=(test_x, test_y), verbose=1, shuffle=False,
                    callbacks=[ModelCheckpoint(model_weights_name, monitor='val_loss', verbose=1,save_best_only='true',
                                              save_weights_only=True)])

Train on 23320 samples, validate on 5831 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.00979, saving model to weights/BTC_ETH_lstm_i288_o144_Close_Volume_Low_High_model_weights.h5
Epoch 2/100

Epoch 00002: val_loss improved from 0.00979 to 0.00700, saving model to weights/BTC_ETH_lstm_i288_o144_Close_Volume_Low_High_model_weights.h5
Epoch 3/100

Epoch 00003: val_loss improved from 0.00700 to 0.00339, saving model to weights/BTC_ETH_lstm_i288_o144_Close_Volume_Low_High_model_weights.h5
Epoch 4/100

In [None]:
# Load the best weights
model.load_weights(model_weights_name)
model.compile(loss='mse', optimizer='adam')
model.evaluate(test_x, test_y)

In [None]:
from matplotlib import pyplot

In [None]:
# plot history
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()

In [None]:
# Test the prediction of test data
y = model.predict(test_x)

In [None]:
a = test_y[:,0]
b = y[:,0]
c = np.append(b, y[-1], axis=0)

# Show how the model fits the test data
pyplot.plot(a[:100], label='original')
pyplot.plot(b[:100], label='model')
pyplot.legend()
pyplot.show()

# Show how the model predicts data
pos = int(a.shape[0]-n_out*4)
pyplot.plot(a[pos:], label='original')
pyplot.plot(c[pos:], label='model')
pyplot.legend()
pyplot.show()


In [None]:
#Prediction on public data!
period = CONFIG['period']
import time
import urllib2
import json

# Download a live bitcoin price data set
def dl_X(now = None, points = n_lag, period = period, pair=CONFIG['pair']):
    if now == None:
        now = time.time() 
    end = now - now % period
    #print end, time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(end))
    start = end - points*period
    #print start, time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(start))
    url = "https://poloniex.com/public?command=returnChartData&currencyPair=%s&start=%d&end=%d&period=%d" % (pair, start, end, period)
    openUrl = urllib2.urlopen(url)
    r = openUrl.read()
    openUrl.close()
    d = json.loads(r.decode())[-n_lag:]
    df = pd.DataFrame(d)
    original_columns=[u'close', u'date', u'high', u'low', u'open',u'volume']
    new_columns = ['Close','Timestamp','High','Low','Open','Volume']
    df = df.loc[:,original_columns]
    df.columns = new_columns
    
    return df

In [None]:
def predict(when=None):
    rt_df = dl_X(when)
    rt_values = rt_df.loc[:,columns].values
    rt_scaled = scaler.transform(rt_values)
    rt_x = rt_scaled.reshape((1, n_lag, n_features))
    print rt_x.shape
    return rt_scaled, model.predict(rt_x)

In [None]:
# do some now & past predictions
for t in [0, 100, 200, 300, 500, 1000, 2000]:
    rt_x, prediction = predict(time.time()-t*period)

    current = rt_x[:,0]
    prediction = prediction[0]

    pyplot.plot(current, label='current')

    # shift train predictions for plotting
    predictPlot = np.empty_like(current)
    predictPlot[:] = np.nan
    predictPlot = np.append(predictPlot, prediction)

    pyplot.plot(predictPlot, label='prediction')
    pyplot.legend()
    pyplot.show()

In [None]:
#Prediction on live data!
starttime=time.time()
while True:
    now = time.time() 
    end = now - now % period
    print time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(end))
    rt_x, prediction = predict()

    current = rt_x[:,0]
    prediction = prediction[0]

    pyplot.plot(current, label='current')

    # shift train predictions for plotting
    predictPlot = np.empty_like(current)
    predictPlot[:] = np.nan
    predictPlot = np.append(predictPlot, prediction)

    pyplot.plot(predictPlot, label='prediction')
    pyplot.legend()
    pyplot.show()

    time.sleep(period - ((time.time() - starttime) % period))