In [1]:
from config import CONFIG
from utils import series_to_supervised

CONFIG

{'pair': 'BTC_ETH',
 'period': 300,
 'input_size': 120,
 'output_size': 10,
 'lstm_hidden_size': 300,
 'columns': ['Close', 'Volume', 'Low', 'High'],
 'csv_src_file': 'BTC_ETH',
 'name': 'lstm',
 'folder': {'data': 'data/', 'weights': 'weights/'},
 'filename': 'BTC_ETH_lstm_i120_o10_Close_Volume_Low_High'}

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from matplotlib import pyplot
 
#data file path
dfp = ''.join([CONFIG['folder']['data'], CONFIG['csv_src_file'], '.csv'])

#Columns of price data to use
columns = CONFIG['columns']
# df = pd.read_csv(dfp).dropna().tail(1000000)
dataset = pd.read_csv(dfp)

# to drop values before 2018 1514764800, March 2018 1519862400, July 2017 1498867200
dataset = dataset[dataset.Timestamp > 1519862400]
dataset.head()

Unnamed: 0,Close,Timestamp,High,Low,Open,Volume
269507,0.082828,1519862700,0.082856,0.082729,0.082729,4.151247
269508,0.082609,1519863000,0.082828,0.082606,0.082828,5.551513
269509,0.082552,1519863300,0.082673,0.082547,0.082609,2.327443
269510,0.08246,1519863600,0.082625,0.082419,0.082552,1.519736
269511,0.082455,1519863900,0.08246,0.082418,0.082455,0.552411


In [3]:
values = dataset.loc[:,columns].values

In [4]:
# parameters to prepare the dataset for learning 
n_lag = CONFIG['input_size']
n_out = CONFIG['output_size']
n_features = len(columns)
n_lag,n_features,n_out

(120, 4, 10)

In [5]:
from sklearn.preprocessing import StandardScaler
# scale dataset
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)

In [6]:
# frame as supervised learning
reframed = series_to_supervised(scaled, n_lag, n_out)
reframed.head(10)

Unnamed: 0,var1(t-120),var2(t-120),var3(t-120),var4(t-120),var1(t-119),var2(t-119),var3(t-119),var4(t-119),var1(t-118),var2(t-118),...,var3(t+7),var4(t+7),var1(t+8),var2(t+8),var3(t+8),var4(t+8),var1(t+9),var2(t+9),var3(t+9),var4(t+9)
120,0.911499,0.005383,0.909966,0.88822,0.904896,0.007199,0.906295,0.887362,0.903185,0.003018,...,0.867957,0.846472,0.871304,0.001382,0.869497,0.846472,0.868639,0.000815,0.869478,0.84647
121,0.904896,0.007199,0.906295,0.887362,0.903185,0.003018,0.904533,0.882607,0.900404,0.001971,...,0.869497,0.846472,0.868639,0.000815,0.869478,0.84647,0.867833,0.001149,0.869477,0.8457
122,0.903185,0.003018,0.904533,0.882607,0.900404,0.001971,0.900683,0.881134,0.900251,0.000716,...,0.869478,0.84647,0.867833,0.001149,0.869477,0.8457,0.859604,0.034157,0.85901,0.844171
123,0.900404,0.001971,0.900683,0.881134,0.900251,0.000716,0.900682,0.876073,0.89995,0.001567,...,0.869477,0.8457,0.859604,0.034157,0.85901,0.844171,0.864068,0.005451,0.859038,0.839505
124,0.900251,0.000716,0.900682,0.876073,0.89995,0.001567,0.899785,0.879122,0.900536,0.001585,...,0.85901,0.844171,0.864068,0.005451,0.859038,0.839505,0.862337,0.008494,0.860819,0.841716
125,0.89995,0.001567,0.899785,0.879122,0.900536,0.001585,0.89959,0.876207,0.898062,0.002247,...,0.859038,0.839505,0.862337,0.008494,0.860819,0.841716,0.868591,0.003031,0.86404,0.844476
126,0.900536,0.001585,0.89959,0.876207,0.898062,0.002247,0.89959,0.877975,0.90053,0.001073,...,0.860819,0.841716,0.868591,0.003031,0.86404,0.844476,0.867355,0.000561,0.865738,0.842903
127,0.898062,0.002247,0.89959,0.877975,0.90053,0.001073,0.89959,0.876218,0.898052,0.00099,...,0.86404,0.844476,0.867355,0.000561,0.865738,0.842903,0.867084,0.000588,0.865738,0.844322
128,0.90053,0.001073,0.89959,0.876218,0.898052,0.00099,0.89959,0.877294,0.900246,0.001661,...,0.865738,0.842903,0.867084,0.000588,0.865738,0.844322,0.868439,0.002234,0.866695,0.84371
129,0.898052,0.00099,0.89959,0.877294,0.900246,0.001661,0.89959,0.876158,0.900536,0.000313,...,0.865738,0.844322,0.868439,0.002234,0.866695,0.84371,0.869157,0.003422,0.86694,0.844475


In [7]:
# drop columns we don't want to predict
# We're only concerned with the estimating the close value,
# Close should be first in the list of column in the config file

cols_to_drop = []

for i in range (n_out):
    for j in range(1, n_features):
        cols_to_drop.append(reframed.shape[1]-(i*n_features+j))

reframed.drop(reframed.columns[cols_to_drop], axis=1, inplace=True)

reframed.head()

Unnamed: 0,var1(t-120),var2(t-120),var3(t-120),var4(t-120),var1(t-119),var2(t-119),var3(t-119),var4(t-119),var1(t-118),var2(t-118),...,var1(t),var1(t+1),var1(t+2),var1(t+3),var1(t+4),var1(t+5),var1(t+6),var1(t+7),var1(t+8),var1(t+9)
120,0.911499,0.005383,0.909966,0.88822,0.904896,0.007199,0.906295,0.887362,0.903185,0.003018,...,0.873974,0.867204,0.868109,0.864457,0.869646,0.869194,0.871154,0.871306,0.871304,0.868639
121,0.904896,0.007199,0.906295,0.887362,0.903185,0.003018,0.904533,0.882607,0.900404,0.001971,...,0.867204,0.868109,0.864457,0.869646,0.869194,0.871154,0.871306,0.871304,0.868639,0.867833
122,0.903185,0.003018,0.904533,0.882607,0.900404,0.001971,0.900683,0.881134,0.900251,0.000716,...,0.868109,0.864457,0.869646,0.869194,0.871154,0.871306,0.871304,0.868639,0.867833,0.859604
123,0.900404,0.001971,0.900683,0.881134,0.900251,0.000716,0.900682,0.876073,0.89995,0.001567,...,0.864457,0.869646,0.869194,0.871154,0.871306,0.871304,0.868639,0.867833,0.859604,0.864068
124,0.900251,0.000716,0.900682,0.876073,0.89995,0.001567,0.899785,0.879122,0.900536,0.001585,...,0.869646,0.869194,0.871154,0.871306,0.871304,0.868639,0.867833,0.859604,0.864068,0.862337


In [8]:
reframed_values = reframed.values
# split into train and test sets
training_size = int(0.8* reframed_values.shape[0])
train = reframed_values[:training_size, :]
test = reframed_values[training_size:, :]

In [9]:
# split into input and outputs
n_obs = n_lag * n_features

# We're only concerned with the estimating the close value,
# Close should be first in the list of column in the config file

n_outputs = n_out * n_features
train_x, train_y = train[:, :n_obs], train[:, -n_out:]
test_x, test_y = test[:, :n_obs], test[:, -n_out:]

# reshape input to be 3D [samples, timesteps, features]
train_x = train_x.reshape((train_x.shape[0], n_lag, n_features))
test_x = test_x.reshape((test_x.shape[0], n_lag, n_features))

# reshape output to be 3D [samples, timesteps, features]
train_y = train_y.reshape(-1, n_out, 1)
test_y = test_y.reshape(-1, n_out, 1)

print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)

(23562, 120, 4) (23562, 10, 1) (5891, 120, 4) (5891, 10, 1)


In [10]:
model_name=''.join([CONFIG['folder']['weights'], CONFIG['filename'], '_model', '.json'])
model_weights_name=''.join([CONFIG['folder']['weights'], CONFIG['filename'], '_model_weights', '.h5'])

In [11]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Input
from keras.layers import LSTM, CuDNNLSTM, GRU,CuDNNGRU
from keras.layers import Conv1D, AveragePooling1D, MaxPooling1D
from keras.layers import Dropout, Flatten
from keras.layers import Activation, BatchNormalization
from keras.layers import TimeDistributed
from keras.layers import Bidirectional
from keras.layers import RepeatVector
from keras.callbacks import ModelCheckpoint

units= CONFIG['lstm_hidden_size']
dropout = .6

# design network
model = Sequential()
model.add(Bidirectional(LSTM(units), input_shape=(train_x.shape[1], train_x.shape[2])))
model.add(Dropout(dropout))

model.add(RepeatVector(n_out))
model.add(LSTM(int(units), return_sequences=True))
model.add(Dropout(dropout))
model.add(Dense(units=CONFIG['output_size']))
model.add(Activation('linear'))

# We're only concerned with the estimating the close value,
# otherwise use n_outputs instead of 1
# Dense(n_outputs, ...
model.add(TimeDistributed(Dense(1, activation='relu')))

model.compile(loss='mae', optimizer='adam')

# store model
# serialize model to JSON
model_json = model.to_json()
with open(model_name, "w") as json_file:
    json_file.write(model_json)

model.summary()

Using TensorFlow backend.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_1 (Bidirection (None, 600)               732000    
_________________________________________________________________
dropout_1 (Dropout)          (None, 600)               0         
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 10, 600)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 10, 300)           1081200   
_________________________________________________________________
dropout_2 (Dropout)          (None, 10, 300)           0         
_________________________________________________________________
dense_1 (Dense)              (None, 10, 10)            3010      
_________________________________________________________________
activation_1 (Activation)    (None, 10, 10)            0         
__________

In [12]:
epochs=50
batch_size=1000

In [None]:
# fit network
history = model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size,
                    validation_data=(test_x, test_y), verbose=1, shuffle=True,
                    callbacks=[ModelCheckpoint(model_weights_name, monitor='val_loss', verbose=1,save_best_only='true',
                                              save_weights_only=True)])

Train on 23562 samples, validate on 5891 samples
Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.04447, saving model to weights/BTC_ETH_lstm_i120_o10_Close_Volume_Low_High_model_weights.h5
Epoch 2/50

Epoch 00002: val_loss improved from 0.04447 to 0.02334, saving model to weights/BTC_ETH_lstm_i120_o10_Close_Volume_Low_High_model_weights.h5
Epoch 3/50

Epoch 00003: val_loss improved from 0.02334 to 0.01241, saving model to weights/BTC_ETH_lstm_i120_o10_Close_Volume_Low_High_model_weights.h5
Epoch 4/50

Epoch 00004: val_loss improved from 0.01241 to 0.00995, saving model to weights/BTC_ETH_lstm_i120_o10_Close_Volume_Low_High_model_weights.h5
Epoch 5/50

Epoch 00005: val_loss did not improve from 0.00995
Epoch 6/50

Epoch 00006: val_loss did not improve from 0.00995
Epoch 7/50

Epoch 00007: val_loss did not improve from 0.00995
Epoch 8/50

Epoch 00008: val_loss did not improve from 0.00995
Epoch 9/50

Epoch 00009: val_loss improved from 0.00995 to 0.00835, saving model to weights

In [None]:
# Load the best weights
model.load_weights(model_weights_name)
model.compile(loss='mae', optimizer='adam')
model.evaluate(test_x, test_y)

In [None]:
from matplotlib import pyplot

In [None]:
# plot history
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()

In [None]:
# Test the prediction of test data
y = model.predict(test_x)

In [None]:
a = test_y[:,0]
b = y[:,0]
c = np.append(b, y[-1], axis=0)

# Show how the model fits the test data
pyplot.plot(a[:100], label='original')
pyplot.plot(b[:100], label='model')
pyplot.legend()
pyplot.show()

# Show how the model predicts data
pos = int(a.shape[0]-n_out*4)
pyplot.plot(a[pos:], label='original')
pyplot.plot(c[pos:], label='model')
pyplot.legend()
pyplot.show()


In [None]:
#Prediction on public data!
period = CONFIG['period']
import time
from urllib.request import urlopen
import json

# Download a live bitcoin price data set
def dl_X(now = None, points = n_lag, period = period, pair=CONFIG['pair']):
    if now == None:
        now = time.time() 
    end = now - now % period
    #print end, time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(end))
    start = end - points*period
    #print start, time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(start))
    url = "https://poloniex.com/public?command=returnChartData&currencyPair=%s&start=%d&end=%d&period=%d" % (pair, start, end, period)
    openUrl = urlopen(url)
    r = openUrl.read()
    openUrl.close()
    d = json.loads(r.decode())[-n_lag:]
    df = pd.DataFrame(d)
    original_columns=[u'close', u'date', u'high', u'low', u'open',u'volume']
    new_columns = ['Close','Timestamp','High','Low','Open','Volume']
    df = df.loc[:,original_columns]
    df.columns = new_columns
    
    return df

In [None]:
def predict(when=None):
    rt_df = dl_X(when)
    rt_values = rt_df.loc[:,columns].values
    rt_scaled = scaler.transform(rt_values)
    rt_x = rt_scaled.reshape((1, n_lag, n_features))
    print (rt_x.shape)
    return rt_scaled, model.predict(rt_x)

In [None]:
# do some now & past predictions
for t in [0, 100, 200, 300, 500, 1000, 2000]:
    rt_x, prediction = predict(time.time()-t*period)

    current = rt_x[:,0]
    prediction = prediction[0]

    pyplot.plot(current, label='current')

    # shift train predictions for plotting
    predictPlot = np.empty_like(current)
    predictPlot[:] = np.nan
    predictPlot = np.append(predictPlot, prediction)

    pyplot.plot(predictPlot, label='prediction')
    pyplot.legend()
    pyplot.show()

In [None]:
#Prediction on live data!
starttime=time.time()
while True:
    now = time.time() 
    end = now - now % period
    print (time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(end)))
    rt_x, prediction = predict()

    current = rt_x[:,0]
    prediction = prediction[0]

    pyplot.plot(current, label='current')

    # shift train predictions for plotting
    predictPlot = np.empty_like(current)
    predictPlot[:] = np.nan
    predictPlot = np.append(predictPlot, prediction)

    pyplot.plot(predictPlot, label='prediction')
    pyplot.legend()
    pyplot.show()

    time.sleep(period - ((time.time() - starttime) % period))