In [1]:
import numpy as np
import pandas as pd
import plotly
plotly.tools.set_credentials_file(username='david.hubbard', api_key='jJxQ6DXpVCat3hEhRoPg')
import plotly.plotly as py
import plotly.graph_objs as go
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Conv1D, Dense, LSTM, GRU
from keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error


Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.

Using TensorFlow backend.


In [2]:
dfp = r'C:\Users\david\Documents\BitCoin Time Series Data Analysis\bitcoin_hourly.csv'
df = pd.read_csv(dfp)

In [3]:
d0 = datetime(2016, 12, 1)
d1 = datetime(2018, 6, 27)

In [4]:
daily = df
daily.index = pd.to_datetime(daily['Timestamp'])
daily = daily.resample('D').mean()
daily = daily[d0:d1]
daily_df = pd.DataFrame({'Timestamps':daily.index, 'Price':daily['Close']})
daily_df = daily_df.set_index(np.arange(daily_df.shape[0]))

In [5]:
trace = go.Scatter(
    x = daily_df.Timestamps,
    y = daily_df.Price,
    mode = 'lines',
    name = 'Price')

data = [trace]
layout = dict(title = 'BTC Daily Price', xaxis = dict(title = 'Time'), yaxis = dict(title = 'Price (USD)'))
fig = dict(data=data, layout=layout)

py.iplot(fig, filename='BTC_Daily_price')

In [6]:
class Preprocesser:
    
    def __init__(self, N, K, alpha):
        
        self.K = K
        self.N = N
        self.alpha = alpha
        
    def trim_data(self, df):
    
        if self.K == 1:
            df = df
        
        else:
            l = df.shape[0]
            r = (l - (2 * self.N)) % self.K 
            df = df.tail(-r)
    
        return df
    
    def test_train_split(self, df):
    
        l = df.shape[0]
        train_l = self.N + self.K * int((self.alpha * (l - 2 * self.N)) / self.K)
        train_df = df.head(train_l)
        test_df = df.tail(-train_l)
    
        return train_df, test_df
    
    def create_sliding_window(self, A):
        
        assert (A.shape[0] - self.N) % self.K == 0
        
        m = (A.shape[0] - self.N) 
        I = np.arange(self.N + self.K) + np.arange(0, m, self.K).reshape(-1,1)
        B = A[I].reshape(-1, self.N + self.K, A.shape[2])
        X = B[:, :self.N]
        y = B[:, self.N:]
    
        return X, y

In [7]:
N, K = 16, 10
alpha = 0.8
preprocessor = Preprocesser(N, K, alpha)

In [8]:
daily_df = preprocessor.trim_data(daily_df)

time_stamps = daily_df['Timestamps']
daily_price = daily_df.loc[:, ['Price']]

In [9]:
train_data, test_data = preprocessor.test_train_split(daily_price)
original_train_data = train_data
original_test_data = test_data

In [10]:
train_data = np.array(train_data)[:,None,:]
test_data = np.array(test_data)[:,None,:]
original_train_data_array = np.array(original_train_data)[:,None,:]
original_test_data_array = np.array(original_test_data)[:,None,:]

In [11]:
scaler = MinMaxScaler()

train_data[:,0] = scaler.fit_transform(train_data[:,0].reshape(-1,1))
test_data[:,0] = scaler.transform(test_data[:,0].reshape(-1,1))

In [12]:
X_train, y_train = preprocessor.create_sliding_window(train_data)
X_test, y_test = preprocessor.create_sliding_window(test_data)

In [13]:
pred_times = time_stamps.tail(test_data.shape[0] - N)
true_prices = daily_price['Price'].tail(test_data.shape[0] - N)
true_prices_df = pd.DataFrame()
true_prices_df['Timestamps'] = pred_times
true_prices_df['Price'] = true_prices

In [14]:
class Tester:
    
    def __init__(self, epochs, batch_size, activation, loss, optimer, patience):
        
        self.epochs = epochs
        self.batch_size = batch_size
        self.activation = activation
        self.loss = loss
        self.optimer = optimer
        self.patience = patience

    def apply_CNN_model(self, X_train, y_train, X_test, y_test):
    
        step_size = X_train.shape[1]
        no_of_features = X_train.shape[2]
        batch_size = self.batch_size
        epochs = self.epochs
    
        CNN_model = Sequential()

        CNN_model.add(Conv1D(input_shape=(step_size,no_of_features), activation='relu', strides=1, filters=8, kernel_size=3))
        CNN_model.add(Conv1D(activation='relu', strides=1, filters=8, kernel_size=3))
        CNN_model.add(Conv1D(activation='relu', strides=1, filters=4, kernel_size=2))
        CNN_model.add(Conv1D(activation=self.activation, strides=1, filters=no_of_features, kernel_size=2))
        
        CNN_model.compile(loss=self.loss, optimizer=self.optimer)
        
        CNN_history = CNN_model.fit(X_train, y_train,
                            verbose=1,
                            batch_size=self.batch_size,
                            validation_data=(X_test, y_test),
                            epochs = self.epochs,
                            callbacks = [EarlyStopping(monitor='val_loss', patience=self.patience, verbose=1)])
        
        return CNN_history, CNN_model
    
    def apply_LSTM_model(self, X_train, y_train, X_test, y_test, units):
    
        step_size = X_train.shape[1]
        units = units
        batch_size = self.batch_size
        no_of_features = X_train.shape[2]
        epochs = self.epochs
        output_size = K

        LSTM_model = Sequential()
        LSTM_model.add(LSTM(units=units, input_shape=(step_size, no_of_features), return_sequences=False))
        LSTM_model.add(Dense(activation=self.activation, units=output_size))

        LSTM_model.compile(loss=self.loss, optimizer=self.optimer)

        LSTM_history = LSTM_model.fit(X_train, y_train,
                              batch_size=self.batch_size,
                              validation_data=(X_test, y_test), 
                              epochs = epochs,
                              callbacks = [EarlyStopping(monitor='val_loss',patience=self.patience, verbose=1)])
        
        return LSTM_history, LSTM_model
    
    def apply_GRU_model(self, X_train, y_train, X_test, y_test, units):
    
        step_size = X_train.shape[1]
        units = units
        batch_size = self.batch_size
        no_of_features = X_train.shape[2]
        epochs = self.epochs
        output_size = K

        GRU_model = Sequential()
        GRU_model.add(GRU(units=units, input_shape=(step_size,no_of_features), return_sequences=False))
        GRU_model.add(Dropout(0.5))
        GRU_model.add(Dense(output_size, activation=self.activation))

        GRU_model.compile(loss=loss, optimizer=self.optimer)

        GRU_history = GRU_model.fit(X_train, y_train,
                            batch_size=self.batch_size,
                            validation_data=(X_test, y_test), 
                            epochs = self.epochs,
                            callbacks = [EarlyStopping(monitor='val_loss', patience=self.patience, verbose=1)])
        
        return GRU_history, GRU_model

In [15]:
class Evaluator:
        
    def plot_loss(self, history, model_name):
        
        trace1 = go.Scatter(
        x = np.arange(0, len(history.history['loss'])),
        y = history.history['loss'],
        mode = 'lines',
        name = 'Loss')

        trace2 = go.Scatter(
        x = np.arange(0, len(history.history['val_loss'])),
        y = history.history['val_loss'],
        mode = 'lines',
        name = 'Validation Loss')

        data = [trace1, trace2]
        layout = dict(title = model_name + ' Accuracy', xaxis = dict(title = 'Epochs'), yaxis = dict(title = 'Loss'))
        fig = dict(data=data, layout=layout)
    
        return fig
    
    def forward_predict(self, X_test, model, K):
        
        predictions = X_test[None,0,:]

        for i in range(X_test.shape[0]):
            predictions = np.append(predictions,
                                    model.predict(predictions[None, 0, i*K : X_test.shape[1] + i*K]))[None,:,None]

        predictions = predictions[None,0,X_test.shape[1]:]
        
        return predictions
    
    def invert_preds(self, predictions, pred_times, scaler, original_data_array):
        
        predictions_inverted = []
        scaler.fit(original_data_array[:,0].reshape(-1,1))

        predictions_inverted.append(scaler.inverse_transform(predictions))
        predictions_inverted =  np.array(predictions_inverted).reshape(-1)

        predictions_df = pd.DataFrame()
        predictions_df['Timestamps'] = pred_times
        predictions_df['Price'] = predictions_inverted
        
        return predictions_df
    
    def plot_preds(self, preds, true, times, model_name):
        
        trace1 = go.Scatter(
        x = times,
        y = true,
        mode = 'lines',
        name = 'Actual')

        trace2 = go.Scatter(
        x = times,
        y = preds,
        mode = 'lines',
        name = 'Predicted')

        data = [trace1, trace2]
        layout = dict(title = model_name + ' Predictions', xaxis = dict(title = 'Time'), yaxis = dict(title = 'Price (USD)'))
        fig = dict(data=data, layout=layout)
    
        return fig
    
    def get_mse(self, predictions, true_values, model_name):
        
        mse = mean_squared_error(predictions, true_values)
        print(model_name + ': ' + str(mse))
        
        return mse

In [16]:
epochs = 100
batch_size = 1
loss = 'mse'
optimer = 'adam'
patience = 30

evaluator = Evaluator()

In [17]:
CNN_tester = Tester(epochs, batch_size, 'tanh', loss, optimer, patience)
CNN_history, CNN_model = CNN_tester.apply_CNN_model(X_train, y_train, X_test, y_test)

Train on 43 samples, validate on 11 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 00032: early stopping


In [18]:
fig = evaluator.plot_loss(CNN_history, 'CNN Model')
py.iplot(fig, filename='CNN_Model_Accuracy')

In [19]:
CNN_rolling_predictions = CNN_model.predict(X_test)

In [20]:
CNN_rolling_predictions_df = evaluator.invert_preds(CNN_rolling_predictions[:,:,0], pred_times, scaler, original_train_data_array)

In [21]:
fig = evaluator.plot_preds(CNN_rolling_predictions_df.Price, true_prices_df.Price, pred_times, 'CNN Rolling Model')
py.iplot(fig, filename='CNN_Rolling_Model_Predictions')

In [22]:
CNN_predictions = evaluator.forward_predict(X_test, CNN_model, K)

In [23]:
CNN_predictions_df = evaluator.invert_preds(CNN_predictions[:,:,0], pred_times, scaler, original_train_data_array)

In [24]:
fig = evaluator.plot_preds(CNN_predictions_df.Price, true_prices_df.Price, pred_times, 'CNN Model')
py.iplot(fig, filename='CNN_Model_Predictions')

In [25]:
CNN_MSE = evaluator.get_mse(CNN_predictions_df['Price'], true_prices_df['Price'], 'CNN Model')

CNN Model: 19191045.582516957


In [26]:
CNN_rolling_MSE = evaluator.get_mse(CNN_rolling_predictions_df['Price'], true_prices, 'GRU Rolling Model')

GRU Rolling Model: 2543297.3613845273


In [27]:
units = 128
RNN_tester = Tester(epochs, batch_size, 'relu', loss, optimer, patience)
LSTM_history, LSTM_model = RNN_tester.apply_LSTM_model(X_train, y_train[:,:,0], X_test, y_test[:,:,0], units)

Train on 43 samples, validate on 11 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epo

In [28]:
fig = evaluator.plot_loss(LSTM_history, 'LSTM Model')
py.iplot(fig, filename='LSTM_Model_Accuracy')

In [29]:
LSTM_rolling_predictions = LSTM_model.predict(X_test)

In [30]:
LSTM_rolling_predictions_df = evaluator.invert_preds(LSTM_rolling_predictions, pred_times, scaler, original_train_data_array)

In [31]:
fig = evaluator.plot_preds(LSTM_rolling_predictions_df.Price, true_prices_df.Price, pred_times, 'LSTM Rolling Model')
py.iplot(fig, filename='LSTM_Rolling_Model_Predictions')

In [32]:
LSTM_predictions = evaluator.forward_predict(X_test, LSTM_model, K)

In [33]:
LSTM_predictions_df = evaluator.invert_preds(LSTM_predictions[:,:,0], pred_times, scaler, original_train_data_array)

In [34]:
fig = evaluator.plot_preds(LSTM_predictions_df.Price, true_prices_df.Price, pred_times, 'LSTM Model')
py.iplot(fig, filename='LSTM_Model_Predictions')

In [35]:
LSTM_MSE = evaluator.get_mse(LSTM_predictions_df['Price'], true_prices_df['Price'], 'LSTM Model')

LSTM Model: 17829731.333964113


In [36]:
LSTM_rolling_MSE = evaluator.get_mse(LSTM_rolling_predictions_df['Price'], true_prices, 'LSTM Rolling Model')

LSTM Rolling Model: 1953750.9093364673


In [39]:
units = 128
GRU_history, GRU_model = RNN_tester.apply_GRU_model(X_train, y_train[:,:,0], X_test, y_test[:,:,0], units)

Train on 43 samples, validate on 11 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epo

In [40]:
fig = evaluator.plot_loss(GRU_history, 'GRU Model')
py.iplot(fig, filename='GRU_Model_Accuracy')

In [41]:
GRU_rolling_predictions = GRU_model.predict(X_test)

In [42]:
GRU_rolling_predictions_df = evaluator.invert_preds(GRU_rolling_predictions, pred_times, scaler, original_train_data_array)

In [43]:
fig = evaluator.plot_preds(GRU_rolling_predictions_df.Price, true_prices_df.Price, pred_times, 'GRU Rolling Model')
py.iplot(fig, filename='GRU_Rolling_Model_Predictions')

In [44]:
GRU_predictions = evaluator.forward_predict(X_test, GRU_model, K)

In [45]:
GRU_predictions_df = evaluator.invert_preds(GRU_predictions[:,:,0], pred_times, scaler, original_train_data_array)

In [46]:
fig = evaluator.plot_preds(GRU_predictions_df.Price, true_prices_df.Price, pred_times, 'GRU Model')
py.iplot(fig, filename='GRU_Model_Predictions')

In [47]:
GRU_MSE = evaluator.get_mse(GRU_predictions_df['Price'], true_prices_df['Price'], 'GRU Model')

GRU Model: 34177960.31915288


In [48]:
GRU_rolling_MSE = evaluator.get_mse(GRU_rolling_predictions_df['Price'], true_prices, 'GRU rolling Model')

GRU rolling Model: 1810696.9539232485
