In [None]:
import numpy as np
import pandas as pd
#I will use plotly to graph any results etc.
import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode(connected=True)
#preprocessing packages
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
#Keras features that I will use
from keras.models import Sequential
from keras.layers import Conv1D, Dense, LSTM, GRU
from keras.callbacks import EarlyStopping
#MSE is used to compare performance of models
from sklearn.metrics import mean_squared_error

In [None]:
#loading the dataset into the notebook
dfp = '../input/bitcoin_hourly.csv'
df = pd.read_csv(dfp)

In [None]:
#we choose the start and end dates from the dataset that we want to consider
d0 = datetime(2016, 12, 1)
d1 = datetime(2018, 6, 27)

In [None]:
#we resample the data by taking the average daily price
daily = df
daily.index = pd.to_datetime(daily['Timestamp'])
daily = daily.resample('D').mean()
daily = daily[d0:d1]
daily_df = pd.DataFrame({'Timestamps':daily.index, 'Price':daily['Close']})
daily_df = daily_df.set_index(np.arange(daily_df.shape[0]))

In [None]:
#let's take a look at the price data we're considering
trace = go.Scatter(
    x = daily_df.Timestamps,
    y = daily_df.Price,
    mode = 'lines',
    name = 'Price')

data = [trace]
layout = dict(title = 'BTC Daily Price', xaxis = dict(title = 'Time'), yaxis = dict(title = 'Price (USD)'))
fig = dict(data=data, layout=layout)

py.iplot(fig, filename='BTC_Daily_price')

In [None]:
#Here we define our Preprocessor class. To prepare the data for a CNN, the data needs to be stacked nicely in arrays;
#in our case 1D arrays. For the purpose of the data fitting in appropriate arrays, we trim the data, removing a small
#number of initial values. The models will learn by having inputs of the size of N consecutive days, and attempting
#to predict the next K. The value alpha determines the approximate proportion of the data outputs that will form
#the training set. The sliding window function transforms the data into 2D arrays with rows being the required inputs and outputs.

class Preprocessor:
    
    def __init__(self, N, K, alpha):
        
        self.K = K
        self.N = N
        self.alpha = alpha
        
    def trim_data(self, df):
    
        if self.K == 1:
            df = df
        
        else:
            l = df.shape[0]
            r = (l - (2 * self.N)) % self.K 
            df = df.tail(-r)
    
        return df
    
    def test_train_split(self, df):
    
        l = df.shape[0]
        train_l = self.N + self.K * int((self.alpha * (l - 2 * self.N)) / self.K)
        train_df = df.head(train_l)
        test_df = df.tail(-train_l)
    
        return train_df, test_df
    
    def create_sliding_window(self, A):
        
        assert (A.shape[0] - self.N) % self.K == 0
        
        m = (A.shape[0] - self.N) 
        I = np.arange(self.N + self.K) + np.arange(0, m, self.K).reshape(-1,1)
        B = A[I].reshape(-1, self.N + self.K, A.shape[2])
        X = B[:, :self.N]
        y = B[:, self.N:]
    
        return X, y

In [None]:
#We make an instance of our Preprocessor class choosing N to be 16 and K to be 10. Roughly 80% of our data is used to train on.

N, K = 16, 10
alpha = 0.8

preprocessor = Preprocessor(N, K, alpha)

In [None]:
daily_df = preprocessor.trim_data(daily_df)

time_stamps = daily_df['Timestamps']
daily_price = daily_df.loc[:, ['Price']]

In [None]:
train_data, test_data = preprocessor.test_train_split(daily_price)
original_train_data = train_data
original_test_data = test_data

In [None]:
#we need our data in the form of NumPy arrays to be used in the Keras model
#we keep track of the original data
train_data = np.array(train_data)[:,None,:]
test_data = np.array(test_data)[:,None,:]
original_train_data_array = np.array(original_train_data)[:,None,:]
original_test_data_array = np.array(original_test_data)[:,None,:]

In [None]:
#it is important to scale the data when using a deep learning model
#the scaler is derived only from the training data to avoid any minor data leakage
scaler = MinMaxScaler()

train_data[:,0] = scaler.fit_transform(train_data[:,0].reshape(-1,1))
test_data[:,0] = scaler.transform(test_data[:,0].reshape(-1,1))

In [None]:
#the X values are sequential rows of price data and the y values are corresponding rows of the following K days of prices
X_train, y_train = preprocessor.create_sliding_window(train_data)
X_test, y_test = preprocessor.create_sliding_window(test_data)

In [None]:
#we make a dataframe of the true prices to compare against our predictions later
pred_times = time_stamps.tail(test_data.shape[0] - N)
true_prices = daily_price['Price'].tail(test_data.shape[0] - N)
true_prices_df = pd.DataFrame()
true_prices_df['Timestamps'] = pred_times
true_prices_df['Price'] = true_prices

In [None]:
#We design a Tester class which makes it easy to create an instance of a CNN, LSTM or GRU model with different hyperparameters.
#Due to the nature of a CNN being very rigid with respect to the input and output size and the method in which it reduces the
#input size, we must explicitly define the structure of the CNN with respect to the values of N and K.

class Tester:
    
    def __init__(self, epochs, batch_size, activation, loss, optimer, patience):
        
        self.epochs = epochs
        self.batch_size = batch_size
        self.activation = activation
        self.loss = loss
        self.optimer = optimer
        self.patience = patience

    def apply_CNN_model(self, X_train, y_train, X_test, y_test):
    
        step_size = X_train.shape[1]
        no_of_features = X_train.shape[2]
        batch_size = self.batch_size
        epochs = self.epochs
    
        CNN_model = Sequential()

        CNN_model.add(Conv1D(input_shape=(step_size,no_of_features), activation='relu', strides=1, filters=8, kernel_size=3))
        CNN_model.add(Conv1D(activation='relu', strides=1, filters=8, kernel_size=3))
        CNN_model.add(Conv1D(activation='relu', strides=1, filters=4, kernel_size=2))
        CNN_model.add(Conv1D(activation=self.activation, strides=1, filters=no_of_features, kernel_size=2))
        
        CNN_model.compile(loss=self.loss, optimizer=self.optimer)
        
        CNN_history = CNN_model.fit(X_train, y_train,
                            verbose=1,
                            batch_size=self.batch_size,
                            validation_data=(X_test, y_test),
                            epochs = self.epochs,
                            callbacks = [EarlyStopping(monitor='loss', patience=self.patience, verbose=1)])
        
        return CNN_history, CNN_model
    
    def apply_LSTM_model(self, X_train, y_train, X_test, y_test, units):
    
        step_size = X_train.shape[1]
        units = units
        batch_size = self.batch_size
        no_of_features = X_train.shape[2]
        epochs = self.epochs
        output_size = K

        LSTM_model = Sequential()
        LSTM_model.add(LSTM(units=units, input_shape=(step_size, no_of_features), return_sequences=False))
        LSTM_model.add(Dense(activation=self.activation, units=output_size))

        LSTM_model.compile(loss=self.loss, optimizer=self.optimer)

        LSTM_history = LSTM_model.fit(X_train, y_train,
                              batch_size=self.batch_size,
                              validation_data=(X_test, y_test), 
                              epochs = epochs,
                              callbacks = [EarlyStopping(monitor='loss',patience=self.patience, verbose=1)])
        
        return LSTM_history, LSTM_model
    
    def apply_GRU_model(self, X_train, y_train, X_test, y_test, units):
    
        step_size = X_train.shape[1]
        units = units
        batch_size = self.batch_size
        no_of_features = X_train.shape[2]
        epochs = self.epochs
        output_size = K

        GRU_model = Sequential()
        GRU_model.add(GRU(units=units, input_shape=(step_size,no_of_features), return_sequences=False))
        GRU_model.add(Dense(output_size, activation=self.activation))

        GRU_model.compile(loss=loss, optimizer=self.optimer)

        GRU_history = GRU_model.fit(X_train, y_train,
                            batch_size=self.batch_size,
                            validation_data=(X_test, y_test), 
                            epochs = self.epochs,
                            callbacks = [EarlyStopping(monitor='loss', patience=self.patience, verbose=1)])
        
        return GRU_history, GRU_model

In [None]:
#The Evaluator class has methods which allow visualisations of loss and predicitons. There are two important methods in the class;
#forward_predict and invert_preds. The former is important as I would like to consider two methods for forecasting. The first seeks
#to view the model as learning to be able to predict K days in the future and would work by, firstly learning weights from the
#training data, then predicting the next K days, receving N more days of information, then predicting the next K days 
#(without updating its weights). The second method uses the forward_predict function and feeds the predictions that
#a given model makes back into the model to make further predictions. In this way, the model can predict as many days
#into the future as desired. The invert_preds function uses the inverse transformation from the earlier scaler so we
#can view the predictions on the same scale as the original data.

class Evaluator:
        
    def plot_loss(self, history, model_name):
        
        trace1 = go.Scatter(
        x = np.arange(0, len(history.history['loss'])),
        y = history.history['loss'],
        mode = 'lines',
        name = 'Loss')

        trace2 = go.Scatter(
        x = np.arange(0, len(history.history['val_loss'])),
        y = history.history['val_loss'],
        mode = 'lines',
        name = 'Validation Loss')

        data = [trace1, trace2]
        layout = dict(title = model_name + ' Accuracy', xaxis = dict(title = 'Epochs'), yaxis = dict(title = 'Loss'))
        fig = dict(data=data, layout=layout)
    
        return fig
    
    def forward_predict(self, X_test, model, K):
        
        predictions = X_test[None,0,:]

        for i in range(X_test.shape[0]):
            predictions = np.append(predictions,
                                    model.predict(predictions[None, 0, i*K : X_test.shape[1] + i*K]))[None,:,None]

        predictions = predictions[None,0,X_test.shape[1]:]
        
        return predictions
    
    def invert_preds(self, predictions, pred_times, scaler, original_data_array):
        
        predictions_inverted = []
        scaler.fit(original_data_array[:,0].reshape(-1,1))

        predictions_inverted.append(scaler.inverse_transform(predictions))
        predictions_inverted =  np.array(predictions_inverted).reshape(-1)

        predictions_df = pd.DataFrame()
        predictions_df['Timestamps'] = pred_times
        predictions_df['Price'] = predictions_inverted
        
        return predictions_df
    
    def plot_preds(self, preds, true, times, model_name):
        
        trace1 = go.Scatter(
        x = times,
        y = true,
        mode = 'lines',
        name = 'Actual')

        trace2 = go.Scatter(
        x = times,
        y = preds,
        mode = 'lines',
        name = 'Predicted')

        data = [trace1, trace2]
        layout = dict(title = model_name + ' Predictions', xaxis = dict(title = 'Time'), yaxis = dict(title = 'Price (USD)'))
        fig = dict(data=data, layout=layout)
    
        return fig
    
    def get_mse(self, predictions, true_values, model_name):
        
        mse = mean_squared_error(predictions, true_values)
        print(model_name + ': ' + str(mse))
        
        return mse

In [None]:
#here we make choices on a few hyperparameters
epochs = 100
batch_size = 1
loss = 'mse'
optimer = 'adam'
patience = 30

evaluator = Evaluator()

In [None]:
#for the CNN model I found using a tanh activation in the penultimate layer was necessary to give improved results
CNN_tester = Tester(epochs, batch_size, 'tanh', loss, optimer, patience)
CNN_history, CNN_model = CNN_tester.apply_CNN_model(X_train, y_train, X_test, y_test)

In [None]:
#here we plot the validation loss against the loss on the training set
#ideally we would want both values to go down over time with validation loss being slightly higher than loss
fig = evaluator.plot_loss(CNN_history, 'CNN Model')
py.iplot(fig, filename='CNN_Model_Accuracy')

In [None]:
#First we generate our rolling predictions where new data is given to the model to make further predictions on.



In [None]:
CNN_rolling_predictions = CNN_model.predict(X_test)

In [None]:
CNN_rolling_predictions_df = evaluator.invert_preds(CNN_rolling_predictions[:,:,0], pred_times, scaler, original_train_data_array)

In [None]:
fig = evaluator.plot_preds(CNN_rolling_predictions_df.Price, true_prices_df.Price, pred_times, 'CNN Rolling Model')
py.iplot(fig, filename='CNN_Rolling_Model_Predictions')

In [None]:
#Next we generate our predictions generated by feeding predictions back into the model.

CNN_predictions = evaluator.forward_predict(X_test, CNN_model, K)

In [None]:
CNN_predictions_df = evaluator.invert_preds(CNN_predictions[:,:,0], pred_times, scaler, original_train_data_array)

In [None]:
fig = evaluator.plot_preds(CNN_predictions_df.Price, true_prices_df.Price, pred_times, 'CNN Model')
py.iplot(fig, filename='CNN_Model_Predictions')

In [None]:
#We print the MSE for both methods.

CNN_MSE = evaluator.get_mse(CNN_predictions_df['Price'], true_prices_df['Price'], 'CNN Model')

In [None]:
CNN_rolling_MSE = evaluator.get_mse(CNN_rolling_predictions_df['Price'], true_prices, 'CNN Rolling Model')

In [None]:
We now do the same for the RNN models. I have chose 128 units in both models so that the run time is not too high.

units = 128
RNN_tester = Tester(epochs, batch_size, 'relu', loss, optimer, patience)
LSTM_history, LSTM_model = RNN_tester.apply_LSTM_model(X_train, y_train[:,:,0], X_test, y_test[:,:,0], units)

In [None]:
fig = evaluator.plot_loss(LSTM_history, 'LSTM Model')
py.iplot(fig, filename='LSTM_Model_Accuracy')

In [None]:
LSTM_rolling_predictions = LSTM_model.predict(X_test)

In [None]:
LSTM_rolling_predictions_df = evaluator.invert_preds(LSTM_rolling_predictions, pred_times, scaler, original_train_data_array)

In [None]:
fig = evaluator.plot_preds(LSTM_rolling_predictions_df.Price, true_prices_df.Price, pred_times, 'LSTM Rolling Model')
py.iplot(fig, filename='LSTM_Rolling_Model_Predictions')

In [None]:
LSTM_predictions = evaluator.forward_predict(X_test, LSTM_model, K)

In [None]:
LSTM_predictions_df = evaluator.invert_preds(LSTM_predictions[:,:,0], pred_times, scaler, original_train_data_array)

In [None]:
fig = evaluator.plot_preds(LSTM_predictions_df.Price, true_prices_df.Price, pred_times, 'LSTM Model')
py.iplot(fig, filename='LSTM_Model_Predictions')

In [None]:
LSTM_MSE = evaluator.get_mse(LSTM_predictions_df['Price'], true_prices_df['Price'], 'LSTM Model')

In [None]:
LSTM_rolling_MSE = evaluator.get_mse(LSTM_rolling_predictions_df['Price'], true_prices, 'LSTM Rolling Model')

In [None]:
units = 128
GRU_history, GRU_model = RNN_tester.apply_GRU_model(X_train, y_train[:,:,0], X_test, y_test[:,:,0], units)

In [None]:
fig = evaluator.plot_loss(GRU_history, 'GRU Model')
py.iplot(fig, filename='GRU_Model_Accuracy')

In [None]:
GRU_rolling_predictions = GRU_model.predict(X_test)

In [None]:
GRU_rolling_predictions_df = evaluator.invert_preds(GRU_rolling_predictions, pred_times, scaler, original_train_data_array)

In [None]:
fig = evaluator.plot_preds(GRU_rolling_predictions_df.Price, true_prices_df.Price, pred_times, 'GRU Rolling Model')
py.iplot(fig, filename='GRU_Rolling_Model_Predictions')

In [None]:
GRU_predictions = evaluator.forward_predict(X_test, GRU_model, K)

In [None]:
GRU_predictions_df = evaluator.invert_preds(GRU_predictions[:,:,0], pred_times, scaler, original_train_data_array)

In [None]:
fig = evaluator.plot_preds(GRU_predictions_df.Price, true_prices_df.Price, pred_times, 'GRU Model')
py.iplot(fig, filename='GRU_Model_Predictions')

In [None]:
GRU_MSE = evaluator.get_mse(GRU_predictions_df['Price'], true_prices_df['Price'], 'GRU Model')

In [None]:
GRU_rolling_MSE = evaluator.get_mse(GRU_rolling_predictions_df['Price'], true_prices, 'GRU rolling Model')