In [9]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import time
import numpy
import tensorflow as tf
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from sklearn.metrics import fbeta_score, make_scorer
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM


def read_in_data():
    """
    Reads in our collected BTC csv into a pandas data frame and removes all other columns other than the close price
    
    :param - 
    :returns - Pandas data frame containing the close price indexed by time
    """
    
    data_frame = pd.DataFrame()
    file = "tweets_with_BTC_prices.csv"
    data_frame = pd.read_csv(file, names=["Date", "time", "open", "high", "low", "close", "volume", "adj_close", "tweets"])
    data_frame.set_index("Date", inplace = True)
    data_frame = data_frame[["close"]]     
    return data_frame

In [10]:
def normalize_data(scaler, close_list):
    """
    This section takes in our value lists and scales them using a minmaxscaler within the range 0,1 and reshapes our data.
    We then seperate our data into training and testing sets to a 80:20 split to be used to train and evaluate our model
    
    :param - scaler, close_list
    :returns - train, test
    """
    
    close_list = scaler.fit_transform(np.array(close_list).reshape(-1,1))
    
    train_split = int(len(close_list)*0.80)    
    train = close_list[0:train_split]
    test = close_list[train_split:len(close_list)]
    
    return train, test

In [11]:
def create_train_test_set(data, lookahead):
    """
    This method takes in either our train or test set and creates a time lag of the predefined lookahead value
    It then returns the X and Y list where Y contains the time lagged data

    :param - data, lookahead
    :returns - X_train, X_test, y_train, y_test
    """
    
    X = list()
    Y = list()
        
    length = len(data)    
    for i in range(length-lookahead-1):
        X.append(data[i:(i+lookahead)])
        Y.append(data[i + lookahead])
    return numpy.array(X), numpy.array(Y)
    

In [12]:
def create_lstm_model():
    """
    This method defines the general structure for the regressor that is passed into grid search
    
    :param - 
    :returns - model
    """
    model=Sequential()
    model.add(LSTM(50,return_sequences=True,input_shape=(7,1)))
    model.add(LSTM(50,return_sequences=True))
    model.add(LSTM(50))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error',optimizer='adam')
    
    return model

def run_grid_search(X_train, y_train):
    """
    This method performs grid search with given parameters on our generic regressor structure
    
    :param - X_train, y_train
    :returns - model
    """    
    grid_parameters = {
        'batch_size' : [16],
        'epochs' : [8],
    }
    
    lsmt_model = KerasRegressor(build_fn=create_lstm_model)

    grid_results = GridSearchCV(lsmt_model, 
                            param_grid = grid_parameters,
                            scoring = 'neg_mean_squared_error',
                            cv = 2)
    
    grid_results.fit(X_train,y_train)
    return grid_results

In [13]:
def run_optimal_lstm_model(X_train, y_train, X_test, y_test, epochs, batchsize, optimizer):
    """
    This method builds and fits our LSTM model to our training data while evaluating score on the test data
    The method then returns our model once it has completed training
    
    :param - X_train, y_train, X_test, y_test, epochs, batchsize, optimizer
    :returns - model
    """
    
    model=Sequential()
    model.add(LSTM(50,return_sequences=True,input_shape=(7,1)))
    model.add(LSTM(50,return_sequences=True))
    model.add(LSTM(50))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error',optimizer=optimizer)
    
    checkpoint_path = "C:/Users/niall/Desktop/Saved_model/cp.ckpt"
    checkpoint_dir = os.path.dirname(checkpoint_path)
    cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, save_weights_only=True,verbose=1)
    
    
    model.fit(
        X_train, X_train, epochs=epochs, validation_data=(X_test, y_test), batch_size=batchsize, verbose=2, callbacks = [cp_callback]
    )
    
    return model

In [14]:
def forecast_model(df, model, scaler,close_list, X_train, y_train):
    """
    In this method we forecast using our model and the test data set
    We then plot the inverse scaled values onto a graph with the real BTC price
    
    :param - X_train, y_train, X_test, y_test
    :returns - model
    """
    
    train_predict=model.predict(X_train)
    test_predict=model.predict(X_test)
    train_predict=scaler.inverse_transform(train_predict)
    test_predict=scaler.inverse_transform(test_predict)
    
    forecast_movement = np.concatenate((train_predict,test_predict))
    real_movement = df[["close"]][:288].values

    plt.figure(figsize=(16,8))
    plt.plot(real_movement, color = 'green', label = 'Real Prices')
    plt.plot(forecast_movement,  color = 'purple', label = 'Predicted Prices')
    plt.xlabel('Trades')
    plt.ylabel('Price in USD')
    plt.legend()
    plt.show()


In [15]:
#Defining our scaler and lookahead values
scaler = MinMaxScaler(feature_range=(0,1))
lookahead = 7
df = read_in_data()
df = df.iloc[1: , :]
df['close'] = df['close'].astype(float)

close_list = df.reset_index()['close']


In [None]:
train, test = normalize_data(scaler, close_list)
X_train, y_train = create_train_test_set(train, lookahead)
X_test, y_test = create_train_test_set(test, lookahead)

X_train= X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)

grid_results = run_grid_search(X_train, y_train)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8


In [None]:
print("optimal parameters are:", grid_results.best_params_)
epochs= grid_results.best_params_['epochs']
batchsize= grid_results.best_params_['batch_size']
optimizer= 'adam'



best_model = run_optimal_lstm_model(X_train, y_train, X_test, y_test, epochs, batchsize, optimizer)

In [None]:
forecast_model(df, best_model, scaler, close_list, X_train, y_train)