In [52]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras_tuner.tuners import RandomSearch
from tensorflow import keras
from tensorflow.keras.utils import plot_model
from matplotlib import pyplot as plt
from tensorflow.keras.models import Sequential 
from tensorflow.keras import layers
from tensorflow.keras.optimizers import RMSprop
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error , mean_absolute_error , mean_absolute_percentage_error
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.layers import LSTM , Dense , Dropout , GRU , Concatenate , Input , Conv1D , InputLayer
from keras.layers.normalization import BatchNormalization
from tensorflow.keras import models
from tensorflow.keras.callbacks import EarlyStopping
import math

<Strong> Grabbing the data from stored files (originally pulled from Binance) </Strong>

In [53]:
#df_btc = pd.read_csv("../../cryptoData/BTC_1h_data.csv")
df_btc = pd.read_csv("BTC_1h_data.csv")

In [54]:
df_btc

Unnamed: 0,date,open,high,low,close,volume
0,2017-08-17 04:00:00.000,4261.48,4313.62,4261.32,4308.83,47.181009
1,2017-08-17 05:00:00.000,4308.83,4328.69,4291.37,4315.32,23.234916
2,2017-08-17 06:00:00.000,4330.29,4345.45,4309.37,4324.35,7.229691
3,2017-08-17 07:00:00.000,4316.62,4349.99,4287.41,4349.99,4.443249
4,2017-08-17 08:00:00.000,4333.32,4377.85,4333.32,4360.69,0.972807
...,...,...,...,...,...,...
39216,2022-02-11 10:00:00,43492.63,43569.67,43233.67,43443.77,1305.626920
39217,2022-02-11 11:00:00,43443.78,43523.42,43320.00,43357.53,744.524180
39218,2022-02-11 12:00:00,43357.53,43700.78,43253.31,43570.77,1317.546940
39219,2022-02-11 13:00:00,43570.77,43826.00,43503.01,43765.14,1564.251800


In [55]:
df_headers = df_btc.columns

<Strong> Grabbing the closing price (univariate) </Strong>

In [56]:
# Removing everything but the closing price
btc_data = df_btc.values[:, 4 ,].astype(float)

btc_data[-1]

43534.54

<Strong> Scaling the data  </Strong>

In [57]:
percTrain = 70
percVal = 20 

In [58]:
scaler = MinMaxScaler()
    
onePercent = len(btc_data) // 100
numberTraining = onePercent * percTrain

reshaped_data = btc_data.reshape(-1,1)

#Just scaling on training data otherwise it would be leakage
scaler.fit(reshaped_data[:numberTraining])
scaled_btc = scaler.transform(reshaped_data)

<Strong> Creating Matrix in Sliding window form <Strong>

In [59]:
def sliding_window(elements, window_size):
    
    data = [] 
    targets = []
    
    if len(elements) <= window_size:
        return elements
    
    for i in range(len(elements) - window_size ):
        
        data.append(elements[i:i+window_size])
        targets.append(elements[i+window_size])
        
    return np.array(data) , np.array(targets)

In [60]:
# Using 24 datapoints to predict the 25th

window_length = 24
features = 1

sliding_winda_btc = sliding_window(scaled_btc , window_length)

<Strong> Splitting the data after we create Sliding Window matrix (more data) </Strong>

In [61]:
# Splitting the data after creating the sliding window data
def splitting_train_test(data):
        
    onePercent = len(data[1]) // 100
    
    numberTraining = onePercent * percTrain
    numberValidation = onePercent * percVal
    
    trainingData = data[0][:numberTraining] , data[1][:numberTraining]
    validationData = data[0][numberTraining : numberTraining + numberValidation] , data[1][numberTraining : numberTraining + numberValidation]
    testData = data[0][numberTraining + numberValidation:] , data[1][numberTraining + numberValidation:] 
    
    #Returning tuples of (sliding-window , target_values)
    return trainingData , validationData , testData

In [62]:
#Reshaping the data so we can use min-max a
btc_train , btc_val , btc_test = splitting_train_test(sliding_winda_btc)

print(btc_train[0].shape)

(27370, 24, 1)


<Strong> Creating a function to create the model. </Strong>

<p> Keras Tuner for hyper-param tuning </p>

In [63]:
# Hyperparameters 

# Dense layer neurons
# Learning rate 
# Dropout 
# Batch Size

In [64]:
def build(hp):

    inputs = Input(shape = (window_length , features) )
    
    hp_dense_units_1 = hp.Choice('dense units_1', values = [64 , 128 , 256])
    hp_dense_units_2 = hp.Choice('dense units_2', values = [64 , 128 , 256])
    hp_dense_dropout_1 = hp.Choice('dropout_1', values = [0.01 , 0.03 , 0.05 , 0.08 , 0.1] )
    hp_dense_dropout_2 = hp.Choice('dropout_2', values = [0.01 , 0.03 , 0.05 , 0.08 , 0.1] )
    hp_dense_learning_rate = hp.Choice('learning_rate', values = [0.002 , 0.004 , 0.006 , 0.008 , 0.01] )

    x = LSTM(30 , return_sequences = True )(inputs)
    x = Dropout(rate = hp_dense_dropout_1)(x) 
    x = LSTM(50)(x)
    x = Dense(units = hp_dense_units_1 ,activation=layers.LeakyReLU(alpha=0.01))(x)
    x = BatchNormalization()

    y = GRU(30,input_shape=(window_length , features))(inputs)
    y = Dropout(rate = hp_dense_dropout_2)(y)
    y = Dense(units = hp_dense_units_2 , activation=layers.LeakyReLU(alpha=0.01))(y)
    y = BatchNormalization()

    final = Concatenate()([x,y])
    final = Dense(1)(final)

    model = tf.keras.Model(inputs=inputs, outputs= final)

    opt = keras.optimizers.Adam(learning_rate=hp_dense_learning_rate)
    model.compile(optimizer=opt , loss = 'mse')

    return model 

In [65]:
tuner = RandomSearch (
    build,
    objective = "val_loss",
    max_trials=50,
    executions_per_trial=1,
    directory = 'my_dir2',
    project_name='baseline2'
)

In [66]:
tuner.search_space_summary()

Search space summary
Default search space size: 5
dense units_1 (Choice)
{'default': 64, 'conditions': [], 'values': [64, 128, 256], 'ordered': True}
dense units_2 (Choice)
{'default': 64, 'conditions': [], 'values': [64, 128, 256], 'ordered': True}
dropout_1 (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.03, 0.05, 0.08, 0.1], 'ordered': True}
dropout_2 (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.03, 0.05, 0.08, 0.1], 'ordered': True}
learning_rate (Choice)
{'default': 0.002, 'conditions': [], 'values': [0.002, 0.004, 0.006, 0.008, 0.01], 'ordered': True}


In [67]:
tuner.search(btc_train[0] , btc_train[1] , epochs=300 , validation_data=btc_val , batch_size = 512 ,  callbacks=[tf.keras.callbacks.EarlyStopping('val_loss', patience=30)])


Trial 50 Complete [00h 01m 06s]
val_loss: 0.0010300858411937952

Best val_loss So Far: 0.0006858810083940625
Total elapsed time: 00h 30m 41s
INFO:tensorflow:Oracle triggered exit


In [68]:
tuner.get_best_hyperparameters()[0].values

{'dense units_1': 128,
 'dense units_2': 128,
 'dropout_1': 0.03,
 'dropout_2': 0.03,
 'learning_rate': 0.008}

In [34]:
tuner.results_summary()

Results summary
Results in my_dir/baseline
Showing 10 best trials
Objective(name='val_loss', direction='min')
Trial summary
Hyperparameters:
dense units_1: 128
dense units_2: 64
dropout_1: 0.03
dropout_2: 0.01
learning_rate: 0.004
Score: 0.0008121904393192381
Trial summary
Hyperparameters:
dense units_1: 128
dense units_2: 128
dropout_1: 0.05
dropout_2: 0.03
learning_rate: 0.008
Score: 0.0009859047131612897
Trial summary
Hyperparameters:
dense units_1: 64
dense units_2: 64
dropout_1: 0.03
dropout_2: 0.08
learning_rate: 0.008
Score: 0.0011869834270328283
Trial summary
Hyperparameters:
dense units_1: 64
dense units_2: 64
dropout_1: 0.05
dropout_2: 0.08
learning_rate: 0.002
Score: 0.0014578334521502256
Trial summary
Hyperparameters:
dense units_1: 128
dense units_2: 128
dropout_1: 0.1
dropout_2: 0.05
learning_rate: 0.008
Score: 0.0016157910285983235
Trial summary
Hyperparameters:
dense units_1: 64
dense units_2: 128
dropout_1: 0.1
dropout_2: 0.03
learning_rate: 0.01
Score: 0.0017103207646