<Strong> Develop a TCN model that overfits , then do some regularization , then do some tuning. </Strong>

In [40]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from keras_tuner.tuners import RandomSearch
from IPython.display import display, Image
from tensorflow import keras
from tensorflow.keras.utils import plot_model
from matplotlib import pyplot as plt
from tensorflow.keras.models import Sequential 
from tensorflow.keras import layers
from tensorflow.keras.optimizers import RMSprop
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error , mean_absolute_error , mean_absolute_percentage_error
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.layers import LSTM , Dense , Dropout , GRU , Concatenate , Flatten , Input , Conv1D , InputLayer , MaxPooling1D
from tensorflow.keras import models
from tensorflow.keras.callbacks import EarlyStopping
import math

In [41]:
df_btc = pd.read_csv("BTC_1h_data.csv")

<Strong> Grabbing the closing price (univariate) </Strong>

In [42]:
btc_data = df_btc.values[:, 4 ,].astype(float)

<Strong> Scaling the data  </Strong>

In [43]:
percTrain = 70
percVal = 20 

In [44]:
scaler = MinMaxScaler()
    
onePercent = len(btc_data) // 100
numberTraining = onePercent * percTrain

reshaped_data = btc_data.reshape(-1,1)

#Just scaling on training data otherwise it would be leakage
scaler.fit(reshaped_data[:numberTraining])
scaled_btc = scaler.transform(reshaped_data)

<Strong> Hyper-parameters </Strong>

In [45]:
# Hyper Parameters that are dependant on the number of layers 

# Dilation_rate
# window_length 
# kernel_size

# The rest of the hyper-parameter

# learning_rate
# Dense layer size
# filters 
# batch_size

window_length = 72
dilation_rate = 2
kernel_size = 2
features = 1

<Strong> Creating Matrix in Sliding window form <Strong>

In [46]:
def sliding_window(elements, window_size):
    
    data = [] 
    targets = []
    
    if len(elements) <= window_size:
        return elements
    
    for i in range(len(elements) - window_size ):
        
        data.append(elements[i:i+window_size])
        targets.append(elements[i+window_size])
        
    return np.array(data) , np.array(targets)

sliding_winda_btc = sliding_window(scaled_btc , window_length)

<Strong> Splitting the data into train , val , test </Strong>

In [47]:
# Splitting the data after creating the sliding window data
def splitting_train_test(data):
        
    onePercent = len(data[1]) // 100
    
    numberTraining = onePercent * percTrain
    numberValidation = onePercent * percVal
    
    trainingData = data[0][:numberTraining] , data[1][:numberTraining]
    validationData = data[0][numberTraining : numberTraining + numberValidation] , data[1][numberTraining : numberTraining + numberValidation]
    testData = data[0][numberTraining + numberValidation:] , data[1][numberTraining + numberValidation:] 
    
    #Returning tuples of (sliding-window , target_values)
    return trainingData , validationData , testData

btc_train , btc_val , btc_test = splitting_train_test(sliding_winda_btc)


<Strong> The number of layers while changing dilation_base , kernel_size or window_size. </Strong>

In [48]:
def getLayers(dilation_rate , window_size , kernel_size):
    
    top = (dilation_rate - 1) * (window_size - 1)
    bottom = (kernel_size - 1) 
    
    division = (top / bottom) + 1 
    log = math.ceil(math.log(division , dilation_rate))
    
    
    # This inequality must hold true for full coverage
    
    first = 1 + (kernel_size - 1)
    second = (dilation_rate ** log ) - 1
    third = dilation_rate - 1
    
    inequality = (second / third) * first
    
    if ( (kernel_size < dilation_rate) or (inequality < window_size) ):
        print("not going to have full coverage")
        return False
    
    else:
        print("layers =" , log , "kernel size =" , kernel_size , "dilation rate =" , dilation_rate )
        return log , dilation_rate , kernel_size

getLayers(2 , 24 , 2)


layers = 5 kernel size = 2 dilation rate = 2


(5, 2, 2)

In [49]:
def createModelFiveLayers(hp):
    
    hp_learning_rate = hp.Choice('learning_rate' , values = [0.002 , 0.004 , 0.006 , 0.008])
    hp_dense_layer = hp.Choice('dense_layer' , values = [16 , 32 , 64])
    hp_filters = hp.Choice ('filters' , values = [8 , 16 , 32])
    hp_dropout = hp.Choice ('dropout' , values = [0.0 , 0.05 , 0.1])

    
    model = models.Sequential()
    
    model.add(Conv1D(filters= hp_filters, kernel_size=2, activation='relu', input_shape=(window_length, 1), dilation_rate=1 , padding = 'causal'))
    model.add(Conv1D(filters= hp_filters, kernel_size=2, activation='relu' , padding = 'causal', dilation_rate=2))
    model.add(Conv1D(filters= hp_filters , kernel_size=2, activation='relu' , padding = 'causal' , dilation_rate=4))
    model.add(Conv1D(filters= hp_filters , kernel_size=2, activation='relu' , padding = 'causal' , dilation_rate=8))
    model.add(Conv1D(filters= hp_filters , kernel_size=2, activation='relu' , padding = 'causal' , dilation_rate=16))
    
    model.add(Flatten())
    
    
    model.add(Dropout(hp_dropout))
    
    model.add(Dense(hp_dense_layer, activation='relu'))

    model.add(Dense(1))

    model.summary()
    
    opt = Adam(learning_rate=hp_learning_rate)
    model.compile(optimizer=opt , loss = 'mse')
    
    return model

In [50]:
tuner = RandomSearch (
    createModelFiveLayers,
    objective = "val_loss",
    max_trials=25,
    executions_per_trial=1,
    directory = 'tcn',
    project_name='tcn 5 layers 72 window'
)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_10 (Conv1D)          (None, 72, 8)             24        
                                                                 
 conv1d_11 (Conv1D)          (None, 72, 8)             136       
                                                                 
 conv1d_12 (Conv1D)          (None, 72, 8)             136       
                                                                 
 conv1d_13 (Conv1D)          (None, 72, 8)             136       
                                                                 
 conv1d_14 (Conv1D)          (None, 72, 8)             136       
                                                                 
 flatten_2 (Flatten)         (None, 576)               0         
                                                                 
 dropout_2 (Dropout)         (None, 576)              

In [51]:
tuner.search(btc_train[0] , btc_train[1] , epochs=300 , validation_data=btc_val , batch_size = 512 , callbacks=[tf.keras.callbacks.EarlyStopping('val_loss', patience=30)] )


Trial 17 Complete [00h 01m 49s]
val_loss: 0.001506519503891468

Best val_loss So Far: 0.000559048552531749
Total elapsed time: 00h 56m 12s

Search: Running Trial #18

Hyperparameter    |Value             |Best Value So Far 
learning_rate     |0.002             |0.004             
dense_layer       |16                |16                
filters           |32                |32                
dropout           |0.1               |0                 

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 72, 32)            96        
                                                                 
 conv1d_1 (Conv1D)           (None, 72, 32)            2080      
                                                                 
 conv1d_2 (Conv1D)           (None, 72, 32)            2080      
                                                                 

KeyboardInterrupt: 

In [None]:
tuner.results_summary()