<Strong> Develop a TCN model that overfits , then do some regularization , then do some tuning. </Strong>

In [58]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras_tuner.tuners import RandomSearch
from tensorflow.keras.utils import plot_model
from matplotlib import pyplot as plt
from tensorflow.keras.models import Sequential 
from tensorflow.keras import layers
from tensorflow.keras.optimizers import RMSprop , Adam
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error , mean_absolute_error , mean_absolute_percentage_error
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.layers import LSTM , Dense , Dropout , GRU , Concatenate , Input , Conv1D , InputLayer , MaxPooling1D
from tensorflow.keras import models
from tensorflow.keras.callbacks import EarlyStopping
import math

In [59]:
#df_btc = pd.read_csv("../../cryptoData/BTC_1h_data.csv")
df_btc = pd.read_csv("BTC_1h_data.csv")

<Strong> Grabbing the closing price (univariate) </Strong>

In [60]:
# Removing everything but the closing price
btc_data = df_btc.values[:, 4 ,].astype(float)

btc_data[-1]

43534.54

<Strong> Scaling the data  </Strong>

In [61]:
percTrain = 70
percVal = 20 

In [62]:
scaler = MinMaxScaler()
    
onePercent = len(btc_data) // 100
numberTraining = onePercent * percTrain

reshaped_data = btc_data.reshape(-1,1)

#Just scaling on training data otherwise it would be leakage
scaler.fit(reshaped_data[:numberTraining])
scaled_btc = scaler.transform(reshaped_data)

<Strong> Creating Matrix in Sliding window form <Strong>

In [63]:
def sliding_window(elements, window_size):
    
    data = [] 
    targets = []
    
    if len(elements) <= window_size:
        return elements
    
    for i in range(len(elements) - window_size ):
        
        data.append(elements[i:i+window_size])
        targets.append(elements[i+window_size])
        
    return np.array(data) , np.array(targets)

In [64]:
# Using 24 datapoints to predict the 25th

window_length = 24
dilation_rate = 2
kernel_size = 2

features = 1

sliding_winda_btc = sliding_window(scaled_btc , window_length)

<Strong> Splitting the data into train , val , test </Strong>

In [65]:
# Splitting the data after creating the sliding window data
def splitting_train_test(data):
        
    onePercent = len(data[1]) // 100
    
    numberTraining = onePercent * percTrain
    numberValidation = onePercent * percVal
    
    trainingData = data[0][:numberTraining] , data[1][:numberTraining]
    validationData = data[0][numberTraining : numberTraining + numberValidation] , data[1][numberTraining : numberTraining + numberValidation]
    testData = data[0][numberTraining + numberValidation:] , data[1][numberTraining + numberValidation:] 
    
    #Returning tuples of (sliding-window , target_values)
    return trainingData , validationData , testData

In [66]:
btc_train , btc_val , btc_test = splitting_train_test(sliding_winda_btc)

print(btc_train[0].shape)

(27370, 24, 1)


In [67]:
# Hyper Parameters To Overfit 

# Add Layers 
# Train longer


<Strong> The number of layers while changing dilation_base , kernel_size or window_size. </Strong>

In [68]:
def getLayers(dilation_rate , window_size , kernel_size):
    
    top = (dilation_rate - 1) * (window_size - 1)
    bottom = (kernel_size - 1) 
    
    division = (top / bottom) + 1 
    log = math.ceil(math.log(division , dilation_rate))
    
    
    # This inequality must hold true for full coverage
    
    first = 1 + (kernel_size - 1)
    second = (dilation_rate ** log ) - 1
    third = dilation_rate - 1
    
    inequality = (second / third) * first
    
    if ( (kernel_size < dilation_rate) or (inequality < window_size) ):
        print("not going to have full coverage")

    return log , dilation_rate , kernel_size

In [69]:
num_layers = getLayers( dilation_rate , window_length , kernel_size)

In [70]:

def createModel(model_params):
    
    num_layers = model_params[0]
    dilation = model_params[1]
    kernel_size = model_params[2]
    
    model1 = models.Sequential()
    
    # Casual adds padding to the start of input sequence
    model1.add(Conv1D(filters=32, kernel_size=kernel_size, activation='relu', input_shape=(window_length, features), dilation_rate=1 , padding = 'causal'))

    #minus one for the base conv1d layer ^
    for i in range (num_layers - 1):
        
        # For full coverage we need 4 layers 
        model1.add(Conv1D(filters=32, kernel_size=kernel_size, activation='relu' , padding = 'causal', dilation_rate= dilation**i))

    model1.add(Dense(128, activation='relu'))

    model1.add(Dense(1))

    model1.summary()


In [74]:
createModel(getLayers(dilation_rate , window_length , kernel_size))

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_29 (Conv1D)          (None, 24, 32)            96        
                                                                 
 conv1d_30 (Conv1D)          (None, 24, 32)            2080      
                                                                 
 conv1d_31 (Conv1D)          (None, 24, 32)            2080      
                                                                 
 conv1d_32 (Conv1D)          (None, 24, 32)            2080      
                                                                 
 conv1d_33 (Conv1D)          (None, 24, 32)            2080      
                                                                 
 dense_9 (Dense)             (None, 24, 128)           4224      
                                                                 
 dense_10 (Dense)            (None, 24, 1)            

In [72]:
# Full Converage = 1 + (kernel_size-1) . dilation_base^n - 1 / dilation_base - 1 >= l
# Number of required layers = log (dilation_base) ( ((input_length - 1) . (dilation_base - 1) / (k - 1) ) + 1 )
# Padding for each layer dilation_base ^ layers below our current . (kernel_size - 1) below = input

# For full coverage we need 4 layers (we are using a base of 2)


model1 = models.Sequential()

# Casual adds padding to the start of input sequence
model1.add(Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(window_length, features), dilation_rate=1 , padding = 'causal'))

#model1.add(MaxPooling1D( pool_size = 2 , padding='same'  , strides= 1))

model1.add(Conv1D(filters=32, kernel_size=2, activation='relu' , padding = 'causal', dilation_rate=2))

#model1.add(MaxPooling1D( pool_size = 2 , padding='same'  , strides= 1))
# For full coverage we need 4 layers 
model1.add(Conv1D(filters=32, kernel_size=2, activation='relu' , padding = 'causal' , dilation_rate=4))

#model1.add(MaxPooling1D( pool_size = 2 , padding='same'  , strides= 1))
# For full coverage we need 4 layers 
model1.add(Conv1D(filters=32, kernel_size=2, activation='relu' , padding = 'causal', dilation_rate= 8))

# For full coverage we need 4 layers 
model1.add(Conv1D(filters=32, kernel_size=2, activation='relu' , padding = 'causal', dilation_rate= 16))

model1.add(Dense(128) , activation='relu')

model1.add(Dense(1))

model1.summary()

TypeError: add() got an unexpected keyword argument 'activation'

In [38]:
opt = Adam(0.008)
model1.compile(optimizer=opt , loss = 'mse')

In [39]:
history = model1.fit(btc_train[0] , btc_train[1] , validation_data = btc_val  , batch_size = 512  , epochs =300 , verbose = 1 )

num_epochs = earlyStopping.stopped_epoch

Epoch 1/300
Epoch 2/300

KeyboardInterrupt: 