

# Training and tuning the hyperparameters of the proposed model


### Mount Google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Add drive to python path and change directory. Change '/content/drive/MyDrive/y3_project/models' to 'root_project_folder/models' as necessary. Also install the necessary libraries

In [None]:
import sys
sys.path.append('/content/drive/MyDrive/')
%cd '/content/drive/MyDrive/y3_project/models'
!pip install tensorflow-text keras-tuner yfinance

### Learning rate grid search

In [None]:
## Learning rate grid search

import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

from model import Model

count = 0
best_loss = 1.0716042518615723 # (0.001, 0.9, 0.9991)
for lr in [0.0099, 0.001, 0.0011]:
    for beta_1 in [0.89, 0.9, 0.91]:
        for beta_2 in [0.9989, 0.999, 0.9991]:
            count += 1
            if count <= 17:
                continue
            
            print(f"\nTesting {[lr, beta_1, beta_2]}")

            model = Model(seq_length=512, seq_num=1,
                            aggregator_size=(2, 256), albert_dense_size=(2, 512),
                            gru_shape=(2, 256), gru_dense_size=(2, 512),
                            encode_len=512, classifier_size=(1, 512),
                            drop_rate=0.01, gru_drop_rate=0.0001,
                            regularizer=tf.keras.regularizers.L2(0.0001),
                            batch_size=128)

            model.train(100, lr=lr, beta_1=beta_1, beta_2=beta_2)
            # model.train(100)
            result = model.evaluate()
            
            print(f"\n-------------------------------")
            print(f"Using {lr, beta_1, beta_2}, {result} achieved")
            loss = result[0]
            if loss < best_loss:
                best_loss = loss
                print(f"!!!New best loss of {loss} found with {lr, beta_1, beta_2}!!!")
            print("--------------------------------\n")



### Hyperband hyperparameter optimization

In [None]:
import keras_tuner as kt
from tensorflow import keras
import tensorflow as tf

from model import Model

def model_builder(hp, return_obj=False):
    agg_depth = hp.Int('agg_depth', min_value=1, max_value=4, step=1)
    agg_width = hp.Int('agg_width', min_value=64, max_value=384, step=64)
    albert_depth = hp.Int('albert_depth', min_value=2, max_value=3, step=1)
    albert_width = hp.Int('abert_width', min_value=640, max_value=1536, step=256)
    gru_depth = hp.Int('gru_depth', min_value=1, max_value=2, step=1)
    gru_width = hp.Int('gru_width', min_value=64, max_value=256, step=32)
    gru_dense_depth = hp.Int('gru_dense_depth', min_value=1, max_value=2, step=2)
    gru_dense_width = hp.Int('gru_dense_width', min_value=256, max_value=768, step=128)
    encode_len = hp.Int('encode_len', min_value=64, max_value=1024, step=64)
    classifier_depth = hp.Int('classifier_depth', min_value=1, max_value=3, step=1)
    classifier_width = hp.Int('classifier_width', min_value=256, max_value=1024, step=128)
    drop_rate = hp.Float('drop_rate', min_value=0.001, max_value=0.5)
    gru_drop_rate = hp.Float('gru_drop_rate', min_value=0.001, max_value=0.3)
    regularize_rate = hp.Float('regularize', min_value=0.00001, max_value=0.0001)

    model_obj = Model(raw_data=True, seq_length=512, seq_num=1,
                aggregator_size=(agg_depth, agg_width),
                albert_dense_size=(albert_depth, albert_width),
                gru_shape=(gru_depth, gru_width), 
                gru_dense_size=(gru_dense_depth, gru_dense_width),
                encode_len=encode_len, classifier_size=(classifier_depth, classifier_width),
                drop_rate=drop_rate, gru_drop_rate=gru_drop_rate,
                regularizer=tf.keras.regularizers.L2(regularize_rate),
                batch_size=128)
    model = model_obj.model
    model.compile(keras.optimizers.Adam(beta_2=0.9991), loss='categorical_crossentropy', metrics=['accuracy', 'mse'])
    
    return model

ds_model = Model(raw_data=True, seq_length=512, seq_num=1,
                aggregator_size=(2, 256), albert_dense_size=(2, 512),
                gru_shape=(2, 256), gru_dense_size=(2, 512),
                encode_len=512, classifier_size=(1, 512),
                drop_rate=0.01, gru_drop_rate=0.0001,
                regularizer=tf.keras.regularizers.L2(0.0001),
                batch_size=128)

train_dataset = ds_model.get_dataset().shuffle(20000, reshuffle_each_iteration=False)
validation_dataset = train_dataset.take(512).batch(ds_model.batch_size)
train_dataset = train_dataset.skip(512).prefetch(tf.data.AUTOTUNE).shuffle(10000, reshuffle_each_iteration=True).take(5000)
train_dataset = train_dataset.batch(ds_model.batch_size)

tuner = kt.Hyperband(model_builder, objective='val_accuracy', max_epochs=50, factor=3, hyperband_iterations=2, directory='tuning', project_name="raw_data_final")
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=3)
tuner.search(train_dataset, epochs=50, validation_data=validation_dataset, callbacks=[stop_early])
tuner.results_summary()

### Train the model

In [None]:
%cd '/content/drive/MyDrive/y3_project/models'

import tensorflow as tf

from model import Model

model = Model(raw_data=True, seq_length=512, seq_num=1,
                aggregator_size=(3, 128), albert_dense_size=(2, 850),
                gru_shape=(1, 96), gru_dense_size=(1, 480),
                encode_len=536, classifier_size=(2, 320),
                drop_rate=0.186, gru_drop_rate=0.277,
                regularizer=tf.keras.regularizers.L2(4e-05),
                batch_size=128, checkpoint_file="./model_checkpoint")

model.train(100)
# tf.keras.utils.plot_model(model.model, expand_nested=True, show_layer_names=False, to_file="model.png")
result = model.evaluate()