In [31]:
from transformer_hyperparam import get_compiled_transformer
import os
import pandas as pd
from copy import deepcopy
import numpy as np
import random
from tensorflow import keras

class ASLDataGenerator(keras.utils.Sequence):

    def __init__(self, dataset_path: str, max_phrase_length: int, max_sign_length: int, batch_size: int, train: bool = True):
        super().__init__()
        self.ds_path = dataset_path
        self.files = os.listdir(dataset_path)
        self.max_phrase_length = max_phrase_length
        self.max_sign_length = max_sign_length
        self.unwanted_columns = ['sequence_id', 'frame', 'participant_id', 'phrase']
        self.train = train
        self.batch_size = batch_size

    def __len__(self):
        return len(self.files)//self.batch_size

    def shuffle(self):
        random.shuffle(self.files)

    def on_epoch_end(self):
        if self.train:
            self.shuffle()

    def __getitem__(self, idx: int):
        batch_signs = []
        batch_contexts = []
        batch_phrases = []
        for i in range(idx, idx+self.batch_size):
            df_phrase = pd.read_pickle(self.ds_path + '/' + self.files[idx])
            phrase = df_phrase.iloc[0].phrase
            if len(phrase) > self.max_phrase_length:
                print('Houston we have a problem!')

            while len(phrase) < self.max_phrase_length:
                phrase.append(59)
            context = deepcopy(phrase)
            context.insert(0, 60)
            context = context[:-1]

            df_phrase = df_phrase.drop(self.unwanted_columns, axis=1)
            signs = df_phrase.to_numpy(copy=True)

            if signs.shape[0] > self.max_sign_length:
                print('Hooooouuuuusteeeeeen!')
            signs = np.pad(signs, [(0, self.max_sign_length-signs.shape[0]), (0,0)], 'edge')
            batch_signs.append(signs)
            batch_contexts.append(context)
            batch_phrases.append(phrase)

        batch_signs = np.array(batch_signs)
        batch_contexts = np.array(batch_contexts)
        batch_phrases = np.array(batch_phrases)

        return [batch_signs, batch_contexts], batch_phrases

In [32]:
MAX_PHRASE_LENGTH = 100
MAX_SIGN_LENGTH = 900
BATCH_SIZE=32

data_gen_train = ASLDataGenerator('./dataset complete/preprocessed_files_data_generator/train_ones', MAX_PHRASE_LENGTH, MAX_SIGN_LENGTH, batch_size=BATCH_SIZE)
data_gen_train.shuffle()

data_gen_test = ASLDataGenerator('./dataset complete/preprocessed_files_data_generator/test_ones', MAX_PHRASE_LENGTH, MAX_SIGN_LENGTH, train=False, batch_size=BATCH_SIZE)

data_gen_val = ASLDataGenerator('./dataset complete/preprocessed_files_data_generator/val_ones', MAX_PHRASE_LENGTH, MAX_SIGN_LENGTH, train=False, batch_size=BATCH_SIZE)

In [33]:
import optuna
import json

epochs = 5

def objective(trial):
    #starting from:
    # d_model = trial.suggest_int('d_model', 20, 256)
    #num_layers = trial.suggest_int('num_layers', 1, 10)
    #num_heads = trial.suggest_int('num_heads', 2, 10)
    #ff_dim = trial.suggest_int('ff_dim', 32, 1024)
    #dropout_rate = trial.suggest_float('droupout_rate', 0., 0.6)

    d_model = trial.suggest_int('d_model', 20, 145) #
    num_layers = trial.suggest_int('num_layers', 1, 2)
    num_heads = trial.suggest_int('num_heads', 2, 4)
    ff_dim = trial.suggest_int('ff_dim', 32, 400)
    dropout_rate = trial.suggest_float('droupout_rate', 0., 0.28)

    with open ("./dataset complete/character_to_prediction_index.json", "r") as f:
        characters = json.load(f)

    output_vocab_size = len(characters) + 2

    #print('Got Values')

    transformer = get_compiled_transformer(
        d_model=d_model,
        num_layers=num_layers,
        num_heads=num_heads,
        ff_dim=ff_dim,
        dropout_rate=dropout_rate,
        output_vocab_size=output_vocab_size)

    #print('Got Transformer')

    callbacks = [
        optuna.integration.TFKerasPruningCallback(trial, 'val_loss'),
    ]
    # print('Training....')
    history = transformer.fit(data_gen_train, epochs=epochs, batch_size=BATCH_SIZE, validation_data=data_gen_val, callbacks=callbacks)
    #print('Training complete!')

    eval_results = transformer.evaluate(data_gen_test, batch_size = BATCH_SIZE)
    print(f'Result:{float(eval_results[0]) }')

    return float(eval_results[0])

In [34]:
study = optuna.create_study(direction='minimize', pruner=optuna.pruners.SuccessiveHalvingPruner(), sampler=optuna.samplers.TPESampler())

#100
study.optimize(objective, n_trials=50)

pruned_trials = study.get_trials(states=[optuna.trial.TrialState.PRUNED])
complete_trials = study.get_trials(states=[optuna.trial.TrialState.COMPLETE])

print('Study statistics:')
print('   Number of finished Trials: ', len(study.trials))
print('   Number of pruned Trials: ', len(pruned_trials))
print('   Number of complete Trials: ', len(complete_trials))

print('Best Trial: ')
trial = study.best_trial

print('  Masked Loss: ', trial.value)

print('   Params: ')
for key, value in trial.params.items():
    print('   {}: {}'.format(key, value))

[I 2023-07-11 09:09:32,096] A new study created in memory with name: no-name-337031c5-7846-4e01-bbef-47da830462be


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2023-07-11 09:24:37,266] Trial 0 finished with value: 2.0121541023254395 and parameters: {'d_model': 130, 'num_layers': 1, 'num_heads': 2, 'ff_dim': 381, 'droupout_rate': 0.11111424528918497}. Best is trial 0 with value: 2.0121541023254395.


Result:2.0121541023254395
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2023-07-11 09:48:16,457] Trial 1 finished with value: 2.3174233436584473 and parameters: {'d_model': 40, 'num_layers': 2, 'num_heads': 4, 'ff_dim': 73, 'droupout_rate': 0.21955007933431644}. Best is trial 0 with value: 2.0121541023254395.


Result:2.3174233436584473
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2023-07-11 10:00:17,242] Trial 2 finished with value: 2.1362009048461914 and parameters: {'d_model': 58, 'num_layers': 1, 'num_heads': 3, 'ff_dim': 351, 'droupout_rate': 0.13513821255214498}. Best is trial 0 with value: 2.0121541023254395.


Result:2.1362009048461914
Epoch 1/5
Epoch 2/5

[I 2023-07-11 10:05:42,929] Trial 3 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 10:09:59,184] Trial 4 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

[I 2023-07-11 10:23:40,909] Trial 5 pruned. Trial was pruned at epoch 4.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 10:29:09,050] Trial 6 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 10:33:32,988] Trial 7 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 10:48:10,981] Trial 8 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2023-07-11 10:59:58,109] Trial 9 finished with value: 2.040076494216919 and parameters: {'d_model': 116, 'num_layers': 1, 'num_heads': 2, 'ff_dim': 250, 'droupout_rate': 0.10024168631270144}. Best is trial 0 with value: 2.0121541023254395.


Result:2.040076494216919
Epoch 1/5
Epoch 2/5

[I 2023-07-11 11:14:44,358] Trial 10 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 11:19:33,122] Trial 11 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 11:24:16,552] Trial 12 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 11:29:42,639] Trial 13 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 11:37:00,270] Trial 14 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

[I 2023-07-11 11:52:00,357] Trial 15 pruned. Trial was pruned at epoch 4.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 11:56:12,932] Trial 16 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 12:14:07,718] Trial 17 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

[I 2023-07-11 12:25:15,304] Trial 18 pruned. Trial was pruned at epoch 4.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 12:30:17,378] Trial 19 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 12:42:34,568] Trial 20 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

[I 2023-07-11 12:54:16,859] Trial 21 pruned. Trial was pruned at epoch 4.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 12:58:32,799] Trial 22 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2023-07-11 13:11:53,526] Trial 23 finished with value: 2.0958447456359863 and parameters: {'d_model': 75, 'num_layers': 1, 'num_heads': 3, 'ff_dim': 303, 'droupout_rate': 0.08323261905486412}. Best is trial 0 with value: 2.0121541023254395.


Result:2.0958447456359863
Epoch 1/5
Epoch 2/5

[I 2023-07-11 13:18:02,606] Trial 24 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 13:22:29,376] Trial 25 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 13:28:11,037] Trial 26 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2023-07-11 13:40:33,749] Trial 27 finished with value: 2.038632392883301 and parameters: {'d_model': 124, 'num_layers': 1, 'num_heads': 2, 'ff_dim': 399, 'droupout_rate': 0.1023889144392246}. Best is trial 0 with value: 2.0121541023254395.


Result:2.038632392883301
Epoch 1/5
Epoch 2/5

[I 2023-07-11 13:45:24,315] Trial 28 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 13:56:04,902] Trial 29 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 14:00:42,798] Trial 30 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

[I 2023-07-11 14:17:44,801] Trial 31 pruned. Trial was pruned at epoch 4.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 14:22:02,518] Trial 32 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

[I 2023-07-11 14:32:56,162] Trial 33 pruned. Trial was pruned at epoch 4.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 14:37:25,092] Trial 34 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 14:43:36,254] Trial 35 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 14:49:18,591] Trial 36 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 14:55:10,614] Trial 37 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 14:59:26,840] Trial 38 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 15:07:25,803] Trial 39 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 15:13:06,485] Trial 40 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

[I 2023-07-11 15:24:49,527] Trial 41 pruned. Trial was pruned at epoch 4.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

[I 2023-07-11 15:36:02,945] Trial 42 pruned. Trial was pruned at epoch 4.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

[I 2023-07-11 15:47:46,518] Trial 43 pruned. Trial was pruned at epoch 4.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 15:52:35,478] Trial 44 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 15:57:02,034] Trial 45 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 16:01:25,200] Trial 46 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 16:12:11,591] Trial 47 pruned. Trial was pruned at epoch 1.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

[I 2023-07-11 16:27:48,148] Trial 48 pruned. Trial was pruned at epoch 4.


Epoch 1/5
Epoch 2/5

[I 2023-07-11 16:34:51,520] Trial 49 pruned. Trial was pruned at epoch 1.


Study statistics:
   Number of finished Trials:  50
   Number of pruned Trials:  44
   Number of complete Trials:  6
Best Trial: 
  Masked Loss:  2.0121541023254395
   Params: 
   d_model: 130
   num_layers: 1
   num_heads: 2
   ff_dim: 381
   droupout_rate: 0.11111424528918497
