In [None]:
EPOCH = 64
BATCH_SIZE = 64

In [None]:
import os
os.environ['KERAS_BACKEND'] = "torch"
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
# TEST: force CPU
# os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
from keras.models import Sequential
from keras.layers import Dense, LSTM, GRU
from keras.optimizers import Adam
import torch
import keras
import numpy as np
import matplotlib.pyplot as plt
import time

from config import config_parser
from mp_util_legacy import preprocess_keypoints_multiple

In [None]:
class KeypointDataGenerator(keras.utils.Sequence):
    def __init__(self, keypoint_path, seq_max_len, batch_size=32, shuffle=False, transform=False):
        self.keypoint_path = keypoint_path
        self.seq_max_len = seq_max_len
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.transform = transform
        
        self.file_list = []
        self.labels = []

        self.all_labels = sorted(os.listdir(keypoint_path))
        for label in self.all_labels:
            for file in os.listdir(os.path.join(keypoint_path, label)):
                self.file_list.append(file)
                self.labels.append(label)
        self.epoch_count = -1
        self.on_epoch_end()

    def reset_epoch(self):
        self.epoch_count = -1
        self.on_epoch_end()

    def on_epoch_end(self):
        self.epoch_count += 1
        np.random.seed(self.epoch_count * 42)
        self.indexes = np.arange(len(self.file_list))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __len__(self):
        return int(np.floor(len(self.file_list) / self.batch_size))
    
    def preprocess_keypoints(self, keypoints):
        angle = np.random.randint(-20, 20)
        tx = np.random.uniform(-0.4, 0.4)
        ty = np.random.uniform(-0.4, 0.4)
        scale = np.random.uniform(0.6, 1.2)
        return preprocess_keypoints_multiple(keypoints, angle=angle, tx=tx, ty=ty, scale=scale)

    def __data_generation(self, indexes):
        sequences = []
        labels = []

        for index in indexes:
            kp_file_name = self.file_list[index]
            label = self.labels[index]
            sequences.append(np.load(os.path.join(self.keypoint_path, label, kp_file_name), mmap_mode="r"))
            labels.append(label)

        for i in range(len(sequences)):
            if self.transform:
                sequences[i] = self.preprocess_keypoints(sequences[i])
            sequences[i] = sequences[i].reshape(self.seq_max_len, -1)

        X = np.array(sequences)
        Y = np.array([self.all_labels.index(label) for label in labels])
        return X, Y
    
    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        X, Y = self.__data_generation(indexes)
        return X, Y

In [None]:
training_generator = KeypointDataGenerator(
    "../dataset/split/train", 
    seq_max_len=30, 
    batch_size=BATCH_SIZE, 
    shuffle=True,
    transform=True
)
validation_generator = KeypointDataGenerator(
    "../dataset/split/val",
    seq_max_len=30, 
    batch_size=BATCH_SIZE, 
    shuffle=False, 
    transform=False
)
test_generator = KeypointDataGenerator(
    "../dataset/split/test",
    seq_max_len=30, 
    batch_size=BATCH_SIZE, 
    shuffle=False, 
    transform=False
)

In [None]:
def train_model(model, learning_rate, epochs):
    start_time = time.time()
    training_generator.reset_epoch()
    validation_generator.reset_epoch()
    test_generator.reset_epoch()
    model.summary()
    model.compile(optimizer=Adam(learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # print("Device: {}".format(device))
    # model.to(device)
    history = model.fit(
        training_generator,
        validation_data=validation_generator,
        epochs=epochs,
        verbose=1,
        callbacks=[
            keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
            # keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True),
        ]
    )
    # plot training accuracy and validation accuracy in the same plot
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.show()
    # plot training loss and validation loss in the same plot
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.show()

    # evaluate the model on the test set
    test_loss, test_acc = model.evaluate(test_generator)
    print('Test loss: {}'.format(test_loss))
    print('Test accuracy: {}'.format(test_acc))

    end_time = time.time()
    time_diff = end_time - start_time

    # return training accuracy of last epoch, validation accuracy of last epoch, test accuracy
    return history.history['accuracy'][-1], history.history['val_accuracy'][-1], test_acc, time_diff

In [None]:
first_batch = training_generator.__getitem__(0)
X_shape = first_batch[0].shape[1:]
Y_shape = len(training_generator.all_labels)
del first_batch
print(X_shape, Y_shape)
results = {}

# Test 1: LSTM vs GRU (Single Layer)

In [None]:
model_test1 = Sequential([
    LSTM(128, input_shape=X_shape, return_sequences=False),
    Dense(Y_shape, activation='softmax')
])
results["LSTM_single_layer"] = train_model(model_test1, 0.0001, EPOCH)

In [None]:
model_test2 = Sequential([
    GRU(128, input_shape=X_shape, return_sequences=False),
    Dense(Y_shape, activation='softmax')
])
results["GRU_single_layer"] = train_model(model_test2, 0.0001, EPOCH)

# Test 2: LSTM vs GRU (Multi Layer)

In [None]:
model_test3 = Sequential([
    LSTM(128, input_shape=X_shape, return_sequences=True),
    LSTM(128, return_sequences=False),
    Dense(Y_shape, activation='softmax')
])
results["LSTM_double_layer"] = train_model(model_test3, 0.0001, EPOCH)

In [None]:
model_test4 = Sequential([
    GRU(128, input_shape=X_shape, return_sequences=True),
    GRU(128, return_sequences=False),
    Dense(Y_shape, activation='softmax')
])
results["GRU_double_layer"] = train_model(model_test4, 0.0001, EPOCH)

In [None]:
for key, value in results.items():
    print(key, value)

Now that we know LSTM singlelayer is better, we test different learning rates.

# Test 3: Learning rates

Note: `lr=0.0001`, Test accuracy: 0.91796875

In [None]:
# Note: labtop broke at this moment, manually re-creating the results
"""
LSTM_single_layer (0.8634868264198303, 0.921875, 0.91796875, 1516.46448636055)
GRU_single_layer (0.6940789222717285, 0.765625, 0.75390625, 1714.1956386566162)
LSTM_double_layer (0.7985197305679321, 0.84765625, 0.87890625, 1329.3484818935394)
GRU_double_layer (0.7080591917037964, 0.69921875, 0.734375, 1714.6088211536407)
"""
results = {
    "LSTM_single_layer": (0.8634868264198303, 0.921875, 0.91796875, 1516.46448636055),
    "GRU_single_layer": (0.6940789222717285, 0.765625, 0.75390625, 1714.1956386566162),
    "LSTM_double_layer": (0.7985197305679321, 0.84765625, 0.87890625, 1329.3484818935394),
    "GRU_double_layer": (0.7080591917037964, 0.69921875, 0.734375, 1714.6088211536407),
}

In [None]:
model_test5 = Sequential([
    LSTM(128, input_shape=X_shape, return_sequences=False),
    # LSTM(128, return_sequences=False),
    Dense(Y_shape, activation='softmax')
])
results["LSTM_single_layer_lr_0.00001"] = train_model(model_test5, 0.00001, EPOCH)
print("LSTM_single_layer_lr_0.00001", results["LSTM_single_layer_lr_0.00001"])

In [None]:
model_test6 = Sequential([
    LSTM(128, input_shape=X_shape, return_sequences=False),
    # LSTM(128, return_sequences=False),
    Dense(Y_shape, activation='softmax')
])
results["LSTM_single_layer_lr_0.001"] = train_model(model_test6, 0.001, EPOCH)

In [None]:
model_test7 = Sequential([
    LSTM(128, input_shape=X_shape, return_sequences=False),
    # LSTM(128, return_sequences=False),
    Dense(Y_shape, activation='softmax')
])
results["LSTM_single_layer_lr_0.01"] = train_model(model_test7, 0.01, EPOCH)

In [None]:
model_test8 = Sequential([
    LSTM(128, input_shape=X_shape, return_sequences=False),
    # LSTM(128, return_sequences=False),
    Dense(Y_shape, activation='softmax')
])
results["LSTM_single_layer_lr_0.1"] = train_model(model_test8, 0.1, EPOCH)

In [None]:
for key, value in results.items():
    print(key, value)

In [None]:
# checkpointing
"""
LSTM_single_layer (0.8634868264198303, 0.921875, 0.91796875, 1516.46448636055)
GRU_single_layer (0.6940789222717285, 0.765625, 0.75390625, 1714.1956386566162)
LSTM_double_layer (0.7985197305679321, 0.84765625, 0.87890625, 1329.3484818935394)
GRU_double_layer (0.7080591917037964, 0.69921875, 0.734375, 1714.6088211536407)
LSTM_single_layer_lr_0.00001 (0.45106908679008484, 0.5078125, 0.5, 958.5009093284607)
LSTM_single_layer_lr_0.001 (0.7569901347160339, 0.796875, 0.86328125, 772.0105435848236)
LSTM_single_layer_lr_0.01 (0.7405427694320679, 0.8515625, 0.8125, 967.8330719470978)
LSTM_single_layer_lr_0.1 (0.18832236528396606, 0.23828125, 0.2265625, 637.1956248283386)
"""

results = {
    "LSTM_single_layer": (0.8634868264198303, 0.921875, 0.91796875, 1516.46448636055),
    "GRU_single_layer": (0.6940789222717285, 0.765625, 0.75390625, 1714.1956386566162),
    "LSTM_double_layer": (0.7985197305679321, 0.84765625, 0.87890625, 1329.3484818935394),
    "GRU_double_layer": (0.7080591917037964, 0.69921875, 0.734375, 1714.6088211536407),
    "LSTM_single_layer_lr_0.00001": (0.45106908679008484, 0.5078125, 0.5, 958.5009093284607),
    "LSTM_single_layer_lr_0.001": (0.7569901347160339, 0.796875, 0.86328125, 772.0105435848236),
    "LSTM_single_layer_lr_0.01": (0.7405427694320679, 0.8515625, 0.8125, 967.8330719470978),
    "LSTM_single_layer_lr_0.1": (0.18832236528396606, 0.23828125, 0.2265625, 637.1956248283386),
}

From the above, we choose `lr=0.0001`

# Test 4: LSTM Unit count

Note: 128 units, lr=`0.0001`, Test accuracy: 0.91796875

In [None]:
model_test9 = Sequential([
    LSTM(64, input_shape=X_shape, return_sequences=False),
    # LSTM(64, return_sequences=False),
    Dense(Y_shape, activation='softmax')
])
results["LSTM_single_layer_64"] = train_model(model_test9, 0.0001, EPOCH)

In [None]:
model_test10 = Sequential([
    LSTM(256, input_shape=X_shape, return_sequences=False),
    # LSTM(256, return_sequences=False),
    Dense(Y_shape, activation='softmax')
])
results["LSTM_single_layer_256"] = train_model(model_test10, 0.0001, EPOCH)

In [None]:
model_test11 = Sequential([
    LSTM(512, input_shape=X_shape, return_sequences=False),
    # LSTM(512, return_sequences=False),
    Dense(Y_shape, activation='softmax')
])
results["LSTM_single_layer_512"] = train_model(model_test11, 0.0001, EPOCH)

In [None]:
model_test12 = Sequential([
    LSTM(1024, input_shape=X_shape, return_sequences=False),
    # LSTM(1024, return_sequences=False),
    Dense(Y_shape, activation='softmax')
])
results["LSTM_single_layer_1024"] = train_model(model_test12, 0.0001, EPOCH)

In [None]:
model_test13 = Sequential([
    LSTM(2048, input_shape=X_shape, return_sequences=False),
    # LSTM(2048, return_sequences=False),
    Dense(Y_shape, activation='softmax')
])
results["LSTM_single_layer_2048"] = train_model(model_test13, 0.0001, EPOCH)

# Summary

In [None]:
for key, value in results.items():
    print(key, value)

In [None]:
# checkpoint

"""
LSTM_single_layer (0.8634868264198303, 0.921875, 0.91796875, 1516.46448636055)
GRU_single_layer (0.6940789222717285, 0.765625, 0.75390625, 1714.1956386566162)
LSTM_double_layer (0.7985197305679321, 0.84765625, 0.87890625, 1329.3484818935394)
GRU_double_layer (0.7080591917037964, 0.69921875, 0.734375, 1714.6088211536407)
LSTM_single_layer_lr_0.00001 (0.45106908679008484, 0.5078125, 0.5, 958.5009093284607)
LSTM_single_layer_lr_0.001 (0.7569901347160339, 0.796875, 0.86328125, 772.0105435848236)
LSTM_single_layer_lr_0.01 (0.7405427694320679, 0.8515625, 0.8125, 967.8330719470978)
LSTM_single_layer_lr_0.1 (0.18832236528396606, 0.23828125, 0.2265625, 637.1956248283386)
LSTM_single_layer_64 (0.7582237124443054, 0.7734375, 0.7734375, 980.6074757575989)
LSTM_single_layer_256 (0.8955591917037964, 0.8984375, 0.92578125, 930.5116012096405)
LSTM_single_layer_512 (0.8289473652839661, 0.859375, 0.8828125, 546.1381299495697)
LSTM_single_layer_1024 (0.7615131735801697, 0.7578125, 0.83984375, 408.09040093421936)
LSTM_single_layer_2048 (0.8252466917037964, 0.86328125, 0.8984375, 540.9860851764679)
"""

results = {
    "LSTM_single_layer": (0.8634868264198303, 0.921875, 0.91796875, 1516.46448636055),
    "GRU_single_layer": (0.6940789222717285, 0.765625, 0.75390625, 1714.1956386566162),
    "LSTM_double_layer": (0.7985197305679321, 0.84765625, 0.87890625, 1329.3484818935394),
    "GRU_double_layer": (0.7080591917037964, 0.69921875, 0.734375, 1714.6088211536407),
    "LSTM_single_layer_lr_0.00001": (0.45106908679008484, 0.5078125, 0.5, 958.5009093284607),
    "LSTM_single_layer_lr_0.001": (0.7569901347160339, 0.796875, 0.86328125, 772.0105435848236),
    "LSTM_single_layer_lr_0.01": (0.7405427694320679, 0.8515625, 0.8125, 967.8330719470978),
    "LSTM_single_layer_lr_0.1": (0.18832236528396606, 0.23828125, 0.2265625, 637.1956248283386),
    "LSTM_single_layer_64": (0.7582237124443054, 0.7734375, 0.7734375, 980.6074757575989),
    "LSTM_single_layer_256": (0.8955591917037964, 0.8984375, 0.92578125, 930.5116012096405),
    "LSTM_single_layer_512": (0.8289473652839661, 0.859375, 0.8828125, 546.1381299495697),
    "LSTM_single_layer_1024": (0.7615131735801697, 0.7578125, 0.83984375, 408.09040093421936),
    "LSTM_single_layer_2048": (0.8252466917037964, 0.86328125, 0.8984375, 540.9860851764679),
}

# Verify

In [None]:
# verify that single layer indeed outperform multi-layer regardless of units
model_verify = Sequential([
    LSTM(256, input_shape=X_shape, return_sequences=True),
    LSTM(256, return_sequences=False),
    Dense(Y_shape, activation='softmax')
])
print(train_model(model_verify, 0.0001, EPOCH))

In [None]:
# verify that learning rate 0.0001 is correct by training 0.00001 and 0.001
model_verify2 = Sequential([
    LSTM(256, input_shape=X_shape, return_sequences=False),
    Dense(Y_shape, activation='softmax')
])
print(train_model(model_verify2, 0.001, EPOCH))

In [None]:
model_verify3 = Sequential([
    LSTM(256, input_shape=X_shape, return_sequences=False),
    Dense(Y_shape, activation='softmax')
])
print(train_model(model_verify3, 0.00001, EPOCH))

# Conclusion

From the above experiments, we get
- LSTM is **way** better than GRU
- Single layer is better
- `lr=0.0001`
- Units: `256`