In [None]:
from sklearn.model_selection import train_test_split # type: ignore
from tensorflow.keras.utils import to_categorical # type: ignore

import datetime
import os
import numpy as np # type: ignore
import tensorflow as tf # type: ignore

In [None]:
# path for saving the data (numpy array)
DATA_PATH = os.path.join("../datasets")

# sign action to be detected
ACTIONS = np.array(
    [
        "hello",
        "thanks",
        "i-love-you",
        "see-you-later",
        "I",
        "Father",
        "Mother",
        "Yes",
        "No",
        "Help",
        "Please",
        "Want",
        "What",
        "Again",
        "Eat",
        "Milk",
        "More",
        "Go To",
        "Bathroom",
        "Fine",
        "Like",
        "Learn",
        "Sign",
        "Done",
    ]
)

# NOTE: use the first 6 since we only have 6 label only for now
ACTIONS = ACTIONS[:6]

# x videos worth of data (per label)
videos_per_label = np.max(np.array(os.listdir(os.path.join(DATA_PATH, ACTIONS[0]))).astype(int))

# 30 action per videos
# NOTE: This does not affect how much the frame is
action_per_video = 30

In [None]:
ACTIONS

In [None]:
# output example:
# {'hello': 0, 'thanks': 1, 'i-love-you': 2}
labels_map = {label: index for index, label in enumerate(ACTIONS)}


sequences, labels = [], []

In [None]:
labels_map, videos_per_label

In [None]:
"""
Iterates over each action in the ACTIONS list.

For each action, it will process multiple sequences of frames.
"""
for action in ACTIONS:

    """Iterates over each sequence for the current action"""
    for sequence in range(videos_per_label):
        # empty list (window) to hold the frames of the current sequence.
        sequence_actions = []

        """
        Frame Processing

        Iterates over each frame in the current sequence, then constructs the file path to the numpy array for the current frame.
        Prints the path to verify correctness, then loads the frame data from the numpy file.
        """
        for frame_num in range(action_per_video):
            # construct the path to the numpy file for the current frame
            npy_path = os.path.join(
                DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)
            )

            # load the frame data from the numpy file
            result = np.load(npy_path)

            # append the frame data to the current sequence (window)
            sequence_actions.append(result)

        # append the completed sequence to the sequences list
        sequences.append(sequence_actions)

        # append the corresponding label to the labels list
        labels.append(labels_map[action])

Convert the sequences and labels lists into NumPy arrays that are suitable for use as input (X) and output (y) in machine learning models, particularly for deep learning models.

In [None]:
X = np.array(sequences)

# convert labels list to a one-hot encoded NumPy array
y = to_categorical(labels).astype(int)

In [None]:
# splits the dataset into training and testing sets
# specifies that 10% of the data should be used as the test set, and the remaining 90% should be used as the training set.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

The shapes of the datasets depend on the total number of sequences and the sequence length. Assuming the code processes 30 sequences for each of the 3 actions, we have:

    Total sequences = 30 sequences/action × 3 actions = 90 sequences

Given a test_size of 0.1, 10% of the data (approximately 9 sequences) will be in the test set, and 90% (approximately 81 sequences) will be in the training set.

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

#### Modelling

In [None]:
from tensorflow.keras.models import Sequential, save_model, load_model  # type: ignore
from tensorflow.keras.layers import BatchNormalization, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, LSTM, TimeDistributed, Reshape, Bidirectional  # type: ignore
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ReduceLROnPlateau  # type: ignore
from tensorflow.keras.regularizers import l2  # type: ignore
from tensorflow.keras.optimizers import Adam # type: ignore

In [None]:
# the input shape (30, 1692) where 30 is the sequence length and 1692 is the number of features per frame
input_shape = (30, 1692)

Topic regarding TimeDistributed

- https://stackoverflow.com/a/76796778/14182545
- https://medium.com/smileinnovation/how-to-work-with-time-distributed-data-in-a-neural-network-b8b39aa4ce00

    Another kind of layer that is the famous "LSTM" (or GRU). It is not mandatory but it will finalize the chronological resolution of inputs.

    Dense without TimeDistributed computes perBatch.
    TimeDistributed with Dense computes per Timestep.

#### Model Architecture CNN-LSTM_1L

In [None]:
# asl-action-cnn-lstm_1l-560k
tf.keras.backend.clear_session()

model = Sequential()

# data normalization
model.add(BatchNormalization(input_shape=input_shape))

# first Conv1D layer with L2 regularization
model.add(
    Conv1D(filters=64, kernel_size=3, activation="relu", kernel_regularizer=l2(0.01))
)  # changed kernel size and filters
model.add(MaxPooling1D(pool_size=2))

# second Conv1D layer with L2 regularization
model.add(
    Conv1D(filters=128, kernel_size=3, activation="relu", kernel_regularizer=l2(0.01))
)  # changed kernel size and filters
model.add(MaxPooling1D(pool_size=2))

# third Conv1D layer with L2 regularization
model.add(
    Conv1D(filters=256, kernel_size=3, activation="relu", kernel_regularizer=l2(0.01))
)  # changed kernel size and filters
model.add(MaxPooling1D(pool_size=2))

# dense layer for feature extraction with L2 regularization
model.add(Dense(64, activation="relu", kernel_regularizer=l2(0.01)))
model.add(Dropout(0.5))

# bidirectional LSTM layer with L2 regularization
model.add(
    Bidirectional(
        LSTM(64, return_sequences=False, activation="relu", kernel_regularizer=l2(0.01))
    )
)

# dense layers for classification with dropout for regularization
model.add(Dense(128, activation="relu", kernel_regularizer=l2(0.01)))
model.add(Dropout(0.5))  # slightly higher dropout rate, so it's not overfitting
model.add(Dense(64, activation="relu", kernel_regularizer=l2(0.01)))
model.add(Dropout(0.5))  # slightly higher dropout rate, so it's not overfitting

model.add(Dense(ACTIONS.shape[0], activation="softmax"))

In [None]:
model.summary()

#### Setting Up

In [None]:
def create_log_dir(base_dir, use_time=False):
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

    # check existing log directories
    existing_logs = [
        d
        for d in os.listdir(base_dir)
        if os.path.isdir(os.path.join(base_dir, d)) and d.startswith("train-")
    ]

    # determine the new log directory name
    if existing_logs and not use_time:
        latest_log = max(existing_logs)
        log_num = int(latest_log.split("-")[1]) + 1
        new_log_dir = os.path.join(base_dir, f"train-{str(log_num).zfill(3)}")

    if not existing_logs and not use_time:
        new_log_dir = os.path.join(base_dir, f"train-001")

    if use_time:
        new_log_dir = os.path.join(base_dir, f"train-{current_time}")

    # create the new log directory
    os.makedirs(new_log_dir)
    print(f"Created new log directory: {new_log_dir}")

    return new_log_dir

In [None]:
from tensorflow.keras.callbacks import Callback  # type: ignore


class EarlyStoppingByLossVal(Callback):
    def __init__(self, monitor="val_loss", value=0.001, verbose=0, patience=20):
        super(Callback, self).__init__()
        self.monitor = monitor
        self.value = value
        self.verbose = verbose
        self.patience = patience
        self.wait = 0

    def on_epoch_end(self, epoch, logs=None):
        current = logs.get(self.monitor)
        if current is None:
            raise ValueError(f"Early stopping requires {self.monitor} available!")

        if current <= self.value:
            self.wait += 1
            if self.wait >= self.patience:
                if self.verbose > 0:
                    print(
                        f"Epoch {epoch}: early stopping threshold reached with {self.monitor} = {current}"
                    )
                self.model.stop_training = True
        else:
            self.wait = 0  # reset wait if the condition is not met

#### Compiling

In [None]:
# set the optimizer with an initial learning rate
optimizer = Adam(learning_rate=0.001)

# compile the model with the optimizer
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

"""
Define the EarlyStopping callback with adjusted patience

monitor  : monitor the `val_loss` for training.
patience : sets the number of epochs to wait for an improvement,
           in the monitored metric before stopping the training.
           `patience=10` means that if the validation loss does not improve for 10 consecutive epochs,
           the training will be stopped.
"""
early_stopping = EarlyStopping(monitor='val_loss', patience=40, restore_best_weights=True)

early_stopping_by_loss_val = EarlyStoppingByLossVal(monitor='val_loss', value=0.0001, verbose=1, patience=10)

"""
Define the ReduceLROnPlateau callback with adjusted factor and patience

monitor  : monitor the `val_loss` for training.
factor   : which the learning rate will be reduced. A factor=0.5 means the
           learning rate will be halved when the metric has stopped improving.
patience : sets the number of epochs with no improvement after which the learning rate will be reduced.
           `patience=10` means if the validation loss does not improve for 10 consecutive epochs,
           the learning rate will be reduced.
min_lr   : lower bound on the learning rate, learning rate will not be reduced below `0.00001`,
           ensuring it doesn't become too small.
"""
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=10, min_lr=0.0001)

#### Training

In [None]:
# TensorBoard callback for logging
log_dir = os.path.join(create_log_dir(os.path.join('../drive/logs/asl_action_6'), True))

tensor_board_cb = TensorBoard(log_dir=log_dir)

# train the model with the callbacks
history = model.fit(
    X_train,
    y_train,
    epochs=600,
    validation_data=(X_test, y_test),
    callbacks=[tensor_board_cb, early_stopping, early_stopping_by_loss_val, reduce_lr],
    # callbacks=[tensor_board_cb, reduce_lr],
    # callbacks=[tensor_board_cb, early_stopping],
    # callbacks=[tensor_board_cb],
    batch_size=64,
)

#### Saving The Trained Model

In [None]:
def get_next_filename(directory, base_name, extension):
    # list all files in the directory
    files = os.listdir(directory)
    # filter files that start with the base_name and end with the extension
    relevant_files = [
        f for f in files if f.startswith(base_name) and f.endswith(extension)
    ]

    if not relevant_files and base_name == "asl-action-weight":
        # if no relevant files found, start with 001
        return f"{base_name}-001.{extension}"

    if not base_name == "asl-action-weight":
        return f"{base_name}.{extension}"

    # extract the numeric part and find the highest number
    numbers = [int(f[len(base_name) + 1 : -len(extension) - 1]) for f in relevant_files]
    next_number = max(numbers) + 1

    # format the next number with leading zeros to maintain the same length
    next_filename = f"{base_name}-{next_number:03d}.{extension}"
    return next_filename


def model_save(
    model, directory="../models/legacy", base_name="asl-action-weight", extension="h5"
):
    next_filename = get_next_filename(directory, base_name, extension)
    model_path = os.path.join(directory, next_filename)

    model.save(model_path)

    print(f"Model saved as {next_filename}")

In [None]:
model_save(model, directory="../drive/models/keras", base_name="...", extension="keras")
model_save(model, directory="../drive/models/legacy", base_name="...", extension="h5")

# asl-action-cnn-lstm_1l-560k
# asl-action-cnn-no_lstm-540k

#### Evaluate

In [None]:
import itertools
import numpy as np # type: ignore
import matplotlib.pyplot as plt # type: ignore

from sklearn.metrics import confusion_matrix, accuracy_score # type: ignore

In [None]:
y_true = np.argmax(y_test, axis=1)
y_pred = np.argmax(model.predict(X_test), axis=1)

# compute confusion matrix
cm = confusion_matrix(y_true, y_pred)

In [None]:
# plot confusion matrix
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion matrix')
plt.colorbar()
tick_marks = np.arange(len(ACTIONS))
plt.xticks(tick_marks, ACTIONS, rotation=45)
plt.yticks(tick_marks, ACTIONS)

# add labels
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(j, i, format(cm[i, j], 'd'),
             horizontalalignment="center",
             color="white" if cm[i, j] > thresh else "black")

plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.tight_layout()

# compute and print accuracy score
accuracy = accuracy_score(y_true, y_pred)
print("Accuracy Score:", accuracy)

plt.show()