# Transformer Model Pose Hands

## Preprocessing

In [None]:
from tqdm import tqdm
import os
import numpy as np
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from concurrent.futures import ThreadPoolExecutor
import pandas as pd

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Constants
DATA_PATH = '/content/drive/MyDrive/Colab Notebooks/AAI-521/Final Project/Models/MediaPipe_NoFace'
NUM_FRAMES = 90
save_path = '/content/drive/MyDrive/Colab Notebooks/AAI-521/Final Project/final_DataSet'

# Load the top 100 glosses
top_100_path = '/content/drive/MyDrive/Colab Notebooks/AAI-521/Final Project/DataSet/gloss_counts_top_100.csv'
df = pd.read_csv(top_100_path)
top_100_classes = df['Gloss'].tolist()  # List of the top 100 glosses

# Label map for only top 100 glosses
actions = sorted(os.listdir(DATA_PATH))
label_map = {label: idx for idx, label in enumerate(actions) if label in top_100_classes}

# Ensure the label map only contains the top 100
label_map = {label: idx for idx, label in enumerate(top_100_classes)}  # Recreate the label map for top 100 only


In [None]:

def process_file(file_path):
    try:
        # Get the gloss label from the file path
        label = os.path.basename(os.path.dirname(file_path))

        # Skip files whose labels are not in the top 100
        if label not in top_100_classes:
            return None

        # Load .npy file
        sequence = np.load(file_path)

        # Normalize keypoints
        sequence = sequence / np.max(np.abs(sequence), axis=(0, 1), keepdims=True)

        # Pad or truncate to NUM_FRAMES
        return sequence[:NUM_FRAMES] if len(sequence) > NUM_FRAMES else np.pad(
            sequence, ((0, NUM_FRAMES - len(sequence)), (0, 0)), 'constant'
        )
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None  # Skip this file

# Count total files and prepare paths, filtering by top 100 glosses
all_files = [
    os.path.join(DATA_PATH, action, file)
    for action in os.listdir(DATA_PATH)
    if action in top_100_classes  # Only include glosses from the top 100
    for file in os.listdir(os.path.join(DATA_PATH, action))
    if file.endswith('.npy')
]

# Process files in parallel
with ThreadPoolExecutor() as executor:
    results = list(tqdm(executor.map(process_file, all_files), total=len(all_files), desc="Processing Files"))

# Filter out None results
sequences = [seq for seq in results if seq is not None]
labels = [
    label_map[os.path.basename(os.path.dirname(file))] for file, seq in zip(all_files, results) if seq is not None
]

# Check if any label exceeds the number of classes
assert all(label < len(label_map) for label in labels), "Some labels are out of bounds!"

# Convert to NumPy arrays
X = np.array(sequences)  # Shape: (num_samples, NUM_FRAMES, num_features)
y = to_categorical(labels, num_classes=len(label_map))  # Shape: (num_samples, num_classes)

Processing Files: 100%|██████████| 1120/1120 [00:17<00:00, 62.80it/s]


In [None]:
print(f"X shape: {X.shape}")  # Expected: (num_samples, NUM_FRAMES, num_features)


X shape: (1120, 90, 258)


In [None]:
# Create directory if it doesn't exist
os.makedirs(save_path, exist_ok=True)

# Save X and y arrays
np.save(os.path.join(save_path, 'X_mp_ph_100.npy'), X)
np.save(os.path.join(save_path, 'y_mp_ph_100.npy'), y)

print("X and y saved successfully!")

X and y saved successfully!


## Model

In [None]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.utils.class_weight import compute_class_weight
import itertools
from tensorflow.keras.layers import MultiHeadAttention, Dense, Dropout, LayerNormalization, BatchNormalization
from tensorflow.keras.layers import Input, MultiHeadAttention, Dense, Dropout, LayerNormalization, BatchNormalization, GlobalAveragePooling1D
from tensorflow.keras.regularizers import l2  # Import l2 regularizer

In [None]:
# Load processed data from disk
save_path = '/content/drive/MyDrive/Colab Notebooks/AAI-521/Final Project/final_DataSet'

X = np.load(os.path.join(save_path, 'X_mp_ph_100.npy'))
y = np.load(os.path.join(save_path, 'y_mp_ph_100.npy'))

In [None]:
# Train-test-validation split
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

X_train shape: (896, 90, 258), y_train shape: (896, 100)
X_val shape: (112, 90, 258), y_val shape: (112, 100)
X_test shape: (112, 90, 258), y_test shape: (112, 100)


In [None]:
# Hyperparameter grid (expanded to have 4 in each)
param_grid = {
    'num_blocks': [1, 2, 3],  # Number of transformer blocks
    'ff_dim': [32, 64, 128],  # Feed-forward dimension (hidden layer size)
    'batch_size': [16, 32, 64],  # Batch size
    'learning_rate': [1e-6, 1e-5, 1e-4]  # Learning rate
}

In [None]:
# Transformer Block with Batch Normalization and L2 Regularization
def transformer_block(inputs, num_heads, ff_dim, dropout=0.1, l2_reg=1e-4):
    attention = MultiHeadAttention(num_heads=num_heads, key_dim=inputs.shape[-1])(inputs, inputs)
    attention = Dropout(dropout)(attention)
    attention = LayerNormalization(epsilon=1e-6)(attention + inputs)

    ff = Dense(ff_dim, activation="relu", kernel_regularizer=l2(l2_reg))(attention)
    ff = Dense(inputs.shape[-1], kernel_regularizer=l2(l2_reg))(ff)
    ff = Dropout(dropout)(ff)
    ff = BatchNormalization()(ff)  # Add BatchNormalization
    outputs = LayerNormalization(epsilon=1e-6)(ff + attention)

    return outputs

In [None]:
# Positional Encoding
def add_positional_encoding(inputs):
    seq_len = inputs.shape[1]  # Sequence length
    dim = inputs.shape[-1]  # Feature size
    pos_enc = np.array([[pos / np.power(10000, 2 * (i // 2) / dim) for i in range(dim)] for pos in range(seq_len)])
    pos_enc[:, 0::2] = np.sin(pos_enc[:, 0::2])  # Apply sin to even indices
    pos_enc[:, 1::2] = np.cos(pos_enc[:, 1::2])  # Apply cos to odd indices

    pos_enc = tf.constant(pos_enc, dtype=tf.float32)
    pos_enc = tf.expand_dims(pos_enc, axis=0)  # Add batch dimension

    return inputs + pos_enc  # Add positional encoding to the input


In [None]:
# Function to build and compile the model
def build_transformer_model(seq_len, num_features, num_classes, num_heads=4, ff_dim=128, num_blocks=3, dropout=0.1, learning_rate=1e-4):
    inputs = Input(shape=(seq_len, num_features))
    x = add_positional_encoding(inputs)

    for _ in range(num_blocks):
        x = transformer_block(x, num_heads=num_heads, ff_dim=ff_dim, dropout=dropout)

    x = GlobalAveragePooling1D()(x)
    x = Dense(512, activation="relu")(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation="softmax")(x)

    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=learning_rate,
        decay_steps=10000,
        decay_rate=0.9
    )
    optimizer = Adam(learning_rate=lr_schedule, clipnorm=1.0)

    model = tf.keras.models.Model(inputs, outputs)
    model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["categorical_accuracy"])

    return model


In [None]:
# Learning Rate Schedule
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-5,
    decay_steps=10000,
    decay_rate=0.9
)

# AdamW Optimizer (with weight decay)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule, clipnorm=1.0)

# Model Parameters
seq_len = 90
num_features = 258
num_classes = 100
num_heads = 4
ff_dim = 64
num_blocks = 2

# Build the Model
transformer_model = build_transformer_model(seq_len, num_features, num_classes, num_heads, ff_dim, num_blocks)
transformer_model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["categorical_accuracy"])
transformer_model.summary()


In [None]:
# Function for grid search
def grid_search(param_grid, X_train, y_train, X_val, y_val):
    best_val_loss = np.inf
    best_params = None
    best_model = None
    best_history = None

    # Grid search over parameters
    for params in itertools.product(*param_grid.values()):
        num_blocks, ff_dim, batch_size, learning_rate = params

        print(f"Training with params: num_blocks={num_blocks}, ff_dim={ff_dim}, batch_size={batch_size}, learning_rate={learning_rate}")

        # Build and train the model with the current set of hyperparameters
        model = build_transformer_model(seq_len=90, num_features=258, num_classes=100,
                                        num_blocks=num_blocks, ff_dim=ff_dim, learning_rate=learning_rate)

        early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        model_checkpoint = ModelCheckpoint(f"best_model_{num_blocks}_{ff_dim}_{batch_size}_{learning_rate}.keras",
                                           save_best_only=True, monitor='val_loss', verbose=1)

        # Compute class weights
        class_weights = compute_class_weight(
            class_weight="balanced",
            classes=np.arange(len(label_map)),
            y=np.argmax(y_train, axis=1)
        )
        class_weight_dict = {i: weight for i, weight in enumerate(class_weights)}

        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=100,
            batch_size=batch_size,
            class_weight=class_weight_dict,
            callbacks=[early_stopping, model_checkpoint],
            verbose=0
        )

        # Check validation loss
        val_loss = min(history.history['val_loss'])
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_params = params
            best_model = model
            best_history = history
            print(f"New Best Model Found: val_loss={val_loss}, params={params}")

    return best_model, best_history, best_params

In [None]:
# Perform grid search
best_model, best_history, best_params = grid_search(param_grid, X_train, y_train, X_val, y_val)

# Save the best model and parameters
final_best_model_path = '/content/drive/MyDrive/Colab Notebooks/AAI-521/Final Project/best_mod'
os.makedirs(final_best_model_path, exist_ok=True)

# Save the best model
best_model.save(os.path.join(final_best_model_path, 'best_model_overall_ph.keras'))

# Save the best hyperparameters
import json
best_params_path = os.path.join(final_best_model_path, 'best_model_params_ph.json')
with open(best_params_path, 'w') as f:
    json.dump({
        'num_blocks': best_params[0],
        'ff_dim': best_params[1],
        'batch_size': best_params[2],
        'learning_rate': best_params[3],
        'validation_loss': min(best_history.history['val_loss'])  # Best validation loss
    }, f)

print(f"Best model and parameters saved to: {final_best_model_path}")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

Epoch 79: val_loss improved from 4.68049 to 4.67992, saving model to best_model_2_128_32_1e-06.keras

Epoch 80: val_loss improved from 4.67992 to 4.67943, saving model to best_model_2_128_32_1e-06.keras

Epoch 81: val_loss improved from 4.67943 to 4.67859, saving model to best_model_2_128_32_1e-06.keras

Epoch 82: val_loss improved from 4.67859 to 4.67844, saving model to best_model_2_128_32_1e-06.keras

Epoch 83: val_loss did not improve from 4.67844

Epoch 84: val_loss improved from 4.67844 to 4.67830, saving model to best_model_2_128_32_1e-06.keras

Epoch 85: val_loss improved from 4.67830 to 4.67818, saving model to best_model_2_128_32_1e-06.keras

Epoch 86: val_loss improved from 4.67818 to 4.67765, saving model to best_model_2_128_32_1e-06.keras

Epoch 87: val_loss did not improve from 4.67765

Epoch 88: val_loss improved from 4.67765 to 4.67713, saving model to best_model_2_128_32_1e-06.keras

Epoch 89: val_loss i

In [None]:
# Define the path to your saved best model and parameters
final_best_model_path = '/content/drive/MyDrive/Colab Notebooks/AAI-521/Final Project/best_mod'

# Load the best model
best_model_path = os.path.join(final_best_model_path, 'best_model_overall_ph.keras')
best_model = tf.keras.models.load_model(best_model_path)

# Load the best hyperparameters (from the saved JSON file)
best_params_path = os.path.join(final_best_model_path, 'best_model_params_ph.json')

with open(best_params_path, 'r') as file:
    best_params = json.load(file)

# Print the best hyperparameters
print("Best Hyperparameters:", best_params)

test_loss, test_accuracy = best_model.evaluate(X_test, y_test)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

Best Hyperparameters: {'num_blocks': 3, 'ff_dim': 128, 'batch_size': 64, 'learning_rate': 0.0001, 'validation_loss': 3.671271800994873}
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1s/step - categorical_accuracy: 0.0997 - loss: 3.8610
Test Loss: 3.8923280239105225
Test Accuracy: 0.0982142835855484
