In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
import json

In [2]:
DATA_DIR = "data"
TRAIN_CSV = os.path.join(DATA_DIR, "sign_mnist_train.csv")
TEST_CSV = os.path.join(DATA_DIR, "sign_mnist_test.csv")
MODEL_DIR = "model"
os.makedirs(MODEL_DIR, exist_ok=True)
MODEL_PATH = os.path.join(MODEL_DIR, "asl_lstm_model.h5")
LABEL_MAP_PATH = os.path.join(MODEL_DIR, "label_map.json")

In [3]:
def load_csv_as_xy(csv_path):
    """
    sign_mnist files: first column 'label', rest 784 pixels
    """
    df = pd.read_csv(csv_path)
    y = df['label'].values
    X = df.drop('label', axis=1).values
    return X, y

In [4]:
def images_to_sequences(X):
    """
    Convert flat 784 vectors into sequences of 28 timesteps x 28 features
    Shape: (N, 28, 28)
    """
    return X.reshape(-1, 28, 28).astype(np.float32)

In [5]:
print("Loading data...")
if not (os.path.exists(TRAIN_CSV) and os.path.exists(TEST_CSV)):
    raise FileNotFoundError("Please put sign_mnist_train.csv and sign_mnist_test.csv into the data/ directory.")

Loading data...


In [6]:
X_train_raw, y_train_raw = load_csv_as_xy(TRAIN_CSV)
X_test_raw,  y_test_raw  = load_csv_as_xy(TEST_CSV)

# Convert to sequences
X_train_seq = images_to_sequences(X_train_raw)
X_test_seq  = images_to_sequences(X_test_raw)

# Normalize (pixel values 0-255 -> 0-1)
X_train_seq /= 255.0
X_test_seq  /= 255.0

# Optionally split a validation set out of training
X_train, X_val, y_train, y_val = train_test_split(X_train_seq, y_train_raw, test_size=0.12, random_state=42, stratify=y_train_raw)

print("Shapes:", X_train.shape, X_val.shape, X_test_seq.shape)

Shapes: (24160, 28, 28) (3295, 28, 28) (7172, 28, 28)


In [7]:
# ------------- Label binarization -------------
# We'll one-hot encode labels for training
lb = LabelBinarizer()
lb.fit(y_train)  # fit on train labels

y_train_ohe = lb.transform(y_train)
y_val_ohe   = lb.transform(y_val)
y_test_ohe  = lb.transform(y_test_raw)

n_classes = y_train_ohe.shape[1]
print("Number of classes:", n_classes)

Number of classes: 24


In [8]:
# Save a label mapping so the Streamlit app can decode predictions
# The class indices correspond to the label values in the dataset (e.g., 0..23)
label_map = {int(cls): int(cls) for cls in lb.classes_}
# We'll create a human-readable mapping as well (common mapping in the Sign Language MNIST dataset
# maps indices to letters A-Y excluding J). But to avoid mismatch, we'll produce mapping from dataset class number -> letter guess.
# For convenience we create a letter list skipping 'J' which is usually absent.
letters = [chr(c) for c in range(ord('A'), ord('Z')+1) if chr(c) != 'J']
# Some datasets order numeric labels in increasing order corresponding to letters (A,B,C,... skipping J).
sorted_classes = sorted(list(lb.classes_))
class_to_letter = {}
for i, cls in enumerate(sorted_classes):
    # guard if classes exceed letters length
    letter = letters[i] if i < len(letters) else str(cls)
    class_to_letter[int(cls)] = letter

In [9]:
# Ensure all classes are plain Python ints
sorted_classes = [int(c) for c in sorted(lb.classes_)]

# Ensure keys in class_to_letter are str (JSON best practice)
class_to_letter = {str(int(k)): v for k, v in class_to_letter.items()}

with open(LABEL_MAP_PATH, "w") as f:
    json.dump({
        "classes": sorted_classes,
        "class_to_letter": class_to_letter
    }, f, indent=2)

In [None]:
# Build an LSTM Model
def build_lstm_model(input_shape=(28,28), num_classes=n_classes):
    model = Sequential()
    # First LSTM layer
    model.add(LSTM(128, input_shape=input_shape, return_sequences=True))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    # Second LSTM layer
    model.add(LSTM(64, return_sequences=False))
    model.add(BatchNormalization())
    model.add(Dropout(0.25))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(num_classes, activation='softmax'))
    return model

In [11]:
model = build_lstm_model()
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

2025-09-06 02:54:18.475367: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2025-09-06 02:54:18.475746: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2025-09-06 02:54:18.475765: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2025-09-06 02:54:18.476670: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-09-06 02:54:18.477793: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 28, 128)           80384     
                                                                 
 batch_normalization (Batch  (None, 28, 128)           512       
 Normalization)                                                  
                                                                 
 dropout (Dropout)           (None, 28, 128)           0         
                                                                 
 lstm_1 (LSTM)               (None, 64)                49408     
                                                                 
 batch_normalization_1 (Bat  (None, 64)                256       
 chNormalization)                                                
                                                                 
 dropout_1 (Dropout)         (None, 64)                0

In [None]:
# Callbacks
checkpoint = ModelCheckpoint(MODEL_PATH, monitor='val_accuracy', save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4, min_lr=1e-6, verbose=1)
early = EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)

In [None]:
# Training
BATCH_SIZE = 128
EPOCHS = 1

history = model.fit(
    X_train, y_train_ohe,
    validation_data=(X_val, y_val_ohe),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[checkpoint, reduce_lr, early],
    verbose=2
)

2025-09-06 02:54:20.866743: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-09-06 02:54:21.329528: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-09-06 02:54:22.046401: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-09-06 02:54:22.873643: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-09-06 02:54:23.189543: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-09-06 02:54:29.395477: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-09-06 02:54:29.491294: I tensorflow/core/grappler/optimizers/cust


Epoch 1: val_accuracy improved from -inf to 0.35569, saving model to model/asl_lstm_model.h5
189/189 - 11s - loss: 2.1942 - accuracy: 0.3361 - val_loss: 2.2210 - val_accuracy: 0.3557 - lr: 0.0010 - 11s/epoch - 57ms/step


  saving_api.save_model(


In [14]:
# Evaluate on test set
print("Evaluating on test set...")
model.load_weights(MODEL_PATH)  # best model
test_loss, test_acc = model.evaluate(X_test_seq, y_test_ohe, verbose=2)
print(f"Test accuracy: {test_acc:.4f}")

print("Training complete. Model saved to", MODEL_PATH)

Evaluating on test set...
225/225 - 3s - loss: 2.2613 - accuracy: 0.3617 - 3s/epoch - 12ms/step
Test accuracy: 0.3617
Training complete. Model saved to model/asl_lstm_model.h5
