In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
import json

In [2]:
DATA_DIR = "data"
TRAIN_CSV = os.path.join(DATA_DIR, "sign_mnist_train.csv")
TEST_CSV = os.path.join(DATA_DIR, "sign_mnist_test.csv")
MODEL_DIR = "model"
os.makedirs(MODEL_DIR, exist_ok=True)
MODEL_PATH = os.path.join(MODEL_DIR, "asl_lstm_model.h5")
LABEL_MAP_PATH = os.path.join(MODEL_DIR, "label_map.json")

In [3]:
def load_csv_as_xy(csv_path):
    """
    sign_mnist files: first column 'label', rest 784 pixels
    """
    df = pd.read_csv(csv_path)
    y = df['label'].values
    X = df.drop('label', axis=1).values
    return X, y

In [4]:
def images_to_sequences(X):
    """
    Convert flat 784 vectors into sequences of 28 timesteps x 28 features
    Shape: (N, 28, 28)
    """
    return X.reshape(-1, 28, 28).astype(np.float32)

In [5]:
print("Loading data...")
if not (os.path.exists(TRAIN_CSV) and os.path.exists(TEST_CSV)):
    raise FileNotFoundError("Please put sign_mnist_train.csv and sign_mnist_test.csv into the data/ directory.")

Loading data...


In [6]:
X_train_raw, y_train_raw = load_csv_as_xy(TRAIN_CSV)
X_test_raw,  y_test_raw  = load_csv_as_xy(TEST_CSV)

X_train_seq = images_to_sequences(X_train_raw)
X_test_seq  = images_to_sequences(X_test_raw)

# Normalize 
X_train_seq /= 255.0
X_test_seq  /= 255.0

X_train, X_val, y_train, y_val = train_test_split(X_train_seq, y_train_raw, test_size=0.12, random_state=42, stratify=y_train_raw)

print("Shapes:", X_train.shape, X_val.shape, X_test_seq.shape)

Shapes: (24160, 28, 28) (3295, 28, 28) (7172, 28, 28)


In [7]:
# One hot encoding labels for training
lb = LabelBinarizer()
lb.fit(y_train)  # fit on train labels

y_train_ohe = lb.transform(y_train)
y_val_ohe   = lb.transform(y_val)
y_test_ohe  = lb.transform(y_test_raw)

n_classes = y_train_ohe.shape[1]
print("Number of classes:", n_classes)

Number of classes: 24


In [8]:
# Save a label mapping so Streamlit app can decode predictions
label_map = {int(cls): int(cls) for cls in lb.classes_}
# Human-readable mapping
letters = [chr(c) for c in range(ord('A'), ord('Z')+1) if chr(c) != 'J']
# Some datasets order numeric labels in increasing order corresponding to letters (A,B,C,... skipping J).
sorted_classes = sorted(list(lb.classes_))
class_to_letter = {}
for i, cls in enumerate(sorted_classes):
    # guard if classes exceed letters length
    letter = letters[i] if i < len(letters) else str(cls)
    class_to_letter[int(cls)] = letter

In [9]:
# Ensure all classes ints
sorted_classes = [int(c) for c in sorted(lb.classes_)]

# Ensure keys in class_to_letter are all strs
class_to_letter = {str(int(k)): v for k, v in class_to_letter.items()}

with open(LABEL_MAP_PATH, "w") as f:
    json.dump({
        "classes": sorted_classes,
        "class_to_letter": class_to_letter
    }, f, indent=2)

In [10]:
# Build an LSTM Model
def build_lstm_model(input_shape=(28,28), num_classes=n_classes):
    model = Sequential()

    model.add(LSTM(128, input_shape=input_shape, return_sequences=True))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))

    model.add(LSTM(64, return_sequences=False))
    model.add(BatchNormalization())
    model.add(Dropout(0.25))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(num_classes, activation='softmax'))
    return model

In [11]:
model = build_lstm_model()
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 28, 128)           80384     
                                                                 
 batch_normalization (Batch  (None, 28, 128)           512       
 Normalization)                                                  
                                                                 
 dropout (Dropout)           (None, 28, 128)           0         
                                                                 
 lstm_1 (LSTM)               (None, 64)                49408     
                                                                 
 batch_normalization_1 (Bat  (None, 64)                256       
 chNormalization)                                                
                                                                 
 dropout_1 (Dropout)         (None, 64)                0

In [12]:
# Callbacks
checkpoint = ModelCheckpoint(MODEL_PATH, monitor='val_accuracy', save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4, min_lr=1e-6, verbose=1)
early = EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)

In [13]:
# Training
BATCH_SIZE = 128
EPOCHS = 100

history = model.fit(
    X_train, y_train_ohe,
    validation_data=(X_val, y_val_ohe),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[checkpoint, reduce_lr, early],
    verbose=2
)

Epoch 1/100

Epoch 1: val_accuracy improved from -inf to 0.23733, saving model to model/asl_lstm_model.h5
189/189 - 8s - loss: 2.2133 - accuracy: 0.3190 - val_loss: 2.4598 - val_accuracy: 0.2373 - lr: 0.0010 - 8s/epoch - 40ms/step
Epoch 2/100


  saving_api.save_model(



Epoch 2: val_accuracy improved from 0.23733 to 0.58118, saving model to model/asl_lstm_model.h5
189/189 - 5s - loss: 1.1860 - accuracy: 0.5968 - val_loss: 1.2931 - val_accuracy: 0.5812 - lr: 0.0010 - 5s/epoch - 29ms/step
Epoch 3/100

Epoch 3: val_accuracy improved from 0.58118 to 0.77420, saving model to model/asl_lstm_model.h5
189/189 - 5s - loss: 0.7248 - accuracy: 0.7493 - val_loss: 0.6565 - val_accuracy: 0.7742 - lr: 0.0010 - 5s/epoch - 27ms/step
Epoch 4/100

Epoch 4: val_accuracy improved from 0.77420 to 0.93961, saving model to model/asl_lstm_model.h5
189/189 - 5s - loss: 0.4709 - accuracy: 0.8385 - val_loss: 0.1959 - val_accuracy: 0.9396 - lr: 0.0010 - 5s/epoch - 26ms/step
Epoch 5/100

Epoch 5: val_accuracy did not improve from 0.93961
189/189 - 5s - loss: 0.3319 - accuracy: 0.8841 - val_loss: 0.4988 - val_accuracy: 0.8528 - lr: 0.0010 - 5s/epoch - 26ms/step
Epoch 6/100

Epoch 6: val_accuracy did not improve from 0.93961
189/189 - 5s - loss: 0.2385 - accuracy: 0.9200 - val_loss

In [14]:
# Evaluate on test set
print("Evaluating on test set...")
model.load_weights(MODEL_PATH)  # best model
test_loss, test_acc = model.evaluate(X_test_seq, y_test_ohe, verbose=2)
print(f"Test accuracy: {test_acc:.4f}")

print("Training complete. Model saved to", MODEL_PATH)

Evaluating on test set...
225/225 - 2s - loss: 0.8464 - accuracy: 0.8430 - 2s/epoch - 10ms/step
Test accuracy: 0.8430
Training complete. Model saved to model/asl_lstm_model.h5
