# Sign Language Recognition Model Training
- 1001 Classes
- CPU Training
- Real-time Inference Ready

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Masking
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import tensorflow as tf

In [None]:
# Load processed data
X_train = np.load('MS_train/processed_data/landmarks_train.npy', allow_pickle=True)
y_train = np.load('MS_train/processed_data/labels_train.npy', allow_pickle=True)
X_val = np.load('MS_train/processed_data/landmarks_val.npy', allow_pickle=True)
y_val = np.load('MS_train/processed_data/labels_val.npy', allow_pickle=True)

In [None]:
# Data Analysis
seq_lengths = [len(seq) for seq in X_train]
print(f"Sequence Length Analysis:\n"
      f"- Average: {np.mean(seq_lengths):.1f}\n"
      f"- Max: {np.max(seq_lengths)}\n"
      f"- Min: {np.min(seq_lengths)}")

plt.figure(figsize=(10, 6))
plt.hist(seq_lengths, bins=50)
plt.title('Sequence Length Distribution')
plt.xlabel('Number of Frames')
plt.ylabel('Count')
plt.show()

In [None]:
# Class Distribution Analysis
class_counts = pd.Series(y_train).value_counts()
print(f"Class Distribution:\n"
      f"- Most common class: {class_counts.idxmax()} ({class_counts.max()} samples)\n"
      f"- Least common class: {class_counts.idxmin()} ({class_counts.min()} samples)")

plt.figure(figsize=(12, 6))
plt.bar(range(len(class_counts)), class_counts.values)
plt.title('Class Distribution')
plt.xlabel('Class Index')
plt.ylabel('Sample Count')
plt.show()

In [None]:
# Preprocessing
MAX_SEQ_LENGTH = int(np.percentile(seq_lengths, 95))  # Covers 95% of samples
print(f"Using sequence length: {MAX_SEQ_LENGTH}")

# Padding sequences
X_train_padded = pad_sequences(X_train, maxlen=MAX_SEQ_LENGTH, padding='post', dtype='float32')
X_val_padded = pad_sequences(X_val, maxlen=MAX_SEQ_LENGTH, padding='post', dtype='float32')

# Label encoding
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_val_enc = le.transform(y_val)

In [None]:
# Build CPU-optimized model
model = Sequential([
    Masking(mask_value=0., input_shape=(MAX_SEQ_LENGTH, 126)),
    LSTM(64, return_sequences=False, activation='tanh'),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dense(1001, activation='softmax')
])

# Custom learning rate for better convergence
optimizer = Adam(learning_rate=0.001)

model.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
# Training Configuration
batch_size = 32  # Reduced for CPU memory
epochs = 50

callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True),
    ModelCheckpoint('best_model_cpu.h5', save_best_only=True)
]

In [None]:
# Train
history = model.fit(
    X_train_padded,
    y_train_enc,
    validation_data=(X_val_padded, y_val_enc),
    epochs=epochs,
    batch_size=batch_size,
    callbacks=callbacks,
    verbose=1
)

In [None]:
# Save final assets
model.save('sign_language_model.h5')
np.save('label_encoder_classes.npy', le.classes_)

# Convert to TF.js format
%pip install tensorflowjs
!tensorflowjs_converter --input_format keras sign_language_model.h5 tfjs_model

In [None]:
train_lengths = [len(seq) for seq in X_train]
val_lengths = [len(seq) for seq in X_val]

print("Training Set:")
print(f"- Average sequence length: {np.mean(train_lengths):.1f}")
print(f"- 95th percentile: {np.percentile(train_lengths, 95)}")
print(f"- Max length: {np.max(train_lengths)}")

print("\nValidation Set:")
print(f"- Average sequence length: {np.mean(val_lengths):.1f}")
print(f"- 95th percentile: {np.percentile(val_lengths, 95)}")

In [None]:
# Class Imbalance Analysis
class_distribution = pd.Series(y_train).value_counts().sort_values(ascending=False)
print(f"Class Balance Metrics:")
print(f"- Class count range: {class_distribution.min()} to {class_distribution.max()}")
print(f"- Median samples per class: {class_distribution.median()}")
print(f"- Classes with <10 samples: {(class_distribution < 10).sum()}")

# Long-tail visualization
plt.figure(figsize=(12, 6))
plt.plot(class_distribution.values)
plt.title('Class Distribution (Sorted by Frequency)')
plt.xlabel('Class Index')
plt.ylabel('Number of Samples')
plt.yscale('log')
plt.show()

In [None]:
# Feature Distribution Analysis
all_landmarks = np.concatenate(X_train_padded)
print("Landmark Value Distribution:")
print(f"- X range: [{np.min(all_landmarks):.3f}, {np.max(all_landmarks):.3f}]")
print(f"- Mean: {np.mean(all_landmarks):.3f}")
print(f"- Std Dev: {np.std(all_landmarks):.3f}")

# Plot coordinate distributions
plt.figure(figsize=(12, 6))
plt.hist(all_landmarks.flatten(), bins=100, log=True)
plt.title('Landmark Coordinate Distribution')
plt.xlabel('Normalized Coordinate Value')
plt.ylabel('Frequency (log scale)')
plt.show()

## Prediction Visualization

In [None]:
# Sample Predictions
test_samples = 5
sample_indices = np.random.choice(len(X_val), test_samples)

plt.figure(figsize=(15, 10))
for i, idx in enumerate(sample_indices):
    prediction = model.predict(X_val_padded[idx][np.newaxis, ...])
    true_label = le.inverse_transform([y_val_enc[idx]])[0]
    pred_label = le.inverse_transform([np.argmax(prediction)])[0]
    
    plt.subplot(test_samples, 1, i+1)
    plt.bar(range(1001), prediction[0])
    plt.title(f'True: {true_label} | Pred: {pred_label}')
    plt.ylim(0, 1)
plt.tight_layout()
plt.show()

In [None]:
# Confusion Matrix (Top 50 Classes)
from sklearn.metrics import confusion_matrix
import seaborn as sns

top_classes = class_distribution.index[:50]
mask = np.isin(y_val_enc, le.transform(top_classes))

cm = confusion_matrix(
    y_val_enc[mask],
    np.argmax(model.predict(X_val_padded[mask]), axis=1),
    labels=le.transform(top_classes)
)

plt.figure(figsize=(20, 20))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=top_classes,
            yticklabels=top_classes)
plt.title('Top 50 Classes Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

In [None]:
# Real-time Inference Simulation
def real_time_demo():
    cap = cv2.VideoCapture(0)
    hands = mp.solutions.hands.Hands()
    
    sequence = []
    while True:
        ret, frame = cap.read()
        results = hands.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        
        if results.multi_hand_landmarks:
            left, right = extract_landmarks_from_frame(frame, hands)
            sequence.extend(left)
            sequence.extend(right)
            
            if len(sequence) >= MAX_SEQ_LENGTH * 126:
                input_data = pad_sequences([sequence[-MAX_SEQ_LENGTH*126:]], 
                                        maxlen=MAX_SEQ_LENGTH*126,
                                        padding='post',
                                        dtype='float32')
                prediction = model.predict(input_data.reshape(1, MAX_SEQ_LENGTH, 126))
                pred_class = le.inverse_transform([np.argmax(prediction)])[0]
                cv2.putText(frame, pred_class, (50, 50),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
        
        cv2.imshow('Sign Detection', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

# Uncomment to run demo
# real_time_demo()