In [None]:
import sys
import os
import cv2
import time
import json
import logging
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim

# mediapipe for hand tracking
import mediapipe as mp

# gpu or cpu
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

print("=" * 30)
print(f"Device in use: {device}")
print("Setup Complete.")
print("=" * 30)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os

BASE_DIR = "/content/drive/MyDrive"
DATA_DIR = f"{BASE_DIR}/data"
ASL_DIR = f"{DATA_DIR}/asl_alphabet"

# create directories
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(ASL_DIR, exist_ok=True)

print("✔ Created directories:")
print(DATA_DIR)
print(ASL_DIR)

print("\n============================================================")
print(" HOW TO ATTACH DATASETS TO THIS NOTEBOOK")
print("============================================================")
print("""
STEP 1 — Click 'Add Data' on the right sidebar.

STEP 2 — Search & attach the following datasets:

    ASL Alphabet Dataset (Grassknoted)
    https://www.kaggle.com/datasets/grassknoted/asl-alphabet

STEP 3 — After attaching all datasets, run the next cell to verify paths.
""")

In [None]:
def find_path(possible_paths):
    for p in possible_paths:
        if os.path.exists(p):
            return p
    return None

# ASL Alphabet dataset
asl_path = find_path([
    # Google Colab paths
    "/content/drive/MyDrive/data/asl_alphabet",
    "/content/drive/MyDrive/data/asl_alphabet_train",
    "/content/asl_alphabet",
])

print("============================================================")
print(" DATASET PATH CHECK RESULTS")
print("============================================================")
print(f"ASL Alphabet found: {asl_path}")

if asl_path: print("✔ ASL Alphabet correctly attached.")
else: print("❌ ASL Alphabet NOT FOUND — attach using 'Add Data'.")

In [None]:
# extract 3D Landmarks from ASL Images

import os
import json
import numpy as np
import mediapipe as mp
from tqdm import tqdm
import cv2


DATA_DIR = "data/asl_alphabet"  u# pdate to dataset path
OUTPUT_DIR = "output"
MAX_IMAGES_PER_CLASS = 1000

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=1,
    min_detection_confidence=0.5
)

# Get class folders (subdirectories in DATA_DIR)
class_folders = sorted([f for f in os.listdir(DATA_DIR) 
                        if os.path.isdir(os.path.join(DATA_DIR, f))])
print(f"Found {len(class_folders)} classes: {class_folders}")

# Create label map
label_map = {folder: idx for idx, folder in enumerate(class_folders)}
print(f"Label map: {label_map}")

# Storage
landmarks_list = []
labels_list = []
no_hand_count = 0

# Process each class
for class_name in tqdm(class_folders, desc="Processing classes"):
    class_path = os.path.join(DATA_DIR, class_name)
    images = [f for f in os.listdir(class_path) 
              if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    
    # Limit to MAX_IMAGES_PER_CLASS
    images = images[:MAX_IMAGES_PER_CLASS]
    
    print(f"Processing class '{class_name}': {len(images)} images")
    
    for img_name in images:
        img_path = os.path.join(class_path, img_name)
        
        # Read and convert image
        img = cv2.imread(img_path)
        if img is None:
            continue
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # Process with MediaPipe
        results = hands.process(img_rgb)
        
        if results.multi_hand_landmarks:
            # Extract 21 landmarks (x, y, z for each = 63 values)
            hand_landmarks = results.multi_hand_landmarks[0]
            landmark_values = []
            for landmark in hand_landmarks.landmark:
                landmark_values.extend([landmark.x, landmark.y, landmark.z])
            
            landmarks_list.append(landmark_values)
            labels_list.append(label_map[class_name])
        else:
            no_hand_count += 1
            print(f"  No hand detected: {img_name}")

# Convert to numpy arrays
landmarks_array = np.array(landmarks_list, dtype=np.float32)
labels_array = np.array(labels_list, dtype=np.int32)

# Verify shapes
print(f"\n=== Results ===")
print(f"Landmarks shape: {landmarks_array.shape} (expected: (N, 63))")
print(f"Labels shape: {labels_array.shape} (expected: (N,))")
print(f"Images with no hand detected: {no_hand_count}")

# Save outputs
np.save(os.path.join(OUTPUT_DIR, "landmarks.npy"), landmarks_array)
np.save(os.path.join(OUTPUT_DIR, "labels.npy"), labels_array)

with open(os.path.join(OUTPUT_DIR, "label_map.json"), "w") as f:
    json.dump(label_map, f, indent=2)

print(f"\n✔ Saved to {OUTPUT_DIR}:")
print(f"  - landmarks.npy")
print(f"  - labels.npy")
print(f"  - label_map.json")

In [None]:
# Normalize Skeletal Hand Data and Split into Train/Validation Sets

import os
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit

# Paths
OUTPUT_DIR = "output"
NORMALIZED_DIR = "normalized"
os.makedirs(NORMALIZED_DIR, exist_ok=True)

# Load data
landmarks = np.load(os.path.join(OUTPUT_DIR, "landmarks.npy"))
labels = np.load(os.path.join(OUTPUT_DIR, "labels.npy"))

print(f"Loaded landmarks shape: {landmarks.shape}")
print(f"Loaded labels shape: {labels.shape}")

# Reshape from (N, 63) to (N, 21, 3)
N = landmarks.shape[0]
landmarks_reshaped = landmarks.reshape(N, 21, 3)
print(f"Reshaped landmarks: {landmarks_reshaped.shape}")

def normalize_hand(landmarks_21x3):
    """Center around wrist and scale by max distance from wrist."""
    # Center: subtract wrist (landmark 0) from all joints
    wrist = landmarks_21x3[0]  # (3,)
    centered = landmarks_21x3 - wrist  # (21, 3)
    
    # Scale: divide by max Euclidean distance from wrist to any other joint
    distances = np.linalg.norm(centered[1:], axis=1)  # distances from wrist to other joints
    max_distance = np.max(distances)
    
    if max_distance > 0:
        scaled = centered / max_distance
    else:
        scaled = centered
    
    return scaled

# Normalize all samples
landmarks_normalized = np.array([normalize_hand(sample) for sample in landmarks_reshaped])
print(f"Normalized landmarks shape: {landmarks_normalized.shape}")

# Stratified split - 10% validation
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=42)
train_idx, val_idx = next(sss.split(landmarks_normalized, labels))

# Split data
X_train = landmarks_normalized[train_idx]
X_val = landmarks_normalized[val_idx]
y_train = labels[train_idx]
y_val = labels[val_idx]

# Save outputs
np.save(os.path.join(NORMALIZED_DIR, "landmarks_normalized.npy"), landmarks_normalized)
np.save(os.path.join(NORMALIZED_DIR, "labels.npy"), labels)
np.save(os.path.join(NORMALIZED_DIR, "train_indices.npy"), train_idx)
np.save(os.path.join(NORMALIZED_DIR, "val_indices.npy"), val_idx)

# Verification
print(f"\n=== Results ===")
print(f"Training set: {X_train.shape}, labels: {y_train.shape}")
print(f"Validation set: {X_val.shape}, labels: {y_val.shape}")
print(f"\nSample normalized landmark (first sample, first 3 joints):")
print(landmarks_normalized[0][:3])

In [None]:
# 1D-CNN Model with Residual Connections for ASL Classification

import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import matplotlib.pyplot as plt

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Load normalized data
NORMALIZED_DIR = "normalized"
landmarks = np.load(os.path.join(NORMALIZED_DIR, "landmarks_normalized.npy"))
labels = np.load(os.path.join(NORMALIZED_DIR, "labels.npy"))
train_idx = np.load(os.path.join(NORMALIZED_DIR, "train_indices.npy"))
val_idx = np.load(os.path.join(NORMALIZED_DIR, "val_indices.npy"))

# Split data using saved indices
X_train = landmarks[train_idx]
X_val = landmarks[val_idx]
y_train = labels[train_idx]
y_val = labels[val_idx]

num_classes = len(np.unique(labels))
print(f"Training: {X_train.shape}, Validation: {X_val.shape}")
print(f"Number of classes: {num_classes}")

# Residual Block
def residual_block(x, filters, kernel_size=3):
    """1D Residual block with skip connection."""
    shortcut = x
    
    # First conv
    x = layers.Conv1D(filters, kernel_size, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    # Second conv
    x = layers.Conv1D(filters, kernel_size, padding='same')(x)
    x = layers.BatchNormalization()(x)
    
    # Skip connection: match dimensions if needed
    if shortcut.shape[-1] != filters:
        shortcut = layers.Conv1D(filters, 1, padding='same')(shortcut)
    
    x = layers.Add()([x, shortcut])
    x = layers.Activation('relu')(x)
    
    return x

# Build Model
inputs = layers.Input(shape=(21, 3))  # 21 joints, 3D coordinates

# Initial Conv
x = layers.Conv1D(64, 3, padding='same')(inputs)
x = layers.BatchNormalization()(x)
x = layers.Activation('relu')(x)

# Residual blocks with increasing filters
x = residual_block(x, 64)
x = layers.Dropout(0.3)(x)

x = residual_block(x, 128)
x = layers.Dropout(0.4)(x)

x = residual_block(x, 256)
x = layers.Dropout(0.5)(x)

# Classification head
x = layers.GlobalAveragePooling1D()(x)
outputs = layers.Dense(num_classes, activation='softmax')(x)

model = Model(inputs, outputs)

# Compile
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

# Callbacks
early_stop = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

# Train
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=64,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)

# Save model
model.save("asl_resnet_model.keras")
print("\n✔ Model saved to asl_resnet_model.keras")

# Plot training history
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

ax1.plot(history.history['accuracy'], label='Train')
ax1.plot(history.history['val_accuracy'], label='Validation')
ax1.set_title('Accuracy')
ax1.set_xlabel('Epoch')
ax1.legend()

ax2.plot(history.history['loss'], label='Train')
ax2.plot(history.history['val_loss'], label='Validation')
ax2.set_title('Loss')
ax2.set_xlabel('Epoch')
ax2.legend()

plt.tight_layout()
plt.savefig("training_history.png", dpi=150)
plt.show()

print("✔ Training history saved to training_history.png")