In [None]:
!pip install kaggle




In [None]:
import kagglehub

path = kagglehub.dataset_download("grassknoted/asl-alphabet")
print(path)

Using Colab cache for faster access to the 'asl-alphabet' dataset.
/kaggle/input/asl-alphabet


In [None]:
!pip install mediapipe==0.10.13

Collecting mediapipe==0.10.13
  Downloading mediapipe-0.10.13-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting protobuf<5,>=4.25.3 (from mediapipe==0.10.13)
  Downloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Downloading mediapipe-0.10.13-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m59.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl (294 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.9/294.9 kB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: protobuf, mediapipe
  Attempting uninstall: protobuf
    Found existing installation: protobuf 5.29.5
    Uninstalling protobuf-5.29.5:
      Successfully uninstalled protobuf-5.29.5
  Attempting uninstall: mediapipe
    Found existing installation: mediapipe 0.10

In [None]:
import cv2
import mediapipe as mp
import numpy as np
import os

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1)

F = 63

In [None]:
def extract_from_image(img_path):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    res = hands.process(img)

    kp = np.zeros((F,), dtype=np.float32)

    if res.multi_hand_landmarks:
        hand = res.multi_hand_landmarks[0]
        for i, lm in enumerate(hand.landmark):
            kp[i*3:i*3+3] = [lm.x, lm.y, lm.z]

    return kp

In [None]:
import shutil, os

if os.path.exists("data/train"):
    shutil.rmtree("data/train")

os.makedirs("data/train", exist_ok=True)

In [None]:
import os
import numpy as np
from tqdm import tqdm

BASE = "/kaggle/input/asl-alphabet/asl_alphabet_train/asl_alphabet_train"
MAX_PER_CLASS = 100

sample_id = 0

for letter in sorted(os.listdir(BASE)):
    letter_path = os.path.join(BASE, letter)

    # Only real classes: A–Z
    if not os.path.isdir(letter_path) or len(letter) != 1:
        continue

    print("Processing class:", letter)

    images = os.listdir(letter_path)[:MAX_PER_CLASS]

    for img in tqdm(images, leave=False):
        img_path = os.path.join(letter_path, img)

        try:
            kp = extract_from_image(img_path)

            np.save(f"data/train/{sample_id}.npy", kp)
            with open(f"data/train/{sample_id}.txt", "w") as f:
                f.write(letter)

            sample_id += 1

        except Exception:
            continue


Processing class: A




Processing class: B




Processing class: C




Processing class: D




Processing class: E




Processing class: F




Processing class: G




Processing class: H




Processing class: I




Processing class: J




Processing class: K




Processing class: L




Processing class: M




Processing class: N




Processing class: O




Processing class: P




Processing class: Q




Processing class: R




Processing class: S




Processing class: T




Processing class: U




Processing class: V




Processing class: W




Processing class: X




Processing class: Y




Processing class: Z




In [None]:
labels = sorted(set(
    open(f"data/train/{f.replace('.npy','.txt')}").read().strip()
    for f in os.listdir("data/train") if f.endswith(".npy")
))

label2idx = {c: i for i, c in enumerate(labels)}
idx2label = {i: c for c, i in label2idx.items()}

NUM_CLASSES = len(labels)

print("Labels:", labels)
print("NUM_CLASSES:", NUM_CLASSES)


Labels: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
NUM_CLASSES: 26


In [None]:
from sklearn.model_selection import train_test_split
import os

all_ids = [f.replace(".npy","") for f in os.listdir("data/train") if f.endswith(".npy")]

train_ids, val_ids = train_test_split(
    all_ids,
    test_size=0.2,
    random_state=42,
    stratify=[
        open(f"data/train/{i}.txt").read().strip()
        for i in all_ids
    ]
)

In [None]:
from torch.utils.data import Dataset, DataLoader

class ASLDataset(Dataset):
    def __init__(self, root, ids):
        self.root = root
        self.ids = ids

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
        sid = self.ids[idx]
        x = torch.tensor(
            np.load(f"{self.root}/{sid}.npy"),
            dtype=torch.float32
        )
        y = open(f"{self.root}/{sid}.txt").read().strip()
        return x, label2idx[y]

In [None]:
train_loader = DataLoader(
    ASLDataset("data/train", train_ids),
    batch_size=64,
    shuffle=True
)

val_loader = DataLoader(
    ASLDataset("data/train", val_ids),
    batch_size=64,
    shuffle=False
)

In [None]:
import torch.nn as nn

class ASLClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(63, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(256, 128),
            nn.ReLU(),

            nn.Linear(128, NUM_CLASSES)
        )

    def forward(self, x):
        return self.net(x)

In [None]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"

model = ASLClassifier().to(device)
opt = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()

In [None]:
for epoch in range(20):
    model.train()
    total, correct = 0, 0

    for x, y in train_loader:
        x, y = x.to(device), y.to(device)

        logits = model(x)
        loss = loss_fn(logits, y)

        opt.zero_grad()
        loss.backward()
        opt.step()

        total += y.size(0)
        correct += (logits.argmax(1) == y).sum().item()

    train_acc = correct / total

    # validation
    model.eval()
    vtotal, vcorrect = 0, 0
    with torch.no_grad():
        for x, y in val_loader:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            vtotal += y.size(0)
            vcorrect += (logits.argmax(1) == y).sum().item()

    val_acc = vcorrect / vtotal

    print(f"Epoch {epoch+1}: Train={train_acc:.3f}, Val={val_acc:.3f}")

Epoch 1: Train=0.229, Val=0.350
Epoch 2: Train=0.532, Val=0.598
Epoch 3: Train=0.662, Val=0.721
Epoch 4: Train=0.706, Val=0.687
Epoch 5: Train=0.721, Val=0.746
Epoch 6: Train=0.736, Val=0.760
Epoch 7: Train=0.747, Val=0.771
Epoch 8: Train=0.748, Val=0.777
Epoch 9: Train=0.741, Val=0.748
Epoch 10: Train=0.749, Val=0.783
Epoch 11: Train=0.755, Val=0.779
Epoch 12: Train=0.763, Val=0.771
Epoch 13: Train=0.763, Val=0.775
Epoch 14: Train=0.755, Val=0.767
Epoch 15: Train=0.754, Val=0.771
Epoch 16: Train=0.753, Val=0.769
Epoch 17: Train=0.762, Val=0.763
Epoch 18: Train=0.757, Val=0.765
Epoch 19: Train=0.763, Val=0.692
Epoch 20: Train=0.763, Val=0.748


In [None]:
import torch
import json

torch.save(model.state_dict(), "asl_mlp.pth")

with open("label_map.json", "w") as f:
    json.dump(label2idx, f)

In [None]:
!pip install onnxscript


Collecting onnxscript
  Downloading onnxscript-0.5.7-py3-none-any.whl.metadata (13 kB)
Collecting onnx_ir<2,>=0.1.12 (from onnxscript)
  Downloading onnx_ir-0.1.15-py3-none-any.whl.metadata (3.2 kB)
Collecting onnx>=1.16 (from onnxscript)
  Downloading onnx-1.20.1-cp312-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (8.4 kB)
Downloading onnxscript-0.5.7-py3-none-any.whl (693 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m693.4/693.4 kB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading onnx-1.20.1-cp312-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (17.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.5/17.5 MB[0m [31m117.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading onnx_ir-0.1.15-py3-none-any.whl (148 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m148.7/148.7 kB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx, onnx_ir, onnxscript
Successfully install

In [None]:
model.eval()

dummy_input = torch.randn(1, 63).to(device)

with torch.no_grad():
    torch.onnx.export(
        model,
        dummy_input,
        "asl_mlp.onnx",
        input_names=["keypoints"],
        output_names=["logits"],
        dynamic_axes={
            "keypoints": {0: "batch"},
            "logits": {0: "batch"}
        },
        opset_version=17
    )


  torch.onnx.export(
W0127 19:06:26.437000 1929 torch/onnx/_internal/exporter/_compat.py:114] Setting ONNX exporter to use operator set version 18 because the requested opset_version 17 is a lower version than we have implementations for. Automatic version conversion will be performed, which may not be successful at converting to the requested version. If version conversion is unsuccessful, the opset version of the exported model will be kept at 18. Please consider setting opset_version >=18 to leverage latest ONNX features


[torch.onnx] Obtain model graph for `ASLClassifier([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `ASLClassifier([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...




[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 1 of general pattern rewrite rules.


In [None]:
from google.colab import files

files.download("asl_mlp.onnx")
files.download("label_map.json")
files.download("asl_mlp.onnx.data")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
!ls

asl_mlp.onnx  asl_mlp.onnx.data  asl_mlp.pth  data  label_map.json  sample_data
