**Actions We Want To Detect**

In [None]:
actions = ['book', 'drink', 'hello', 'idea']

Import Stuff

In [None]:
import os
import json
import cv2
import numpy as np
from pathlib import Path

from tqdm.notebook import tqdm

# Create Dataset

**WLASL (World Level American Sign Language) Video**

WLASL is the largest video dataset for Word-Level ASL recognition, which features 2,000 common different words in ASL.

Options:

1. Get From Kaggle

2. Download From URLs

In [None]:
option = 1  # or 2

**Option 1: Get From Kaggle**

Gets all data (about 5GB)

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("risangbaskoro/wlasl-processed")

!cp -r {path} /content/

**Option 2: Download From URLs**

Customizable

In [None]:
!pip install yt-dlp
!apt update && apt install -y ffmpeg

In [None]:
import yt_dlp
import requests
from pathlib import Path
from urllib.parse import urlparse

Video **Download**

In [None]:
!rm -rf videos

In [None]:
def download_video(url, save_dir='downloads'):
    Path(save_dir).mkdir(exist_ok=True)

    parsed_url = urlparse(url)
    ext = os.path.splitext(parsed_url.path)[-1]
    filename = parsed_url.path.split("/")[-1]

    if not filename or not ext:
        print(f"Skipping unknown format: {url}")
        return None

    save_path = os.path.join(save_dir, filename)

    # Skip existing files
    if os.path.exists(save_path):
        print(f"Already exists: {save_path}")
        return save_path

    try:
        if "youtube.com" in url or "youtu.be" in url:
            import yt_dlp
            ydl_opts = {
                'outtmpl': os.path.join(save_dir, '%(id)s.%(ext)s'),
                'quiet': True,
                'format': 'bestvideo+bestaudio/best',
                'merge_output_format': 'mp4',
            }
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                ydl.download([url])
            print(f"YouTube downloaded: {url}")
            return save_dir  # may return folder if unknown file
        elif ext in ['.mp4', '.mov']:
            r = requests.get(url, stream=True, timeout=10)
            if r.status_code == 200:
                with open(save_path, 'wb') as f:
                    for chunk in r.iter_content(1024 * 1024):
                        if chunk:
                            f.write(chunk)
                print(f"Downloaded: {save_path}")
                return save_path
            else:
                print(f"HTTP {r.status_code}: {url}")
                return None
        elif ext == '.swf':
            print(f"Skipping unsupported .swf: {url}")
            return None
        else:
            print(f"Unknown or unsupported format: {url}")
            return None
    except Exception as e:
        print(f"Failed: {url} | Error: {str(e)}")
        return None


Test

In [None]:
video_urls = [
    # Idea
    "https://elementalaslconcepts.weebly.com/uploads/2/4/4/5/24454483/idea.mov",
    "https://media.spreadthesign.com/video/mp4/13/244655.mp4",
    "https://signstock.blob.core.windows.net/signschool/videos/db_uploads/SignSchool%20Idea%2C%20Imagine-3YjmVax6CBc.mp4",
    "https://signstock.blob.core.windows.net/signschool/videos/db_uploads/SignSchool%20Idea-HR8afgGna7A.mp4",
    "https://media.asldeafined.com/vocabulary/1468665115.4947.mp4",

    # Hello
    "https://signstock.blob.core.windows.net/signschool/videos/db_uploads/SignSchool%20Hello-6kvCOzxP9_A.mp4",
    "https://media.asldeafined.com/vocabulary/1468580623.2588.mp4",
    "https://www.handspeak.com/word/h/hello.mp4",
    "https://www.signingsavvy.com/signs/mp4/6/6353.mp4",
]

for url in video_urls:
    download_video(url)


**Get keypoints with Mediapipe**

**MediaPipe Setup**

In [None]:
!pip install mediapipe

In [None]:
import mediapipe as mp

In [None]:
mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)

In [None]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]) if results.pose_landmarks else np.zeros((33, 4))
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]) if results.face_landmarks else np.zeros((468, 3))
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]) if results.left_hand_landmarks else np.zeros((21, 3))
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]) if results.right_hand_landmarks else np.zeros((21, 3))
    return np.concatenate([pose.flatten(), face.flatten(), lh.flatten(), rh.flatten()])


**Process and Save Keypoints**

In [None]:
def process_video(video_path, start_frame=1, end_frame=None, bbox=None, fps=25):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Cannot open video: {video_path}")
        return []

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    end_frame = end_frame if end_frame and end_frame > 0 else total_frames

    keypoints = []

    for frame_num in range(start_frame, end_frame):
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
        ret, frame = cap.read()
        if not ret:
            break

        # Apply bounding box crop if provided
        if bbox:
            x1, y1, x2, y2 = bbox
            frame = frame[y1:y2, x1:x2]

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = holistic.process(frame_rgb)
        keypoints.append(extract_keypoints(results))

    cap.release()
    return np.array(keypoints)

**Main Loop: Load JSON & Save Keypoints**

In [None]:
with open("5/WLASL_Sample.json") as f:
    data = json.load(f)

for gloss_entry in tqdm(data):
    gloss = gloss_entry["gloss"]

    # Skip glosses not in the action list
    if gloss not in actions:
        continue

    print(gloss)
    for inst in gloss_entry["instances"]:
        url = inst["url"]
        bbox = inst.get("bbox", None)
        start = inst.get("frame_start", 1)
        end = inst.get("frame_end", -1)
        fps = inst.get("fps", 25)
        video_id = inst["video_id"]

        if option == 1:
            path = '5/videos'
            video_path = os.path.join(path, f'{video_id}.mp4')
        elif option == 2:
            video_path = download_video(url)
            if video_path is None:
                continue

        keypoints = process_video(video_path, start_frame=start, end_frame=end, bbox=bbox, fps=fps)
        if len(keypoints) == 0:
            continue

        save_dir = f"keypoints/{gloss}"
        Path(save_dir).mkdir(parents=True, exist_ok=True)
        np.save(f"{save_dir}/{video_id}.npy", keypoints)


Sample

In [None]:
np.load('keypoints/drink/17720.npy')

Download&Upload Data

In [None]:
!zip -r /content/file.zip /content/keypoints/

In [None]:
!unzip /content/file.zip

# Preprocess Data and Create Labels and Features

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
# Set desired number of frames (sequence length)
MAX_FRAMES = 25

In [None]:
data_dir = 'keypoints'
X = []  # features
y = []  # labels
label_map = {}  # class name -> index

for i, label in enumerate(os.listdir(data_dir)):
    label_path = os.path.join(data_dir, label)
    if not os.path.isdir(label_path):
        continue
    label_map[label] = i  # "book" -> 0, "drink" -> 1, etc.

    for file in os.listdir(label_path):
        if file.endswith('.npy'):
            keypoints = np.load(os.path.join(label_path, file))
            if len(keypoints.shape) != 2:  # (frames, features)
                continue  # skip corrupted or empty files
            X.append(keypoints)
            y.append(i)


# Pad all sequences to shape (MAX_FRAMES, num_features)
X = pad_sequences(X, maxlen=MAX_FRAMES, dtype='float32', padding='post', truncating='post')
y = np.array(y)

print("X shape:", X.shape)  # should be (num_samples, MAX_FRAMES, features)
print("y shape:", y.shape)


In [None]:
# Split train and test dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42)

# ensure they're NumPy arrays (if still in list form)
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)


In [None]:
y_test

# Build Model



LSTM Neural Network