In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/wlasl-processed/nslt_2000.json
/kaggle/input/wlasl-processed/nslt_1000.json
/kaggle/input/wlasl-processed/WLASL_v0.3.json
/kaggle/input/wlasl-processed/wlasl_class_list.txt
/kaggle/input/wlasl-processed/nslt_300.json
/kaggle/input/wlasl-processed/missing.txt
/kaggle/input/wlasl-processed/nslt_100.json
/kaggle/input/wlasl-processed/videos/03238.mp4
/kaggle/input/wlasl-processed/videos/19110.mp4
/kaggle/input/wlasl-processed/videos/24027.mp4
/kaggle/input/wlasl-processed/videos/65091.mp4
/kaggle/input/wlasl-processed/videos/08595.mp4
/kaggle/input/wlasl-processed/videos/56178.mp4
/kaggle/input/wlasl-processed/videos/57934.mp4
/kaggle/input/wlasl-processed/videos/32478.mp4
/kaggle/input/wlasl-processed/videos/54425.mp4
/kaggle/input/wlasl-processed/videos/42670.mp4
/kaggle/input/wlasl-processed/videos/59051.mp4
/kaggle/input/wlasl-processed/videos/65405.mp4
/kaggle/input/wlasl-processed/videos/30278.mp4
/kaggle/input/wlasl-processed/videos/03318.mp4
/kaggle/input/wlasl-proce

In [2]:
import os
import json
import torch
import torch.nn as nn
from glob import glob
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import cv2
from tqdm import tqdm


In [3]:
# Define paths
video_dir = "/kaggle/input/wlasl-processed/videos"
json_path = "/kaggle/input/wlasl-processed/WLASL_v0.3.json"

# Load metadata and build mappings
def load_wlasl_metadata(json_path, video_dir):
    with open(json_path, 'r') as f:
        data = json.load(f)

    gloss_to_videos = {}
    all_videos = []

    for entry in data:
        gloss = entry["gloss"]
        for inst in entry["instances"]:
            video_id = inst["video_id"]
            video_path = os.path.join(video_dir, f"{video_id}.mp4")
            if os.path.exists(video_path):
                gloss_to_videos[video_path] = gloss
                all_videos.append(video_path)

    labels = sorted(list(set(gloss_to_videos.values())))
    label_to_index = {label: idx for idx, label in enumerate(labels)}
    return all_videos, gloss_to_videos, label_to_index

video_paths, label_map, label_to_index = load_wlasl_metadata(json_path, video_dir)


In [4]:
class WLASLDataset(Dataset):
    def __init__(self, video_paths, label_map, label_to_index, transform=None, max_frames=16):
        self.video_paths = video_paths
        self.label_map = label_map
        self.label_to_index = label_to_index
        self.transform = transform
        self.max_frames = max_frames

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, idx):
        path = self.video_paths[idx]
        label_str = self.label_map[path]
        label = self.label_to_index[label_str]
        frames = self.load_video(path)
        return frames, torch.tensor(label).long()

    def load_video(self, path):
        cap = cv2.VideoCapture(path)
        frames = []
        while cap.isOpened() and len(frames) < self.max_frames:
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, (224, 224))
            if self.transform:
                frame = self.transform(frame)
            frames.append(frame)
        cap.release()
        while len(frames) < self.max_frames:
            frames.append(frames[-1])
        return torch.stack(frames)


In [5]:
class SignModel(nn.Module):
    def __init__(self, num_classes):
        super(SignModel, self).__init__()
        self.cnn = models.resnet18(pretrained=True)
        self.cnn.fc = nn.Identity()
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=512, nhead=8), num_layers=2)
        self.classifier = nn.Linear(512, num_classes)

    def forward(self, x):
        B, T, C, H, W = x.shape
        x = x.view(B * T, C, H, W)
        feat = self.cnn(x)
        feat = feat.view(B, T, -1)
        feat = self.transformer(feat.permute(1, 0, 2))
        out = self.classifier(feat.mean(0))
        return out


In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor()
])

dataset = WLASLDataset(video_paths, label_map, label_to_index, transform)
loader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=2)

model = SignModel(num_classes=len(label_to_index)).to(device)
model = nn.DataParallel(model)  # Wrap model to use multiple GPUs
model = model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(10):
    running_loss = 0.0
    for frames, label in tqdm(loader, desc=f"Epoch {epoch+1}"):
        frames, label = frames.to(device).float(), label.to(device)
        out = model(frames)
        loss = criterion(out, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1} | Loss: {running_loss / len(loader):.4f}")

torch.save(model.state_dict(), "sign_model.pt")
print("✅ Model saved to sign_model.pt")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 158MB/s] 
Epoch 1: 100%|██████████| 2995/2995 [09:03<00:00,  5.51it/s]


Epoch 1 | Loss: 7.8350


Epoch 2: 100%|██████████| 2995/2995 [08:57<00:00,  5.57it/s]


Epoch 2 | Loss: 7.7114


Epoch 3: 100%|██████████| 2995/2995 [08:53<00:00,  5.61it/s]


Epoch 3 | Loss: 7.6616


Epoch 4: 100%|██████████| 2995/2995 [08:57<00:00,  5.58it/s]


Epoch 4 | Loss: 7.6404


Epoch 5: 100%|██████████| 2995/2995 [08:56<00:00,  5.58it/s]


Epoch 5 | Loss: 7.6253


Epoch 6: 100%|██████████| 2995/2995 [08:43<00:00,  5.72it/s]


Epoch 6 | Loss: 7.6144


Epoch 7: 100%|██████████| 2995/2995 [08:50<00:00,  5.65it/s]


Epoch 7 | Loss: 7.6067


Epoch 8: 100%|██████████| 2995/2995 [08:55<00:00,  5.59it/s]


Epoch 8 | Loss: 7.6010


Epoch 9: 100%|██████████| 2995/2995 [08:48<00:00,  5.67it/s]


Epoch 9 | Loss: 7.5969


Epoch 10: 100%|██████████| 2995/2995 [08:48<00:00,  5.66it/s]


Epoch 10 | Loss: 7.5919
✅ Model saved to sign_model.pt


In [None]:
torch.save(model.state_dict(), "sign_model.pth")
print("✅ Model saved to sign_model.pt")