ChatGPT was used in this assignment to help generate the code

In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import random_split, DataLoader
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder
import os

In [27]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)

In [32]:
import kagglehub

path = kagglehub.dataset_download("ammarsayedtaha/arabic-sign-language-dataset-2022")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/ammarsayedtaha/arabic-sign-language-dataset-2022?dataset_version_number=2...


100%|██████████| 808M/808M [00:11<00:00, 73.9MB/s] 

Extracting files...





Path to dataset files: /home/ikushbay/.cache/kagglehub/datasets/ammarsayedtaha/arabic-sign-language-dataset-2022/versions/2


In [93]:
path = '/home/ikushbay/.cache/kagglehub/datasets/ammarsayedtaha/arabic-sign-language-dataset-2022/versions/2'

batch_size = 64
epochs     = 50
lr         = 1e-3

In [94]:
data_root   = os.path.join(path, "datasets")
train_txt   = os.path.join(data_root, "train.txt")
val_txt     = os.path.join(data_root, "val.txt")
sign_yaml   = os.path.join(data_root, "sign.yaml")

In [95]:
import os
import yaml
from torch.utils.data import Dataset
from PIL import Image
import glob 

with open(sign_yaml, "r") as f:
    meta = yaml.safe_load(f)
class_names = meta["names"]
num_classes = len(class_names)
print(class_names)

['ain', 'al', 'aleff', 'bb', 'dal', 'dha', 'dhad', 'fa', 'gaaf', 'ghain', 'ha', 'haa', 'jeem', 'kaaf', 'khaa', 'la', 'laam', 'meem', 'nun', 'ra', 'saad', 'seen', 'sheen', 'ta', 'taa', 'thaa', 'thal', 'toot', 'waw', 'ya', 'yaa', 'zay']


In [105]:
transform = transforms.Compose([
    transforms.Resize(128),
    transforms.ToTensor(),
    transforms.Normalize((0.5,)*3, (0.5,)*3)
])

In [106]:
class SignLangFlatDataset(Dataset):
    def __init__(self, images_folder, class_names, transform=None):
        self.transform   = transform
        self.class_names = class_names
        pattern = os.path.join(images_folder, "*.*")
        self.paths = [p for p in glob.glob(pattern) if p.lower().endswith((".jpg",".png"))]

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        path = self.paths[idx]
        fname = os.path.basename(path)
        # parse out the class token: last underscore-delimited field before the index
        # e.g. "1201_23_M_dhad_4.jpg" → ['1201','23','M','dhad','4'] → take [-2]
        cls_token = fname.rsplit(".",1)[0].split("_")[-2]
        label     = self.class_names.index(cls_token)

        img = Image.open(path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, label

In [107]:
train_img_dir = os.path.join(data_root, "train", "images")
val_img_dir   = os.path.join(data_root, "valid", "images")

train_ds = SignLangFlatDataset(train_img_dir, class_names, transform)
val_ds   = SignLangFlatDataset(val_img_dir,   class_names, transform)

batch_size = 64
train_loader = DataLoader(train_ds, batch_size, shuffle=True,  num_workers=4, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size, shuffle=False, num_workers=4, pin_memory=True)

print(f"Train samples: {len(train_ds)} | Val samples: {len(val_ds)} | Classes: {num_classes}")

Train samples: 9955 | Val samples: 4247 | Classes: 32


In [108]:
# I decided to go with simple ResNet for this task
class ToyResNet64(nn.Module):
    def __init__(self, num_classes: int):
        super().__init__()
        # helper: conv -> batchnorm -> relu
        def conv_bn_relu(in_ch, out_ch, **kwargs):
            return nn.Sequential(
                nn.Conv2d(in_ch, out_ch, **kwargs),
                nn.BatchNorm2d(out_ch),
                nn.ReLU(inplace=True)
            )

        # Initial block: preserve 64x64, then downsample to 32x32
        self.initial = nn.Sequential(
            conv_bn_relu(3, 32, kernel_size=3, padding=1),  # -> 64x64
            conv_bn_relu(32, 64, kernel_size=3, padding=1), # -> 64x64
            nn.MaxPool2d(kernel_size=2, stride=2)           # -> 32x32
        )

        # Residual block 1 (32x32)
        self.res1 = nn.Sequential(
            conv_bn_relu(64, 64, kernel_size=3, padding=1),
            conv_bn_relu(64, 64, kernel_size=3, padding=1)
        )
        # Residual block 2 (32x32)
        self.res2 = nn.Sequential(
            conv_bn_relu(64, 64, kernel_size=3, padding=1),
            conv_bn_relu(64, 64, kernel_size=3, padding=1)
        )
        # Residual block 3 (32x32)
        self.res3 = nn.Sequential(
            conv_bn_relu(64, 64, kernel_size=3, padding=1),
            conv_bn_relu(64, 64, kernel_size=3, padding=1)
        )

        # Downsample again to 16x16
        self.down = nn.MaxPool2d(kernel_size=2, stride=2)

        # Final conv -> 16x16
        self.conv_final = conv_bn_relu(64, 64, kernel_size=3, padding=1)
        self.gap        = nn.AdaptiveAvgPool2d(1)  # -> 1x1

        # Classifier head
        self.fc = nn.Sequential(
            nn.Linear(64, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

        # He initialization
        for m in self.modules():
            if isinstance(m, (nn.Conv2d, nn.Linear)):
                nn.init.kaiming_normal_(m.weight, nonlinearity="relu")
                if hasattr(m, 'bias') and m.bias is not None:
                    nn.init.zeros_(m.bias)

    def forward(self, x):
        # initial downsample
        x = self.initial(x)
        # first residual
        b1 = x
        x  = self.res1(x) + b1
        # second residual
        b2 = x
        x  = self.res2(x) + b2
        # third residual
        b3 = x
        x  = self.res3(x) + b3
        # further downsampling
        x  = self.down(x)
        # final conv + pooling
        x  = self.conv_final(x)
        x  = self.gap(x).view(x.size(0), -1)
        # classifier
        return self.fc(x)

In [109]:
model     = ToyResNet(num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
n_train, n_val = len(train_ds), len(val_ds)

In [110]:
import matplotlib.pyplot as plt

train_losses, train_accs = [], []
val_losses,   val_accs   = [], []

In [111]:
for epoch in range(1, epochs+1):
    # — Training —
    model.train()
    running_loss = running_acc = 0.0
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        logits = model(imgs)
        loss   = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * imgs.size(0)
        running_acc  += (logits.argmax(1) == labels).sum().item()
    train_loss = running_loss / n_train
    train_acc  = running_acc  / n_train

    # — Record training metrics —
    train_losses.append(train_loss)
    train_accs.append(train_acc)

    # — Validation —
    model.eval()
    val_loss = val_acc = 0.0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            logits = model(imgs)
            loss   = criterion(logits, labels)
            val_loss += loss.item() * imgs.size(0)
            val_acc  += (logits.argmax(1) == labels).sum().item()
    val_loss /= n_val
    val_acc  /= n_val

    # — Record validation metrics —
    val_losses.append(val_loss)
    val_accs.append(val_acc)

    # — Scheduler step and print —
    scheduler.step()
    print(f"Epoch {epoch}/{epochs}  "
          f"Train: loss {train_loss:.4f}, acc {train_acc:.4f}  |  "
          f"Val:   loss {val_loss:.4f}, acc {val_acc:.4f}")

Epoch 1/50  Train: loss 3.4750, acc 0.0274  |  Val:   loss 3.4655, acc 0.0292
Epoch 2/50  Train: loss 3.4666, acc 0.0302  |  Val:   loss 3.4654, acc 0.0318
Epoch 3/50  Train: loss 3.4667, acc 0.0294  |  Val:   loss 3.4656, acc 0.0381
Epoch 4/50  Train: loss 3.4664, acc 0.0303  |  Val:   loss 3.4647, acc 0.0360
Epoch 5/50  Train: loss 3.4663, acc 0.0284  |  Val:   loss 3.4654, acc 0.0318
Epoch 6/50  Train: loss 3.4662, acc 0.0294  |  Val:   loss 3.4654, acc 0.0318
Epoch 7/50  Train: loss 3.4656, acc 0.0332  |  Val:   loss 3.4654, acc 0.0290
Epoch 8/50  Train: loss 3.4660, acc 0.0296  |  Val:   loss 3.4652, acc 0.0318
Epoch 9/50  Train: loss 3.4656, acc 0.0322  |  Val:   loss 3.4648, acc 0.0327
Epoch 10/50  Train: loss 3.4653, acc 0.0287  |  Val:   loss 3.4641, acc 0.0334
Epoch 11/50  Train: loss 3.4649, acc 0.0287  |  Val:   loss 3.4643, acc 0.0325
Epoch 12/50  Train: loss 3.4643, acc 0.0298  |  Val:   loss 3.4638, acc 0.0318
Epoch 13/50  Train: loss 3.4648, acc 0.0312  |  Val:   loss 3

In [None]:
epochs_range = range(1, epochs+1)

plt.figure()
plt.plot(epochs_range, train_losses, label="Train Loss")
plt.plot(epochs_range, val_losses,   label="Val Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Training and Validation Loss")
plt.show()

plt.figure()
plt.plot(epochs_range, train_accs, label="Train Accuracy")
plt.plot(epochs_range, val_accs,   label="Val Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Training and Validation Accuracy")
plt.show()