### Imports

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader, Subset
import torchvision.transforms as transforms
from PIL import Image
import pandas as pd
import os
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import time

dataset kaggle link

[https://www.kaggle.com/datasets/vignonantoine/mediapipe-processed-asl-dataset](https://)

In [None]:
from google.colab import files
files.upload()

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets download -d vignonantoine/mediapipe-processed-asl-dataset

!unzip mediapipe-processed-asl-dataset.zip -d asl_dataset


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: asl_dataset/processed_combine_asl_dataset/y/Y329.jpg  
  inflating: asl_dataset/processed_combine_asl_dataset/y/Y33.jpg  
  inflating: asl_dataset/processed_combine_asl_dataset/y/Y330.jpg  
  inflating: asl_dataset/processed_combine_asl_dataset/y/Y331.jpg  
  inflating: asl_dataset/processed_combine_asl_dataset/y/Y332.jpg  
  inflating: asl_dataset/processed_combine_asl_dataset/y/Y333.jpg  
  inflating: asl_dataset/processed_combine_asl_dataset/y/Y334 - Copy - Copy.jpg  
  inflating: asl_dataset/processed_combine_asl_dataset/y/Y334.jpg  
  inflating: asl_dataset/processed_combine_asl_dataset/y/Y335.jpg  
  inflating: asl_dataset/processed_combine_asl_dataset/y/Y336.jpg  
  inflating: asl_dataset/processed_combine_asl_dataset/y/Y337.jpg  
  inflating: asl_dataset/processed_combine_asl_dataset/y/Y338.jpg  
  inflating: asl_dataset/processed_combine_asl_dataset/y/Y339.jpg  
  inflating: asl_dataset/processed_com

### Dataloading

In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import zipfile
import os

# Base path to unzipped dataset
base_path = '/content/asl_dataset/processed_combine_asl_dataset'

full_dataset = datasets.ImageFolder(root=base_path)

# Extract image indices and corresponding class labels
indices = list(range(len(full_dataset)))
labels = [full_dataset.samples[i][1] for i in indices]

train_indices, test_indices = train_test_split(
    indices, test_size=0.2, stratify=labels, random_state=42
)

# Apply transforms AFTER splitting
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(18),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

# Assign transforms to each subset using custom wrapper
class TransformedSubset(Subset):
    def __init__(self, dataset, indices, transform):
        super().__init__(dataset, indices)
        self.dataset.transform = transform

# Wrap subsets
train_dataset = TransformedSubset(full_dataset, train_indices, train_transform)
test_dataset = TransformedSubset(full_dataset, test_indices, test_transform)

# Dataloaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

### Model with Resnet18 Transfer Learning

In [None]:
# Load Pretrained ResNet18 and Fine-tune Last Block
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

# Unfreeze last block (layer4) and fc layer
for name, param in model.named_parameters():
    if "layer4" in name or "fc" in name:
        param.requires_grad = True
    else:
        param.requires_grad = False

# Replace the fully connected head
num_ftrs = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 128),
    nn.ReLU(),
    nn.Linear(128, 36)
)

model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)  # Halves LR every 10 epochs

Using device: cuda


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 177MB/s]


### Train Model

In [None]:
num_epochs = 10

for epoch in range(num_epochs):
    start_time = time.time()

    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    print(f"Epoch {epoch+1}/{num_epochs}")
    for images, labels in tqdm(train_loader, desc="Training", leave=False):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    end_time = time.time()
    epoch_duration = end_time - start_time
    est_remaining = (num_epochs - (epoch + 1)) * epoch_duration / 60

    print(f"Epoch [{epoch+1}/{num_epochs}], "
          f"Loss: {running_loss/len(train_loader):.4f}, "
          f"Accuracy: {100.*correct/total:.2f}%, "
          f"Epoch time: {epoch_duration:.1f}s, "
          f"Estimated remaining: {est_remaining:.1f} min")

print("Finished Training!")

Epoch 1/10




Epoch [1/10], Loss: 0.2266, Accuracy: 93.67%, Epoch time: 315.7s, Estimated remaining: 47.4 min
Epoch 2/10




Epoch [2/10], Loss: 0.0964, Accuracy: 97.27%, Epoch time: 314.3s, Estimated remaining: 41.9 min
Epoch 3/10




Epoch [3/10], Loss: 0.0650, Accuracy: 98.08%, Epoch time: 315.1s, Estimated remaining: 36.8 min
Epoch 4/10




Epoch [4/10], Loss: 0.0483, Accuracy: 98.55%, Epoch time: 316.1s, Estimated remaining: 31.6 min
Epoch 5/10




Epoch [5/10], Loss: 0.0389, Accuracy: 98.83%, Epoch time: 314.3s, Estimated remaining: 26.2 min
Epoch 6/10




Epoch [6/10], Loss: 0.0280, Accuracy: 99.19%, Epoch time: 316.3s, Estimated remaining: 21.1 min
Epoch 7/10




Epoch [7/10], Loss: 0.0252, Accuracy: 99.26%, Epoch time: 316.2s, Estimated remaining: 15.8 min
Epoch 8/10




Epoch [8/10], Loss: 0.0238, Accuracy: 99.30%, Epoch time: 314.8s, Estimated remaining: 10.5 min
Epoch 9/10




Epoch [9/10], Loss: 0.0210, Accuracy: 99.41%, Epoch time: 315.6s, Estimated remaining: 5.3 min
Epoch 10/10


                                                             

Epoch [10/10], Loss: 0.0164, Accuracy: 99.53%, Epoch time: 315.5s, Estimated remaining: 0.0 min
Finished Training!




### Evaluate model

In [None]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

print(f"Test Accuracy: {100.*correct/total:.2f}%")


Test Accuracy: 98.39%


### Save Model in Drive

Not sure what to do here because I think it depends on how the website is programed, so leaving blank for now.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os

os.makedirs('/content/drive/MyDrive/Sign_interp', exist_ok=True)

torch.save(model.state_dict(), '/content/drive/MyDrive/Sign_interp/sign_language_model.pth')
print("Model saved to /content/drive/MyDrive/Sign_interp/sign_language_model.pth")

Mounted at /content/drive
Model saved to /content/drive/MyDrive/Sign_interp/sign_language_model.pth
