# Data Loading and Preprocessing

In [7]:
# ======================
#  Data & Transforms
# ======================

# Path to your dataset (which has 22 subfolders for each class)
dataset_path = "LIS-fingerspelling-dataset/LIS-fingerspelling-dataset-processed"

# Train/Val transforms
# - For training, we include data augmentation (flip, rotation, color jitter)
# - For validation, only resize + normalize
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

transform_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Create full dataset (ImageFolder automatically applies the transform)
# We'll initially apply the training transform, then adjust for validation
dataset_full = torchvision.datasets.ImageFolder(root=dataset_path, transform=transform_train)

# Split into train / val
train_size = int(0.8 * len(dataset_full))  # e.g. 80% train
val_size = len(dataset_full) - train_size
train_dataset, val_dataset = random_split(dataset_full, [train_size, val_size])

# Overwrite transform for val_dataset to the validation transform
val_dataset.dataset.transform = transform_val

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

print("Dataset size:", len(dataset_full))
print("Training samples:", len(train_dataset))
print("Validation samples:", len(val_dataset))
print("Class names:", dataset_full.classes)  # Should show your 22 classes


Dataset size: 51424
Training samples: 41139
Validation samples: 10285
Class names: ['a', 'b', 'c', 'd', 'e', 'f', 'h', 'i', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 't', 'u', 'v', 'w', 'x', 'y']


# Hyperparameters and Device Setup

In [10]:
# ======================
#  Setup
# ======================

import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from torchvision import models

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Hyperparameters
num_classes = 22   # Number of sign language classes
num_epochs = 5
batch_size = 32
learning_rate = 1e-4  # Fine-tuning often uses a smaller LR

print(f"num_classes={num_classes}, epochs={num_epochs}, batch_size={batch_size}, lr={learning_rate}")

Using device: cuda
num_classes=22, epochs=5, batch_size=32, lr=0.0001


# Define the CNN Model

In [11]:
# ======================
#  Model Definition
# ======================

# Load ResNet18 with ImageNet weights
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

# The final layer (model.fc) is for 1000 classes by default
# We'll replace it to match our num_classes
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Move to device
model = model.to(device)

# Define loss function & optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

# Training and Validation

In [12]:
# ======================
#  Training & Validation
# ======================

def train_one_epoch(model, loader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in loader:
        images = images.to(device)
        labels = labels.to(device)

        # Forward
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Statistics
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc


def validate(model, loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc


best_val_acc = 0.0

for epoch in range(num_epochs):
    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer)
    val_loss, val_acc = validate(model, val_loader, criterion)

    print(f"Epoch [{epoch+1}/{num_epochs}]")
    print(f"  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
    print(f"  Val   Loss: {val_loss:.4f},   Val Acc: {val_acc:.4f}")

    # Save the model if validation accuracy improves
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        print(f"  Saving best model so far (val_acc={best_val_acc:.4f})")
        torch.save(model.state_dict(), "best_resnet18.pth")

print("Training complete.")
print("Best validation accuracy:", best_val_acc)

Epoch [1/5]
  Train Loss: 0.0910, Train Acc: 0.9818
  Val   Loss: 0.0187,   Val Acc: 0.9923
  Saving best model so far (val_acc=0.9923)
Epoch [2/5]
  Train Loss: 0.0024, Train Acc: 0.9997
  Val   Loss: 0.0001,   Val Acc: 1.0000
  Saving best model so far (val_acc=1.0000)
Epoch [3/5]
  Train Loss: 0.0003, Train Acc: 1.0000
  Val   Loss: 0.0000,   Val Acc: 1.0000
Epoch [4/5]
  Train Loss: 0.0121, Train Acc: 0.9967
  Val   Loss: 0.0001,   Val Acc: 1.0000
Epoch [5/5]
  Train Loss: 0.0002, Train Acc: 1.0000
  Val   Loss: 0.0000,   Val Acc: 1.0000
Training complete.
Best validation accuracy: 1.0


# Saving the Model

In [13]:
# ======================
#  Cell 5: Saving / Loading
# ======================

# Already done partial saving in the training loop (best model).
# But if you want to save the final model forcibly:
torch.save(model.state_dict(), "final_resnet18.pth")
print("Saved final model as final_resnet18.pth")

# To load the model later:
# model = models.resnet18(weights=None)  # or we can do resnet18(pretrained=False) for older versions
# model.fc = nn.Linear(model.fc.in_features, num_classes)
# model.load_state_dict(torch.load("best_resnet18.pth", map_location=device))
# model.to(device)
# model.eval()

Saved final model as final_resnet18.pth
