In [1]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, SubsetRandomSampler
import numpy as np
from tqdm import tqdm


In [2]:
# Define data transformations for training and validation sets
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the training and validation datasets
train_data = datasets.ImageFolder(root='D:/ASL_Alphabet_Dataset/asl_alphabet_train_transformed', transform=transform)
val_data = datasets.ImageFolder(root='D:/ASL_Alphabet_Dataset/asl_alphabet_test_transformed', transform=transform)

# Validation loader (uses full validation dataset)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False)


In [3]:
# Function to get random indices for each epoch
def get_random_indices(dataset, fraction=0.5):
    num_samples = int(len(dataset) * fraction)
    indices = np.random.choice(len(dataset), num_samples, replace=False)
    return indices

# Initialize the train_loader with an empty sampler initially
train_sampler = SubsetRandomSampler(get_random_indices(train_data, fraction=0.5))
train_loader = DataLoader(train_data, batch_size=32, sampler=train_sampler, num_workers=1, pin_memory=True)


In [8]:
# Load a pre-trained MobileNetV2 model and adjust the classifier for ASL classes
model = models.mobilenet_v2(pretrained=True)
model.classifier[1] = nn.Linear(model.last_channel, len(train_data.classes))
print(model)
# Define the device (GPU if available) and enable mixed-precision training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
scaler = torch.amp.GradScaler("cuda")  # For mixed-precision training


MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=



In [5]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0000478)


In [6]:
def label_smoothed_nll_loss(lprobs, target, eps):
    nll_loss = -lprobs.gather(dim=-1, index=target.unsqueeze(-1)).squeeze(-1)
    smooth_loss = -lprobs.mean(dim=-1)
    loss = (1.0 - eps) * nll_loss + eps * smooth_loss
    return loss.mean()

In [7]:
num_epochs = 10
fraction_of_data = 0.6  # Choose half the dataset per epoch
best_val_acc = 0.0
label_smoothing_eps = 0.4  # Smoothing factor

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")

    # Update the sampler indices for each epoch without reinitializing DataLoader
    train_sampler_indices = get_random_indices(train_data, fraction=fraction_of_data)
    train_loader = DataLoader(train_data, batch_size=8, sampler=SubsetRandomSampler(train_sampler_indices))

    # Training phase
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False):
        inputs, labels = inputs.to(device), labels.to(device)

        # Mixed-precision training
        with torch.amp.autocast("cuda"):
            outputs = model(inputs)
            lprobs = nn.functional.log_softmax(outputs, dim=-1)  # Log probabilities
            loss = label_smoothed_nll_loss(lprobs, labels, label_smoothing_eps)  # Apply label smoothing

        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_loss = running_loss / len(train_sampler_indices)
    train_acc = 100 * correct / total
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss:.4f}, Accuracy: {train_acc:.2f}%')

    # Validation phase
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for val_inputs, val_labels in tqdm(val_loader, desc="Validating", leave=False):
            val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)
            val_outputs = model(val_inputs)
            val_loss += criterion(val_outputs, val_labels).item() * val_inputs.size(0)
            _, val_predicted = val_outputs.max(1)
            val_total += val_labels.size(0)
            val_correct += val_predicted.eq(val_labels).sum().item()

    val_loss /= len(val_loader.dataset)
    val_acc = 100 * val_correct / val_total
    print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.2f}%')

    # Save the model if it has the best validation accuracy so far
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'asl_to_text_model.pth')
        print(f"New best model saved with Validation Accuracy: {val_acc:.2f}%")

print("Training complete.")


Epoch 1/10


                                                              

KeyboardInterrupt: 