In [10]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split, Subset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import random
import matplotlib.pyplot as plt

# ----------------------------
# Data setup
# ----------------------------
dataFilePath = "~/asl_alphabet_train/asl_alphabet_train"

transform = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

dataset = datasets.ImageFolder(root=dataFilePath, transform=transform)

print("Classes:", dataset.classes)
print("Total images:", len(dataset))

# Use only 10% of the dataset
subset_size = int(0.1 * len(dataset))
subset_indices = random.sample(range(len(dataset)), subset_size)
subset_dataset = Subset(dataset, subset_indices)
print(f"Using 10% of dataset: {len(subset_dataset)} images")

# 80% train, 20% validation
train_size = int(0.8 * len(subset_dataset))
val_size = len(subset_dataset) - train_size
train_dataset, val_dataset = random_split(subset_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
print(f"Training samples: {len(train_dataset)}, Validation samples: {len(val_dataset)}")

# ----------------------------
# Model definition
# ----------------------------
class ASLNet(nn.Module):
    def __init__(self, num_classes=29):  # A-Z + del + nothing + space
        super(ASLNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(128 * 25 * 25, 256)  # for 200x200 input
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = ASLNet(num_classes=29)

# ----------------------------
# Training function
# ----------------------------
def train_model(model, train_dataset, val_dataset=None, batch_size=32, lr=1e-3, num_epochs=10, 
                checkpoint_path="checkpoint.pth", checkpoint_freq=5, plot_curve=False):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) if val_dataset else None

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Lists for plotting
    train_losses, train_accuracies, val_accuracies = [], [], []

    for epoch in range(1, num_epochs + 1):
        model.train()
        running_loss = 0.0
        correct, total = 0, 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        train_loss = running_loss / total
        train_acc = 100.0 * correct / total
        train_losses.append(train_loss)
        train_accuracies.append(train_acc)

        # Validation
        if val_loader:
            model.eval()
            val_correct, val_total = 0, 0
            with torch.no_grad():
                for images, labels in val_loader:
                    images, labels = images.to(device), labels.to(device)
                    outputs = model(images)
                    _, predicted = outputs.max(1)
                    val_total += labels.size(0)
                    val_correct += predicted.eq(labels).sum().item()
            val_acc = 100.0 * val_correct / val_total
            val_accuracies.append(val_acc)
            print(f"Epoch [{epoch}/{num_epochs}] - Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%")
        else:
            print(f"Epoch [{epoch}/{num_epochs}] - Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")

        # Checkpoint
        if epoch % checkpoint_freq == 0:
            torch.save(model.state_dict(), f"{checkpoint_path}_epoch{epoch}.pth")
            print(f"Checkpoint saved at epoch {epoch}")

    # Save final model
    torch.save(model.state_dict(), f"{checkpoint_path}_final.pth")

    # Optional plotting
    if plot_curve:
        plt.figure(figsize=(10,4))
        plt.subplot(1,2,1)
        plt.plot(range(1, num_epochs+1), train_losses, label='Train Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title('Training Loss')
        plt.grid(True)
        plt.legend()

        plt.subplot(1,2,2)
        plt.plot(range(1, num_epochs+1), train_accuracies, label='Train Acc')
        if val_loader:
            plt.plot(range(1, num_epochs+1), val_accuracies, label='Val Acc')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy (%)')
        plt.title('Accuracy')
        plt.grid(True)
        plt.legend()

        plt.tight_layout()
        plt.show()

# ----------------------------
# Run training
# ----------------------------
os.makedirs("checkpoints", exist_ok=True)
checkpoint_path = "checkpoints/asl_model"

print("batch size = 16, lr = 1e-3")
model = ASLNet(num_classes=29)  # new instance
#train_model(model, train_dataset, val_dataset, batch_size=16, lr=1e-3, num_epochs=20,
 #           checkpoint_path=None, plot_curve=True)

print("batch size = 32, lr = 1e-4")
model = ASLNet(num_classes=29)  # new instance
#train_model(model, train_dataset, val_dataset, batch_size=32, lr=1e-4, num_epochs=20,
            #checkpoint_path=None, plot_curve=True)

print("batch size = 16, lr = 1e-4")
model = ASLNet(num_classes=29)  # new instance
#train_model(model, train_dataset, val_dataset, batch_size=16, lr=1e-4, num_epochs=20,
            #checkpoint_path=None, plot_curve=True)

print("batch size = 32, lr = 1e-3")
model = ASLNet(num_classes=29)  # new instance
#train_model(model, train_dataset, val_dataset, batch_size=32, lr=1e-3, num_epochs=20,
            #checkpoint_path=None, plot_curve=True)

# ----------------------------------------------------------------------------------------------------------------------
# Part C. Transfer Learning [15 pt]
# ----------------------------------------------------------------------------------------------------------------------
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Dataset and loader
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

loader = DataLoader(subset_dataset, batch_size=32, shuffle=False)

# Load AlexNet pretrained
alexnet = models.alexnet(pretrained=True).to(device)
alexnet.eval()

# Extract features
features_list = []
labels_list = []

with torch.no_grad():
    for imgs, labels in loader:
        imgs = imgs.to(device)
        feats = alexnet.features(imgs)
        features_list.append(feats.cpu())
        labels_list.append(labels)

# Concatenate all batches
features = torch.cat(features_list)
labels = torch.cat(labels_list)

# Save
torch.save(features, "alexnet_features.pt")
torch.save(labels, "alexnet_labels.pt")

print("Features shape:", features.shape)
print("Labels shape:", labels.shape)


Classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']
Total images: 87000
Using 10% of dataset: 8700 images
Training samples: 6960, Validation samples: 1740
batch size = 16, lr = 1e-3
batch size = 32, lr = 1e-4
batch size = 16, lr = 1e-4
batch size = 32, lr = 1e-3
Features shape: torch.Size([8700, 256, 5, 5])
Labels shape: torch.Size([8700])
