<a href="https://colab.research.google.com/github/GeorgeM9203/DAEN-429-Final-Project/blob/main/S_A.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

In [None]:
SEED = 429
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
import kagglehub
path = kagglehub.dataset_download("grassknoted/asl-alphabet")
print(path)
data_dir = "/kaggle/input/asl-alphabet/asl_alphabet_train/asl_alphabet_train"





transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

Using Colab cache for faster access to the 'asl-alphabet' dataset.
/kaggle/input/asl-alphabet


In [None]:
import os
base = "/kaggle/input/asl-alphabet"
print("Base contents:", os.listdir(base))
train1 = os.path.join(base, "asl_alphabet_train")
print("Inside asl_alphabet_train:", os.listdir(train1))
train2 = os.path.join(base, "asl_alphabet_train", "asl_alphabet_train")
if os.path.exists(train2):
    print("Inside nested train folder:", os.listdir(train2))
else:
    print("Nested folder does NOT exist")


Base contents: ['asl_alphabet_test', 'asl_alphabet_train']
Inside asl_alphabet_train: ['asl_alphabet_train']
Inside nested train folder: ['N', 'R', 'space', 'B', 'I', 'del', 'F', 'H', 'E', 'U', 'M', 'X', 'K', 'Q', 'Y', 'S', 'G', 'A', 'O', 'T', 'V', 'Z', 'C', 'P', 'L', 'W', 'D', 'nothing', 'J']


In [None]:
full_dataset = datasets.ImageFolder(root=data_dir, transform=transform)
classes = full_dataset.classes


indices = np.arange(len(full_dataset))
labels = np.array([label for _, label in full_dataset.samples])
train_idx, val_idx = train_test_split(indices, test_size=0.2, stratify=labels, random_state=SEED)


train_dataset = Subset(full_dataset, train_idx)
val_dataset = Subset(full_dataset, val_idx)

In [None]:
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

In [None]:
class BasicBlock(nn.Module):
  expansion = 1
  def __init__(self, in_channels, out_channels, stride=1):
    super(BasicBlock, self).__init__()
    self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(out_channels)
    self.relu = nn.ReLU(inplace=True)
    self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(out_channels)
    self.shortcut = nn.Sequential()
    if stride != 1 or in_channels != out_channels:
      self.shortcut = nn.Sequential(
      nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
      nn.BatchNorm2d(out_channels)
      )


  def forward(self, x):
    identity = x
    out = self.relu(self.bn1(self.conv1(x)))
    out = self.bn2(self.conv2(out))
    out += self.shortcut(identity)
    return self.relu(out)

In [None]:
class ResNet18(nn.Module):
  def __init__(self, num_classes=29):
    super(ResNet18, self).__init__()
    self.in_channels = 64
    self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    self.relu = nn.ReLU(inplace=True)
    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    self.layer1 = self._make_layer(64, 2, stride=1)
    self.layer2 = self._make_layer(128, 2, stride=2)
    self.layer3 = self._make_layer(256, 2, stride=2)
    self.layer4 = self._make_layer(512, 2, stride=2)
    self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
    self.fc = nn.Linear(512, num_classes)


  def _make_layer(self, out_channels, blocks, stride):
    layers = [BasicBlock(self.in_channels, out_channels, stride)]
    self.in_channels = out_channels
    for _ in range(1, blocks):
      layers.append(BasicBlock(self.in_channels, out_channels))
    return nn.Sequential(*layers)


  def forward(self, x):
    x = self.relu(self.bn1(self.conv1(x)))
    x = self.maxpool(x)
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)
    x = self.avgpool(x)
    x = torch.flatten(x, 1)
    return self.fc(x)

In [None]:
model = ResNet18(num_classes=29).to(device)
print(model.fc)  # should show: Linear(in_features=512, out_features=29, bias=True)

Linear(in_features=512, out_features=29, bias=True)


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [None]:
model = ResNet18(num_classes=len(classes)).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
model.train()

ResNet18(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

In [None]:
# Verify all layers are trainable
for name, param in model.named_parameters():
    assert param.requires_grad, f"Layer {name} is frozen!"

# Verify all BatchNorm layers are in training mode
for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        assert m.training, "BatchNorm layer is stuck in eval mode!"

In [None]:
import torch
import matplotlib.pyplot as plt
from tqdm import tqdm

num_epochs = 3

train_loss_history = []
train_acc_history = []
val_loss_history = []
val_acc_history = []

for epoch in range(1, num_epochs + 1):

    # ------------------- TRAINING -------------------
    model.train()
    running_loss = 0.0
    running_correct = 0
    total_samples = 0

    train_loop = tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs} (Train)")

    for images, labels in train_loop:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        running_correct += (outputs.argmax(1) == labels).sum().item()
        total_samples += images.size(0)

    epoch_train_loss = running_loss / total_samples
    epoch_train_acc = running_correct / total_samples

    train_loss_history.append(epoch_train_loss)
    train_acc_history.append(epoch_train_acc)


    # ------------------- VALIDATION -------------------
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    val_loop = tqdm(val_loader, desc=f"Epoch {epoch}/{num_epochs} (Val)")

    with torch.no_grad():
        for images, labels in val_loop:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * images.size(0)
            val_correct += (outputs.argmax(1) == labels).sum().item()
            val_total += images.size(0)

    epoch_val_loss = val_loss / val_total
    epoch_val_acc = val_correct / val_total

    val_loss_history.append(epoch_val_loss)
    val_acc_history.append(epoch_val_acc)

    print(f"Epoch {epoch}/{num_epochs} | "
          f"Train Loss: {epoch_train_loss:.4f}, Train Acc: {epoch_train_acc:.4f} | "
          f"Val Loss: {epoch_val_loss:.4f}, Val Acc: {epoch_val_acc:.4f}")


Epoch 1/3 (Train):   9%|â–‰         | 97/1088 [29:41<5:00:05, 18.17s/it]

In [None]:
from sklearn.metrics import f1_score

def compute_f1(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for imgs, labels in dataloader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            _, preds = outputs.max(1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    return f1_score(all_labels, all_preds, average='macro')

In [None]:
final_f1 = compute_f1(model, val_loader, device)
print("Final T-A Macro F1:", final_f1)

Final T-A Macro F1: 0.9968409844967989


In [None]:
import matplotlib.pyplot as plt

epochs = range(1, num_epochs + 1)

plt.figure(figsize=(10, 5))
plt.plot(epochs, train_loss_history, marker='o', label='Train Loss')
plt.plot(epochs, val_loss_history, marker='o', label='Val Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# print(f"Train set size: {len(train_dataset)}")
# print(f"Validation set size: {len(val_dataset)}")


In [None]:
# model.eval()
# from torchvision.utils import make_grid
# import matplotlib.pyplot as plt

# # Get a batch from validation
# images, labels = next(iter(val_loader))
# images = images.to(device)
# outputs = model(images)
# preds = outputs.argmax(1)

# # Map back to class names
# idx_to_class = {v: k for k, v in full_dataset.class_to_idx.items()}
# pred_names = [idx_to_class[p.item()] for p in preds]
# true_names = [idx_to_class[l.item()] for l in labels]

# # Show predictions
# grid = make_grid(images.cpu()[:8], nrow=4)
# plt.figure(figsize=(12, 6))
# plt.imshow(grid.permute(1, 2, 0))
# plt.title("Predictions vs Ground Truth")
# for i in range(8):
#     print(f"True: {true_names[i]}, Pred: {pred_names[i]}")


In [None]:
# from matplotlib import pyplot as plt

# def show_class_samples(dataset, class_idx, n=5):
#     indices = [i for i, (_, label) in enumerate(dataset) if label == class_idx][:n]
#     images = [dataset[i][0] for i in indices]
#     grid = make_grid(images, nrow=n)
#     plt.figure(figsize=(15, 4))
#     plt.imshow(grid.permute(1, 2, 0))
#     plt.title(f"Samples of class '{full_dataset.classes[class_idx]}'")
#     plt.axis('off')
#     plt.show()

# # Try for 2 or 3 different classes
# show_class_samples(train_dataset, class_idx=0)  # e.g., A
# show_class_samples(val_dataset, class_idx=0)    # same class in val


In [None]:
# print("Classes:", full_dataset.classes)
# print("Num classes:", len(full_dataset.classes))
# print("Num images:", len(full_dataset))

# from collections import Counter
# print(Counter(labels))
