<a href="https://colab.research.google.com/github/GeorgeM9203/DAEN-429-Final-Project/blob/main/T_B_C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import kagglehub

path = kagglehub.dataset_download("grassknoted/asl-alphabet")

print(path)

# /kaggle/input/asl-alphabet/asl_alphabet_train/asl_alphabet_train

In [None]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import copy
import re
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
import matplotlib.pyplot as plt
from sklearn.model_selection import GroupShuffleSplit

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Reproducibility
SEED = 429
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

## Data Loading (Stratified Split)

In [None]:
# --------------------------
# Data Configuration
# --------------------------
DATA_DIR = '/kaggle/input/asl-alphabet/asl_alphabet_train/asl_alphabet_train'
BATCH_SIZE = 64
IMG_SIZE = 224

import os
import re
import numpy as np
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split


# --------------------------
# Custom Transform
# --------------------------
def custom_to_tensor(pic):
    img = np.array(pic, dtype=np.float32)
    img = img / 255.0
    img = img.transpose((2, 0, 1))
    return torch.tensor(img)


# --------------------------
# Transforms
# --------------------------
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),        # Keep images consistent

        transforms.RandomHorizontalFlip(p=0.1),         # Very small chance (ASL hand signs are NOT always left-right symmetric)
        transforms.RandomRotation(5),                    # Max 5 degrees keeps gestures valid
        transforms.ColorJitter(brightness=0.1,
                               contrast=0.1,
                               saturation=0.1,
                               hue=0.02),               # Subtle lighting variation
        transforms.Lambda(custom_to_tensor),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.Lambda(custom_to_tensor),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
}


# --------------------------
# Helper: Transformed Subset
# --------------------------
class TransformedSubset(torch.utils.data.Dataset):
    def __init__(self, subset, transform=None):
        self.subset = subset
        self.transform = transform

    def __getitem__(self, index):
        x, y = self.subset[index]
        if self.transform:
            x = self.transform(x)
        return x, y

    def __len__(self):
        return len(self.subset)


# --------------------------
# Load Full Dataset
# --------------------------
full_dataset_raw = datasets.ImageFolder(root=DATA_DIR)
classes = full_dataset_raw.classes
print(f"Classes ({len(classes)}): {classes}")


# --------------------------
# REQUIRED: Stratified 80/20 Split (seed = 429)
# --------------------------
indices = np.arange(len(full_dataset_raw))
labels = np.array([label for _, label in full_dataset_raw.samples])

train_idx, val_idx = train_test_split(
    indices,
    test_size=0.2,
    stratify=labels,
    random_state=429
)


# --------------------------
# Create Transformed Subsets
# --------------------------
train_dataset = TransformedSubset(Subset(full_dataset_raw, train_idx),
                                  data_transforms['train'])
val_dataset = TransformedSubset(Subset(full_dataset_raw, val_idx),
                                data_transforms['val'])


# --------------------------
# DataLoaders
# --------------------------
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE,
                          shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE,
                        shuffle=False, num_workers=0)

print(f"Train samples: {len(train_dataset)}")
print(f"Val samples: {len(val_dataset)}")


In [None]:
train_set_indices = set(train_idx)
val_set_indices = set(val_idx)

overlap = train_set_indices.intersection(val_set_indices)
print("Overlap size:", len(overlap))

## Model Definition (T-B: Last Block)

In [None]:
def get_tb_model(num_classes):
    model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
    for param in model.parameters():
        param.requires_grad = False
    for param in model.layer4.parameters():
        param.requires_grad = True
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

model = get_tb_model(len(classes))
model = model.to(device)

print("Trainable parameters:")
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name)

## Training Loop

In [None]:
def calculate_metrics(preds, labels):
    preds = np.array(preds)
    labels = np.array(labels)
    acc = np.mean(preds == labels)
    unique_labels = np.unique(np.concatenate([preds, labels]))
    f1_scores = []
    for l in unique_labels:
        tp = np.sum((preds == l) & (labels == l))
        fp = np.sum((preds == l) & (labels != l))
        fn = np.sum((preds != l) & (labels == l))
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        if precision + recall > 0:
            f1 = 2 * (precision * recall) / (precision + recall)
        else:
            f1 = 0
        f1_scores.append(f1)
    macro_f1 = np.mean(f1_scores) if f1_scores else 0
    return acc, macro_f1

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=3):
    train_loss_history = []
    val_loss_history = []
    val_acc_history = []
    best_model_wts = copy.deepcopy(model.state_dict())
    best_f1 = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)
        model.train()

            # ðŸ”’ Freeze all BatchNorm running stats for layers you froze
        for m in model.modules():
            if isinstance(m, nn.BatchNorm2d):
                if not any(p.requires_grad for p in m.parameters()):
                    m.eval()
                    m.track_running_stats = False

        running_loss = 0.0
        running_corrects = 0

        for i, (inputs, labels) in enumerate(train_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
            if (i + 1) % 100 == 0:
                print(f'   Batch {i+1}/{len(train_loader)} - Loss: {loss.item():.4f}')

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects.double() / len(train_loader.dataset)
        train_loss_history.append(epoch_loss)
        print(f'Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

        model.eval()
        val_loss = 0.0
        val_corrects = 0
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                _, preds = torch.max(outputs, 1)
                val_loss += loss.item() * inputs.size(0)
                val_corrects += torch.sum(preds == labels.data)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        epoch_val_loss = val_loss / len(val_loader.dataset)
        epoch_val_acc = val_corrects.double() / len(val_loader.dataset)
        _, epoch_f1 = calculate_metrics(all_preds, all_labels)
        val_loss_history.append(epoch_val_loss)
        val_acc_history.append(epoch_val_acc)
        print(f'Val Loss: {epoch_val_loss:.4f} Acc: {epoch_val_acc:.4f} F1: {epoch_f1:.4f}')

        if epoch_f1 > best_f1:
            best_f1 = epoch_f1
            best_model_wts = copy.deepcopy(model.state_dict())
            print(f"   >> New Best F1: {best_f1:.4f} <<")



    print(f'Best Val F1: {best_f1:.4f}')
    model.load_state_dict(best_model_wts)
    return model, train_loss_history, val_loss_history, best_f1



criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

print("Starting T-B Training...")
trained_model, train_loss, val_loss, tb_best_f1 = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=3)

In [None]:
for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        print(m, [p.requires_grad for p in m.parameters()])


In [None]:
#CHECKING

bn_running_means_before = []
bn_running_vars_before = []

for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        bn_running_means_before.append(m.running_mean.clone())
        bn_running_vars_before.append(m.running_var.clone())

# Run ONE training batch
imgs, labels = next(iter(train_loader))
imgs, labels = imgs.to(device), labels.to(device)

model.train()
optimizer.zero_grad()
outputs = model(imgs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()

bn_running_means_after = []
bn_running_vars_after = []

for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        bn_running_means_after.append(m.running_mean.clone())
        bn_running_vars_after.append(m.running_var.clone())

# Compare
changed = False
for b1, b2 in zip(bn_running_means_before, bn_running_means_after):
    if not torch.equal(b1, b2):
        changed = True

print("BatchNorm running stats changed:", changed)

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(train_loss, label='Train Loss')
plt.plot(val_loss, label='Val Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# (T-C) Progressive Unfreezing

This section implements **T-C: Progressive**.
- **Policy**: Start from T-B's best checkpoint; unfreeze layer3 as well; train layer3 + layer4 + head (fc).



In [None]:
# -----------------------------
# (T-C) Progressive Unfreezing
# -----------------------------
print("Setting up T-C...")

# 1. Freeze entire model
for param in trained_model.parameters():
    param.requires_grad = False

# 2. Unfreeze layer3, layer4, and fc
for param in trained_model.layer3.parameters():
    param.requires_grad = True
for param in trained_model.layer4.parameters():
    param.requires_grad = True
for param in trained_model.fc.parameters():
    param.requires_grad = True

print("\nTrainable parameters (T-C):")
for name, param in trained_model.named_parameters():
    if param.requires_grad:
        print("  ", name)

# 3. Optimizer for the newly unfrozen layers
optimizer_tc = optim.Adam(
    filter(lambda p: p.requires_grad, trained_model.parameters()),
    lr=5e-5
)

# 4. Train T-C (Progressive Unfreezing)
print("\nStarting T-C Training...")
trained_model_tc, train_loss_tc, val_loss_tc, tc_best_f1 = train_model(
    trained_model,
    train_loader,
    val_loader,
    criterion,
    optimizer_tc,
    num_epochs=3
)

# 5. Plot loss curves
plt.figure(figsize=(10, 5))
plt.plot(train_loss_tc, label='Train Loss (T-C)')
plt.plot(val_loss_tc, label='Val Loss (T-C)')
plt.title('T-C: Progressive Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

print("Best T-C F1:", tc_best_f1)


In [None]:
for name, p in trained_model.named_parameters():
    print(name, p.requires_grad)


In [None]:
print("="*30)
print("FINAL RESULTS")
print("="*30)
print(f"T-B (Last Block) Best F1: {tb_best_f1:.4f}")
print(f"T-C (Progressive) Best F1: {tc_best_f1:.4f}")
print("-"*30)
if tc_best_f1 > tb_best_f1:
    print("Winner: T-C (Progressive)")
else:
    print("Winner: T-B (Last Block)")
print("="*30)

# Testing on the T-B Model

In [None]:
from PIL import Image
import os
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# --------------------------
# Load Test Images
# --------------------------
TEST_DIR = '/kaggle/input/asl-alphabet/asl_alphabet_test/asl_alphabet_test'
test_images = sorted([f for f in os.listdir(TEST_DIR) if f.lower().endswith(('.png', '.jpg', '.jpeg'))])

test_inputs = []
true_labels = []

for img_name in test_images:
    img_path = os.path.join(TEST_DIR, img_name)
    img = Image.open(img_path).convert('RGB')
    img = data_transforms['val'](img)
    test_inputs.append(img)

    # Extract true label from filename (e.g., "A_test.jpg" â†’ "A")
    label_char = img_name.split('_')[0]
    true_labels.append(classes.index(label_char))  # Convert to class index

test_tensor = torch.stack(test_inputs).to(device)

# --------------------------
# Predict with Trained T-B Model
# --------------------------
trained_model.eval()
with torch.no_grad():
    outputs = trained_model(test_tensor)
    _, preds = torch.max(outputs, 1)

predicted_labels = preds.cpu().numpy()
pred_classes = [classes[p] for p in predicted_labels]

# --------------------------
# Report Metrics
# --------------------------
acc, macro_f1 = calculate_metrics(predicted_labels, true_labels)
print("\nTest Accuracy:", round(acc, 4))
print("Test Macro-F1:", round(macro_f1, 4))

print("\nPredicted Classes:")
for img_name, pred in zip(test_images, pred_classes):
    print(f"{img_name}: {pred}")

# --------------------------
# Confusion Matrix
# --------------------------
cm = confusion_matrix(true_labels, predicted_labels)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
disp.plot(xticks_rotation=45, cmap='Blues')
plt.title("Confusion Matrix on 28-Image Test Set")
plt.tight_layout()
plt.show()


# Testing the handmade signs on T-B

In [None]:
!pip install pyheif

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
import os
import torch
import pyheif
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# -----------------------------------------------------------
# PATH TO YOUR SHARED DRIVE DATASET
# -----------------------------------------------------------
DATA_DIR = "/content/drive/Shared drives/DAEN 429 Final Project/Test Data (handtaken)"

# -----------------------------------------------------------
# GET ALL .HEIC FILES
# -----------------------------------------------------------
image_files = sorted([
    f for f in os.listdir(DATA_DIR)
    if f.lower().endswith(".heic")
])

print("Found images:", image_files)

# -----------------------------------------------------------
# LOAD IMAGES + TRUE LABELS
# -----------------------------------------------------------
image_tensors = []
true_labels = []

for filename in image_files:
    filepath = os.path.join(DATA_DIR, filename)

    # -------------------------------------------
    # FIX: Extract ONLY the first letter as label
    # Examples:
    #   "C.HEIC" â†’ C
    #   "C (1).HEIC" â†’ C
    #   "G (2).HEIC" â†’ G
    # -------------------------------------------
    label_char = filename[0].upper()

    if label_char not in classes:
        raise ValueError(f"Label '{label_char}' not in classes list. Check class mapping.")

    true_label_idx = classes.index(label_char)
    true_labels.append(true_label_idx)

    # Load HEIC file â†’ PIL image
    heif_file = pyheif.read(filepath)
    image = Image.frombytes(
        heif_file.mode,
        heif_file.size,
        heif_file.data,
        "raw",
        heif_file.mode,
        heif_file.stride,
    )

    # Apply your validation transform
    img_tensor = data_transforms["val"](image)
    image_tensors.append(img_tensor)

# Stack into batch tensor
X = torch.stack(image_tensors)

# -----------------------------------------------------------
# MOVE TO DEVICE
# -----------------------------------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
trained_model.to(device)
X = X.to(device)
y_true = torch.tensor(true_labels).to(device)

# -----------------------------------------------------------
# RUN INFERENCE
# -----------------------------------------------------------
trained_model.eval()
with torch.no_grad():
    outputs = trained_model(X)
    _, preds = torch.max(outputs, 1)

pred_labels = preds.cpu().numpy()
true_labels_np = y_true.cpu().numpy()

# -----------------------------------------------------------
# METRICS
# -----------------------------------------------------------
acc, macro_f1 = calculate_metrics(pred_labels, true_labels_np)

print("\n=== RESULTS ON HAND-TAKEN HEIC DATA ===")
print(f"Accuracy:  {acc:.4f}")
print(f"Macro-F1:  {macro_f1:.4f}")

# -----------------------------------------------------------
# CONFUSION MATRIX â€” FIXED FOR MISSING CLASSES
# -----------------------------------------------------------

import numpy as np

# Only include labels that appear in y_true OR pred_labels
unique_label_indices = sorted(list(set(true_labels_np) | set(pred_labels)))

# Map class indices to class names
label_names = [classes[i] for i in unique_label_indices]

# Build a filtered confusion matrix
cm = confusion_matrix(true_labels_np, pred_labels, labels=unique_label_indices)

# Display
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_names)
disp.plot(xticks_rotation=45, cmap="Blues")
plt.title("Confusion Matrix â€” Hand-Taken HEIC Dataset (Filtered)")
plt.tight_layout()
plt.show()

