In [None]:
import random
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from resnet18 import ResNet18
from letters import *
from sparse_attack import l0_pgd_attack

if torch.cuda.is_available():
    print("using Cuda")
    device = torch.device("cuda")
elif torch.backends.mps.is_built():
    print("using MPS")
    device = torch.device("mps")
else:
    print("using CPU")
    device = torch.device("cpu")

In [None]:
experiment_name = "robust"
#model = ResNet18(num_classes=ABC_SIZE + len(additional_symbols))
model = ResNet18()
model = model.to(device)

In [None]:
# Create dataset and data loader
csv_file = f"data/letters_{experiment_name}.csv"
dataset = LettersDataset(csv_file, device)
data_loader = DataLoader(dataset, batch_size=32, shuffle=True)

# Compute the mean and std of the entire dataset
mean = torch.mean(torch.stack([array.mean() for array, _ in dataset]))
std = torch.std(torch.stack([array.std() for array, _ in dataset]))
print(f"{mean = }")
print(f"{std = }")

In [None]:
# Define the loss function and optimizer

"""
Instead of using one-hot labels (e.g., "L" = [0, 1, 0, …]), apply label smoothing.
For example, if there are 26 classes, instead of assigning a probability of 1.0 to the correct class,
assign 0.9 and spread the remaining 0.1 across all other classes equally.
This prevents the model from becoming overconfident.
"""
criterion = torch.nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for arrays, labels in tqdm(data_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        arrays, labels = arrays.to(device), labels.to(device)
        arrays = (arrays - mean) / std

        # === With 50% probability, use adversarial examples ===
        if random.random() < 0.5:
            delta = l0_pgd_attack(arrays, labels, model, k=10, alpha=0.1, steps=10)
            arrays = (arrays + delta).clamp(-1, 1)

        # Training step
        optimizer.zero_grad()
        logits = model(arrays)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(data_loader):.4f}")


In [None]:
# save model .pth
torch.save(model.state_dict(), f'models/resnet18_{experiment_name}.pth')