### BASE WORKING standard PGD

In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.transforms.functional import to_pil_image
from tqdm import tqdm


In [None]:
epsilon    = 8/255
alpha      = 2/255
pgd_steps  = 7
batch_size = 128
epochs     = 25
lr         = 1e-3
device     = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
# --- Begin: Dataset class definitions needed for torch.load ---
import torch
from torch.utils.data import Dataset
from typing import Tuple

class TaskDataset(Dataset):
    def __init__(self, transform=None):
        self.ids = []
        self.imgs = []
        self.labels = []
        self.transform = transform

    def __getitem__(self, index) -> Tuple[int, torch.Tensor, int]:
        id_ = self.ids[index]
        img = self.imgs[index]
        if self.transform:
            img = self.transform(img)
        label = self.labels[index]
        return id_, img, label

    def __len__(self):
        return len(self.ids)
# --- End: Dataset class definitions ---


In [None]:
from PIL import Image

class DataWrapper(Dataset):
    """Wraps provided TaskDataset (idx, PIL.Image, label) → (Tensor, label)."""
    def __init__(self, base_dataset, transform=None):
        self.base = base_dataset
        self.transform = transform or transforms.ToTensor()

    def __len__(self):
        return len(self.base)

    def __getitem__(self, idx):
        rec = self.base[idx]
        if len(rec) == 3:
            _, img, label = rec
        elif len(rec) == 2:
            img, label = rec
        else:
            raise ValueError(f"Unexpected tuple length {len(rec)}")

        # Convert to RGB if not already
        if isinstance(img, torch.Tensor):
            if img.max() > 1: img = img.float() / 255.
            img = to_pil_image(img)

        if img.mode != "RGB":
            img = img.convert("RGB")

        img = self.transform(img)
        return img, int(label)


In [None]:
raw_ds = torch.load("/content/data/Train.pt", weights_only=False)    # your provided TaskDataset
dataset = DataWrapper(raw_ds, transform=transforms.ToTensor())
loader  = DataLoader(dataset,
                     batch_size=batch_size,
                     shuffle=True,
                     num_workers=0,
                     pin_memory=True)

# Sanity check
x0, y0 = dataset[0]
assert isinstance(x0, torch.Tensor) and x0.max() <= 1.0 and x0.min() >= 0.0
assert isinstance(y0, int)


In [None]:
from resnet_wrapper import ResNetWrapper  # your existing model file

model = ResNetWrapper("resnet18", num_classes=10).to(device)
opt   = torch.optim.Adam(model.parameters(), lr=lr)
crit  = nn.CrossEntropyLoss()



In [None]:
def pgd_attack(m, x, y, eps=epsilon, alpha=alpha, iters=pgd_steps):
    x_orig = x.detach()
    delta = torch.zeros_like(x).uniform_(-eps, eps).to(device)
    delta.requires_grad = True

    for _ in range(iters):
        logits = m(x + delta)
        loss   = crit(logits, y)
        loss.backward()
        # gradient step on delta
        delta.data = (delta + alpha * delta.grad.sign()).clamp(-eps, eps)
        delta.data = (x_orig + delta.data).clamp(0, 1) - x_orig
        delta.grad.zero_()

    return (x_orig + delta.detach()).clamp(0, 1)

In [None]:
for epoch in range(1, epochs+1):
    model.train()
    total_loss, total_corr, total_num = 0.0, 0, 0

    for imgs, labels in tqdm(loader, desc=f"Epoch {epoch}/{epochs}"):
        imgs, labels = imgs.to(device), labels.to(device)

        # generate adversarial examples
        adv = pgd_attack(model, imgs, labels)

        # train on them
        opt.zero_grad()
        out = model(adv)
        loss = crit(out, labels)
        loss.backward()
        opt.step()

        # stats
        total_loss += loss.item() * imgs.size(0)
        preds = out.argmax(dim=1)
        total_corr += (preds == labels).sum().item()
        total_num  += imgs.size(0)

    avg_loss = total_loss / total_num
    acc      = 100 * total_corr / total_num
    print(f"[Epoch {epoch:2d}] Loss={avg_loss:.4f}, Clean Acc={acc:.2f}%")

Epoch 1/25: 100%|██████████| 782/782 [02:22<00:00,  5.48it/s]


[Epoch  1] Loss=1.7956, Clean Acc=31.34%


Epoch 2/25: 100%|██████████| 782/782 [02:24<00:00,  5.41it/s]


[Epoch  2] Loss=1.7375, Clean Acc=33.02%


Epoch 3/25: 100%|██████████| 782/782 [02:24<00:00,  5.40it/s]


[Epoch  3] Loss=1.7205, Clean Acc=34.06%


Epoch 4/25: 100%|██████████| 782/782 [02:24<00:00,  5.40it/s]


[Epoch  4] Loss=1.7019, Clean Acc=35.08%


Epoch 5/25: 100%|██████████| 782/782 [02:24<00:00,  5.40it/s]


[Epoch  5] Loss=1.6892, Clean Acc=35.77%


Epoch 6/25: 100%|██████████| 782/782 [02:24<00:00,  5.40it/s]


[Epoch  6] Loss=1.6805, Clean Acc=36.33%


Epoch 7/25: 100%|██████████| 782/782 [02:24<00:00,  5.40it/s]


[Epoch  7] Loss=1.6723, Clean Acc=36.66%


Epoch 8/25: 100%|██████████| 782/782 [02:24<00:00,  5.41it/s]


[Epoch  8] Loss=1.6663, Clean Acc=37.21%


Epoch 9/25: 100%|██████████| 782/782 [02:23<00:00,  5.44it/s]


[Epoch  9] Loss=1.6613, Clean Acc=37.32%


Epoch 10/25: 100%|██████████| 782/782 [02:23<00:00,  5.45it/s]


[Epoch 10] Loss=1.6555, Clean Acc=37.61%


Epoch 11/25: 100%|██████████| 782/782 [02:23<00:00,  5.44it/s]


[Epoch 11] Loss=1.6528, Clean Acc=37.82%


Epoch 12/25: 100%|██████████| 782/782 [02:23<00:00,  5.45it/s]


[Epoch 12] Loss=1.6462, Clean Acc=38.13%


Epoch 13/25: 100%|██████████| 782/782 [02:23<00:00,  5.45it/s]


[Epoch 13] Loss=1.6422, Clean Acc=38.26%


Epoch 14/25: 100%|██████████| 782/782 [02:23<00:00,  5.45it/s]


[Epoch 14] Loss=1.6389, Clean Acc=38.29%


Epoch 15/25: 100%|██████████| 782/782 [02:23<00:00,  5.45it/s]


[Epoch 15] Loss=1.6356, Clean Acc=38.42%


Epoch 16/25: 100%|██████████| 782/782 [02:23<00:00,  5.45it/s]


[Epoch 16] Loss=1.6299, Clean Acc=38.69%


Epoch 17/25: 100%|██████████| 782/782 [02:23<00:00,  5.44it/s]


[Epoch 17] Loss=1.6261, Clean Acc=38.84%


Epoch 18/25: 100%|██████████| 782/782 [02:23<00:00,  5.45it/s]


[Epoch 18] Loss=1.6204, Clean Acc=38.91%


Epoch 19/25: 100%|██████████| 782/782 [02:23<00:00,  5.45it/s]


[Epoch 19] Loss=1.6158, Clean Acc=39.12%


Epoch 20/25: 100%|██████████| 782/782 [02:23<00:00,  5.46it/s]


[Epoch 20] Loss=1.6107, Clean Acc=39.24%


Epoch 21/25: 100%|██████████| 782/782 [02:23<00:00,  5.45it/s]


[Epoch 21] Loss=1.6048, Clean Acc=39.45%


Epoch 22/25: 100%|██████████| 782/782 [02:23<00:00,  5.44it/s]


[Epoch 22] Loss=1.5980, Clean Acc=39.69%


Epoch 23/25: 100%|██████████| 782/782 [02:24<00:00,  5.43it/s]


[Epoch 23] Loss=1.5912, Clean Acc=39.90%


Epoch 24/25: 100%|██████████| 782/782 [02:23<00:00,  5.46it/s]


[Epoch 24] Loss=1.5839, Clean Acc=40.17%


Epoch 25/25: 100%|██████████| 782/782 [02:23<00:00,  5.45it/s]

[Epoch 25] Loss=1.5732, Clean Acc=40.44%





In [None]:
os.makedirs("saved_models", exist_ok=True)
torch.save(model.state_dict(), "saved_models/PGD_madry_model.pt")
print("Model saved → saved_models/PGD_madry_model.pt")

Model saved → saved_models/PGD_madry_model.pt


### STANDARD PGD WITH OPTIM
AND 70-30 composition of adv and clean data

In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.transforms.functional import to_pil_image
from tqdm import tqdm
# from resnet_wrapper import ResNetWrapper
from torch.optim.lr_scheduler import MultiStepLR


In [2]:
epsilon    = 8/255
alpha      = 2/255
pgd_max_steps = 7
batch_size = 128
epochs     = 25
lr         = 1e-3
device     = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [7]:
# --- hyperparameters ---
clean_ratio   = 0.80        #   <-- change here
epsilon       = 8/255
alpha         = 1/255       #   finer PGD updates
pgd_max_steps = 7
batch_size    = 128
epochs        = 30          #   a few more epochs with cosine is cheap
base_lr       = 0.2         #   SGD works well with higher LR
device        = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# --- Begin: Dataset class definitions needed for torch.load ---
import torch
from torch.utils.data import Dataset
from typing import Tuple

class TaskDataset(Dataset):
    def __init__(self, transform=None):
        self.ids = []
        self.imgs = []
        self.labels = []
        self.transform = transform

    def __getitem__(self, index) -> Tuple[int, torch.Tensor, int]:
        id_ = self.ids[index]
        img = self.imgs[index]
        if self.transform:
            img = self.transform(img)
        label = self.labels[index]
        return id_, img, label

    def __len__(self):
        return len(self.ids)
# --- End: Dataset class definitions ---


In [4]:
from PIL import Image

class DataWrapper(Dataset):
    """Wraps provided TaskDataset (idx, PIL.Image, label) → (Tensor, label)."""
    def __init__(self, base_dataset, transform=None):
        self.base = base_dataset
        # self.transform = transform or transforms.ToTensor()
        self.transform = transforms.functional.to_tensor

    def __len__(self):
        return len(self.base)

    def __getitem__(self, idx):
        rec = self.base[idx]
        if len(rec) == 3:
            _, img, label = rec
        elif len(rec) == 2:
            img, label = rec
        else:
            raise ValueError(f"Unexpected tuple length {len(rec)}")

        # Convert to RGB if not already
        if isinstance(img, torch.Tensor):
            if img.max() > 1: img = img.float() / 255.
            img = to_pil_image(img)

        if img.mode != "RGB":
            img = img.convert("RGB")

        img = self.transform(img)
        return img, int(label)


In [5]:
# --- Load Dataset ---
raw_ds = torch.load("/content/data/Train.pt", weights_only=False)
dataset = DataWrapper(raw_ds, transform=transforms.ToTensor())
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

In [6]:
# # --- Model, Optimizer, Scheduler ---
# model = ResNetWrapper("resnet18", num_classes=10).to(device)
# opt = torch.optim.Adam(model.parameters(), lr=lr)
# scheduler = MultiStepLR(opt, milestones=[10, 20], gamma=0.1)
# crit = nn.CrossEntropyLoss()



# --- Model, Optimizer, Scheduler ---
from torchvision import models
import torch.nn as nn
from torch.optim.lr_scheduler import MultiStepLR

model = models.resnet18(weights=None)  # ✅ vanilla torchvision resnet18
model.fc = nn.Linear(model.fc.in_features, 10)  # adjust for 10 classes
model = model.to(device)

opt = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = MultiStepLR(opt, milestones=[10, 20], gamma=0.1)
crit = nn.CrossEntropyLoss()


In [7]:
def pgd_attack(model, x, y, eps, alpha, iters):
    x_orig = x.detach()
    delta = torch.zeros_like(x).uniform_(-eps, eps).to(device)
    delta.requires_grad = True

    for _ in range(iters):
        logits = model(x + delta)
        loss = crit(logits, y)
        loss.backward()
        delta.data = (delta + alpha * delta.grad.sign()).clamp(-eps, eps)
        delta.data = (x_orig + delta.data).clamp(0, 1) - x_orig
        delta.grad.zero_()

    return (x_orig + delta.detach()).clamp(0, 1)

#### TRAINING LOOP: 70-30 data comp with

In [None]:
# --- Training Loop ---
for epoch in range(1, epochs + 1):
    model.train()
    total_loss, total_corr, total_num = 0.0, 0, 0
    pgd_steps = min(1 + epoch // 2, pgd_max_steps)

    for imgs, labels in tqdm(loader, desc=f"Epoch {epoch}/{epochs} (PGD steps: {pgd_steps})"):
        imgs, labels = imgs.to(device), labels.to(device)

        # 70% clean, 30% PGD adversarial examples
        out_clean = model(imgs)
        adv_imgs = pgd_attack(model, imgs, labels, eps=epsilon, alpha=alpha, iters=pgd_steps)
        out_adv = model(adv_imgs)

        loss = 0.7 * crit(out_clean, labels) + 0.3 * crit(out_adv, labels)

        opt.zero_grad()
        loss.backward()
        opt.step()

        total_loss += loss.item() * imgs.size(0)
        total_corr += (out_clean.argmax(1) == labels).sum().item()
        total_num += imgs.size(0)

    scheduler.step()
    avg_loss = total_loss / total_num
    acc = 100. * total_corr / total_num
    print(f"[Epoch {epoch:2d}] Loss={avg_loss:.4f}, Clean Acc={acc:.2f}%")

Epoch 1/25 (PGD steps: 1): 100%|██████████| 782/782 [00:53<00:00, 14.64it/s]


[Epoch  1] Loss=1.4507, Clean Acc=49.16%


Epoch 2/25 (PGD steps: 2): 100%|██████████| 782/782 [01:09<00:00, 11.29it/s]


[Epoch  2] Loss=1.4000, Clean Acc=52.53%


Epoch 3/25 (PGD steps: 2): 100%|██████████| 782/782 [01:10<00:00, 11.11it/s]


[Epoch  3] Loss=1.3626, Clean Acc=54.20%


Epoch 4/25 (PGD steps: 3): 100%|██████████| 782/782 [01:27<00:00,  8.92it/s]


[Epoch  4] Loss=1.3761, Clean Acc=55.28%


Epoch 5/25 (PGD steps: 3): 100%|██████████| 782/782 [01:27<00:00,  8.92it/s]


[Epoch  5] Loss=1.3524, Clean Acc=56.45%


Epoch 6/25 (PGD steps: 4): 100%|██████████| 782/782 [01:44<00:00,  7.51it/s]


[Epoch  6] Loss=1.3675, Clean Acc=56.95%


Epoch 7/25 (PGD steps: 4): 100%|██████████| 782/782 [01:44<00:00,  7.51it/s]


[Epoch  7] Loss=1.3486, Clean Acc=58.06%


Epoch 8/25 (PGD steps: 5): 100%|██████████| 782/782 [02:00<00:00,  6.49it/s]


[Epoch  8] Loss=1.3553, Clean Acc=58.78%


Epoch 9/25 (PGD steps: 5): 100%|██████████| 782/782 [02:00<00:00,  6.49it/s]


[Epoch  9] Loss=1.3374, Clean Acc=59.90%


Epoch 10/25 (PGD steps: 6): 100%|██████████| 782/782 [02:16<00:00,  5.73it/s]


[Epoch 10] Loss=1.3392, Clean Acc=60.71%


Epoch 11/25 (PGD steps: 6): 100%|██████████| 782/782 [02:16<00:00,  5.73it/s]


[Epoch 11] Loss=1.2677, Clean Acc=64.85%


Epoch 12/25 (PGD steps: 7): 100%|██████████| 782/782 [02:33<00:00,  5.10it/s]


[Epoch 12] Loss=1.2583, Clean Acc=66.29%


Epoch 13/25 (PGD steps: 7): 100%|██████████| 782/782 [02:33<00:00,  5.10it/s]


[Epoch 13] Loss=1.2445, Clean Acc=67.38%


Epoch 14/25 (PGD steps: 7): 100%|██████████| 782/782 [02:33<00:00,  5.09it/s]


[Epoch 14] Loss=1.2306, Clean Acc=68.43%


Epoch 15/25 (PGD steps: 7): 100%|██████████| 782/782 [02:33<00:00,  5.09it/s]


[Epoch 15] Loss=1.2154, Clean Acc=69.40%


Epoch 16/25 (PGD steps: 7): 100%|██████████| 782/782 [02:33<00:00,  5.11it/s]


[Epoch 16] Loss=1.2015, Clean Acc=70.38%


Epoch 17/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.11it/s]


[Epoch 17] Loss=1.1865, Clean Acc=71.25%


Epoch 18/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.12it/s]


[Epoch 18] Loss=1.1685, Clean Acc=72.33%


Epoch 19/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.11it/s]


[Epoch 19] Loss=1.1359, Clean Acc=73.47%


Epoch 20/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.12it/s]


[Epoch 20] Loss=1.0634, Clean Acc=74.86%


Epoch 21/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.11it/s]


[Epoch 21] Loss=0.9807, Clean Acc=77.22%


Epoch 22/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.13it/s]


[Epoch 22] Loss=0.9703, Clean Acc=77.74%


Epoch 23/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.12it/s]


[Epoch 23] Loss=0.9601, Clean Acc=78.16%


Epoch 24/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.12it/s]


[Epoch 24] Loss=0.9540, Clean Acc=78.31%


Epoch 25/25 (PGD steps: 7): 100%|██████████| 782/782 [02:34<00:00,  5.06it/s]

[Epoch 25] Loss=0.9496, Clean Acc=78.50%





In [None]:
  # --- Save Model ---
os.makedirs("saved_models", exist_ok=True)
torch.save(model.state_dict(), "saved_models/PGD_madry_model_optim_last.pt")
print("Model saved → saved_models/PGD_madry_model_optim_last.pt")

Model saved → saved_models/PGD_madry_model_optim_last.pt


#### TRAINING LOOP: 75-25 data comp

In [10]:

# --- model & optimiser ---
model = models.resnet18(weights=None)
model.fc = nn.Linear(model.fc.in_features, 10)
model  = model.to(device)

opt = torch.optim.SGD(model.parameters(), lr=base_lr,
                      momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=epochs)
crit = nn.CrossEntropyLoss()

# --- training loop ---
for epoch in range(1, epochs + 1):
    model.train()
    total_loss, total_corr, total_num = 0.0, 0, 0
    pgd_steps = min(1 + epoch // 3, pgd_max_steps)   # slower ramp: 1,2,3…7

    for imgs, labels in loader:
        imgs, labels = imgs.to(device), labels.to(device)

        # clean forward
        logits_clean = model(imgs)

        # PGD adversary
        adv_imgs = pgd_attack(model, imgs, labels,
                              eps=epsilon, alpha=alpha, iters=pgd_steps)
        logits_adv = model(adv_imgs)

        # blended TRADES-style loss
        loss = (clean_ratio * crit(logits_clean, labels) +
               (1 - clean_ratio) * crit(logits_adv,  labels))

        opt.zero_grad()
        loss.backward()
        opt.step()

        total_loss += loss.item() * imgs.size(0)
        total_corr += (logits_clean.argmax(1) == labels).sum().item()
        total_num  += imgs.size(0)

    scheduler.step()
    print(f"[Ep {epoch:02d}] loss {total_loss/total_num:.4f}  "
          f"cleanAcc {100*total_corr/total_num:.2f}%  pgdK={pgd_steps}")




[Ep 01] loss 1.6318  cleanAcc 43.60%  pgdK=1
[Ep 02] loss 1.3858  cleanAcc 49.66%  pgdK=1
[Ep 03] loss 1.3643  cleanAcc 51.23%  pgdK=2
[Ep 04] loss 1.3393  cleanAcc 52.37%  pgdK=2
[Ep 05] loss 1.3201  cleanAcc 53.42%  pgdK=2
[Ep 06] loss 1.3365  cleanAcc 53.88%  pgdK=3
[Ep 07] loss 1.3272  cleanAcc 54.11%  pgdK=3
[Ep 08] loss 1.3236  cleanAcc 54.44%  pgdK=3
[Ep 09] loss 1.3333  cleanAcc 54.87%  pgdK=4
[Ep 10] loss 1.3256  cleanAcc 55.54%  pgdK=4
[Ep 11] loss 1.3207  cleanAcc 55.35%  pgdK=4
[Ep 12] loss 1.3296  cleanAcc 55.98%  pgdK=5
[Ep 13] loss 1.3231  cleanAcc 56.24%  pgdK=5
[Ep 14] loss 1.3147  cleanAcc 56.63%  pgdK=5
[Ep 15] loss 1.3204  cleanAcc 57.10%  pgdK=6
[Ep 16] loss 1.3112  cleanAcc 57.45%  pgdK=6
[Ep 17] loss 1.2996  cleanAcc 57.91%  pgdK=6
[Ep 18] loss 1.3048  cleanAcc 58.53%  pgdK=7
[Ep 19] loss 1.2915  cleanAcc 59.15%  pgdK=7
[Ep 20] loss 1.2768  cleanAcc 60.15%  pgdK=7
[Ep 21] loss 1.2641  cleanAcc 60.78%  pgdK=7
[Ep 22] loss 1.2479  cleanAcc 61.65%  pgdK=7
[Ep 23] lo

In [11]:
  # --- Save Model ---
os.makedirs("saved_models", exist_ok=True)
torch.save(model.state_dict(), "/content/saved_models/PGD_7525.pt")
print("Model saved → saved_models/PGD_7525.pt")

Model saved → saved_models/PGD_7525.pt


In [12]:
from google.colab import files
files.download("/content/saved_models/PGD_7525.pt")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

#### TRAINING LOOP 80-20 data comp

In [8]:
# --- Training Loop ---
for epoch in range(1, epochs + 1):
    model.train()
    total_loss, total_corr, total_num = 0.0, 0, 0
    pgd_steps = min(1 + epoch // 2, pgd_max_steps)

    for imgs, labels in tqdm(loader, desc=f"Epoch {epoch}/{epochs} (PGD steps: {pgd_steps})"):
        imgs, labels = imgs.to(device), labels.to(device)

        # 70% clean, 30% PGD adversarial examples
        out_clean = model(imgs)
        adv_imgs = pgd_attack(model, imgs, labels, eps=epsilon, alpha=alpha, iters=pgd_steps)
        out_adv = model(adv_imgs)

        loss = 0.7 * crit(out_clean, labels) + 0.3 * crit(out_adv, labels)

        opt.zero_grad()
        loss.backward()
        opt.step()

        total_loss += loss.item() * imgs.size(0)
        total_corr += (out_clean.argmax(1) == labels).sum().item()
        total_num += imgs.size(0)

    scheduler.step()
    avg_loss = total_loss / total_num
    acc = 100. * total_corr / total_num
    print(f"[Epoch {epoch:2d}] Loss={avg_loss:.4f}, Clean Acc={acc:.2f}%")

Epoch 1/25 (PGD steps: 1): 100%|██████████| 782/782 [00:57<00:00, 13.71it/s]


[Epoch  1] Loss=1.4491, Clean Acc=49.10%


Epoch 2/25 (PGD steps: 2): 100%|██████████| 782/782 [01:10<00:00, 11.16it/s]


[Epoch  2] Loss=1.3998, Clean Acc=52.87%


Epoch 3/25 (PGD steps: 2): 100%|██████████| 782/782 [01:10<00:00, 11.13it/s]


[Epoch  3] Loss=1.3606, Clean Acc=54.37%


Epoch 4/25 (PGD steps: 3): 100%|██████████| 782/782 [01:27<00:00,  8.98it/s]


[Epoch  4] Loss=1.3790, Clean Acc=55.09%


Epoch 5/25 (PGD steps: 3): 100%|██████████| 782/782 [01:27<00:00,  8.93it/s]


[Epoch  5] Loss=1.3547, Clean Acc=56.24%


Epoch 6/25 (PGD steps: 4): 100%|██████████| 782/782 [01:43<00:00,  7.53it/s]


[Epoch  6] Loss=1.3687, Clean Acc=56.91%


Epoch 7/25 (PGD steps: 4): 100%|██████████| 782/782 [01:43<00:00,  7.53it/s]


[Epoch  7] Loss=1.3491, Clean Acc=57.80%


Epoch 8/25 (PGD steps: 5): 100%|██████████| 782/782 [01:59<00:00,  6.52it/s]


[Epoch  8] Loss=1.3557, Clean Acc=58.82%


Epoch 9/25 (PGD steps: 5): 100%|██████████| 782/782 [02:00<00:00,  6.51it/s]


[Epoch  9] Loss=1.3403, Clean Acc=59.67%


Epoch 10/25 (PGD steps: 6): 100%|██████████| 782/782 [02:15<00:00,  5.77it/s]


[Epoch 10] Loss=1.3386, Clean Acc=60.70%


Epoch 11/25 (PGD steps: 6): 100%|██████████| 782/782 [02:15<00:00,  5.76it/s]


[Epoch 11] Loss=1.2670, Clean Acc=64.80%


Epoch 12/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.13it/s]


[Epoch 12] Loss=1.2608, Clean Acc=66.17%


Epoch 13/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.12it/s]


[Epoch 13] Loss=1.2450, Clean Acc=67.24%


Epoch 14/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.12it/s]


[Epoch 14] Loss=1.2299, Clean Acc=68.29%


Epoch 15/25 (PGD steps: 7): 100%|██████████| 782/782 [02:33<00:00,  5.11it/s]


[Epoch 15] Loss=1.2136, Clean Acc=69.37%


Epoch 16/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.13it/s]


[Epoch 16] Loss=1.1957, Clean Acc=70.14%


Epoch 17/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.12it/s]


[Epoch 17] Loss=1.1487, Clean Acc=71.66%


Epoch 18/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.11it/s]


[Epoch 18] Loss=1.0661, Clean Acc=73.23%


Epoch 19/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.12it/s]


[Epoch 19] Loss=1.0142, Clean Acc=75.20%


Epoch 20/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.12it/s]


[Epoch 20] Loss=0.9818, Clean Acc=76.64%


Epoch 21/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.12it/s]


[Epoch 21] Loss=0.9281, Clean Acc=79.27%


Epoch 22/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.12it/s]


[Epoch 22] Loss=0.9172, Clean Acc=79.58%


Epoch 23/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.11it/s]


[Epoch 23] Loss=0.9144, Clean Acc=79.73%


Epoch 24/25 (PGD steps: 7): 100%|██████████| 782/782 [02:33<00:00,  5.10it/s]


[Epoch 24] Loss=0.9090, Clean Acc=79.94%


Epoch 25/25 (PGD steps: 7): 100%|██████████| 782/782 [02:32<00:00,  5.12it/s]

[Epoch 25] Loss=0.9045, Clean Acc=80.21%





In [9]:
  # --- Save Model ---
os.makedirs("saved_models", exist_ok=True)
torch.save(model.state_dict(), "/content/saved_models/PGD_8020.pt")
print("Model saved → saved_models/PGD_8020.pt")

Model saved → saved_models/PGD_8020.pt


In [10]:
from google.colab import files
files.download("/content/saved_models/PGD_8020.pt")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

#### Sanity check

In [11]:
import torch, torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms.functional import to_pil_image
from tqdm import tqdm

MODEL_PATH = "/content/saved_models/PGD_8020.pt"   # <- your .pt file
DATA_PATH  = "/content/data/Train.pt"                               # <- official training data

# 1 ─────────────── Dataset wrapper identical to server expectation ──────────────
# class DataWrapper(Dataset):
#     def __init__(self, base_dataset, transform=None):
#         self.base = base_dataset
#         self.tf   = transform or transforms.ToTensor()
#     def __len__(self): return len(self.base)
#     def __getitem__(self, idx):
#         rec = self.base[idx]
#         img = rec[1] if len(rec)==3 else rec[0]
#         y   = rec[2] if len(rec)==3 else rec[1]
#         # ensure PIL → tensor in [0,1]
#         if isinstance(img, torch.Tensor):
#             img = to_pil_image(img.float()/255.) if img.max()>1 else to_pil_image(img)
#         img = self.tf(img.convert("RGB"))
#         return img, int(y)

raw_ds = torch.load(DATA_PATH, weights_only=False, map_location=device)
ds     = DataWrapper(raw_ds, transform=transforms.ToTensor())
ldr    = DataLoader(ds, batch_size=256, shuffle=False)

# 2 ─────────────── strict-load the submission file ─────────────────────────────
model = models.resnet18(weights=None)
model.fc = nn.Linear(model.fc.in_features, 10)
state   = torch.load(MODEL_PATH, map_location=device)
try:
    model.load_state_dict(state, strict=True)
    print("✓ strict load succeeded.")
except RuntimeError as e:
    print("✗ strict load FAILED!\n", e)
    raise

model.eval()

# 3a ───────────── accuracy on images already in [0,1]  -------------------------
def eval_acc(net, loader, scale_inputs=1.0):
    correct = total = 0
    with torch.no_grad():
        for x, y in loader:
            x = x*scale_inputs    # if scale_inputs=255 converts to uint8-like
            out = net(x)
            correct += (out.argmax(1) == y).sum().item()
            total   += y.size(0)
    return 100*correct/total

acc_clean = eval_acc(model, ldr, scale_inputs=1.0)
print(f"Clean accuracy [0–1 input] : {acc_clean:.2f}%")

acc_uint8 = eval_acc(model, ldr, scale_inputs=255.0)
print(f"Clean accuracy [0–255 input]: {acc_uint8:.2f}%")

# Interpretation
if acc_clean < 50:
    print("→ Your weights are wrong or under-trained; server will reject.")
elif acc_uint8 < 50 and acc_clean >= 50:
    print("→ Model expects [0,1] inputs but server likely feeds uint8; add a x/255 layer.")
else:
    print("✓ Model should pass server clean-accuracy gate.")


✓ strict load succeeded.
Clean accuracy [0–1 input] : 52.71%
Clean accuracy [0–255 input]: 5.40%
→ Model expects [0,1] inputs but server likely feeds uint8; add a x/255 layer.


In [None]:
from torchvision import models
import torch, torch.nn as nn
from resnet_wrapper import ResNetWrapper

wrapper = ResNetWrapper("resnet18", 10)
wrapper.load_state_dict(torch.load("saved_models/PGD_madry_model_optim.pt", map_location="cpu"))

plain = models.resnet18(weights=None)
plain.fc = nn.Linear(plain.fc.in_features, 10)
plain.load_state_dict(wrapper.state_dict(), strict=True)  # backbone + fc weights

torch.save(plain.state_dict(), "saved_models/resnet18_submission.pt")


✅  Saved resnet18_scaled_input.pt (expects uint-8 inputs)


### PGD+FGSM
cosine annealing and ptimizer/scheduler are modified.

In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.transforms.functional import to_pil_image
from tqdm import tqdm
# from resnet_wrapper import ResNetWrapper
from torch.optim.lr_scheduler import MultiStepLR

import torch, torch.nn as nn, torch.nn.functional as F
from torchvision import transforms, models
from torch.optim.lr_scheduler import CosineAnnealingLR
import random



In [None]:
eps, alpha, max_pgd = 8/255, 2/255, 7
sigma_noise = 0.05    # light Gaussian smoothing
total_epochs = 25


device     = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# --- Begin: Dataset class definitions needed for torch.load ---
import torch
from torch.utils.data import Dataset
from typing import Tuple

class TaskDataset(Dataset):
    def __init__(self, transform=None):
        self.ids = []
        self.imgs = []
        self.labels = []
        self.transform = transform

    def __getitem__(self, index) -> Tuple[int, torch.Tensor, int]:
        id_ = self.ids[index]
        img = self.imgs[index]
        if self.transform:
            img = self.transform(img)
        label = self.labels[index]
        return id_, img, label

    def __len__(self):
        return len(self.ids)
# --- End: Dataset class definitions ---


In [None]:
from PIL import Image

class DataWrapper(Dataset):
    """Wraps provided TaskDataset (idx, PIL.Image, label) → (Tensor, label)."""
    def __init__(self, base_dataset, transform=None):
        self.base = base_dataset
        # self.transform = transform or transforms.ToTensor()
        self.transform = transforms.functional.to_tensor

    def __len__(self):
        return len(self.base)

    def __getitem__(self, idx):
        rec = self.base[idx]
        if len(rec) == 3:
            _, img, label = rec
        elif len(rec) == 2:
            img, label = rec
        else:
            raise ValueError(f"Unexpected tuple length {len(rec)}")

        # Convert to RGB if not already
        if isinstance(img, torch.Tensor):
            if img.max() > 1: img = img.float() / 255.
            img = to_pil_image(img)

        if img.mode != "RGB":
            img = img.convert("RGB")

        img = self.transform(img)
        return img, int(label)


In [None]:
train_tf = transforms.functional.to_tensor        # <- note functional version

In [None]:
raw_ds = torch.load("data/Train.pt", weights_only=False, map_location="cpu")
dataset = DataWrapper(raw_ds, transform=train_tf)
loader  = DataLoader(dataset, batch_size=128, shuffle=True, num_workers=2)

In [None]:
model = models.resnet18(weights=None)
model.fc = nn.Linear(model.fc.in_features, 10)
model = model.to(device)

In [None]:
opt = torch.optim.SGD(model.parameters(), lr=0.2, momentum=0.9, weight_decay=5e-4)
scheduler = CosineAnnealingLR(opt, T_max=25)       # 25 epochs total
crit = nn.CrossEntropyLoss()

In [None]:
def fgsm_rs(m, x, y, eps):
    x_adv = (x + torch.empty_like(x).uniform_(-eps, eps)).clamp(0,1).requires_grad_(True)
    loss  = crit(m(x_adv), y)
    loss.backward()
    return (x_adv + eps * x_adv.grad.sign()).clamp(0,1).detach()

In [None]:
def pgd_attack(m, x, y, eps, alpha, iters):
    delta = torch.zeros_like(x).uniform_(-eps, eps).to(device).requires_grad_(True)
    for _ in range(iters):
        loss = crit(m(x + delta), y)
        loss.backward()
        delta.data = (delta + alpha * delta.grad.sign()).clamp(-eps, eps)
        delta.data = (x + delta).clamp(0,1) - x
        delta.grad.zero_()
    return (x + delta.detach()).clamp(0,1)

In [None]:
# ----- Training loop with dynamic clean/adv ratio + Gaussian noise -----
eps, alpha, max_pgd = 8/255, 2/255, 7
sigma_noise = 0.05    # light Gaussian smoothing
total_epochs = 25

for epoch in range(1, total_epochs+1):
    model.train()
    clean_ratio = max(0.9 - 0.02*epoch, 0.6)   # 90%→60% over epochs
    pgd_k = min(1 + epoch//2, max_pgd)         # ramp PGD steps

    for x, y in loader:
        x, y = x.to(device), y.to(device)

        # --- Gaussian noise augmentation ---
        x_noisy = (x + sigma_noise * torch.randn_like(x)).clamp(0,1)

        # --- Generate adversarial examples ---
        x_pgd  = pgd_attack(model, x_noisy, y, eps, alpha, pgd_k)
        x_fgsm = fgsm_rs(model, x_noisy, y, eps)

        # --- Mix samples according to ratio ---
        batch_size = x.size(0)
        idx = torch.randperm(batch_size)
        mix_mask = torch.rand(batch_size, device=device) < clean_ratio

        x_mix = torch.where(mix_mask[:,None,None,None], x_noisy, x_pgd[idx])
        # also sprinkle FGSM (replace 1/4 of PGD slots)
        fgsm_slots = (~mix_mask) & (torch.rand(batch_size, device=device) < 0.25)
        x_mix[fgsm_slots] = x_fgsm[fgsm_slots]

        # ----- Forward, loss, backward -----
        opt.zero_grad()
        logits = model(x_mix)
        loss = crit(logits, y)
        loss.backward()
        opt.step()

    scheduler.step()
    print(f"Epoch {epoch}: finished with clean_ratio={clean_ratio:.2f}, pgd_steps={pgd_k}")

Epoch 1: finished with clean_ratio=0.88, pgd_steps=1
Epoch 2: finished with clean_ratio=0.86, pgd_steps=2
Epoch 3: finished with clean_ratio=0.84, pgd_steps=2
Epoch 4: finished with clean_ratio=0.82, pgd_steps=3
Epoch 5: finished with clean_ratio=0.80, pgd_steps=3
Epoch 6: finished with clean_ratio=0.78, pgd_steps=4
Epoch 7: finished with clean_ratio=0.76, pgd_steps=4
Epoch 8: finished with clean_ratio=0.74, pgd_steps=5
Epoch 9: finished with clean_ratio=0.72, pgd_steps=5
Epoch 10: finished with clean_ratio=0.70, pgd_steps=6
Epoch 11: finished with clean_ratio=0.68, pgd_steps=6
Epoch 12: finished with clean_ratio=0.66, pgd_steps=7
Epoch 13: finished with clean_ratio=0.64, pgd_steps=7
Epoch 14: finished with clean_ratio=0.62, pgd_steps=7
Epoch 15: finished with clean_ratio=0.60, pgd_steps=7
Epoch 16: finished with clean_ratio=0.60, pgd_steps=7
Epoch 17: finished with clean_ratio=0.60, pgd_steps=7
Epoch 18: finished with clean_ratio=0.60, pgd_steps=7
Epoch 19: finished with clean_ratio=0

In [None]:
  # --- Save Model ---
os.makedirs("saved_models", exist_ok=True)
torch.save(model.state_dict(), "saved_models/PGD_FGSM_optim_last_unnorm.pt")
print("Model saved → saved_models/PGD_FGSM_optim_last.pt")

Model saved → saved_models/PGD_FGSM_optim_last.pt


In [None]:
import torch, torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms.functional import to_pil_image
from tqdm import tqdm

MODEL_PATH = "saved_models/PGD_FGSM_optim_last_unnorm.pt"   # <- your .pt file
DATA_PATH  = "/content/data/Train.pt"                               # <- official training data

# 1 ─────────────── Dataset wrapper identical to server expectation ──────────────
# class DataWrapper(Dataset):
#     def __init__(self, base_dataset, transform=None):
#         self.base = base_dataset
#         self.tf   = transform or transforms.ToTensor()
#     def __len__(self): return len(self.base)
#     def __getitem__(self, idx):
#         rec = self.base[idx]
#         img = rec[1] if len(rec)==3 else rec[0]
#         y   = rec[2] if len(rec)==3 else rec[1]
#         # ensure PIL → tensor in [0,1]
#         if isinstance(img, torch.Tensor):
#             img = to_pil_image(img.float()/255.) if img.max()>1 else to_pil_image(img)
#         img = self.tf(img.convert("RGB"))
#         return img, int(y)

raw_ds = torch.load(DATA_PATH, weights_only=False, map_location=device)
ds     = DataWrapper(raw_ds, transform=train_tf)
ldr    = DataLoader(ds, batch_size=256, shuffle=False)

# 2 ─────────────── strict-load the submission file ─────────────────────────────
model = models.resnet18(weights=None)
model.fc = nn.Linear(model.fc.in_features, 10)
state   = torch.load(MODEL_PATH, map_location=device)
try:
    model.load_state_dict(state, strict=True)
    print("✓ strict load succeeded.")
except RuntimeError as e:
    print("✗ strict load FAILED!\n", e)
    raise

model.eval()

# 3a ───────────── accuracy on images already in [0,1]  -------------------------
def eval_acc(net, loader, scale_inputs=1.0):
    correct = total = 0
    with torch.no_grad():
        for x, y in loader:
            x = x*scale_inputs    # if scale_inputs=255 converts to uint8-like
            out = net(x)
            correct += (out.argmax(1) == y).sum().item()
            total   += y.size(0)
    return 100*correct/total

acc_clean = eval_acc(model, ldr, scale_inputs=1.0)
print(f"Clean accuracy [0–1 input] : {acc_clean:.2f}%")

acc_uint8 = eval_acc(model, ldr, scale_inputs=255.0)
print(f"Clean accuracy [0–255 input]: {acc_uint8:.2f}%")

# Interpretation
if acc_clean < 50:
    print("→ Your weights are wrong or under-trained; server will reject.")
elif acc_uint8 < 50 and acc_clean >= 50:
    print("→ Model expects [0,1] inputs but server likely feeds uint8; add a x/255 layer.")
else:
    print("✓ Model should pass server clean-accuracy gate.")


✓ strict load succeeded.
Clean accuracy [0–1 input] : 32.62%
Clean accuracy [0–255 input]: 0.66%
→ Your weights are wrong or under-trained; server will reject.
