H√úCRE 1: K√ºt√ºphaneler, Ayarlar ve Veri Hazƒ±rlƒ±ƒüƒ±

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, ConcatDataset
from torchvision import datasets, transforms
import numpy as np
import os
import matplotlib.pyplot as plt

# 1. Cihaz Ayarƒ±
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"üî• Cihaz: {device}")

# 2. CIFAR-10 ƒ∞statistikleri ve Fonksiyonlar
cifar10_mean = (0.4914, 0.4822, 0.4465)
cifar10_std  = (0.2470, 0.2435, 0.2616)

mean_tensor = torch.tensor(cifar10_mean, device=device).view(1, 3, 1, 1)
std_tensor  = torch.tensor(cifar10_std,  device=device).view(1, 3, 1, 1)

def normalize(x):
    return (x - mean_tensor) / std_tensor

def denormalize(x):
    return x * std_tensor + mean_tensor

# 3. Veri Y√ºkleyiciler (DataLoaders)
def get_loaders(batch_size=128):
    train_transform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(cifar10_mean, cifar10_std),
    ])
    test_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(cifar10_mean, cifar10_std),
    ])
    
    train_ds = datasets.CIFAR10(root="./data", train=True, download=True, transform=train_transform)
    test_ds = datasets.CIFAR10(root="./data", train=False, download=True, transform=test_transform)
    
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
    
    return train_loader, test_loader

# Klas√∂r kontrol√º
os.makedirs("checkpoints", exist_ok=True)
train_loader, test_loader = get_loaders()
torch.backends.cudnn.benchmark = True
print("‚úÖ Veri setleri hazƒ±rlandƒ±.")

üî• Cihaz: cuda
‚úÖ Veri setleri hazƒ±rlandƒ±.


H√úCRE 2: Model Mimarisi (WideResNet-28-10)

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# ---------------------------
# WideResNet building blocks
# ---------------------------

class WideBasic(nn.Module):
    def __init__(self, in_planes, planes, dropout_rate=0.0, stride=1):
        super().__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, bias=False)

        self.dropout = nn.Dropout(p=dropout_rate) if dropout_rate > 0 else nn.Identity()

        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)

        self.shortcut = nn.Identity()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False)

    def forward(self, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = self.dropout(out)
        out = self.conv2(F.relu(self.bn2(out)))
        out = out + self.shortcut(x)
        return out


class NetworkBlock(nn.Module):
    def __init__(self, num_layers, in_planes, out_planes, block, dropout_rate, stride):
        super().__init__()
        layers = []
        for i in range(num_layers):
            s = stride if i == 0 else 1
            inp = in_planes if i == 0 else out_planes
            layers.append(block(inp, out_planes, dropout_rate, s))
        self.block = nn.Sequential(*layers)

    def forward(self, x):
        return self.block(x)


class WideResNet(nn.Module):
    """
    WideResNet for CIFAR.
    depth must satisfy: depth = 6n + 4

    WRN-28-10 => depth=28 -> n=4, widen_factor=10
    """
    def __init__(self, depth=28, widen_factor=10, dropout_rate=0.0, num_classes=10):
        super().__init__()
        assert (depth - 4) % 6 == 0, "WideResNet depth must be 6n+4 (e.g., 28, 34, 40...)"
        n = (depth - 4) // 6
        k = widen_factor

        nStages = [16, 16*k, 32*k, 64*k]

        self.conv1 = nn.Conv2d(3, nStages[0], kernel_size=3, stride=1, padding=1, bias=False)
        self.block1 = NetworkBlock(n, nStages[0], nStages[1], WideBasic, dropout_rate, stride=1)
        self.block2 = NetworkBlock(n, nStages[1], nStages[2], WideBasic, dropout_rate, stride=2)
        self.block3 = NetworkBlock(n, nStages[2], nStages[3], WideBasic, dropout_rate, stride=2)

        self.bn1 = nn.BatchNorm2d(nStages[3])
        self.fc = nn.Linear(nStages[3], num_classes)

        # Initialization (common WRN init)
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, nonlinearity="relu")
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1.0)
                nn.init.constant_(m.bias, 0.0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight)
                nn.init.constant_(m.bias, 0.0)

    def forward(self, x):
        out = self.conv1(x)
        out = self.block1(out)
        out = self.block2(out)
        out = self.block3(out)
        out = F.relu(self.bn1(out))
        out = F.adaptive_avg_pool2d(out, 1)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


# ---------------------------
# Factory function (clean usage)
# ---------------------------
def make_wrn28_10(num_classes=10, dropout_rate=0.0):
    """
    Returns WRN-28-10 for CIFAR-10/100.
    dropout_rate default=0.0 (AT i√ßin ba≈ülangƒ±√ßta √∂nerilir)
    """
    return WideResNet(depth=28, widen_factor=10, dropout_rate=dropout_rate, num_classes=num_classes)


print("‚úÖ WideResNet-28-10 modeli hazƒ±r (H√úCRE 2).")


‚úÖ WideResNet-28-10 modeli hazƒ±r (H√úCRE 2).


H√úCRE 3: Saldƒ±rƒ± (Attack) ve Deƒüerlendirme Fonksiyonlarƒ±
FGSM / R-FGSM, PGD, TRADES ve evaluation fonksiyonlarƒ±

In [6]:
# H√úCRE 3: Saldƒ±rƒ± (Attack) ve Deƒüerlendirme Fonksiyonlarƒ±
# FGSM / R-FGSM, PGD, TRADES ve evaluation fonksiyonlarƒ±

import torch
import torch.nn.functional as F

# --- EVALUATION ---
@torch.no_grad()
def evaluate_clean(model, loader):
    model.eval()
    correct = 0
    total = 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        outputs = model(x)
        _, preds = outputs.max(1)
        correct += preds.eq(y).sum().item()
        total += y.size(0)
    return correct / total


def evaluate_robust(model, loader, attack_fn, epsilon, **kwargs):
    """
    Verilen attack_fn kullanƒ±larak robust accuracy hesaplar.
    attack_fn imzasƒ±:
        attack_fn(model, x, y, epsilon, device, **kwargs)
    """
    model.eval()
    correct = 0
    total = 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        x_adv = attack_fn(model, x, y, epsilon=epsilon, device=device, **kwargs)
        with torch.no_grad():
            outputs = model(x_adv)
            _, preds = outputs.max(1)
            correct += preds.eq(y).sum().item()
            total += y.size(0)
    return correct / total


# --- CLAMP (Sƒ±kƒ±≈ütƒ±rma) Fonksiyonu ---
# Normalize edilmi≈ü veriyi [0, 1] aralƒ±ƒüƒ±na denk gelen min/max deƒüerlerinde tutar.
def clamp(x, min_val=0.0, max_val=1.0):
    """
    Pratik √ß√∂z√ºm:
      1) Denormalize et
      2) [0,1] aralƒ±ƒüƒ±nda clamp et
      3) Tekrar normalize et
    """
    x_denorm = denormalize(x)
    x_clamped = torch.clamp(x_denorm, min_val, max_val)
    return normalize(x_clamped)


# --- ATTACKS ---

def fgsm_attack(model, x, y, epsilon, device, random_start=False, **kwargs):
    """
    FGSM / R-FGSM saldƒ±rƒ±sƒ±.
    
    Parametreler:
        random_start = False  -> Klasik FGSM
        random_start = True   -> R-FGSM (√∂nce epsilon topu i√ßinde rastgele ba≈üla, sonra FGSM)
    """
    x = x.clone().detach().to(device)
    y = y.to(device)

    # Epsilon'u normalize uzaya √∂l√ßekle
    eps_norm = (epsilon / std_tensor).to(device)

    # Random start (R-FGSM)
    if random_start and epsilon > 0:
        noise = torch.empty_like(x).uniform_(-1, 1) * eps_norm
        x = clamp(x + noise)

    x.requires_grad = True
    
    output = model(x)
    loss = F.cross_entropy(output, y)
    
    model.zero_grad()
    loss.backward()
    data_grad = x.grad.data
    
    # Attack Step
    x_adv = x + eps_norm * data_grad.sign()
    
    # Clipping (veriyi ge√ßerli aralƒ±kta tut)
    x_adv = clamp(x_adv)
    
    return x_adv.detach()


def pgd_attack(model, x, y, epsilon, device, alpha=2/255, steps=7, **kwargs):
    """
    L‚àû-PGD saldƒ±rƒ±sƒ±.
    """
    x = x.clone().detach().to(device)
    y = y.to(device)

    eps_norm = (epsilon / std_tensor).to(device)
    alpha_norm = (alpha / std_tensor).to(device)
    
    # Random Start (epsilon topu i√ßinde)
    delta = torch.zeros_like(x).uniform_(-1, 1) * eps_norm
    delta = clamp(x + delta) - x  # ge√ßerli aralƒ±kta kal
    x_adv = x + delta
    
    for _ in range(steps):
        x_adv.requires_grad = True
        output = model(x_adv)
        loss = F.cross_entropy(output, y)
        
        model.zero_grad()  # gradient temizle
        loss.backward()
        grad = x_adv.grad.data
        
        # Gradient adƒ±mƒ±
        x_adv = x_adv.detach() + alpha_norm * grad.sign()
        
        # Projection & Clamping
        delta = x_adv - x
        delta = torch.max(torch.min(delta, eps_norm), -eps_norm)
        x_adv = clamp(x + delta)
        
    return x_adv.detach()


print("‚úÖ FGSM / PGD saldƒ±rƒ± fonksiyonlarƒ± hazƒ±r.")


def trades_loss(model, x, y, epsilon, device, step_size=2/255, steps=10, beta=6.0):
    """
    TRADES loss (natural CE + beta * KL(clean || adv)).
    """
    model.eval()
    batch_size = x.shape[0]
    x = x.to(device)
    y = y.to(device)

    eps_norm = (epsilon / std_tensor).to(device)
    step_norm = (step_size / std_tensor).to(device)
    
    # K√º√ß√ºk g√ºr√ºlt√º ile ba≈üla
    x_adv = x.detach() + 0.001 * torch.randn_like(x).detach().to(device)
    
    with torch.no_grad():
        logits_clean = model(x)
    
    for _ in range(steps):
        x_adv.requires_grad = True
        logits_adv = model(x_adv)
        loss_kl = F.kl_div(
            F.log_softmax(logits_adv, dim=1),
            F.softmax(logits_clean, dim=1),
            reduction='sum'
        )
        grad = torch.autograd.grad(loss_kl, x_adv)[0]
        
        # Gradient adƒ±mƒ±
        x_adv = x_adv.detach() + step_norm * grad.sign()
        
        # Projection & (istersen clamp)
        delta = x_adv - x
        delta = torch.max(torch.min(delta, eps_norm), -eps_norm)
        x_adv = x + delta
        x_adv = clamp(x_adv)
        
    model.train()
    x_adv = x_adv.detach()
    
    logits_adv = model(x_adv)
    logits_clean = model(x)
    
    loss_natural = F.cross_entropy(logits_clean, y)
    loss_robust = (1.0 / batch_size) * F.kl_div(
        F.log_softmax(logits_adv, dim=1),
        F.softmax(logits_clean, dim=1),
        reduction='sum'
    )
    return loss_natural + beta * loss_robust


print("‚úÖ Saldƒ±rƒ± ve Test fonksiyonlarƒ± hazƒ±r (FGSM/R-FGSM + PGD + TRADES).")


‚úÖ FGSM / PGD saldƒ±rƒ± fonksiyonlarƒ± hazƒ±r.
‚úÖ Saldƒ±rƒ± ve Test fonksiyonlarƒ± hazƒ±r (FGSM/R-FGSM + PGD + TRADES).


H√úCRE 4: Curriculum Engine (WRN-28-10 uyumlu, PGD-7 train + PGD-20 eval)

In [7]:
import time
import os
import torch
import torch.nn.functional as F

def evaluate_detailed(model, loader, device):
    """
    Clean + FGSM + PGD-20 sanity check (epsilon=8/255)
    """
    model.eval()
    clean_acc = evaluate_clean(model, loader)

    fgsm_acc = evaluate_robust(
        model, loader, fgsm_attack,
        epsilon=8/255,
        # random_start=True  # ƒ∞stersen R-FGSM sanity check i√ßin a√ß
    )

    pgd20_acc = evaluate_robust(
        model, loader, pgd_attack,
        epsilon=8/255,
        steps=20,
        alpha=2/255
    )

    print(f"    üìä [Detaylƒ± Test] Clean: %{clean_acc*100:.2f} | FGSM: %{fgsm_acc*100:.2f} | PGD-20: %{pgd20_acc*100:.2f}")
    return clean_acc, fgsm_acc, pgd20_acc


def get_curriculum_epsilon(epoch):
    """
    A≈üama 2‚Äôdeki schedule ile aynƒ±:
      1-9   : 0
      10-24 : 2/255
      25-44 : 4/255
      45-64 : 6/255
      65-80 : 8/255
    """
    if epoch <= 9:
        return 0.0, "üü¢ Warm-up (Clean)"
    elif epoch <= 24:
        return 2/255, "üü° Low Noise (2/255)"
    elif epoch <= 44:
        return 4/255, "üü† Mid Noise (4/255)"
    elif epoch <= 64:
        return 6/255, "üî¥ High Noise (6/255)"
    else:
        return 8/255, "üî• Target Noise (8/255)"


def train_curriculum_wrn(
    model_name="wrn28_10_curriculum_v1",
    epochs=80,
    lr=0.1,
    weight_decay=5e-4,
    pgd_steps_train=7,
):
    print(f"\nüöÄ CURRICULUM TRAINING BA≈ûLIYOR: {model_name}")
    print(f"   Model: WRN-28-10 | Epochs: {epochs} | PGD-{pgd_steps_train} train | PGD-20 eval")

    # Model
    model = make_wrn28_10(num_classes=10, dropout_rate=0.0).to(device)

    # Optimizer + Scheduler (controlled scaling)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 60], gamma=0.1)

    # Checkpoints
    os.makedirs("checkpoints", exist_ok=True)
    path_best_clean = f"checkpoints/{model_name}_best_clean.pth"
    path_best_robust = f"checkpoints/{model_name}_best_robust.pth"
    path_final = f"checkpoints/{model_name}_final.pth"

    best_clean_acc = 0.0
    best_robust_acc = 0.0

    start_time = time.time()

    for epoch in range(1, epochs + 1):
        model.train()
        epsilon, mode = get_curriculum_epsilon(epoch)

        # Train-time PGD step size rule: alpha = epsilon/4 (epsilon=0 ise saldƒ±rƒ± yok)
        if epsilon == 0.0:
            alpha = 0.0
            steps = 0
        else:
            alpha = epsilon / 4
            steps = pgd_steps_train

        total_loss = 0.0

        for x, y in train_loader:
            x, y = x.to(device), y.to(device)

            optimizer.zero_grad()

            if epsilon > 0.0:
                # Attack generation with BN stats frozen
                model.eval()
                x_adv = pgd_attack(
                    model, x, y,
                    epsilon=epsilon,
                    device=device,
                    alpha=alpha,
                    steps=steps
                )
                model.train()

                # Clear grads from attack gen (safety)
                optimizer.zero_grad()
                model.zero_grad()

                logits = model(x_adv)
                loss = F.cross_entropy(logits, y)  # Madry-style
            else:
                logits = model(x)
                loss = F.cross_entropy(logits, y)

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            total_loss += loss.item()

        current_lr = scheduler.get_last_lr()[0]
        scheduler.step()

        # Clean eval each epoch (fast)
        clean_acc = evaluate_clean(model, test_loader)
        print(f"Ep {epoch}/{epochs} | {mode} | Eps: {epsilon:.4f} | LR: {current_lr:.5f} | Loss: {total_loss/len(train_loader):.4f} | Clean: %{clean_acc*100:.2f}")

        # Save best clean
        if clean_acc > best_clean_acc:
            best_clean_acc = clean_acc
            torch.save(model.state_dict(), path_best_clean)

        # Robust eval schedule (same as before)
        do_eval = (epoch <= 60 and epoch % 10 == 0) or (epoch > 60 and epoch % 5 == 0)
        if do_eval:
            print(f"üîé Epoch {epoch} Detaylƒ± Robustness Kontrol√º...")
            _, _, robust_acc = evaluate_detailed(model, test_loader, device)

            if robust_acc > best_robust_acc:
                best_robust_acc = robust_acc
                print(f"üèÜ YENƒ∞ EN ƒ∞Yƒ∞ ROBUST MODEL! (%{best_robust_acc*100:.2f}) -> Kaydedildi.")
                torch.save(model.state_dict(), path_best_robust)

    # Final save
    torch.save(model.state_dict(), path_final)

    total_min = (time.time() - start_time) / 60.0
    print(f"\nüèÅ Eƒüitim Tamamlandƒ±. S√ºre: {total_min:.1f} dk")
    print(f"En ƒ∞yi Clean Acc: %{best_clean_acc*100:.2f}")
    print(f"En ƒ∞yi Robust Acc (PGD-20): %{best_robust_acc*100:.2f}")

    return model


print("‚úÖ H√úCRE 4 hazƒ±r: WRN-28-10 Curriculum PGD Engine.")


‚úÖ H√úCRE 4 hazƒ±r: WRN-28-10 Curriculum PGD Engine.


In [8]:
# Batch 128 ise:
model = train_curriculum_wrn(
    model_name="wrn28_10_curriculum_v1",
    epochs=80,
    lr=0.1,
    weight_decay=5e-4,
    pgd_steps_train=7
)

# OOM olursa √∂nce H√úCRE 1‚Äôde get_loaders(batch_size=64) yap,
# sonra lr=0.05 ile tekrar dene.



üöÄ CURRICULUM TRAINING BA≈ûLIYOR: wrn28_10_curriculum_v1
   Model: WRN-28-10 | Epochs: 80 | PGD-7 train | PGD-20 eval
Ep 1/80 | üü¢ Warm-up (Clean) | Eps: 0.0000 | LR: 0.10000 | Loss: 1.4637 | Clean: %54.80
Ep 2/80 | üü¢ Warm-up (Clean) | Eps: 0.0000 | LR: 0.10000 | Loss: 0.9902 | Clean: %53.52
Ep 3/80 | üü¢ Warm-up (Clean) | Eps: 0.0000 | LR: 0.10000 | Loss: 0.7854 | Clean: %59.55
Ep 4/80 | üü¢ Warm-up (Clean) | Eps: 0.0000 | LR: 0.10000 | Loss: 0.6521 | Clean: %71.65
Ep 5/80 | üü¢ Warm-up (Clean) | Eps: 0.0000 | LR: 0.10000 | Loss: 0.5736 | Clean: %73.96
Ep 6/80 | üü¢ Warm-up (Clean) | Eps: 0.0000 | LR: 0.10000 | Loss: 0.5187 | Clean: %67.70
Ep 7/80 | üü¢ Warm-up (Clean) | Eps: 0.0000 | LR: 0.10000 | Loss: 0.4833 | Clean: %82.28
Ep 8/80 | üü¢ Warm-up (Clean) | Eps: 0.0000 | LR: 0.10000 | Loss: 0.4491 | Clean: %75.17
Ep 9/80 | üü¢ Warm-up (Clean) | Eps: 0.0000 | LR: 0.10000 | Loss: 0.4286 | Clean: %78.84
Ep 10/80 | üü° Low Noise (2/255) | Eps: 0.0078 | LR: 0.10000 | Loss: