In [39]:
from __future__ import print_function
import os
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchattacks
import torch.optim as optim
import pandas as pd 
import numpy as np
import autoattack

from autoattack import AutoAttack 
from torch.autograd import Variable
from torchvision import datasets, transforms
from models.vgg import *
from models.resnet import *

In [40]:
parser = argparse.ArgumentParser(description='PyTorch CIFAR Attack Evaluation')
parser.add_argument('--test-batch-size', type=int, default=200, metavar='N',
                    help='input batch size for testing (default: 200)')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')
parser.add_argument('--epsilon', default=0.031,
                    help='perturbation')
parser.add_argument('--num-steps', default=20,
                    help='perturb number of steps')
parser.add_argument('--step-size', default=0.003,
                    help='perturb step size')
parser.add_argument('--random',
                    default=True,
                    help='random initialization for PGD')
parser.add_argument('--white-box-attack', default=True,
                    help='whether perform white-box attack')
parser.add_argument('--model', type=str, default='vgg', choices=['resnet', 'vgg'],
                    help='Modelo a usar: resnet o vgg')

args, unknown = parser.parse_known_args()

In [41]:
def path (model):
    parser = argparse.ArgumentParser(description='PyTorch CIFAR Attack Evaluation')
    if (model['dataset'] == 'cifar'):
        if (model['estrategia'] == 'trades-cifar-vgg16'):
            parser.add_argument('--model-path',
                        default='./model-cifar-trades-vgg16/model-nn-epoch100.pt',
                        help='model for white-box attack evaluation')
        elif(model['estrategia'] == 'trades-cifar-resnet'):
            parser.add_argument('--model-path',
                        default='./model-cifar-trades-resnet18/model-nn-epoch100.pt',
                        help='model for white-box attack evaluation')
        elif(model['estrategia'] == 'mart-cifar-vgg16'):
            parser.add_argument('--model-path',
                        default='./model-cifar-mart-vgg16/model-nn-epoch100.pt',
                        help='model for white-box attack evaluation')
        elif(model['estrategia'] == 'mart-cifar-resnet'):
            parser.add_argument('--model-path',
                        default='./model-cifar-mart-resnet18/model-nn-epoch100.pt',
                        help='model for white-box attack evaluation')
        elif(model['estrategia'] == 'faal-cifar-vgg16'):
            parser.add_argument('--model-path',
                        default='./model-cifar-faal-vgg16/model-nn-epoch100.pt',
                        help='model for white-box attack evaluation')
        elif(model['estrategia'] == 'faal-cifar-resnet'):
            parser.add_argument('--model-path',
                        default='./model-cifar-faal-resnet18/model-nn-epoch100.pt',
                        help='model for white-box attack evaluation')
        elif(model['estrategia'] == 'Dtrades-cifar-resnet'):
            parser.add_argument('--model-path',
                        default='./model-cifar-D-trades-resnet18/model-nn-epoch100.pt',
                        help='model for white-box attack evaluation')
        elif(model['estrategia'] == 'Dtrades-cifar-vgg16'):
            parser.add_argument('--model-path',
                        default='./model-cifar-D-trades-vgg16/model-nn-epoch100.pt',
                        help='model for white-box attack evaluation')
        elif(model['estrategia'] == 'Dtrades-cifar-resnet-a1-b1'):
            parser.add_argument('--model-path',
                        default='./model-cifar-D-trades-resnet18-alpha1-beta1/model-nn-epoch100.pt',
                        help='model for white-box attack evaluation')
        elif(model['estrategia'] == 'Dtrades-cifar-vgg16-a1-b1'):
            parser.add_argument('--model-path',
                        default='./model-cifar-D-trades-vgg16-alpha1-beta1/model-nn-epoch100.pt',
                        help='model for white-box attack evaluation')
        elif(model['estrategia'] == 'standar-cifar-resnet18'):
            parser.add_argument('--model-path',
                        default='model-standard-cifar-resnet18/model-nn-epoch100.pt',
                        help='model for white-box attack evaluation')
        elif(model['estrategia'] == 'standar-cifar-vgg16'):
            parser.add_argument('--model-path',
                        default='model-standard-cifar-vgg16/model-nn-epoch100.pt',
                        help='model for white-box attack evaluation')
    args, unknown = parser.parse_known_args()
    return args

In [42]:
# Ajustes
use_cuda = not args.no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

In [43]:
# Configurar el cargador de datos
transform_test = transforms.Compose([transforms.ToTensor(),])
testset = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform_test)
test_loader = torch.utils.data.DataLoader(testset, batch_size=args.test_batch_size, shuffle=False, **kwargs)

In [44]:
def _pgd_whitebox(model,
                  X,
                  y,
                  epsilon=args.epsilon,
                  num_steps=20,
                  step_size=0.003):
    out = model(X)
    err = (out.data.max(1)[1] != y.data).float().sum()
    X_pgd = Variable(X.data, requires_grad=True)

    random_noise = torch.FloatTensor(*X_pgd.shape).uniform_(-epsilon, epsilon).to(device)
    X_pgd = Variable(X_pgd.data + random_noise, requires_grad=True)

    for _ in range(num_steps):
        opt = optim.SGD([X_pgd], lr=1e-3)
        opt.zero_grad()

        with torch.enable_grad():
            loss = nn.CrossEntropyLoss()(model(X_pgd), y)
        loss.backward()
        eta = step_size * X_pgd.grad.data.sign()
        X_pgd = Variable(X_pgd.data + eta, requires_grad=True)
        eta = torch.clamp(X_pgd.data - X.data, -epsilon, epsilon)
        X_pgd = Variable(X.data + eta, requires_grad=True)
        X_pgd = Variable(torch.clamp(X_pgd, 0, 1.0), requires_grad=True)
    err_pgd = (model(X_pgd).data.max(1)[1] != y.data).float().sum()
    return err, err_pgd

In [45]:
def eval_adv_test_whitebox(model, device, test_loader):

    model.eval()
    robust_err_total = 0
    natural_err_total = 0

    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        # pgd attack
        X, y = Variable(data, requires_grad=True), Variable(target)
        err_natural, err_robust = _pgd_whitebox(model, X, y)
        robust_err_total += err_robust
        natural_err_total += err_natural
        
    natural_acc = 1 - natural_err_total / len(test_loader.dataset)
    robust_acc = 1- robust_err_total / len(test_loader.dataset)
    robust_drop = natural_acc - robust_acc
    attack_success_rate = 1 - robust_acc
    
    print(f'PGD natural_acc: {natural_acc:.4f}, robust_acc: {robust_acc:.4f}, robust_drop: {robust_drop:4f}, attack_success_rate: {attack_success_rate:4f}')
    return natural_acc, robust_acc, robust_drop, attack_success_rate

In [46]:
def _fgsm_whitebox(model, X, y, epsilon):
    model.eval()

    with torch.no_grad():
        out = model(X)
    err_natural = (out.data.max(1)[1] != y.data).float().sum()

    X_adv = X.clone().detach().requires_grad_(True)

    outputs = model(X_adv)
    loss = nn.CrossEntropyLoss()(outputs, y)

    model.zero_grad()
    if X_adv.grad is not None:
        X_adv.grad.zero_()
    loss.backward()

    eta = epsilon * X_adv.grad.data.sign()
    X_adv = X_adv.data + eta
    X_adv = torch.clamp(X_adv, 0.0, 1.0)          
    X_adv = X_adv.detach()                        

    with torch.no_grad():
        out_adv = model(X_adv)
    err_adv = (out_adv.data.max(1)[1] != y.data).float().sum()

    return err_natural, err_adv, X_adv

In [47]:
def eval_adv_test_fgsm(model, device, test_loader, epsilon=0.03):
    model.to(device)
    model.eval()
    natural_err_total = 0.0
    robust_err_total = 0.0
    total = 0

    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        batch_size = data.size(0)
        total += batch_size

        err_nat, err_adv, _ = _fgsm_whitebox(model, data, target, epsilon)
        natural_err_total += err_nat
        robust_err_total += err_adv

    natural_acc = 1.0 - (natural_err_total / total)
    robust_acc  = 1.0 - (robust_err_total / total)
    robust_drop = natural_acc - robust_acc
    attack_success_rate = 1 - robust_acc
    
    print(f'FGSM natural_acc: {natural_acc:.4f}, robust_acc: {robust_acc:.4f}, robust_drop: {robust_drop:4f}, attack_success_rate: {attack_success_rate:4f}')
    return natural_acc, robust_acc, robust_drop, attack_success_rate

In [48]:
def eval_adv_test_autoattack(model, device, test_loader, 
                             norm='Linf', 
                             eps=8/255, 
                             max_samples=1000):

    model.to(device)
    model.eval()
    
    x_test = []
    y_test = []
    
    total_samples = 0
    for x, y in test_loader:
        x_test.append(x)
        y_test.append(y)
        total_samples += x.size(0)
        if total_samples >= max_samples:
            break
            
    x_test = torch.cat(x_test, dim=0)[:max_samples].to(device)
    y_test = torch.cat(y_test, dim=0)[:max_samples].to(device)
    total = x_test.size(0)

    print(f"Iniciando evaluación con AutoAttack ({total} muestras, Norma: {norm}, Epsilon: {eps:.4f})")
    print("----------------------------------------------------------------")

    adversary = autoattack.AutoAttack(
        model, 
        norm=norm, 
        eps=eps, 
        log_path=None, 
        version='standard', 
        device=device,
        seed=0
    )
    x_adv = adversary.run_standard_evaluation(x_test, y_test)
    
    with torch.no_grad():
        out_nat = model(x_test)
        natural_err_total = (out_nat.max(1)[1] != y_test).float().sum().item()

        out_adv = model(x_adv)
        robust_err_total = (out_adv.max(1)[1] != y_test).float().sum().item()
        
    natural_acc = 1.0 - (natural_err_total / total)
    robust_acc  = 1.0 - (robust_err_total / total)
    robust_drop = natural_acc - robust_acc
    attack_success_rate = 1.0 - robust_acc
    
    print('----------------------------------------------------------------')
    print(f'Métricas de Robustez con AutoAttack {norm}:')
    print(f'   -> Precisión Natural:       {natural_acc:.4f}')
    print(f'   -> Precisión Robusta (AA):  {robust_acc:.4f}')
    print(f'   -> Caída de Robustez:       {robust_drop:.4f}')
    print(f'   -> Tasa de Éxito del Ataque: {attack_success_rate:.4f}')
    print('----------------------------------------------------------------')
    
    return natural_acc, robust_acc, robust_drop, attack_success_rate

In [49]:
log_dir = './attacks-cifar'
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

In [50]:
def safe_name(path):
    return path.replace('/', '_').replace('\\', '_').replace(':', '_').replace('.', '_')

In [52]:
def main():
    if args.white_box_attack:
        print('white-box attacks')
        if args.model.lower() == "resnet":
            model = ResNet18()
        elif args.model.lower() == "vgg":
            model = vgg16()
        else:
            raise ValueError("Modelo no reconocido: usa --model resnet o --model vgg")
            
        validation_results = []
        model = model.to(device)
        estrategy = {'dataset': 'cifar',
                     'estrategia': 'standar-cifar-vgg16'}
        
        args_path = path(estrategy)
        model.load_state_dict(torch.load(args_path.model_path))
        
        print('================================================================')
        natural_acc_pgd,robust_acc_pgd, robust_drop_pgd, attack_success_pgd = eval_adv_test_whitebox(model, device, test_loader) #PGD20
        natural_acc_fgsm, robust_acc_fgsm, robust_drop_fgsm, attack_success_fgsm  = eval_adv_test_fgsm(model, device, test_loader) #FGSM
        natural_acc_auto, robust_acc_auto, robust_drop_auto, attack_success_auto = eval_adv_test_autoattack(model, device, test_loader) #AutoAttack

        validation_results.append({
            "Model": {args_path.model_path},
            "pgd_natural": natural_acc_pgd,
            "pgd_robust": robust_acc_pgd,
            "pgd_drop": robust_drop_pgd,
            "attack_succes_pgd": attack_success_pgd,
            "fgsm_natural": natural_acc_fgsm,
            "fgsm_robust": robust_acc_fgsm,
            "fgsm_drop": robust_drop_fgsm,
            "attack_success_fgsm": attack_success_fgsm,
            "auto_natural": natural_acc_auto,
            "auto_robust": robust_acc_auto,
            "auto_drop": robust_drop_auto,
            "attack_success_auto": attack_success_auto
        })
        results_file = os.path.join(log_dir, f"evaluate_{safe_name(args_path.model_path)}.csv")
        df_results = pd.DataFrame(validation_results)
        df_results.to_csv(results_file, index=False)
        print(df_results)
        print('================================================================')


if __name__ == '__main__':
    main()

white-box attacks
PGD natural_acc: 0.9219, robust_acc: 0.0188, robust_drop: 0.903100, attack_success_rate: 0.981200
FGSM natural_acc: 0.9219, robust_acc: 0.2052, robust_drop: 0.716700, attack_success_rate: 0.794800
Iniciando evaluación con AutoAttack (1000 muestras, Norma: Linf, Epsilon: 0.0314)
----------------------------------------------------------------
setting parameters for standard version
using standard version including apgd-ce, apgd-t, fab-t, square.
initial accuracy: 92.60%
apgd-ce - 1/4 - 247 out of 250 successfully perturbed
apgd-ce - 2/4 - 249 out of 250 successfully perturbed
apgd-ce - 3/4 - 249 out of 250 successfully perturbed
apgd-ce - 4/4 - 175 out of 176 successfully perturbed
robust accuracy after APGD-CE: 0.60% (total time 15.4 s)
apgd-t - 1/1 - 6 out of 6 successfully perturbed
robust accuracy after APGD-T: 0.00% (total time 15.8 s)
max Linf perturbation: 0.03137, nan in tensor: 0, max: 1.00000, min: 0.00000
robust accuracy: 0.00%
------------------------------