# 白盒攻击

## 分类模型
- 使用ResNet34结构
- 使用FashionMNIST数据集
- 使用PyTorch框架
- 训练参数设置：
    - 数据预处理：随机水平翻转、随机缩放裁剪、随机旋转、随机擦除、标准化
    - 优化器：Adam
    - 学习率：0.001，学习率衰减：0.1，每10个epoch衰减一次
    - 损失函数：交叉熵
    - 训练轮数：40
    - 批次大小：128

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.models import resnet34
import matplotlib.pyplot as plt
import os

# 定义训练函数
def train(model, device, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * data.size(0)
        _, predicted = output.max(1)
        total += target.size(0)
        correct += predicted.eq(target).sum().item()

    train_loss /= len(train_loader.dataset)
    train_accuracy = 100. * correct / total
    return train_loss, train_accuracy

# 定义测试函数
def test(model, device, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item() * data.size(0)
            _, predicted = output.max(1)
            total += target.size(0)
            correct += predicted.eq(target).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / total
    return test_loss, test_accuracy



transform_train = transforms.Compose([
    transforms.RandomCrop(28, padding=4),
    # transforms.RandomResizedCrop(28),
    # transforms.RandomRotation(10),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.2860,), std=(0.3530,)),
    transforms.RandomErasing()
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.2860,), std=(0.3530,))
])
train_dataset = datasets.FashionMNIST(root='./data/', download=False, train=True, transform=transform_train)
test_dataset = datasets.FashionMNIST(root='./data/', download=False, train=False, transform=transform_test)
# 定义模型
model = resnet34(weights=None)  # 加载预训练模型
model.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1, bias=False)  # 更改第一层卷积层
model.fc = nn.Linear(model.fc.in_features, 10)  # 更改最后一层全连接层

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss() # 交叉熵损失函数
# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
train_batch_size = 128
test_batch_size = 100
train_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=8)
test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False, num_workers=8)
model = model.to(device)
model = torch.nn.DataParallel(model).cuda() # 多GPU并行计算
train_losses, train_accuracies, test_losses, test_accuracies = [], [], [], []

for epoch in range(40):
    train_loss, train_accuracy = train(model, device, train_loader, criterion, optimizer)
    scheduler.step() # 更新学习率
    test_loss, test_accuracy = test(model, device, test_loader, criterion)

    print('Epoch {}: Train Loss: {:.4f}, Train Accuracy: {:.2f}%, Test Loss: {:.4f}, Test Accuracy: {:.2f}%'.format(
        epoch + 1, train_loss, train_accuracy, test_loss, test_accuracy))
    
    if not os.path.exists('./checkpoints'):
        os.makedirs('./checkpoints/')
    torch.save(model.state_dict(), './checkpoints/ResNet-epo{}-acc{}.ckpt'.format(epoch+1,test_accuracy))

    train_losses.append(train_loss)
    train_accuracies.append(train_accuracy)
    test_losses.append(test_loss)
    test_accuracies.append(test_accuracy)

# 绘制精度曲线
plt.plot(train_accuracies, label='Train')
plt.plot(test_accuracies, label='Test')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy vs Epoch')
plt.legend()
plt.show()

# 绘制损失曲线
plt.plot(train_losses, label='Train')
plt.plot(test_losses, label='Test')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss vs Epoch')
plt.legend()
plt.show()


## FGSM

- 参考文献：[Explaining and Harnessing Adversarial Examples](https://arxiv.org/abs/1412.6572)
- 参考代码：[PyTorch ADVERSARIAL EXAMPLE GENERATION](https://pytorch.org/tutorials/beginner/fgsm_tutorial.html)
- 所需数据集：FashionMNIST
- 分类模型：ResNet34，测试集准确率93.71%

In [None]:
import torch
import torchvision
from torchvision import transforms
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.2860,), std=(0.3530,))
    ])
# 加载测试集
test_data = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform_test)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=True)

# 定义攻击函数
def fgsm_attack(image, epsilon, max_iter, model, target):
    if epsilon == 0:
            return False, perturbed_image
    perturbed_image = image
    attack_target = (target + 1) % 10
    # 迭代更新扰动值
    for i in range(max_iter):
        # 计算梯度并更新扰动值
        # perturbed_image.requires_grad = True
        perturbed_image.requires_grad_(True)
        output = model(perturbed_image)
        criterion = nn.CrossEntropyLoss()
        loss = criterion(output, attack_target)
        model.zero_grad()
        loss.backward()
        # print(perturbed_image.grad.size())
        if epsilon == 0:
            return False, perturbed_image
        if perturbed_image.grad is None:
            return False, perturbed_image
        data_grad = perturbed_image.grad.data
        perturbed_image = perturbed_image + epsilon * data_grad.sign()
        # 限制像素值范围在[0,1]
        perturbed_image = torch.clamp(perturbed_image, 0, 1)
        # 如果扰动图像已经被误分类，停止攻击
        if model(perturbed_image).max(1, keepdim=True)[1] == attack_target:
            return True, perturbed_image
    return False, perturbed_image

# 定义测试函数
def test(model, device, test_loader, epsilon):
    model.eval()
    success_num = 0
    correct = 0
    adv_examples = []
    for data, target in test_loader:
        # 找到被分类正确的图像
        data, target = data.to(device), target.to(device)
        output = model(data)
        init_pred = output.max(1, keepdim=True)[1]
        if init_pred.item() != target.item():
            continue
        correct += 1

        success, perturbed_data = fgsm_attack(image=data, epsilon=epsilon, max_iter=100, model=model, target=target)
        if success:
            success_num += 1
            adv_examples.append((data, perturbed_data, target))
        # 打印进度信息
        if correct % 500 == 0:
            print(f"Epsilon: {epsilon}\tAttack progress: {correct}/{len(test_loader)}\tsuccess:{success_num}")
        if correct == 5000:
            break
    # 计算攻击成功率
    attack_acc = success_num / float(correct)
    print(f"Epsilon: {epsilon}\tAttack success rate: {attack_acc:.4f}")
    return attack_acc, adv_examples

# 设置攻击参数
device = torch.device("cuda" if torch.cuda.is_available() else "mps")
print(device)
epsilons = [.02, .05, .1, .15, .2, .25, .3]
attack_accs = []
examples = []

# 加载模型
model = torchvision.models.resnet34(weights=None, num_classes=10)
model.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1, bias=False)
model.fc = nn.Linear(model.fc.in_features, 10)  # 更改最后一层全连接层
model.load_state_dict(torch.load('./checkpoints/checkpoint-60-93.71.pt'))
model.to(device)
# 对模型进行测试和攻击

for eps in epsilons:
    acc, ex = test(model, device, test_loader, eps)
    attack_accs.append(acc)
    examples.append(ex)

plt.figure(figsize=(5,5))
plt.plot(epsilons, attack_accs, "*-")
# plt.yticks(np.arange(0, 1.1, step=0.1))
plt.xticks(np.arange(0, .35, step=0.05))
plt.title("Attack Success Rate vs Epsilon")
plt.xlabel("Epsilon")
plt.ylabel("Attack Success Rate")
plt.show()