# STEP 1

### basic functions

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# base transformation
transform = transforms.ToTensor()

# MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset  = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=1000, shuffle=False)


100%|██████████| 9.91M/9.91M [00:01<00:00, 6.62MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 224kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 1.82MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 2.56MB/s]


### target model easy MLP

In [2]:
class TargetModel(nn.Module):
    def __init__(self):
        super(TargetModel, self).__init__()
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        return self.fc(x)

model_target = TargetModel()


### model training

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_target.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_target.parameters(), lr=0.001)

# basic training (5 epochs)
for epoch in range(5):
    model_target.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model_target(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss:.4f}")


Epoch 1, Loss: 260.4031
Epoch 2, Loss: 96.9816
Epoch 3, Loss: 64.5850
Epoch 4, Loss: 45.4558
Epoch 5, Loss: 35.5921


### evaluation

In [4]:
model_target.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model_target(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Target model accuracy: {accuracy:.2f}%")

# save model
torch.save(model_target.state_dict(), "model_target.pth")


Target model accuracy: 98.07%


# STEP 2 - surrogate model

### surrogate model

In [5]:
class SurrogateModel(nn.Module):
    def __init__(self):
        super(SurrogateModel, self).__init__()
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 10)
        )

    def forward(self, x):
        return self.fc(x)

model_surrogate = SurrogateModel().to(device)


### training surrogate

In [6]:
optimizer_s = optim.Adam(model_surrogate.parameters(), lr=0.001)

for epoch in range(5):
    model_surrogate.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer_s.zero_grad()
        outputs = model_surrogate(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_s.step()
        total_loss += loss.item()
    print(f"[Surrogate] Epoch {epoch+1}, Loss: {total_loss:.4f}")


[Surrogate] Epoch 1, Loss: 313.7414
[Surrogate] Epoch 2, Loss: 128.9547
[Surrogate] Epoch 3, Loss: 88.4981
[Surrogate] Epoch 4, Loss: 65.2625
[Surrogate] Epoch 5, Loss: 51.9341


### surrogate accuracy

In [7]:
model_surrogate.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model_surrogate(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Surrogate model accuracy: {100 * correct / total:.2f}%")


Surrogate model accuracy: 97.58%


# STEP 3 - FGSM attack

In [8]:
def fgsm_attack(image, epsilon, data_grad):
    # Applica il segno del gradiente moltiplicato per epsilon
    sign_data_grad = data_grad.sign()
    perturbed_image = image + epsilon * sign_data_grad
    # Clamp per rimanere tra 0 e 1
    perturbed_image = torch.clamp(perturbed_image, 0, 1)
    return perturbed_image


In [9]:
epsilon = 0.2
adv_correct = 0
total = 0
transfer_success = 0

model_target.load_state_dict(torch.load("model_target.pth"))
model_target.eval()

# Test su un sottoinsieme per velocità
loader = DataLoader(test_dataset, batch_size=1, shuffle=True)

for idx, (image, label) in enumerate(loader):
    if idx > 500:  # massimo 500 immagini
        break

    image, label = image.to(device), label.to(device)
    image.requires_grad = True

    # Predizione surrogato
    output_surr = model_surrogate(image)
    pred_surr = output_surr.max(1, keepdim=True)[1]

    if pred_surr.item() != label.item():
        continue  # saltiamo se già sbagliato

    loss = criterion(output_surr, label)
    model_surrogate.zero_grad()
    loss.backward()

    data_grad = image.grad.data
    perturbed = fgsm_attack(image, epsilon, data_grad)

    # Test sul target
    output_target = model_target(perturbed)
    pred_target = output_target.max(1, keepdim=True)[1]

    # Statistiche
    total += 1
    if pred_target.item() == label.item():
        adv_correct += 1
    else:
        transfer_success += 1

# Risultati
acc = 100 * adv_correct / total
print(f"Target model accuracy under transfer attack (ε={epsilon}): {acc:.2f}%")
print(f"Transfer success rate: {transfer_success / total * 100:.2f}%")


Target model accuracy under transfer attack (ε=0.2): 2.02%
Transfer success rate: 97.98%
