In [12]:
import torch
import torchvision
from collections import namedtuple
import os
import matplotlib.pyplot as plt
from attacks.analytic_attack import ImprintAttacker
from modifications.imprint import ImprintBlock
from utils.breaching_utils import *

import medmnist
from medmnist import INFO, Evaluator

from opacus import PrivacyEngine
from opacus.validators import ModuleValidator
from torch.utils.data import DataLoader
import numpy as np
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [13]:
batch_size = 8 # Number of images in the user's batch. We have a small one here for visualization purposes
import random
random.seed(2324) # You can change this to get a new batch.

transforms = torchvision.transforms.Compose(
    [
        torchvision.transforms.Resize(256),
        torchvision.transforms.CenterCrop(224),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(mean=data_cfg_default.mean, std=data_cfg_default.std),
    ]
)
data_flag = 'dermamnist'
info = INFO[data_flag]
DataClass = getattr(medmnist, info['python_class'])
dataset = DataClass(split="val", transform=transforms, download=True, size=224)
samples = [dataset[i] for i in random.sample(range(len(dataset)), batch_size)]
data_np = np.array([sample[0].numpy() for sample in samples])  # Convert list of numpy arrays to a single numpy array
data = torch.tensor(data_np)  # Convert the numpy array to a PyTorch tensor
#data = torch.stack([sample[0] for sample in samples])
labels = torch.tensor([sample[1] for sample in samples]).flatten()

Using downloaded and verified file: /Users/maximilianeckert/.medmnist/dermamnist_224.npz


### Initialize your model

In [14]:
setup = dict(device=torch.device("cuda" if torch.cuda.is_available() else "cpu"), dtype=torch.float)

# This could be any model:
#model = torchvision.models.resnet18(weights = True)
# Modify the final layer to have 7 output classes
#model.fc = torch.nn.Linear(512, 7)

model = torchvision.models.resnet18(num_classes=7)

# Modify the final layer to have 7 output classes
#model.classifier[1] = torch.nn.Conv2d(512, 7, kernel_size=(1, 1), stride=(1, 1))

# Update the number of classes attribute
#model.num_classes = 7

loss_fn = torch.nn.CrossEntropyLoss()


In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [16]:
from torch.utils.data import Subset, DataLoader

model = ModuleValidator.fix(model)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

training_set = DataClass(split="train", transform=transforms, download=True, size=224)
subset_indices = np.arange(100)
subset_training_set = Subset(training_set, subset_indices)
data_loader = DataLoader(subset_training_set, batch_size=batch_size)

Using downloaded and verified file: /Users/maximilianeckert/.medmnist/dermamnist_224.npz


In [17]:

# add opacus here -> problem with the model structure (ImprintBlock) so do it after the imprint block
#if hasattr(model, "autograd_grad_sample_hooks"):
#   del model.autograd_grad_sample_hooks
#EPSILON = 50.0
#EPOCHS = 2
#DELTA = 1e-5
#MAX_GRAD_NORM = 1.2

privacy_engine = PrivacyEngine()
model, optimizer, data_loader = privacy_engine.make_private(
    module=model,
    optimizer=optimizer,
    data_loader=data_loader,
    max_grad_norm=1.2,
    poisson_sampling= False,
    #grad_sample_mode= "hooks",
    noise_multiplier= 1.1,
    #grad_sample_mode="ew",
    #epochs = 2,
    #target_epsilon = EPSILON,
    #target_delta = DELTA,
)

#print(f"Using sigma={optimizer.noise_multiplier} and C={MAX_GRAD_NORM}")

In [18]:
# It will be modified maliciously:
input_dim = data_cfg_default.shape[0] * data_cfg_default.shape[1] * data_cfg_default.shape[2]
num_bins = 100 # Here we define number of imprint bins
block = ImprintBlock(input_dim, num_bins=num_bins)
model = torch.nn.Sequential(
    torch.nn.Flatten(), block, torch.nn.Unflatten(dim=1, unflattened_size=data_cfg_default.shape), model
)
secret = dict(weight_idx=0, bias_idx=1, shape=tuple(data_cfg_default.shape), structure=block.structure)
secrets = {"ImprintBlock": secret}

In [19]:
import torch
import numpy as np
from opacus.utils.batch_memory_manager import BatchMemoryManager
from tqdm.notebook import tqdm

MAX_PHYSICAL_BATCH_SIZE = 8
DELTA = 1e-5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def accuracy(preds, labels):
    return (preds == labels).mean()

def train(model, train_loader, optimizer, epoch, device):
    model.train()
    criterion = torch.nn.CrossEntropyLoss()
    #criterion.to(device)
    losses = []
    top1_acc = []

    with BatchMemoryManager(
        data_loader=train_loader,
        max_physical_batch_size=MAX_PHYSICAL_BATCH_SIZE,
        optimizer=optimizer
    ) as memory_safe_data_loader:

        for i, (images, target) in enumerate(memory_safe_data_loader):
            optimizer.zero_grad()
            images = images.to(device)
            target = target.to(device)

            # compute output
            output = model(images)
            target = target.flatten()
            loss = criterion(output, target)

            preds = np.argmax(output.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()

            # measure accuracy and record loss
            acc = accuracy(preds, labels)

            losses.append(loss.item())
            top1_acc.append(acc)

            loss.backward()
            optimizer.step()

            if (i + 1) % 200 == 0:
                epsilon = privacy_engine.get_epsilon(DELTA)
                print(
                    f"\tTrain Epoch: {epoch} \t"
                    f"Loss: {np.mean(losses):.6f} "
                    f"Acc@1: {np.mean(top1_acc) * 100:.6f} "
                    f"(ε = {epsilon:.2f}, δ = {DELTA})"
                )

In [7]:
# Model training

for epoch in range(4):
    running_loss = 0.0
    for inputs, labels in data_loader:
        labels = labels.flatten()
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss/len(data_loader)}")

print("Training finished")




Epoch 1, Loss: 1.761365349476154
Epoch 2, Loss: 2.420740315547356
Epoch 3, Loss: 2.290159894869878
Epoch 4, Loss: 2.3271426191696754
Training finished


In [20]:
def test(model, test_loader, device):
    model.eval()
    criterion = torch.nn.CrossEntropyLoss()
    #criterion.to(device)
    losses = []
    top1_acc = []

    with torch.no_grad():
        for images, target in test_loader:
            images = images.to(device)
            target = target.to(device)

            output = model(images)
            target = target.flatten()
            loss = criterion(output, target)
            preds = np.argmax(output.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()
            acc = accuracy(preds, labels)

            losses.append(loss.item())
            top1_acc.append(acc)

    top1_avg = np.mean(top1_acc)

    print(
        f"\tTest set:"
        f"Loss: {np.mean(losses):.6f} "
        f"Acc: {top1_avg * 100:.6f} "
    )
    return np.mean(top1_acc)

In [21]:
from tqdm.notebook import tqdm
#model.to(device)
EPOCHS = 2

for epoch in tqdm(range(EPOCHS), desc="Epoch", unit="epoch"):
    train(model, data_loader, optimizer, epoch + 1, device)

Epoch:   0%|          | 0/2 [00:00<?, ?epoch/s]

In [9]:
#test_set = DataClass(split="test", transform=transforms, download=True, size=224)
#test_loader = DataLoader(test_set, batch_size=batch_size)


Using downloaded and verified file: /Users/maximilianeckert/.medmnist/dermamnist_224.npz


In [10]:
#top1_acc = test(model, test_loader, device)

NameError: name 'device' is not defined

In [23]:

model_trained = model._modules["3"]
print(model_trained)

GradSampleModule(ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): GroupNorm(32, 64, eps=1e-05, affine=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): GroupNorm(32, 64, eps=1e-05, affine=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): GroupNorm(32, 64, eps=1e-05, affine=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): GroupNorm(32, 64, eps=1e-05, affine=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): GroupNorm(32, 64, eps=1e-05, af

In [24]:
#model_trained = model._module

print(model_trained(data))
top1_acc = test(model_trained, test_loader, device)
print(labels)



tensor([[ 1.0945e+00, -2.2669e+00,  5.2215e-01, -1.4668e+00, -1.5816e+00,
          5.7645e+00, -1.8293e-01],
        [ 1.2489e+00, -2.3246e+00,  6.5417e-01, -1.6065e+00, -1.8673e+00,
          5.7240e+00,  2.3677e-02],
        [ 1.1728e+00, -2.4929e+00,  6.0076e-01, -1.1025e+00, -1.6405e+00,
          5.9059e+00, -1.5486e-01],
        [ 1.2061e+00, -2.4201e+00,  5.9178e-01, -1.4826e+00, -1.8301e+00,
          5.6564e+00,  2.6670e-02],
        [ 1.1953e+00, -2.0234e+00,  6.9307e-01, -1.3356e+00, -1.8601e+00,
          5.6960e+00,  7.1418e-02],
        [ 1.2328e+00, -2.2934e+00,  6.8024e-01, -1.4809e+00, -1.5130e+00,
          5.6255e+00, -1.6966e-03],
        [ 8.2622e-01, -2.5781e+00,  2.5600e-01, -1.6895e+00, -1.6916e+00,
          5.3792e+00,  4.6395e-02],
        [ 9.8170e-01, -2.2247e+00,  6.0846e-01, -1.6373e+00, -1.6655e+00,
          5.6353e+00,  1.2429e-01]], grad_fn=<AddmmBackward0>)


[E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument


KeyboardInterrupt: 

### Simulate an attacked FL protocol

In [25]:

# This is the attacker:
attacker = ImprintAttacker(model_trained, loss_fn, attack_cfg_default, setup)

#Server-side computation:
queries = [dict(parameters=[p for p in model_trained.parameters()], buffers=[b for b in model_trained.buffers()])]
server_payload = dict(queries=queries, data=data_cfg_default)

#User-side computation:


loss = loss_fn(model_trained(data.to(device)), labels.to(device))



In [26]:
def compute_gradients(model, loss):
    # Zero gradients
    for param in model.parameters():
        if param.grad is not None:
            param.grad.data.zero_()
    model.eval()
    # Perform backward pass manually
    loss.backward(retain_graph=True)

    # Collect gradients
    gradients = []
    for param in model.parameters():
        gradients.append(param.grad.data.clone())
    
    return gradients

In [27]:
#print(len(shared_data["gradients"]),len(shared_data["gradients"][0]))
def manual_compute_gradients(model, loss):
    # Ensure gradients are zeroed
    model.eval()
    optimizer.zero_grad()
    
    # Perform backward pass to compute gradients
    loss.backward(retain_graph=True)
    
    # Collect gradients
    gradients = [param.grad.clone() for param in model.parameters() if param.grad is not None]
    
    return gradients



In [28]:
gradients = manual_compute_gradients(model_trained, loss)

In [29]:
print(loss,model_trained.parameters())

tensor(1.6000, grad_fn=<NllLossBackward0>) <generator object Module.parameters at 0x3210a8c80>


In [30]:

shared_data = dict(
    gradients=[torch.autograd.grad(loss, model_trained.parameters())],
    buffers=None,
    num_data_points=1,
    labels=labels,
    local_hyperparams=None,
)


IndexError: pop from empty list

### Reconstruct data from the update

In [148]:
# Attack:
reconstructed_user_data, stats = attacker.reconstruct(server_payload, shared_data, secrets, dryrun=False)

torch.Size([150528]) torch.Size([150528])
torch.Size([150528]) torch.Size([150528])


IndexError: too many indices for tensor of dimension 1

In [None]:
# Metrics?:
from utils.analysis import report
true_user_data = {'data': data, 'labels': labels}
metrics = report(reconstructed_user_data,
    true_user_data,
    server_payload,
    model, compute_ssim=False) # Can change to true and install a package...
print(f"MSE: {metrics['mse']}, PSNR: {metrics['psnr']}, LPIPS: {metrics['lpips']}, SSIM: {metrics['ssim']} ")

### Plot ground-truth data

In [None]:
plot_data(data_cfg_default, true_user_data, setup)

# Create the "images" folder if it doesn't exist
if not os.path.exists("images"):
    os.makedirs("images")

# Save the images inside the "images" folder
plt.savefig("images/true_user_data.png")


### Now plot reconstructed data

In [None]:
plot_data(data_cfg_default, reconstructed_user_data, setup)
# Save the images inside the "images" folder
plt.savefig("images/reconstructed_user_data.png")