In [1]:
import torch
import torchvision
from collections import namedtuple
import os
import matplotlib.pyplot as plt
from attacks.analytic_attack import ImprintAttacker
from modifications.imprint import ImprintBlock
from utils.breaching_utils import *

import medmnist
from medmnist import INFO, Evaluator

from opacus import PrivacyEngine
from opacus.validators import ModuleValidator
from torch.utils.data import DataLoader
import numpy as np
%load_ext autoreload
%autoreload 2

In [134]:
batch_size = 8 # Number of images in the user's batch. We have a small one here for visualization purposes
import random
random.seed(2324) # You can change this to get a new batch.

transforms = torchvision.transforms.Compose(
    [
        torchvision.transforms.Resize(256),
        torchvision.transforms.CenterCrop(224),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(mean=data_cfg_default.mean, std=data_cfg_default.std),
    ]
)
data_flag = 'dermamnist'
info = INFO[data_flag]
DataClass = getattr(medmnist, info['python_class'])
dataset = DataClass(split="val", transform=transforms, download=True, size=224)
samples = [dataset[i] for i in random.sample(range(len(dataset)), batch_size)]
data_np = np.array([sample[0].numpy() for sample in samples])  # Convert list of numpy arrays to a single numpy array
data = torch.tensor(data_np)  # Convert the numpy array to a PyTorch tensor
#data = torch.stack([sample[0] for sample in samples])
labels = torch.tensor([sample[1] for sample in samples]).flatten()

Using downloaded and verified file: /Users/maximilianeckert/.medmnist/dermamnist_224.npz


### Initialize your model

In [135]:
setup = dict(device=torch.device("cuda" if torch.cuda.is_available() else "cpu"), dtype=torch.float)

# This could be any model:
#model = torchvision.models.resnet18(weights = True)
# Modify the final layer to have 7 output classes
#model.fc = torch.nn.Linear(512, 7)

model = torchvision.models.squeezenet1_0(pretrained=True)

# Modify the final layer to have 7 output classes
model.classifier[1] = torch.nn.Conv2d(512, 7, kernel_size=(1, 1), stride=(1, 1))

# Update the number of classes attribute
model.num_classes = 7

loss_fn = torch.nn.CrossEntropyLoss()




In [136]:
print(model(data))
print(labels)

tensor([[1.6099, 1.4645, 0.8820, 1.9419, 1.6310, 0.2506, 0.4033],
        [2.2453, 1.2833, 1.3389, 1.8274, 2.6610, 0.7810, 1.1914],
        [1.4521, 0.9041, 0.9726, 1.2699, 1.3063, 0.3086, 0.6245],
        [3.5291, 2.9252, 1.9890, 1.3838, 2.2669, 0.5537, 0.8241],
        [2.9076, 2.3195, 1.4985, 1.9851, 1.8889, 0.2447, 1.0092],
        [2.5162, 2.1325, 1.7550, 1.5822, 3.7739, 1.0111, 1.3861],
        [1.7880, 0.5975, 0.7233, 2.1471, 2.0306, 0.5964, 1.0334],
        [1.8729, 1.0466, 1.1320, 2.3196, 2.5302, 0.4377, 0.6230]],
       grad_fn=<ReshapeAliasBackward0>)
tensor([5, 2, 5, 5, 4, 5, 5, 5])


In [137]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [138]:
from torch.utils.data import Subset, DataLoader

model = ModuleValidator.fix(model)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

training_set = DataClass(split="train", transform=transforms, download=True, size=224)
subset_indices = np.arange(100)
subset_training_set = Subset(training_set, subset_indices)
data_loader = DataLoader(subset_training_set, batch_size=batch_size)

Using downloaded and verified file: /Users/maximilianeckert/.medmnist/dermamnist_224.npz


In [139]:

# add opacus here -> problem with the model structure (ImprintBlock) so do it after the imprint block
#if hasattr(model, "autograd_grad_sample_hooks"):
#   del model.autograd_grad_sample_hooks
EPSILON = 50.0
EPOCHS = 2
DELTA = 1e-5
MAX_GRAD_NORM = 1.2

privacy_engine = PrivacyEngine()
model, optimizer, data_loader = privacy_engine.make_private(
    module=model,
    optimizer=optimizer,
    data_loader=data_loader,
    max_grad_norm=MAX_GRAD_NORM,
    poisson_sampling= False,
    #grad_sample_mode= "hooks",
    noise_multiplier= 1.1,
    #grad_sample_mode="ew",
    #epochs = 2,
    #target_epsilon = EPSILON,
    #target_delta = DELTA,
)

print(f"Using sigma={optimizer.noise_multiplier} and C={MAX_GRAD_NORM}")

Using sigma=1.1 and C=1.2




In [140]:
# It will be modified maliciously:
input_dim = data_cfg_default.shape[0] * data_cfg_default.shape[1] * data_cfg_default.shape[2]
num_bins = 100 # Here we define number of imprint bins
block = ImprintBlock(input_dim, num_bins=num_bins)
model = torch.nn.Sequential(
    torch.nn.Flatten(), block, torch.nn.Unflatten(dim=1, unflattened_size=data_cfg_default.shape), model
)
secret = dict(weight_idx=0, bias_idx=1, shape=tuple(data_cfg_default.shape), structure=block.structure)
secrets = {"ImprintBlock": secret}

In [141]:
import torch
import numpy as np
from opacus.utils.batch_memory_manager import BatchMemoryManager
from tqdm.notebook import tqdm

MAX_PHYSICAL_BATCH_SIZE = 8
DELTA = 1e-5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def accuracy(preds, labels):
    return (preds == labels).mean()

def train(model, train_loader, optimizer, epoch, device):
    model.train()
    criterion = torch.nn.CrossEntropyLoss()
    #criterion.to(device)
    losses = []
    top1_acc = []

    with BatchMemoryManager(
        data_loader=train_loader,
        max_physical_batch_size=MAX_PHYSICAL_BATCH_SIZE,
        optimizer=optimizer
    ) as memory_safe_data_loader:

        for i, (images, target) in enumerate(memory_safe_data_loader):
            optimizer.zero_grad()
            images = images.to(device)
            target = target.to(device)

            # compute output
            output = model(images)
            target = target.flatten()
            loss = criterion(output, target)

            preds = np.argmax(output.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()

            # measure accuracy and record loss
            acc = accuracy(preds, labels)

            losses.append(loss.item())
            top1_acc.append(acc)

            loss.backward()
            optimizer.step()

            if (i + 1) % 200 == 0:
                epsilon = privacy_engine.get_epsilon(DELTA)
                print(
                    f"\tTrain Epoch: {epoch} \t"
                    f"Loss: {np.mean(losses):.6f} "
                    f"Acc@1: {np.mean(top1_acc) * 100:.6f} "
                    f"(ε = {epsilon:.2f}, δ = {DELTA})"
                )

In [95]:
# Model training

for epoch in range(1):
    running_loss = 0.0
    for inputs, labels in data_loader:
        labels = labels.flatten()
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss/len(data_loader)}")

print("Training finished")




Epoch 1, Loss: 11.13193203852727
Training finished


In [113]:
def test(model, test_loader, device):
    model.eval()
    criterion = torch.nn.CrossEntropyLoss()
    #criterion.to(device)
    losses = []
    top1_acc = []

    with torch.no_grad():
        for images, target in test_loader:
            images = images.to(device)
            target = target.to(device)

            output = model(images)
            target = target.flatten()
            loss = criterion(output, target)
            preds = np.argmax(output.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()
            acc = accuracy(preds, labels)

            losses.append(loss.item())
            top1_acc.append(acc)

    top1_avg = np.mean(top1_acc)

    print(
        f"\tTest set:"
        f"Loss: {np.mean(losses):.6f} "
        f"Acc: {top1_avg * 100:.6f} "
    )
    return np.mean(top1_acc)

In [114]:
from tqdm.notebook import tqdm
#model.to(device)

for epoch in tqdm(range(EPOCHS), desc="Epoch", unit="epoch"):
    train(model, data_loader, optimizer, epoch + 1, device)

Epoch:   0%|          | 0/2 [00:00<?, ?epoch/s]

In [115]:
test_set = DataClass(split="test", transform=transforms, download=True, size=224)
test_loader = DataLoader(test_set, batch_size=batch_size)


Using downloaded and verified file: /Users/maximilianeckert/.medmnist/dermamnist_224.npz


In [116]:
top1_acc = test(model, test_loader, device)

KeyboardInterrupt: 

In [162]:

model_trained = model.__dict__[_modules]['3']
print(model_trained.__dict__.items())

NameError: name '_modules' is not defined

In [155]:
#model_trained = model._module

print(model_trained(data))
top1_acc = test(model_trained, test_loader, device)
print(labels)



tensor([[1.4305, 1.0979, 0.6477, 1.6111, 1.3809, 0.1406, 0.1620],
        [1.6231, 0.8244, 0.7447, 1.5270, 2.3330, 0.3052, 0.7816],
        [1.1552, 0.5816, 0.6148, 0.9779, 1.0850, 0.1464, 0.3954],
        [3.0533, 2.3448, 1.6137, 0.8302, 1.9914, 0.3286, 0.2818],
        [2.8633, 1.9247, 1.0087, 1.5910, 1.5590, 0.0817, 0.5659],
        [1.8518, 1.5261, 1.3469, 0.8847, 3.3491, 0.4206, 0.6603],
        [1.3113, 0.1449, 0.4382, 2.0426, 1.5484, 0.3206, 0.5943],
        [1.3019, 0.5592, 0.4781, 2.1186, 2.0522, 0.2679, 0.3745]],
       grad_fn=<ReshapeAliasBackward0>)


KeyboardInterrupt: 

### Simulate an attacked FL protocol

In [156]:

# This is the attacker:
attacker = ImprintAttacker(model_trained, loss_fn, attack_cfg_default, setup)

#Server-side computation:
queries = [dict(parameters=[p for p in model_trained.parameters()], buffers=[b for b in model_trained.buffers()])]
server_payload = dict(queries=queries, data=data_cfg_default)

#User-side computation:


loss = loss_fn(model_trained(data.to(device)), labels.to(device))



In [144]:
def compute_gradients(model, loss):
    # Zero gradients
    for param in model.parameters():
        if param.grad is not None:
            param.grad.data.zero_()
    model.eval()
    # Perform backward pass manually
    loss.backward(retain_graph=True)

    # Collect gradients
    gradients = []
    for param in model.parameters():
        gradients.append(param.grad.data.clone())
    
    return gradients

In [151]:
#print(len(shared_data["gradients"]),len(shared_data["gradients"][0]))
def manual_compute_gradients(model, loss):
    # Ensure gradients are zeroed
    model.eval()
    optimizer.zero_grad()
    
    # Perform backward pass to compute gradients
    loss.backward(retain_graph=True)
    
    # Collect gradients
    gradients = [param.grad.clone() for param in model.parameters() if param.grad is not None]
    
    return gradients



In [152]:
gradients = manual_compute_gradients(model_trained, loss)

IndexError: pop from empty list

In [157]:
print(loss,model_trained.parameters())

tensor(2.8677, grad_fn=<NllLossBackward0>) <generator object Module.parameters at 0x978449540>


In [158]:

shared_data = dict(
    gradients=[torch.autograd.grad(loss, model_trained.parameters())],
    buffers=None,
    num_data_points=1,
    labels=labels,
    local_hyperparams=None,
)


IndexError: pop from empty list

### Reconstruct data from the update

In [148]:
# Attack:
reconstructed_user_data, stats = attacker.reconstruct(server_payload, shared_data, secrets, dryrun=False)

torch.Size([150528]) torch.Size([150528])
torch.Size([150528]) torch.Size([150528])


IndexError: too many indices for tensor of dimension 1

In [None]:
# Metrics?:
from utils.analysis import report
true_user_data = {'data': data, 'labels': labels}
metrics = report(reconstructed_user_data,
    true_user_data,
    server_payload,
    model, compute_ssim=False) # Can change to true and install a package...
print(f"MSE: {metrics['mse']}, PSNR: {metrics['psnr']}, LPIPS: {metrics['lpips']}, SSIM: {metrics['ssim']} ")

### Plot ground-truth data

In [None]:
plot_data(data_cfg_default, true_user_data, setup)

# Create the "images" folder if it doesn't exist
if not os.path.exists("images"):
    os.makedirs("images")

# Save the images inside the "images" folder
plt.savefig("images/true_user_data.png")


### Now plot reconstructed data

In [None]:
plot_data(data_cfg_default, reconstructed_user_data, setup)
# Save the images inside the "images" folder
plt.savefig("images/reconstructed_user_data.png")