In [1]:
import matplotlib.pyplot as plt
import h5py
import numpy as np
import pandas as pd
import torch

# Both Jupyter and `pfl` use async. `nest_asyncio` allows `pfl` to run inside the notebook 
import nest_asyncio
nest_asyncio.apply()

# append the root directory to your paths to be able to reach the examples.  
torch.random.manual_seed(1)
np.random.seed(1)

# Always import the `pfl` model first before initializing any `pfl` components to let `pfl` know which Deep Learning framework you will use.
import multiprocessing
# Set multiprocessing start method to "spawn" instead of forkserver (which is the default)
# That is because forkserver does not work on Windows, but spawn does.
def init_multiprocessing():
    try:
        multiprocessing.set_start_method("spawn", force=True)  # Forces "spawn"
    except RuntimeError:
        pass  # Ignore if it's already set

init_multiprocessing()

from pfl.model.pytorch import PyTorchModel

# Define the DP mechanism

In [2]:
# Define Gaussian DP mechanisms for central DP guarantees using three different methods

clipping_bound = 0.5
epsilon = 2
delta = 1e-8
cohort_size = 100
num_epochs = 100
sampling_probability = 1e-4
is_central = True

In [3]:
# define a Gaussian DP mechanism using the PLD privacy accountant
# WARNING: it takes a while for the gaussian_moments_accountant mechanism to be instantiated

from pfl.privacy import (PLDPrivacyAccountant, CentrallyAppliedPrivacyMechanism, GaussianMechanism)

# define the PLD privacy accountant, which will use the Gaussian noise mechanism
pld_accountant = PLDPrivacyAccountant(
    num_compositions=num_epochs,
    sampling_probability=sampling_probability,
    mechanism='gaussian',
    epsilon=epsilon,
    delta=delta)

# instantiate a Gaussian noise mechanism using the privacy accountant
pld_gaussian_noise_mechanism = GaussianMechanism.from_privacy_accountant(
    accountant=pld_accountant, clipping_bound=clipping_bound)

# wrap the noise mechanism with CentrallyAppliedPrivacyMechanism to make it a central privacy mechanism
pld_central_privacy = CentrallyAppliedPrivacyMechanism(pld_gaussian_noise_mechanism)

# Define the model

In [4]:
import torch.nn as nn
import torch.nn.functional as F
from typing import Dict, Optional
from pfl.metrics import Weighted


class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


pytorch_model = Net()

loss_fn = torch.nn.CrossEntropyLoss()

def loss(inputs: torch.Tensor, targets: torch.Tensor, eval: bool = False) -> torch.Tensor:
    pytorch_model.eval() if eval else pytorch_model.train()
    return loss_fn(pytorch_model(inputs), targets)


@torch.no_grad()
def metrics(inputs: torch.Tensor,
             targets: torch.Tensor,
             eval: bool = True) -> Dict[str, Weighted]:
    pytorch_model.eval() if eval else pytorch_model.train()
    prediction = pytorch_model(inputs)
    logits = torch.argmax(prediction, dim=1)
    num_samples = len(inputs)
    num_predictions = targets.numel()
    correct = torch.sum(torch.eq((logits > 0.0).float(), targets))

    loss = loss_fn(prediction, targets).item()
    return {
        "loss": Weighted(loss, num_samples),
        "accuracy": Weighted(correct, num_predictions)
    }

pytorch_model.loss = loss
pytorch_model.metrics = metrics

# Preprocess the data

## Load the data

In [5]:
from pfl.data.dataset import Dataset
import sys

sys.path.append('../flower/flower_normal')
from centralized import load_data
train_set, test_set = load_data()


Device: cpu


## Reformat the data

In [6]:
# Split the training dataloader into features and labels
all_features = []
all_labels = []

for features, labels in train_set:
    all_features.append(features)
    all_labels.append(labels)

# Convert to tensors if needed
all_features = torch.cat(all_features, dim=0)
all_labels = torch.cat(all_labels, dim=0)


central_num_iterations = 5
central_data = Dataset([all_features, all_labels])

In [7]:
print(all_features.shape)
print(all_labels.shape)

torch.Size([50000, 3, 32, 32])
torch.Size([50000])


In [8]:
# Split the validation dataloader into features and labels
val_all_features = []
val_all_labels = []

for features, labels in test_set:
    val_all_features.append(features)
    val_all_labels.append(labels)

# Convert to tensors if needed
val_all_features = torch.cat(val_all_features, dim=0)
val_all_labels = torch.cat(val_all_labels, dim=0)


In [9]:
print(val_all_features.shape)
print(val_all_labels.shape)

torch.Size([10000, 3, 32, 32])
torch.Size([10000])


In [10]:
n_samples = len(train_set.dataset)
print(n_samples)
val_n_samples = len(test_set.dataset)
print(val_n_samples)

50000
10000


# Train the model

## Setting up the model

In [None]:
from pfl.data import ArtificialFederatedDataset, get_data_sampler

# Create data sampler to sample each artificial user dataset as a random subset of the original dataset
data_sampler = get_data_sampler(sample_type="random", max_bound=n_samples)

# Create an artificial federated dataset where each user dataset has constant size of 10
sample_dataset_len = lambda: 10
federated_dataset = ArtificialFederatedDataset.from_slices(
    data=[all_features, all_labels], 
    data_sampler=data_sampler,
    sample_dataset_len=sample_dataset_len,
)

In [None]:
val_data_sampler = get_data_sampler(sample_type="random", max_bound=val_n_samples)

# Create an artificial federated dataset where each user dataset has constant size of 10
val_sample_dataset_len = lambda: 10
val_federated_dataset = ArtificialFederatedDataset.from_slices(
    data=[val_all_features, val_all_labels],
    data_sampler=val_data_sampler,
    sample_dataset_len=val_sample_dataset_len
)

In [14]:
params = [p for p in pytorch_model.parameters() if p.requires_grad]

model = PyTorchModel(pytorch_model, 
                     local_optimizer_create=torch.optim.SGD,
                     central_optimizer=torch.optim.SGD(params, 1.0))

In [15]:
# PFL training: train with DP

from pfl.algorithm import FederatedAveraging, NNAlgorithmParams
from pfl.callback import CentralEvaluationCallback, AggregateMetricsToDisk
from pfl.hyperparam import NNTrainHyperParams, NNEvalHyperParams
from pfl.aggregate.simulate import SimulatedBackend


model_train_params = NNTrainHyperParams(
    local_learning_rate=0.05,
    local_num_epochs=2,
    local_batch_size=5)

# Do full-batch evaluation to run faster.
model_eval_params = NNEvalHyperParams(local_batch_size=None)

evaluation_frequency = 5
algorithm_params = NNAlgorithmParams(
    central_num_iterations=central_num_iterations,
    evaluation_frequency=evaluation_frequency,
    train_cohort_size=cohort_size,
    val_cohort_size=20)

pfl_callbacks = [CentralEvaluationCallback(central_data, model_eval_params, evaluation_frequency), AggregateMetricsToDisk(output_path='pfl_training_metrics/metrics.csv')]

postprocessors = [pld_central_privacy]

pfl_simulated_backend = SimulatedBackend(
    training_data=federated_dataset,
    val_data=val_federated_dataset,
    postprocessors=postprocessors
)


algorithm = FederatedAveraging()

## Run the training

In [16]:
# PFL training using DP

pfl_model = algorithm.run(
    backend=pfl_simulated_backend,
    model=model,
    algorithm_params=algorithm_params,
    model_train_params=model_train_params,
    model_eval_params=model_eval_params,
    callbacks=pfl_callbacks,
    send_metrics_to_platform=True)

  torch.tensor(v, device=get_default_device()).add(


Metrics at iteration 0 ():
    Central val | loss                                : 4.6076517105102536e-05
    Central val | accuracy                            : 0.1
    Central val | number of data points               : 50000
    Train population | number of devices              : 100
    Train population | number of data points          : 10.0
    Train population | loss before local training     : 0.23036053657531738
    Train population | accuracy before local training : 0.097
    Train population | loss after local training      : 0.22754933476448058
    Train population | accuracy after local training  : 0.097
    Central DP | l2 norm bound                        : 0.5
    Central DP | fraction of clipped norms            : 0.0
    Central DP | norm before clipping                 : 0.07378956619650126
    Train population | total weight                   : 1.0
    Central DP | DP noise std. dev. on summed stats   : 0.24874210357666016
    Central DP | signal-to-DP-noise ratio o