In [1]:
import pandas as pd
import numpy as np
import math
import time
import json
import copy
import os
import sys
sys.path.append('../..')

import torch
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torch.utils.tensorboard import SummaryWriter

from torchvision.datasets import MNIST
from torchvision.transforms import Compose, Normalize, ToTensor

from fedrpdp.datasets.fed_mnist import (
    BaselineModel,
    BaselineLoss,
    metric
)

from fedrpdp.utils.rpdp_utils import (
    get_sample_rate_curve,
    MultiLevels, 
    MixGauss, 
    Pareto,
)

device = "cuda:0"
lr = 0.5

project_abspath = os.path.dirname(os.getcwd())
DATA_ROOT = '/data/privacyGroup/liujunxu/datasets/mnist'

train_data = MNIST(DATA_ROOT, train=True, download=False, transform=Compose([ToTensor(), Normalize(0.5, 0.5)]))
test_data = MNIST(DATA_ROOT, train=False, download=False, transform=Compose([ToTensor(), Normalize(0.5, 0.5)]))

train_loader = DataLoader(
    train_data,
    batch_size=len(train_data), # use all data points
    shuffle=False,
    num_workers=0,
)
test_loader = DataLoader(
    test_data,
    batch_size=len(test_data),
    shuffle=False,
    num_workers=0,
)

model_init = BaselineModel().to(device)
torch.manual_seed(42)

noise_multiplier = 5.0
max_grad_norm = 5.0
max_epochs = 100
delta = 1e-5

total_points = len(train_data)
num_level1 = int(total_points * 0.7)
num_level2 = int(total_points * 0.2)
num_level3 = total_points - num_level1 - num_level2

def train(model, device, train_loader, optimizer, criterion, metric, running_norms=None):
    model.train()
    data, target = next(iter(train_loader))
    data, target = data.to(device), target.to(device)
    optimizer.zero_grad()
    output = model(data)

    # compute train acc
    correct = metric(target.detach().cpu().numpy(), output.detach().cpu().numpy())
    train_acc = correct / len(target)
    
    # compute train loss
    loss = criterion(output, target)
    train_loss = loss.item()
    loss.backward()

    if running_norms is not None:
        gradient_norms = optimizer.step(running_norms)
        gradient_norms_sq = gradient_norms * gradient_norms
        return train_loss, train_acc, gradient_norms_sq
    
    else:
        optimizer.step()
        return train_loss, train_acc
    

def test(model, device, test_loader, criterion, metric):
    model.eval()
    with torch.no_grad():
        data, target = next(iter(test_loader))
        data, target = data.to(device), target.to(device)
        output = model(data)
        test_loss = criterion(output, target).item()
        
        correct = metric(target.detach().cpu().numpy(), output.detach().cpu().numpy())
        test_acc = 1. * correct / len(target)
        print(correct, len(target))
    return test_loss, test_acc

# GD with RDP Filter (NeurIPS'21)

In [2]:
from torchdp import PrivacyEngine
from fedrpdp.accountants.utils import get_noise_multiplier

def generate_rdp_orders():
    dense = 1.07
    alpha_list = [int(dense ** i + 1) for i in range(int(math.floor(math.log(1000, dense))) + 1)]
    alpha_list = np.unique(alpha_list)
    return alpha_list

norm_sq_budgets = [100] * num_level1 + [500] * num_level2 + [2500] * num_level3

_model = copy.deepcopy(model_init)
_train_loader = copy.deepcopy(train_loader)
optimizer = optim.SGD(_model.parameters(), lr=lr, momentum=0)
criterion = BaselineLoss()
privacy_engine = PrivacyEngine(
    module=_model,
    batch_size=total_points,
    sample_size=total_points,
    alphas=generate_rdp_orders(),
    noise_multiplier=noise_multiplier,
    max_grad_norm=max_grad_norm,
    norm_sq_budget=norm_sq_budgets,
    should_clip=True,
)
privacy_engine.attach(optimizer)

In [3]:
# privacy_engine.steps = int(privacy_engine.norm_sq_budget[0]/max_grad_norm**2) + 1
# epsilon1 = privacy_engine.get_epsilon(privacy_engine.norm_sq_budget[0], delta)[0]
# privacy_engine.steps = int(privacy_engine.norm_sq_budget[42000]/max_grad_norm**2) + 1
# epsilon2 = privacy_engine.get_epsilon(privacy_engine.norm_sq_budget[42000], delta)[0]
# privacy_engine.steps = int(privacy_engine.norm_sq_budget[54000]/max_grad_norm**2) + 1
# epsilon3 = privacy_engine.get_epsilon(privacy_engine.norm_sq_budget[54000], delta)[0]
# print(
#     f"δ: {delta} ε1 = {epsilon1:.2f} ε2 = {epsilon2:.2f} ε3 = {epsilon3:.2f}."
# )
# privacy_engine.steps = 0

In [None]:
running_grad_sq_norms = [ torch.Tensor([0] * total_points).to(device) ]
results_all_reps = [
    {
        "test_loss": 0, 
        "test_acc": 0, 
        "seconds": 0, 
        "num_active_points": total_points,
        "norm_sq_budgets": set(norm_sq_budgets), 
        "e": json.dumps({0: 0, num_level1:0, num_level1+num_level2:0}), 
        "d": delta, 
        "nm": round(noise_multiplier, 2), 
        "norm": max_grad_norm
    }
]

for epoch in range(1, max_epochs + 51):
    # compute activate points
    temp = running_grad_sq_norms[-1].cpu().numpy()
    num_active_points = np.sum(np.round(temp, 4) < np.array(norm_sq_budgets))

    start = time.time()
    train_loss, train_acc, grad_sq_norms = train(_model, device, _train_loader, optimizer, criterion, metric, running_grad_sq_norms[-1])
#     train_loss, train_acc = train(_model, device, _train_loader, optimizer, criterion, metric)

    end = time.time()
    seconds = end - start
    running_grad_sq_norms.append(running_grad_sq_norms[-1] + grad_sq_norms)
    
    epsilon1 = privacy_engine.get_epsilon(privacy_engine.norm_sq_budget[0], delta)[0]
    epsilon2 = privacy_engine.get_epsilon(privacy_engine.norm_sq_budget[num_level1], delta)[0]
    epsilon3 = privacy_engine.get_epsilon(privacy_engine.norm_sq_budget[num_level1 + num_level2], delta)[0]
    
    temp = running_grad_sq_norms[-1].cpu().numpy()
    print(temp[0], temp[num_level1], temp[num_level1+num_level2])
    
    print(f"Epoch: {epoch}: seconds = {seconds}")
    print(
        f"Train Loss: {train_loss:.4f} \t Acc: {train_acc:.4f} "
        f"| δ: {delta} ε1 = {epsilon1:.4f} ε2 = {epsilon2:.4f} ε3 = {epsilon3:.4f}."
    )
    
    test_loss, test_acc = test(_model, device, test_loader, criterion, metric)
    print(
        f"Test  Loss: {test_loss:.4f} \t Acc: {test_acc:.4f}\n"
    )

    results_all_reps.append(
        {
            "test_loss": round(test_loss,4), 
            "test_acc": round(test_acc,4), 
            "seconds": round(seconds,4), 
            "num_active_points": num_active_points.item(),
            "norm_sq_budgets": set(running_grad_sq_norms[-1].cpu().numpy()),
            "e": json.dumps({0:epsilon1, num_level1:epsilon2, num_level1+num_level2:epsilon3}), 
            "d": delta, 
            "nm": round(noise_multiplier,2), 
            "norm": max_grad_norm
        }
    )

    results = pd.DataFrame.from_dict(results_all_reps)
    results.to_csv("results_filter.csv", index=False)
    
    if num_active_points < 10:
        break
    epoch += 1

22.97957 27.384346 24.19378
Epoch: 1: seconds = 10.402732849121094
Train Loss: 2.3069 	 Acc: 0.1049 | δ: 1e-05 ε1 = 0.9797 ε2 = 0.9797 ε3 = 0.9797.
1355 10000
Test  Loss: 2.2901 	 Acc: 0.1355

38.89315 48.83638 42.095863
Epoch: 2: seconds = 8.521914958953857
Train Loss: 2.2901 	 Acc: 0.1347 | δ: 1e-05 ε1 = 0.9797 ε2 = 1.3972 ε3 = 1.3972.
1731 10000
Test  Loss: 2.2783 	 Acc: 0.1731

58.586983 72.31065 64.689644
Epoch: 3: seconds = 8.4428129196167
Train Loss: 2.2789 	 Acc: 0.1735 | δ: 1e-05 ε1 = 0.9797 ε2 = 1.7224 ε3 = 1.7224.
1831 10000
Test  Loss: 2.2733 	 Acc: 0.1831

69.961494 91.73306 78.84592
Epoch: 4: seconds = 8.376971244812012
Train Loss: 2.2733 	 Acc: 0.1834 | δ: 1e-05 ε1 = 0.9797 ε2 = 1.9994 ε3 = 1.9994.
1165 10000
Test  Loss: 2.2722 	 Acc: 0.1165

99.789734 120.50515 104.62121
Epoch: 5: seconds = 8.345195055007935
Train Loss: 2.2733 	 Acc: 0.1151 | δ: 1e-05 ε1 = 0.9797 ε2 = 2.2466 ε3 = 2.2466.
1069 10000
Test  Loss: 2.2550 	 Acc: 0.1069

100.0 140.40471 119.36687
Epoch: 6: se

5824 10000
Test  Loss: 1.1406 	 Acc: 0.5824

100.0 500.0 2500.0
Epoch: 45: seconds = 7.075235843658447
Train Loss: 1.1715 	 Acc: 0.5652 | δ: 1e-05 ε1 = 0.9797 ε2 = 2.2466 ε3 = 5.3026.
5758 10000
Test  Loss: 1.1525 	 Acc: 0.5758

100.0 500.0 2500.0
Epoch: 46: seconds = 7.118883848190308
Train Loss: 1.1835 	 Acc: 0.5601 | δ: 1e-05 ε1 = 0.9797 ε2 = 2.2466 ε3 = 5.3026.
5666 10000
Test  Loss: 1.1677 	 Acc: 0.5666

100.0 500.0 2500.0
Epoch: 47: seconds = 7.119960784912109
Train Loss: 1.1987 	 Acc: 0.5534 | δ: 1e-05 ε1 = 0.9797 ε2 = 2.2466 ε3 = 5.3026.
5585 10000
Test  Loss: 1.1862 	 Acc: 0.5585

100.0 500.0 2500.0
Epoch: 48: seconds = 7.2267491817474365
Train Loss: 1.2177 	 Acc: 0.5463 | δ: 1e-05 ε1 = 0.9797 ε2 = 2.2466 ε3 = 5.3026.
5540 10000
Test  Loss: 1.1974 	 Acc: 0.5540

100.0 500.0 2500.0
Epoch: 49: seconds = 7.130969524383545
Train Loss: 1.2291 	 Acc: 0.5416 | δ: 1e-05 ε1 = 0.9797 ε2 = 2.2466 ε3 = 5.3026.
5502 10000
Test  Loss: 1.2054 	 Acc: 0.5502

100.0 500.0 2500.0
Epoch: 50: seco

4863 10000
Test  Loss: 1.4783 	 Acc: 0.4863

100.0 500.0 2500.0
Epoch: 90: seconds = 7.098005294799805
Train Loss: 1.5147 	 Acc: 0.4843 | δ: 1e-05 ε1 = 0.9797 ε2 = 2.2466 ε3 = 5.3026.
4865 10000
Test  Loss: 1.4834 	 Acc: 0.4865



Q: running_grad_sq_norm 与 epsilon 不匹配：epsilon 需在 running_grad_sq_norm 增长过程中，同步达到给定的 epsilon budget？
A: 首先看 `get_epsilon()` 的代码:
``` Python
rdp = self.get_renyi_divergence() * min(self.steps, norm_sq_budget/self.max_grad_norm**2)
return tf_privacy.get_privacy_spent(self.alphas, rdp, target_delta)
```
可见，当 self.steps > norm_sq_budget/max_grad_norm^2 时，rdp不会再变化（导致计算的epsilon不再变化），但 running_grad_sq_norm 还在增加。

另外：
1. 若 nm 确定了，则 self.get_renyi_divergence() 便确定了；而 nm 的大小是根据 MAX_EPSILON 与 MAX_EPOCHS 得到的（即条件是当达到MAX_EPOCHS时，达到 MAX_EPSILON；
2. 若 actual steps 与 norm_sq_budget/max_grad_norm^2 差距特别大，说明选择的 max_grad_norm 太大，需适当调小

In [None]:
from fedrpdp.accountants.utils import get_noise_multiplier
from fedrpdp import PrivacyEngine

curve_fn = get_sample_rate_curve(
    target_delta = delta,
    noise_multiplier = noise_multiplier,
    num_updates = max_epochs,
    num_rounds = None,
    client_rate = None
)
epsilon_budgets = [epsilon1] * num_level1 + [epsilon2] * num_level2 + [epsilon3] * num_level3

_model = copy.deepcopy(model_init)
_train_loader = copy.deepcopy(train_loader)
optimizer = optim.SGD(_model.parameters(), lr=lr, momentum=0)
criterion = BaselineLoss()

privacy_engine = PrivacyEngine(accountant="pers_rdp", noise_multiplier=noise_multiplier)
privacy_engine.sample_rate_fn = curve_fn
per_sample_rate = [float(privacy_engine.sample_rate_fn(x)) for x in epsilon_budgets]
print(round(min(epsilon_budgets),4), round(min(per_sample_rate),4))
print(round(max(epsilon_budgets),4), round(max(per_sample_rate),4))
if max(per_sample_rate) == 0.0:
    raise ValueError("Hyper parameter errors! The maximum value of per_sample_rates is zero!")
privacy_engine.sample_rate = per_sample_rate # TODO: make it as an internal func of PrivacyEngine
print(set(privacy_engine.sample_rate))

_model, optimizer, _train_loader = privacy_engine.make_private_with_personalization(
    module=_model,
    optimizer=optimizer,
    data_loader=_train_loader,
    noise_multiplier=noise_multiplier,
    max_grad_norm=max_grad_norm
)
results_all_reps = []

for epoch in range(1, max_epochs + 1):
    start = time.time()
    train_loss, train_acc = train(_model, device, _train_loader, optimizer, criterion, metric)
    end = time.time()
    seconds = end - start
    
    test_loss, test_acc = test(_model, device, test_loader, criterion, metric)
    
    epsilon_1 = privacy_engine.get_epsilon(0, delta)
    epsilon_2 = privacy_engine.get_epsilon(num_level1, delta)
    epsilon_3 = privacy_engine.get_epsilon(num_level1+num_level2, delta)
    
    print(f"Epoch: {epoch}")
    print(
        f"Train Loss: {train_loss:.6f} \t Acc: {100*train_acc:.2f}% "
        f"| δ: {delta} "
        f"ε1 = {epsilon_1:.4f}, "
        f"ε2 = {epsilon_2:.4f}, "
        f"ε3 = {epsilon_3:.4f}, "
    )
        
    print("Test  Loss: {:.4f} \t Acc: {:.2f}%\n".format(test_loss, 100*test_acc))
    results_all_reps.append(
        {
            "test_loss": round(test_loss,4), "test_acc": round(test_acc,4), 
             "seconds": round(seconds,4),
             "e": set(epsilon_budgets), "d": delta, "nm": round(noise_multiplier,2), "norm": max_grad_norm
        }
    )
    
    results = pd.DataFrame.from_dict(results_all_reps)
    results.to_csv("results_ours.csv", index=False)