In [1]:
import pandas as pd
import numpy as np
import math
import time
import json
import copy
import os
import sys
sys.path.append('../..')

import torch
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torch.utils.tensorboard import SummaryWriter

from torchvision.datasets import MNIST
from torchvision.transforms import Compose, Normalize, ToTensor

from fedrpdp.datasets.fed_mnist import (
    BaselineModel,
    BaselineLoss,
    metric
)

from fedrpdp.utils.rpdp_utils import (
    get_sample_rate_curve,
    MultiLevels, 
    MixGauss, 
    Pareto,
)

device = "cuda:0"
lr = 0.5

project_abspath = os.path.dirname(os.getcwd())
DATA_ROOT = '/data/privacyGroup/liujunxu/datasets/mnist'

train_data = MNIST(DATA_ROOT, train=True, download=False, transform=Compose([ToTensor(), Normalize(0.5, 0.5)]))
test_data = MNIST(DATA_ROOT, train=False, download=False, transform=Compose([ToTensor(), Normalize(0.5, 0.5)]))

train_loader = DataLoader(
    train_data,
    batch_size=len(train_data), # use all data points
    shuffle=False,
    num_workers=0,
)
test_loader = DataLoader(
    test_data,
    batch_size=len(test_data),
    shuffle=False,
    num_workers=0,
)

model_init = BaselineModel().to(device)
torch.manual_seed(42)

noise_multiplier = 5.0
max_grad_norm = 5.0
max_epochs = 100
delta = 1e-5

total_points = len(train_data)
num_level1 = int(total_points * 0.7)
num_level2 = int(total_points * 0.2)
num_level3 = total_points - num_level1 - num_level2

def train(model, device, train_loader, optimizer, criterion, metric, running_norms=None):
    model.train()
    data, target = next(iter(train_loader))
    data, target = data.to(device), target.to(device)
    optimizer.zero_grad()
    output = model(data)

    # compute train acc
    correct = metric(target.detach().cpu().numpy(), output.detach().cpu().numpy())
    train_acc = correct / len(target)
    
    # compute train loss
    loss = criterion(output, target)
    train_loss = loss.item()
    loss.backward()

    if running_norms is not None:
        gradient_norms = optimizer.step(running_norms)
        gradient_norms_sq = gradient_norms * gradient_norms
        return train_loss, train_acc, gradient_norms_sq
    
    else:
        optimizer.step()
        return train_loss, train_acc
    

def test(model, device, test_loader, criterion, metric):
    model.eval()
    with torch.no_grad():
        data, target = next(iter(test_loader))
        data, target = data.to(device), target.to(device)
        output = model(data)
        test_loss = criterion(output, target).item()
        
        correct = metric(target.detach().cpu().numpy(), output.detach().cpu().numpy())
        test_acc = 1. * correct / len(target)
        print(correct, len(target))
    return test_loss, test_acc

# GD with RDP Filter (NeurIPS'21)

In [2]:
from torchdp import PrivacyEngine
from fedrpdp.accountants.utils import get_noise_multiplier

def generate_rdp_orders():
    dense = 1.07
    alpha_list = [int(dense ** i + 1) for i in range(int(math.floor(math.log(1000, dense))) + 1)]
    alpha_list = np.unique(alpha_list)
    return alpha_list

norm_sq_budgets = [100] * num_level1 + [500] * num_level2 + [2500] * num_level3

_model = copy.deepcopy(model_init)
_train_loader = copy.deepcopy(train_loader)
optimizer = optim.SGD(_model.parameters(), lr=lr, momentum=0)
criterion = BaselineLoss()
privacy_engine = PrivacyEngine(
    module=_model,
    batch_size=total_points,
    sample_size=total_points,
    alphas=generate_rdp_orders(),
    noise_multiplier=noise_multiplier,
    max_grad_norm=max_grad_norm,
    norm_sq_budget=norm_sq_budgets,
    should_clip=True,
)
privacy_engine.attach(optimizer)

In [3]:
# privacy_engine.steps = int(privacy_engine.norm_sq_budget[0]/max_grad_norm**2) + 1
# epsilon1 = privacy_engine.get_epsilon(privacy_engine.norm_sq_budget[0], delta)[0]
# privacy_engine.steps = int(privacy_engine.norm_sq_budget[42000]/max_grad_norm**2) + 1
# epsilon2 = privacy_engine.get_epsilon(privacy_engine.norm_sq_budget[42000], delta)[0]
# privacy_engine.steps = int(privacy_engine.norm_sq_budget[54000]/max_grad_norm**2) + 1
# epsilon3 = privacy_engine.get_epsilon(privacy_engine.norm_sq_budget[54000], delta)[0]
# print(
#     f"δ: {delta} ε1 = {epsilon1:.2f} ε2 = {epsilon2:.2f} ε3 = {epsilon3:.2f}."
# )
# privacy_engine.steps = 0

In [4]:
running_grad_sq_norms = [ torch.Tensor([0] * total_points).to(device) ]
results_all_reps = [
    {
        "test_loss": 0, 
        "test_acc": 0, 
        "seconds": 0, 
        "num_active_points": total_points,
        "norm_sq_budgets": set(norm_sq_budgets), 
        "e": json.dumps({0: 0, num_level1:0, num_level1+num_level2:0}), 
        "d": delta, 
        "nm": round(noise_multiplier, 2), 
        "norm": max_grad_norm
    }
]

for epoch in range(1, max_epochs + 51):
    # compute activate points
    temp = running_grad_sq_norms[-1].cpu().numpy()
    num_active_points = np.sum(np.round(temp, 4) < np.array(norm_sq_budgets))

    start = time.time()
    train_loss, train_acc, grad_sq_norms = train(_model, device, _train_loader, optimizer, criterion, metric, running_grad_sq_norms[-1])
#     train_loss, train_acc = train(_model, device, _train_loader, optimizer, criterion, metric)

    end = time.time()
    seconds = end - start
    running_grad_sq_norms.append(running_grad_sq_norms[-1] + grad_sq_norms)
    
    epsilon1 = privacy_engine.get_epsilon(privacy_engine.norm_sq_budget[0], delta)[0]
    epsilon2 = privacy_engine.get_epsilon(privacy_engine.norm_sq_budget[num_level1], delta)[0]
    epsilon3 = privacy_engine.get_epsilon(privacy_engine.norm_sq_budget[num_level1 + num_level2], delta)[0]
    
    temp = running_grad_sq_norms[-1].cpu().numpy()
    print(temp[0], temp[num_level1], temp[num_level1+num_level2])
    
    print(f"Epoch: {epoch}: seconds = {seconds}")
    print(
        f"Train Loss: {train_loss:.4f} \t Acc: {train_acc:.4f} "
        f"| δ: {delta} ε1 = {epsilon1:.4f} ε2 = {epsilon2:.4f} ε3 = {epsilon3:.4f}."
    )
    
    test_loss, test_acc = test(_model, device, test_loader, criterion, metric)
    print(
        f"Test  Loss: {test_loss:.4f} \t Acc: {test_acc:.4f}\n"
    )

    results_all_reps.append(
        {
            "test_loss": round(test_loss,4), 
            "test_acc": round(test_acc,4), 
            "seconds": round(seconds,4), 
            "num_active_points": num_active_points.item(),
            "norm_sq_budgets": set(running_grad_sq_norms[-1].cpu().numpy()),
            "e": json.dumps({0:epsilon1, num_level1:epsilon2, num_level1+num_level2:epsilon3}), 
            "d": delta, 
            "nm": round(noise_multiplier,2), 
            "norm": max_grad_norm
        }
    )

    results = pd.DataFrame.from_dict(results_all_reps)
    results.to_csv("results_filter.csv", index=False)
    
    if num_active_points < 10:
        break
    epoch += 1

22.97957 24.99999 24.19378
Epoch: 1: seconds = 9.028477430343628
Train Loss: 2.3069 	 Acc: 0.1049 | δ: 1e-05 ε1 = 0.9797 ε2 = 0.9797 ε3 = 0.9797.
1383 10000
Test  Loss: 2.2888 	 Acc: 0.1383

39.522083 46.174908 42.072067
Epoch: 2: seconds = 7.196760892868042
Train Loss: 2.2888 	 Acc: 0.1370 | δ: 1e-05 ε1 = 1.3972 ε2 = 1.3972 ε3 = 1.3972.
1835 10000
Test  Loss: 2.2773 	 Acc: 0.1835

61.52929 69.82898 64.558174
Epoch: 3: seconds = 7.027766227722168
Train Loss: 2.2782 	 Acc: 0.1846 | δ: 1e-05 ε1 = 1.7224 ε2 = 1.7224 ε3 = 1.7224.
1398 10000
Test  Loss: 2.2683 	 Acc: 0.1398

75.33095 88.58594 79.7293
Epoch: 4: seconds = 7.280696630477905
Train Loss: 2.2688 	 Acc: 0.1386 | δ: 1e-05 ε1 = 1.9994 ε2 = 1.9994 ε3 = 1.9994.
1135 10000
Test  Loss: 2.2779 	 Acc: 0.1135

99.99999 113.58593 104.729294
Epoch: 5: seconds = 7.052935838699341
Train Loss: 2.2785 	 Acc: 0.1128 | δ: 1e-05 ε1 = 1.9994 ε2 = 2.2466 ε3 = 2.2466.
1028 10000
Test  Loss: 2.2593 	 Acc: 0.1028

100.0 133.75291 120.86867
Epoch: 6: sec

7646 10000
Test  Loss: 0.8582 	 Acc: 0.7646

100.0 500.0 1095.8685
Epoch: 45: seconds = 7.359129190444946
Train Loss: 0.8832 	 Acc: 0.7573 | δ: 1e-05 ε1 = 1.9994 ε2 = 4.7026 ε3 = 7.3782.
7673 10000
Test  Loss: 0.8454 	 Acc: 0.7673

100.0 500.0 1120.8685
Epoch: 46: seconds = 7.32367205619812
Train Loss: 0.8706 	 Acc: 0.7607 | δ: 1e-05 ε1 = 1.9994 ε2 = 4.7026 ε3 = 7.4782.
7713 10000
Test  Loss: 0.8325 	 Acc: 0.7713

100.0 500.0 1145.8685
Epoch: 47: seconds = 7.282956838607788
Train Loss: 0.8578 	 Acc: 0.7640 | δ: 1e-05 ε1 = 1.9994 ε2 = 4.7026 ε3 = 7.5782.
7725 10000
Test  Loss: 0.8216 	 Acc: 0.7725

100.0 500.0 1170.8685
Epoch: 48: seconds = 7.164820194244385
Train Loss: 0.8469 	 Acc: 0.7657 | δ: 1e-05 ε1 = 1.9994 ε2 = 4.7026 ε3 = 7.6776.
7747 10000
Test  Loss: 0.8100 	 Acc: 0.7747

100.0 500.0 1195.8685
Epoch: 49: seconds = 7.117162704467773
Train Loss: 0.8354 	 Acc: 0.7689 | δ: 1e-05 ε1 = 1.9994 ε2 = 4.7026 ε3 = 7.7576.
7779 10000
Test  Loss: 0.7984 	 Acc: 0.7779

100.0 500.0 1220.8685

8318 10000
Test  Loss: 0.5533 	 Acc: 0.8318

100.0 500.0 2195.8684
Epoch: 89: seconds = 7.1700239181518555
Train Loss: 0.5816 	 Acc: 0.8250 | δ: 1e-05 ε1 = 1.9994 ε2 = 4.7026 ε3 = 10.9576.
8325 10000
Test  Loss: 0.5499 	 Acc: 0.8325

100.0 500.0 2220.8684
Epoch: 90: seconds = 7.115966081619263
Train Loss: 0.5782 	 Acc: 0.8257 | δ: 1e-05 ε1 = 1.9994 ε2 = 4.7026 ε3 = 11.0376.
8331 10000
Test  Loss: 0.5465 	 Acc: 0.8331

100.0 500.0 2245.8684
Epoch: 91: seconds = 7.010625123977661
Train Loss: 0.5750 	 Acc: 0.8264 | δ: 1e-05 ε1 = 1.9994 ε2 = 4.7026 ε3 = 11.1176.
8343 10000
Test  Loss: 0.5424 	 Acc: 0.8343

100.0 500.0 2270.8684
Epoch: 92: seconds = 7.159003734588623
Train Loss: 0.5710 	 Acc: 0.8280 | δ: 1e-05 ε1 = 1.9994 ε2 = 4.7026 ε3 = 11.1976.
8349 10000
Test  Loss: 0.5398 	 Acc: 0.8349

100.0 500.0 2295.8684
Epoch: 93: seconds = 7.331357955932617
Train Loss: 0.5684 	 Acc: 0.8283 | δ: 1e-05 ε1 = 1.9994 ε2 = 4.7026 ε3 = 11.2776.
8355 10000
Test  Loss: 0.5366 	 Acc: 0.8355

100.0 500.0 23

7618 10000
Test  Loss: 0.7023 	 Acc: 0.7618

100.0 500.0 2500.0
Epoch: 133: seconds = 7.1480302810668945
Train Loss: 0.7268 	 Acc: 0.7558 | δ: 1e-05 ε1 = 1.9994 ε2 = 4.7026 ε3 = 11.7565.
7602 10000
Test  Loss: 0.7072 	 Acc: 0.7602

100.0 500.0 2500.0
Epoch: 134: seconds = 6.982903003692627
Train Loss: 0.7318 	 Acc: 0.7537 | δ: 1e-05 ε1 = 1.9994 ε2 = 4.7026 ε3 = 11.7565.
7590 10000
Test  Loss: 0.7103 	 Acc: 0.7590

100.0 500.0 2500.0
Epoch: 135: seconds = 7.175188064575195
Train Loss: 0.7348 	 Acc: 0.7526 | δ: 1e-05 ε1 = 1.9994 ε2 = 4.7026 ε3 = 11.7565.
7582 10000
Test  Loss: 0.7138 	 Acc: 0.7582

100.0 500.0 2500.0
Epoch: 136: seconds = 7.274751901626587
Train Loss: 0.7384 	 Acc: 0.7513 | δ: 1e-05 ε1 = 1.9994 ε2 = 4.7026 ε3 = 11.7565.
7577 10000
Test  Loss: 0.7171 	 Acc: 0.7577

100.0 500.0 2500.0
Epoch: 137: seconds = 7.17677903175354
Train Loss: 0.7417 	 Acc: 0.7504 | δ: 1e-05 ε1 = 1.9994 ε2 = 4.7026 ε3 = 11.7565.
7570 10000
Test  Loss: 0.7188 	 Acc: 0.7570

100.0 500.0 2500.0
Epoch:

Q: running_grad_sq_norm 与 epsilon 不匹配：epsilon 需在 running_grad_sq_norm 增长过程中，同步达到给定的 epsilon budget？
A: 首先看 `get_epsilon()` 的代码:
``` Python
rdp = self.get_renyi_divergence() * min(self.steps, norm_sq_budget/self.max_grad_norm**2)
return tf_privacy.get_privacy_spent(self.alphas, rdp, target_delta)
```
可见，当 self.steps > norm_sq_budget/max_grad_norm^2 时，rdp不会再变化（导致计算的epsilon不再变化），但 running_grad_sq_norm 还在增加。

另外：
1. 若 nm 确定了，则 self.get_renyi_divergence() 便确定了；而 nm 的大小是根据 MAX_EPSILON 与 MAX_EPOCHS 得到的（即条件是当达到MAX_EPOCHS时，达到 MAX_EPSILON；
2. 若 actual steps 与 norm_sq_budget/max_grad_norm^2 差距特别大，说明选择的 max_grad_norm 太大，需适当调小

In [5]:
from fedrpdp.accountants.utils import get_noise_multiplier
from fedrpdp import PrivacyEngine

curve_fn = get_sample_rate_curve(
    target_delta = delta,
    noise_multiplier = noise_multiplier,
    num_updates = max_epochs,
    num_rounds = None,
    client_rate = None
)
epsilon_budgets = [epsilon1] * num_level1 + [epsilon2] * num_level2 + [epsilon3] * num_level3

_model = copy.deepcopy(model_init)
_train_loader = copy.deepcopy(train_loader)
optimizer = optim.SGD(_model.parameters(), lr=lr, momentum=0)
criterion = BaselineLoss()

privacy_engine = PrivacyEngine(accountant="pers_rdp", noise_multiplier=noise_multiplier)
privacy_engine.sample_rate_fn = curve_fn
per_sample_rate = [float(privacy_engine.sample_rate_fn(x)) for x in epsilon_budgets]
print(round(min(epsilon_budgets),4), round(min(per_sample_rate),4))
print(round(max(epsilon_budgets),4), round(max(per_sample_rate),4))
if max(per_sample_rate) == 0.0:
    raise ValueError("Hyper parameter errors! The maximum value of per_sample_rates is zero!")
privacy_engine.sample_rate = per_sample_rate # TODO: make it as an internal func of PrivacyEngine
print(set(privacy_engine.sample_rate))

_model, optimizer, _train_loader = privacy_engine.make_private_with_personalization(
    module=_model,
    optimizer=optimizer,
    data_loader=_train_loader,
    noise_multiplier=noise_multiplier,
    max_grad_norm=max_grad_norm
)
results_all_reps = []

for epoch in range(1, max_epochs + 1):
    start = time.time()
    train_loss, train_acc = train(_model, device, _train_loader, optimizer, criterion, metric)
    end = time.time()
    seconds = end - start
    
    test_loss, test_acc = test(_model, device, test_loader, criterion, metric)
    
    epsilon_1 = privacy_engine.get_epsilon(0, delta)
    epsilon_2 = privacy_engine.get_epsilon(num_level1, delta)
    epsilon_3 = privacy_engine.get_epsilon(num_level1+num_level2, delta)
    
    print(f"Epoch: {epoch}")
    print(
        f"Train Loss: {train_loss:.6f} \t Acc: {100*train_acc:.2f}% "
        f"| δ: {delta} "
        f"ε1 = {epsilon_1:.4f}, "
        f"ε2 = {epsilon_2:.4f}, "
        f"ε3 = {epsilon_3:.4f}, "
    )
        
    print("Test  Loss: {:.4f} \t Acc: {:.2f}%\n".format(test_loss, 100*test_acc))
    results_all_reps.append(
        {
            "test_loss": round(test_loss,4), "test_acc": round(test_acc,4), 
             "seconds": round(seconds,4),
             "e": set(epsilon_budgets), "d": delta, "nm": round(noise_multiplier,2), "norm": max_grad_norm
        }
    )
    
    results = pd.DataFrame.from_dict(results_all_reps)
    results.to_csv("results_ours.csv", index=False)

r2 score of the curve fitting. 0.9999516878074831
1.9994 0.2213
11.7565 1.0
{0.22128745227537328, 0.4855037425991854, 1.0}
1265 10000
Epoch: 1
Train Loss: 2.305834 	 Acc: 10.80% | δ: 1e-05 ε1 = 0.2153, ε2 = 0.4442, ε3 = 0.7945, 
Test  Loss: 2.2885 	 Acc: 12.65%

2073 10000
Epoch: 2
Train Loss: 2.288741 	 Acc: 12.52% | δ: 1e-05 ε1 = 0.2869, ε2 = 0.6149, ε3 = 1.1599, 
Test  Loss: 2.2722 	 Acc: 20.73%

1794 10000
Epoch: 3
Train Loss: 2.271363 	 Acc: 21.33% | δ: 1e-05 ε1 = 0.3431, ε2 = 0.7473, ε3 = 1.4456, 
Test  Loss: 2.2566 	 Acc: 17.94%

1958 10000
Epoch: 4
Train Loss: 2.257713 	 Acc: 17.70% | δ: 1e-05 ε1 = 0.3912, ε2 = 0.8605, ε3 = 1.6937, 
Test  Loss: 2.2762 	 Acc: 19.58%

1030 10000
Epoch: 5
Train Loss: 2.276587 	 Acc: 19.22% | δ: 1e-05 ε1 = 0.4340, ε2 = 0.9609, ε3 = 1.9162, 
Test  Loss: 2.2670 	 Acc: 10.30%

2129 10000
Epoch: 6
Train Loss: 2.266145 	 Acc: 10.63% | δ: 1e-05 ε1 = 0.4730, ε2 = 1.0555, ε3 = 2.1180, 
Test  Loss: 2.2042 	 Acc: 21.29%

2948 10000
Epoch: 7
Train Loss: 2.208

9215 10000
Epoch: 59
Train Loss: 0.315696 	 Acc: 91.54% | δ: 1e-05 ε1 = 1.4952, ε2 = 3.5222, ε3 = 7.8079, 
Test  Loss: 0.2788 	 Acc: 92.15%

9153 10000
Epoch: 60
Train Loss: 0.291978 	 Acc: 91.64% | δ: 1e-05 ε1 = 1.5081, ε2 = 3.5575, ε3 = 7.8879, 
Test  Loss: 0.3157 	 Acc: 91.53%

9245 10000
Epoch: 61
Train Loss: 0.309612 	 Acc: 91.66% | δ: 1e-05 ε1 = 1.5210, ε2 = 3.5875, ε3 = 7.9679, 
Test  Loss: 0.2698 	 Acc: 92.45%

9231 10000
Epoch: 62
Train Loss: 0.279300 	 Acc: 91.95% | δ: 1e-05 ε1 = 1.5339, ε2 = 3.6174, ε3 = 8.0479, 
Test  Loss: 0.2889 	 Acc: 92.31%

9279 10000
Epoch: 63
Train Loss: 0.283963 	 Acc: 92.18% | δ: 1e-05 ε1 = 1.5468, ε2 = 3.6474, ε3 = 8.1279, 
Test  Loss: 0.2608 	 Acc: 92.79%

9259 10000
Epoch: 64
Train Loss: 0.269445 	 Acc: 92.21% | δ: 1e-05 ε1 = 1.5597, ε2 = 3.6773, ε3 = 8.2079, 
Test  Loss: 0.2796 	 Acc: 92.59%

9287 10000
Epoch: 65
Train Loss: 0.290353 	 Acc: 92.08% | δ: 1e-05 ε1 = 1.5726, ε2 = 3.7072, ε3 = 8.2879, 
Test  Loss: 0.2569 	 Acc: 92.87%

9281 10000
Ep