In [1]:
import pandas as pd
import numpy as np
import time
import math
import os
import sys
sys.path.append('../..')

import torch
import torch.optim as optim
from torch.utils.data import DataLoader, Subset

from fedrpdp.datasets.fed_heart_disease import (
    BaselineModel,
    BaselineLoss,
    FedHeartDisease,
    metric,
)

from fedrpdp.utils.rpdp_utils import (
    get_sample_rate_curve,
    MultiLevels, 
    MixGauss, 
    Pareto,
)

device = "cuda:0"
lr = 0.1

train_data = FedHeartDisease(train=True, pooled=True)
test_data = FedHeartDisease(train=False, pooled=True)
train_loader = DataLoader(
    train_data,
    batch_size=len(train_data),
    shuffle=False,
    num_workers=0,
)
test_loader = DataLoader(
    test_data,
    batch_size=len(test_data),
    shuffle=False,
    num_workers=0,
)

model = BaselineModel().to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0)
criterion = BaselineLoss()

In [2]:
from fedrpdp.accountants.utils import get_noise_multiplier
from fedrpdp import PrivacyEngine

total_points = len(train_data)
num_level1 = int(total_points * 0.7)
num_level2 = int(total_points * 0.2)
num_level3 = total_points - num_level1 - num_level2

epsilon_budgets = [0.86] * num_level1 + [1.76] * num_level2 + [4.23] * num_level3
max_grad_norm = 1.0
max_epochs = 100
delta = 1e-3
noise_multiplier = 10.0

# from fedrpdp.accountants.utils import get_noise_multiplier
# nm = get_noise_multiplier(
#     target_epsilon=4.23,
#     target_delta=delta,
#     sample_rate=1.0,
#     epochs=max_epochs,
#     accountant="rdp"
# )

curve_fn = get_sample_rate_curve(
    target_delta = delta,
    noise_multiplier = noise_multiplier,
    num_updates = max_epochs,
    num_rounds = None,
    client_rate = None
)

privacy_engine = PrivacyEngine(accountant="pers_rdp", noise_multiplier=noise_multiplier)
privacy_engine.sample_rate_fn = curve_fn
per_sample_rate = [float(privacy_engine.sample_rate_fn(x)) for x in epsilon_budgets]
print(round(min(epsilon_budgets),4), round(min(per_sample_rate),4))
print(round(max(epsilon_budgets),4), round(max(per_sample_rate),4))
if max(per_sample_rate) == 0.0:
    raise ValueError("Hyper parameter errors! The maximum value of per_sample_rates is zero!")
privacy_engine.sample_rate = per_sample_rate # TODO: make it as an internal func of PrivacyEngine
print(set(privacy_engine.sample_rate))

model, optimizer, train_loader = privacy_engine.make_private_with_personalization(
    module=model,
    optimizer=optimizer,
    data_loader=train_loader,
    noise_multiplier=noise_multiplier,
    max_grad_norm=max_grad_norm
)

r2 score of the curve fitting. 0.9998839695905924
0.86 0.2959
4.23 1.0
{0.2958897334748451, 0.5560226155478202, 1.0}


In [3]:
def train(model, device, train_loader, optimizer, criterion, metric, running_norms=None):
    model.train()
    data, target = next(iter(train_loader))
    data, target = data.to(device), target.to(device)
    optimizer.zero_grad()
    output = model(data)

    # compute train acc
    correct = metric(target.detach().cpu().numpy(), output.detach().cpu().numpy())
    train_acc = correct / len(target)
    
    # compute train loss
    loss = criterion(output, target)
    train_loss = loss.item()
    loss.backward()

    optimizer.step()
    return train_loss, train_acc
    

def test(model, device, test_loader, criterion, metric):
    model.eval()
    with torch.no_grad():
        data, target = next(iter(test_loader))
        data, target = data.to(device), target.to(device)
        output = model(data)
        test_loss = criterion(output, target).item()
        
        correct = metric(target.detach().cpu().numpy(), output.detach().cpu().numpy())
        test_acc = 1. * correct / len(target)

    return test_loss, test_acc

results_all_reps = []
for epoch in range(1, max_epochs + 1):
    start = time.time()
    train_loss, train_acc = train(model, device, train_loader, optimizer, criterion, metric)
    end = time.time()
    seconds = end - start
    
    test_loss, test_acc = test(model, device, test_loader, criterion, metric)
    
    epsilon_1 = privacy_engine.get_epsilon(0, delta)
    epsilon_2 = privacy_engine.get_epsilon(num_level1, delta)
    epsilon_3 = privacy_engine.get_epsilon(num_level1+num_level2, delta)
    print(f"Epoch: {epoch}")
    print(
        f"Train Loss: {train_loss:.6f} \t Acc: {100*train_acc:.2f}% "
        f"| δ: {delta} "
        f"ε1 = {epsilon_1:.2f}, "
        f"ε2 = {epsilon_2:.2f}, "
        f"ε3 = {epsilon_3:.2f}, "
    )
        
    print("Test  Loss: {:.4f} \t Acc: {:.2f}%\n".format(test_loss, 100*test_acc))
    results_all_reps.append(
        {
            "test_loss": round(test_loss,4), "test_acc": round(test_acc,4), 
             "seconds": round(seconds,4),
             "e": set(epsilon_budgets), "d": delta, "nm": round(noise_multiplier,2), "norm": max_grad_norm
        }
    )
    
    results = pd.DataFrame.from_dict(results_all_reps)
    results.to_csv("results_ours.csv", index=False)

Epoch: 1
Train Loss: 0.663185 	 Acc: 60.89% | δ: 0.001 ε1 = 0.06, ε2 = 0.13, ε3 = 0.24, 
Test  Loss: 0.6613 	 Acc: 58.66%

Epoch: 2
Train Loss: 0.681481 	 Acc: 57.41% | δ: 0.001 ε1 = 0.09, ε2 = 0.19, ε3 = 0.35, 
Test  Loss: 0.6492 	 Acc: 59.84%

Epoch: 3
Train Loss: 0.632004 	 Acc: 63.64% | δ: 0.001 ε1 = 0.11, ε2 = 0.24, ε3 = 0.45, 
Test  Loss: 0.6336 	 Acc: 61.81%

Epoch: 4
Train Loss: 0.634878 	 Acc: 64.43% | δ: 0.001 ε1 = 0.13, ε2 = 0.28, ε3 = 0.53, 
Test  Loss: 0.6215 	 Acc: 62.99%

Epoch: 5
Train Loss: 0.615344 	 Acc: 68.27% | δ: 0.001 ε1 = 0.15, ε2 = 0.31, ε3 = 0.60, 
Test  Loss: 0.6109 	 Acc: 65.35%

Epoch: 6
Train Loss: 0.623546 	 Acc: 65.37% | δ: 0.001 ε1 = 0.17, ε2 = 0.35, ε3 = 0.67, 
Test  Loss: 0.6007 	 Acc: 67.32%

Epoch: 7
Train Loss: 0.595830 	 Acc: 70.00% | δ: 0.001 ε1 = 0.18, ε2 = 0.38, ε3 = 0.74, 
Test  Loss: 0.5892 	 Acc: 68.11%

Epoch: 8
Train Loss: 0.570463 	 Acc: 72.86% | δ: 0.001 ε1 = 0.20, ε2 = 0.41, ε3 = 0.80, 
Test  Loss: 0.5790 	 Acc: 68.50%

Epoch: 9
Train L

Epoch: 68
Train Loss: 0.420888 	 Acc: 78.40% | δ: 0.001 ε1 = 0.68, ε2 = 1.41, ε3 = 2.80, 
Test  Loss: 0.4438 	 Acc: 81.50%

Epoch: 69
Train Loss: 0.440497 	 Acc: 79.29% | δ: 0.001 ε1 = 0.69, ε2 = 1.43, ε3 = 2.83, 
Test  Loss: 0.4439 	 Acc: 81.50%

Epoch: 70
Train Loss: 0.470067 	 Acc: 77.51% | δ: 0.001 ε1 = 0.69, ε2 = 1.44, ε3 = 2.85, 
Test  Loss: 0.4433 	 Acc: 81.50%

Epoch: 71
Train Loss: 0.432574 	 Acc: 78.37% | δ: 0.001 ε1 = 0.70, ε2 = 1.45, ε3 = 2.88, 
Test  Loss: 0.4428 	 Acc: 81.89%

Epoch: 72
Train Loss: 0.434007 	 Acc: 79.41% | δ: 0.001 ε1 = 0.70, ε2 = 1.46, ε3 = 2.90, 
Test  Loss: 0.4428 	 Acc: 81.89%

Epoch: 73
Train Loss: 0.424918 	 Acc: 80.65% | δ: 0.001 ε1 = 0.71, ε2 = 1.48, ε3 = 2.93, 
Test  Loss: 0.4425 	 Acc: 81.89%

Epoch: 74
Train Loss: 0.471991 	 Acc: 77.48% | δ: 0.001 ε1 = 0.71, ε2 = 1.49, ε3 = 2.95, 
Test  Loss: 0.4423 	 Acc: 81.89%

Epoch: 75
Train Loss: 0.394914 	 Acc: 82.35% | δ: 0.001 ε1 = 0.72, ε2 = 1.50, ε3 = 2.98, 
Test  Loss: 0.4425 	 Acc: 81.89%

Epoch: 7

In [4]:
results = pd.read_csv("results_ours.csv")
np.array(results["seconds"].tolist()).mean()

0.011286000000000001