In [1]:
import os
path_project = os.path.dirname(os.path.abspath('.'))
import sys
sys.path.append(os.path.join(path_project, 'src'))
sys.path.append(os.path.join(path_project, 'exp/script'))

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset
import numpy as np
import seaborn as sns
import json
import subprocess
import re
from sklearn.datasets import make_classification

import results_saver
import options

from secure_simulator import SecureWeightingFLSimulator
from simulator import FLSimulator
from mylogger import logger_set_debug

img_path = os.path.join(path_project, 'exp', 'img')
results_path = os.path.join(path_project, 'exp', 'results')
default_args = options.build_default_args(path_project)

PLOT_MARKERS = ['o', 'v', 's', 'X', 'p', '*', 'h', '^', '<', '>', 'H', 'D', 'd', 'P']
METHOD_ORDER = ["DEFAULT", "ULDP-NAIVE", "ULDP-GROUP-2", "ULDP-GROUP-8", "ULDP-SGD", "ULDP-AVG"]

This notebook show the calculation of private weighting method. And evaluate the overhead of the private method with artiricial dataset and model.


In [2]:
from phe import paillier
from secure_aggregation import (
    SecureAggregator,
    SecureLocalTrainer,
    PRIMARY_SILO_ID,
    gen_random_int_in_GFp,
    non_recursive_mod_inverse,
    integerize,
    encode,
    decode,
    re_integerize,
    PRECISION,
    DIVISIBLE_NUM,
    get_perfect_divisible_number,
)

In [3]:
DIVISIBLE_NUM = get_perfect_divisible_number(2000)
print("C_LCM digit: ", len(str(DIVISIBLE_NUM)))

C_LCM digit:  867


In [5]:
# Private weighting calculation example

pk, sk = paillier.generate_paillier_keypair(n_length=3072)
print("n digit: ", len(str(pk.n)))
modulus = pk.n
pk.max_int = modulus - 1
r = gen_random_int_in_GFp(pk.max_int, np.random.RandomState())
N = 900
n = 90
w = 0.01234567890123456789
w_list = np.array([w, -w]).astype(object)

int_w_list = integerize(w_list, PRECISION)

int_w_list = encode(int_w_list, modulus)

inv_DIVISIBLE_NUM = non_recursive_mod_inverse(DIVISIBLE_NUM, modulus)

a = non_recursive_mod_inverse(N * r, modulus)

enc_a = pk.encrypt(a)
enc_coef = enc_a  * n * r * DIVISIBLE_NUM
enc_out = int_w_list * enc_coef

out = (
    np.vectorize(lambda x: decode(sk.decrypt(x), modulus))(enc_out)
    / DIVISIBLE_NUM
)
print(out)
(
    np.abs(
        re_integerize(out, PRECISION)
        - np.array([0.001234567890123456789, -0.001234567890123456789])
    ).max()
    < 1e-7,
)

n digit:  925
[12345678.9 -12345678.9]


(True,)

In [2]:
def create_artificail_model(input_size, output_size, num_layers, nodes_per_layer):
    layers = []
    layers.append(nn.Linear(input_size, nodes_per_layer))
    layers.append(nn.ReLU())
    
    for _ in range(num_layers-1):
        layers.append(nn.Linear(nodes_per_layer, nodes_per_layer))
        layers.append(nn.ReLU())
    
    layers.append(nn.Linear(nodes_per_layer, output_size))
    
    return nn.Sequential(*layers)


def prepare_artificial_dataset(n_users, n_silos, n_samples, n_features, n_classes, random_state):
    local_dataset_per_silos = {}
    for silo_id in range(n_silos):
        X, y = make_classification(n_samples=n_samples, n_features=n_features, n_classes=n_classes, random_state=random_state.randint(2**20))
        X_tensor = torch.tensor(X, dtype=torch.float32)
        y_tensor = torch.tensor(y, dtype=torch.long)
        train_dataset = TensorDataset(X_tensor, y_tensor)
        users = list(range(n_users))
        selected_users = random_state.choice(users, size=n_samples, replace=True)
        user_hist = {}
        for user_id in selected_users:
            if user_id not in user_hist:
                user_hist[user_id] = 0
            user_hist[user_id] += 1
        local_dataset_per_silos[silo_id] = (train_dataset, [], user_hist, selected_users)

    X_test, y_test = make_classification(n_samples=100, n_features=n_features, n_classes=n_classes, random_state=random_state.randint(2**20))
    X_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_tensor = torch.tensor(y_test, dtype=torch.long)
    all_test_dataset = TensorDataset(X_tensor, y_tensor)

    return [], all_test_dataset, local_dataset_per_silos

In [3]:
def run_secure_simulation(seed, n_users, n_silos, n_samples, n_features, n_classes, n_layers, nodes_per_layer):
    device = "cpu"
    data_seed = seed
    data_random_state = np.random.RandomState(seed=data_seed)

    # load data
    train_dataset, test_dataset, local_dataset_per_silos = prepare_artificial_dataset(
        n_users=n_users,
        n_silos=n_silos,
        n_samples=n_samples,
        n_features=n_features,
        n_classes=n_classes,
        random_state=data_random_state,
    )

    # load model
    model = create_artificail_model(input_size=n_features, output_size=n_classes, num_layers=n_layers, nodes_per_layer=nodes_per_layer)
    param_size = sum(p.numel() for p in model.parameters() if p.requires_grad)

    print("parameter_size", param_size)
    print("n_users", n_users)
    print("n_silos", n_silos)
    print("n_samples", n_samples)


    # start training
    base_seed = np.random.RandomState(seed=seed).randint(2**32 - 1)

    simulator = SecureWeightingFLSimulator(
        seed=base_seed,
        model=model,
        train_dataset=train_dataset,
        test_dataset=test_dataset,
        local_dataset_per_silos=local_dataset_per_silos,
        n_silos=n_silos,
        n_users=n_users,
        device=device,
        n_total_round=1,
        n_silo_per_round=n_silos,
        local_learning_rate=0.01,
        global_learning_rate=0.1,
        local_batch_size=8,
        weight_decay=0.01,
        client_optimizer="sgd",
        local_epochs=10,
        agg_strategy="ULDP-AVG-w",
        clipping_bound=1.0,
        sigma=5.0,
        delta=1e-5,
        group_k=2,
        dataset_name="artificial",
        sampling_rate_q=0.3,
    )
    simulator.run()
    results = simulator.get_results()



In [5]:
seed = 0
n_users = 10
n_silos = 3
n_samples = 10000

n_features = 4
n_classes = 2

n_layers = 0
nodes_per_layer = 2

# run_secure_simulation(seed, n_users, n_silos, n_samples, n_features, n_classes, n_layers, nodes_per_layer)

In [6]:
for _n_samples in [100, 1000]:
    run_secure_simulation(seed, n_users, n_silos, _n_samples, n_features, n_classes, n_layers, nodes_per_layer)

parameter_size 16
n_users 10
n_silos 3
n_samples 100
[16:11:34 INFO] Start federated learning simulation with secure weighting.
key_exchange 1.051577091217041
multiplicative_blind_user_hist 4.12992000579834
training_silo_0 85.44185900688171
training_silo_1 84.4896731376648
training_silo_2 89.13435697555542
[16:16:00 INFO] Privacy spent: epsilon = 0.794522032537103 (round 0)
aggregation 2.1517958641052246
[16:16:00 INFO] |----- Global test result of round 0
[16:16:00 INFO] 	 |----- Test/Acc: 0.66 (100), Test/Loss: 0.6677354574203491
global_test 0.002007007598876953
total 1.2874603271484375e-05
[16:16:00 INFO] Finish federated learning simulation
parameter_size 16
n_users 10
n_silos 3
n_samples 1000
[16:16:06 INFO] Start federated learning simulation with secure weighting.
key_exchange 1.0447149276733398
multiplicative_blind_user_hist 4.114915132522583
training_silo_0 86.59528112411499
training_silo_1 83.68200397491455
training_silo_2 86.97076797485352
[16:20:31 INFO] Privacy spent: epsi

In [10]:
for _nodes_per_layer in [4, 8, 15]:
    run_secure_simulation(seed, n_users, n_silos, n_samples, n_features, n_classes, n_layers, _nodes_per_layer)

parameter_size 107
n_users 10
n_silos 3
n_samples 10000
[17:08:02 INFO] Start federated learning simulation with secure weighting.
key_exchange 1.0656218528747559
multiplicative_blind_user_hist 4.129354000091553
training_silo_0 287.4782841205597
training_silo_1 303.5857400894165
training_silo_2 267.60097789764404
[17:22:38 INFO] Privacy spent: epsilon = 0.794522032537103 (round 0)
aggregation 11.992104053497314
[17:22:38 INFO] |----- Global test result of round 0
[17:22:38 INFO] 	 |----- Test/Acc: 0.52 (100), Test/Loss: nan
global_test 0.00168609619140625
total 1.6927719116210938e-05
[17:22:38 INFO] Finish federated learning simulation


In [8]:
for _n_silos in [5, 7, 9]:
    run_secure_simulation(seed, n_users, _n_silos, n_samples, n_features, n_classes, n_layers, nodes_per_layer)

parameter_size 16
n_users 10
n_silos 5
n_samples 10000
[16:35:36 INFO] Start federated learning simulation with secure weighting.
key_exchange 2.9396181106567383
multiplicative_blind_user_hist 4.531608819961548
training_silo_0 91.01992702484131
training_silo_1 94.6601710319519
training_silo_2 88.53735017776489
training_silo_3 94.27006006240845
training_silo_4 91.61543083190918
[16:43:26 INFO] Privacy spent: epsilon = 0.794522032537103 (round 0)
aggregation 2.1841750144958496
[16:43:26 INFO] |----- Global test result of round 0
[16:43:26 INFO] 	 |----- Test/Acc: 0.5 (100), Test/Loss: nan
global_test 0.00179290771484375
total 3.504753112792969e-05
[16:43:26 INFO] Finish federated learning simulation
parameter_size 16
n_users 10
n_silos 7
n_samples 10000
[16:43:38 INFO] Start federated learning simulation with secure weighting.
key_exchange 5.725150108337402
multiplicative_blind_user_hist 4.887310028076172
training_silo_0 83.21179986000061
training_silo_1 92.95768213272095
training_silo_2

In [11]:
for _n_users in [20, 30, 40]:
    run_secure_simulation(seed, _n_users, n_silos, n_samples, n_features, n_classes, n_layers, nodes_per_layer)

parameter_size 16
n_users 20
n_silos 3
n_samples 10000
[17:22:41 INFO] Start federated learning simulation with secure weighting.
key_exchange 1.0518741607666016
multiplicative_blind_user_hist 8.142098903656006
training_silo_0 179.95025610923767
training_silo_1 179.33229804039001
training_silo_2 169.8616497516632
[17:31:41 INFO] Privacy spent: epsilon = 0.794522032537103 (round 0)
aggregation 2.156702995300293
[17:31:41 INFO] |----- Global test result of round 0
[17:31:41 INFO] 	 |----- Test/Acc: 0.5 (100), Test/Loss: 1.1250611543655396
global_test 0.0017132759094238281
total 1.5735626220703125e-05
[17:31:41 INFO] Finish federated learning simulation
parameter_size 16
n_users 30
n_silos 3
n_samples 10000
[17:31:44 INFO] Start federated learning simulation with secure weighting.
key_exchange 1.052083969116211
multiplicative_blind_user_hist 12.47703504562378
training_silo_0 220.05000114440918
training_silo_1 256.8751108646393
training_silo_2 241.8202509880066
[17:43:58 INFO] Privacy spen

# MPC evaluation

macOS Monterey Version 12.1
MacBook Pro (14-inch, 2021)
Chip Apple M1 Max
Memory 64GB

parameter_size 16
n_users 10
n_silos 3
n_samples 10000
le 10

key_exchange 1.0501000881195068
multiplicative_blind_user_hist 4.100116968154907
training_silo 85.51134705543518 94.669105052948 71.39624691009521
aggregation 2.1742188930511475

# n_samples

#### n_samples 10000
key_exchange 1.0501000881195068
multiplicative_blind_user_hist 4.100116968154907
training_silo 85.51134705543518 94.669105052948 71.39624691009521
aggregation 2.1742188930511475

#### n_samples 1000
key_exchange 1.0447149276733398
multiplicative_blind_user_hist 4.114915132522583
training_silo 86.59528112411499 83.68200397491455 86.97076797485352
aggregation 2.145372152328491

#### n_samples 100
key_exchange 1.051577091217041
multiplicative_blind_user_hist 4.12992000579834
training_silo 85.44185900688171 84.4896731376648 89.13435697555542
aggregation 2.1517958641052246


# parameter size


#### parameter_size 30
key_exchange 1.054563045501709
multiplicative_blind_user_hist 4.218053817749023
training_silo_0 122.73623919487 94.80184197425842 107.45446586608887
aggregation 3.6276512145996094

#### parameter_size 58
key_exchange 1.044889211654663
multiplicative_blind_user_hist 4.1281819343566895
training_silo_0 186.789489030838 185.6517848968506 172.2580258846283
aggregation 6.741549015045166

#### parameter_size 107
key_exchange 1.0656218528747559
multiplicative_blind_user_hist 4.129354000091553
training_silo_0 287.4782841205597
training_silo_1 303.5857400894165
training_silo_2 267.60097789764404
aggregation 11.992104053497314


# n_silos

#### n_silos 5
key_exchange 2.9396181106567383
multiplicative_blind_user_hist 4.531608819961548
training_silo_0 91.01992702484131
training_silo_1 94.6601710319519
training_silo_2 88.53735017776489
training_silo_3 94.27006006240845
training_silo_4 91.61543083190918
aggregation 2.1841750144958496

#### n_silos 7
key_exchange 5.725150108337402
multiplicative_blind_user_hist 4.887310028076172
training_silo_0 83.21179986000061
training_silo_1 92.95768213272095
training_silo_2 87.23076605796814
training_silo_3 96.2587890625
training_silo_4 93.93128395080566
training_silo_5 84.52289700508118
training_silo_6 89.68692398071289
aggregation 2.1623528003692627

#### n_silos 9
key_exchange 9.477273941040039
multiplicative_blind_user_hist 5.087700128555298
training_silo_0 94.12726998329163
training_silo_1 79.14221906661987
training_silo_2 86.59900283813477
training_silo_3 86.83643698692322
training_silo_4 78.3361611366272
training_silo_5 86.65233302116394
training_silo_6 85.84408116340637
training_silo_7 82.54178977012634
training_silo_8 86.9701840877533
aggregation 2.177841901779175



## n_users

#### n_users 20
key_exchange 1.0518741607666016
multiplicative_blind_user_hist 8.142098903656006
training_silo_0 179.95025610923767
training_silo_1 179.33229804039001
training_silo_2 169.8616497516632
aggregation 2.156702995300293


#### n_users 30
key_exchange 1.052083969116211
multiplicative_blind_user_hist 12.47703504562378
training_silo_0 220.05000114440918
training_silo_1 256.8751108646393
training_silo_2 241.8202509880066
aggregation 2.148691177368164

#### n_users 40
key_exchange 1.0446362495422363
multiplicative_blind_user_hist 16.489341974258423
training_silo_0 319.25748205184937
training_silo_1 309.42834091186523
training_silo_2 336.7686347961426
aggregation 2.1438820362091064