In [2]:
from autodp import rdp_acct
import matplotlib.pyplot as plt
import numpy as np

import os
path_project = os.path.dirname(os.path.abspath('.'))
import sys
sys.path.append(os.path.join(path_project, 'src'))
sys.path.append(os.path.join(path_project, 'exp/script'))

import options

img_path = os.path.join(path_project, 'exp', 'img')
pickle_path = os.path.join(path_project, 'exp', 'pickle')
results_path = os.path.join(path_project, 'exp', 'results')
default_args = options.build_default_args(path_project)

import copy
from run_simulation import run_simulation

from mylogger import logger_set_debug, logger_set_info, logger_set_warning
from opacus.accountants import RDPAccountant

import pickle

def RDP_gaussian_with_C(sigma, alpha, C):
    assert(sigma > 0)
    assert(alpha >= 0)
    return 0.5 * C**2 / sigma ** 2 * alpha

In [3]:
# binary search given q_u
def from_q_u(q_u, delta, epsilon_u, sigma, T, m=100, precision=1e-6):
    max_sensitivity_u = 100
    min_sensitivity_u = 0
    while True:
        sensitivity_u = (max_sensitivity_u + min_sensitivity_u) / 2
        # func_gaussian = lambda x: RDP_gaussian_with_C(sigma, x, sensitivity_u)
        # accountant = rdp_acct.anaRDPacct(m=m)
        accountant = RDPAccountant()
        for i in range(T):
            accountant.step(noise_multiplier=sigma/sensitivity_u, sample_rate=q_u)
            # accountant.compose_subsampled_mechanisms_lowerbound(func=func_gaussian, prob=q_u)
        # eps = accountant.get_eps(delta)
        eps = accountant.get_epsilon(delta=delta)
        if eps < epsilon_u:
            min_sensitivity_u = sensitivity_u
        else:
            max_sensitivity_u = sensitivity_u
        if 0 < epsilon_u - eps and epsilon_u - eps < precision:
            return sensitivity_u, eps


# binary search given_sensitivity_u
def from_sensitivity_u(sensitivity_u, delta, epsilon_u, sigma, T, m=100, precision=1e-6):
    max_q_u = 1.0
    min_q_u = 0
    # func_gaussian = lambda x: RDP_gaussian_with_C(sigma, x, sensitivity_u)
    while True:
        q_u = (max_q_u + min_q_u) / 2
        # accountant = rdp_acct.anaRDPacct(m=m)
        accountant = RDPAccountant()
        for i in range(T):
            # accountant.compose_subsampled_mechanisms_lowerbound(func=func_gaussian, prob=q_u)
            accountant.step(noise_multiplier=sigma/sensitivity_u, sample_rate=q_u)
        # eps = accountant.get_eps(delta)
        eps = accountant.get_epsilon(delta=delta)
        if eps < epsilon_u:
            min_q_u = q_u
        else:
            max_q_u = q_u
        if 0 < epsilon_u - eps and epsilon_u - eps < precision:
            return q_u, eps
        

def make_q_c_curve(epsilon_u, delta, sigma, n_round=100, num_points=20, min=-5):
    T = n_round

    num_points = num_points // 3 * 2
    x = np.logspace(min, -1, num_points).tolist() + np.linspace(0.15, 1.0, int(num_points/2)).tolist()
    y = []
    for q_u in x:
        sensitivity_u, eps = from_q_u(q_u=q_u, delta=delta, epsilon_u=epsilon_u, sigma=sigma, T=T)
        assert eps <= epsilon_u, f"eps={eps} > epsilon_u={epsilon_u}"
        # print("sensitivity_u =", sensitivity_u, "eps =", eps)
        y.append(sensitivity_u)
    return x, y

def plot_q_c_curve(x, y, title="", log=True):
    fig = plt.figure(figsize=(7, 5))
    ax = fig.add_subplot(111)
    ax.plot(x, y, marker='o', label='sensitivity_u')
    for i, (xi, yi) in enumerate(zip(x, y)):
        if i % 5 == 0:
            ax.annotate(f"({xi:.5f}, {yi:.5f})", (xi, yi), textcoords="offset points", xytext=(0,10), ha='center')
    # q*C の値をプロット
    ax.plot(x, np.array(x)*np.array(y), marker='x', linestyle='--', color='red', label='product_x*y')
    print("Max idx", np.argmax(np.array(x)*np.array(y)))
    if log:
        ax.set_xscale("log")
        ax.set_yscale("log")
    ax.set_xlabel("q_u")
    ax.set_ylabel("sensitivity_u")
    ax.set_title(title)
    plt.show()

def make_epsilon_u_list(epsilon=1.0, n_users=0, dist="homo", epsilon_list=[], ratio_list=[]):
    if dist == "homo":
        epsilon_u = [epsilon for _ in range(n_users)]
    elif dist == "hetero":
        assert len(epsilon_list) > 0 and len(ratio_list) > 0
        epsilon_u = np.random.choice(epsilon_list, size=n_users, p=ratio_list)
    else:
        raise ValueError(f"invalid dist {dist}")
    return epsilon_u


# epsilon_uのリストを受け取り，C_uとq_uを作る
def make_params(epsilon_u_list, delta, sigma, n_users=0, idx=None, selection="idx", idx_list=None):
    user_list = list(range(n_users))
    C_u = {i: 0.0 for i in user_list}
    q_u = {i: 0.0 for i in user_list}

    for epsilon_u, user_id in zip(epsilon_u_list, user_list):
        if points_store.get((epsilon_u, delta, sigma)) is None:
            raise ValueError(f"no points for {epsilon_u} and {delta} and {sigma}")
        q_list, C_list = points_store[(epsilon_u, delta, sigma)]

        if selection == "idx":
            if type(idx) == int:
                C_u[user_id] = C_list[idx]
                q_u[user_id] = q_list[idx]
        elif selection == "idx_list":
            idx = idx_list[epsilon_u]
            C_u[user_id] = C_list[idx]
            q_u[user_id] = q_list[idx]
        else:
            raise ValueError(f"invalid selection: {selection}")

    return C_u, q_u


def fed_simulation(
    delta, 
    sigma, 
    n_users, 
    C_u=None, 
    q_u=None, 
    times=1, 
    user_dist="uniform-iid", 
    silo_dist="uniform", 
    dataset_name="light_mnist", 
    global_learning_rate=10.0, 
    n_round=10, 
    local_epochs=50, 
    local_learning_rate=0.01,
    agg_strategy="PULDP-AVG",
    epsilon_u=None,
    group_thresholds=None,
):
    args = options.build_default_args(path_project)

    if dataset_name == "heart_disease":
        from flamby_utils.heart_disease import update_args

        args = update_args(args)

    elif dataset_name == "tcga_brca":
        from flamby_utils.tcga_brca import update_args

        args = update_args(args)

    args.dataset_name = dataset_name
    args.agg_strategy = agg_strategy
    args.n_total_round = n_round
    args.n_users = n_users
    args.local_epochs = local_epochs
    args.times = times

    args.user_dist = user_dist
    args.silo_dist = silo_dist
    args.global_learning_rate = global_learning_rate
    args.local_learning_rate = local_learning_rate

    args.delta = delta
    args.sigma = sigma
    args.C_u = C_u
    args.q_u = q_u
    args.epsilon_u = epsilon_u
    args.group_thresholds = group_thresholds
    args.dry_run = False
    args.secure_w = False

    # logger_set_warning()
    logger_set_info()

    results_list = []
    for i in range(args.times):
        # print("======== TIME:", i, "start")
        args.seed = args.seed + i
        try:
            sim_results = run_simulation(args, path_project)
            results_list.append(sim_results)
        except OverflowError:
            results_list.append("LOSS IS NAN")
        except AssertionError:
            results_list.append("Assertion Error")

    return results_list


def calc_metric(results):
    acc_list = np.array([r['global']['global_test'][-1][1] for r in results])
    loss_list = np.array([r['global']['global_test'][-1][2] for r in results])
    acc_mean, acc_std, loss_mean, loss_std = np.mean(acc_list), np.std(acc_list), np.mean(loss_list), np.std(loss_list)
    print(f"{acc_mean:.4f} ± {acc_std:.4f}", f"{loss_mean:.4f} ± {loss_std:.4f}")
    return acc_mean, acc_std, loss_mean, loss_std


def make_epsilon_u(epsilon=1.0, n_users=0, dist="homo", epsilon_list=[], ratio_list=[]):
    if dist == "homo":
        epsilon_u = {user_id: epsilon for user_id in range(n_users)}
    elif dist == "hetero":
        assert len(epsilon_list) > 0 and len(ratio_list) > 0
        epsilon_u_list = np.random.choice(epsilon_list, size=n_users, p=ratio_list)
        epsilon_u = {user_id: epsilon_u_list[user_id] for user_id in range(n_users)}
    else:
        raise ValueError(f"invalid dist {dist}")
    return epsilon_u

# Check monotonically increasing

In [None]:
delta = 1e-5
sigma = 5.0
epsilon_u = 5.0
T = 1000

q_u_list = [0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.8, 0.99]
sensitivity_u_list = [0.01, 0.1, 1.0, 5.0, 10, 100]
data = []

for q_u in q_u_list:
    for sensitivity_u in sensitivity_u_list:
        func_gaussian = lambda x: RDP_gaussian_with_C(sigma, x, sensitivity_u)
        accountant = rdp_acct.anaRDPacct(m=100)
        for i in range(T):
            accountant.compose_subsampled_mechanisms_lowerbound(func=func_gaussian, prob=q_u)
        eps = accountant.get_eps(delta)
        if eps < 1e10:
            data.append((q_u, sensitivity_u, eps))
        else:
            continue


# for q_u in q_u_list
fig = plt.figure(figsize=(15, 2))
for i, q_u in enumerate(q_u_list):
    filtered_data = [d for d in data if d[0] == q_u]
    x = [d[1] for d in filtered_data]
    y = [d[2] for d in filtered_data]
    ax = fig.add_subplot(1, len(q_u_list), i+1)
    ax.plot(x, y, label='q_u = {}'.format(q_u), marker='o')
    ax.set_title('q_u = {}'.format(q_u), size=14)
    ax.set_xlabel('sensitivity_u', size=14)
    if i == 0:
        ax.set_ylabel('epsilon', size=14)
    ax.set_xscale('log')
    ax.set_yscale('log')
    # ax.set_ylim(0, epsilon_u+1.0)
plt.show()

# for sensitivity_u in sensitivity_u_list
fig = plt.figure(figsize=(18, 2))
for i, sensitivity_u in enumerate(sensitivity_u_list):
    filtered_data = [d for d in data if d[1] == sensitivity_u]
    x = [d[0] for d in filtered_data]
    y = [d[2] for d in filtered_data]
    ax = fig.add_subplot(1, len(sensitivity_u_list), i+1)
    ax.plot(x, y, label='sensitivity_u = {}'.format(sensitivity_u), marker='o')
    ax.set_title('sensitivity_u = {}'.format(sensitivity_u), size=14)
    ax.set_xlabel('q_u', size=14)
    if i == 0:
        ax.set_ylabel('epsilon', size=14)
    ax.set_xscale('log')
    ax.set_yscale('log')
    # ax.set_ylim(0, epsilon_u+1.0)
plt.show()

# Binary Search based method

- `accountant = rdp_acct.anaRDPacct(m=m)`
    - Zhu, Yuqing, and Yu-Xiang Wang. "Poission subsampled rényi differential privacy." International Conference on Machine Learning. PMLR, 2019.
- `accountant = RDPAccountant()`
    - Mironov, Ilya, Kunal Talwar, and Li Zhang. "R\'enyi differential privacy of the sampled gaussian mechanism." arXiv preprint arXiv:1908.10530 (2019).

In [None]:
delta = 1e-5
sigma = 5.0
epsilon_u = 5.0
T = 100

sensitivity_u, eps = from_q_u(q_u=0.1, delta=delta, epsilon_u=epsilon_u, sigma=sigma, T=T)
print("sensitivity_u =", sensitivity_u, "eps =", eps)

q_u, eps = from_sensitivity_u(sensitivity_u=1.59, delta=delta, epsilon_u=epsilon_u, sigma=sigma, T=T)
print("q_u =", q_u, "eps =", eps)

# Observation
- to observe what happens with various sensitivity and sampling rate

Parameters
- epsilon_u, delta_u
    - privacy budgets for each users
- homo, hetero
    - distribution of privacy budgets for each users

In [None]:
# prepare various epsilon and delta
delta = 1e-5
privacy_budget_list = [(0.5, 1e-5), (1.0, 1e-5), (3.0, 1e-5), (5.0, 1e-5), (10.0, 1e-5)]
sigma_list = [0.5, 1.0]


In [None]:
# ファイルからデータを読み込む

n_round = 20
try:
    with open(os.path.join(pickle_path, f'points_store_T{n_round}.pkl'), 'rb') as file:
        points_store = pickle.load(file)
except FileNotFoundError:
    points_store = {}


for epsilon_u, delta in privacy_budget_list:
    for sigma in sigma_list:
        if (epsilon_u, delta, sigma) in points_store:
            continue
        print("epsilon_u =", epsilon_u, "delta =", delta, "sigma =", sigma)
        x, y = make_q_c_curve(epsilon_u=epsilon_u, delta=delta, sigma=sigma, num_points=30, n_round=n_round)
        plot_q_c_curve(x, y, title=f"epsilon_u = {epsilon_u}, delta = {delta}, sigma = {sigma}")
        points_store[(epsilon_u, delta, sigma)] = (x, y)

with open(os.path.join(pickle_path, f'points_store_T{n_round}.pkl'), 'wb') as file:
    pickle.dump(points_store, file)

### Plotting the pairs

In [None]:
epsilon_u = 3.0
n_round = 20
for sigma in [0.5]:
    x, y = make_q_c_curve(epsilon_u=epsilon_u, delta=delta, sigma=sigma, num_points=30, n_round=n_round, min=-6)
    plot_q_c_curve(x, y, title=f"n_round = {n_round}, epsilon_u = {epsilon_u}, delta = {delta}, sigma = {sigma}")

In [None]:
epsilon_u = 3.0
n_round = 10
for sigma in [0.5]:
    x, y = make_q_c_curve(epsilon_u=epsilon_u, delta=delta, sigma=sigma, num_points=30, n_round=n_round, min=-6)
    plot_q_c_curve(x, y, title=f"n_round = {n_round}, epsilon_u = {epsilon_u}, delta = {delta}, sigma = {sigma}", log=True)

In [None]:
delta = 1e-5
# epsilon_u = 1.0
# sigma = 0.5
# for n_round in [10, 100, 1000]:
#     x, y = make_q_c_curve(epsilon_u=epsilon_u, delta=delta, sigma=0.5, num_points=30, n_round=n_round, min=-6)
#     plot_q_c_curve(x, y, title=f"n_round = {n_round}, epsilon_u = {epsilon_u}, delta = {delta}, sigma = {sigma}")

# epsilon_u = 10.0
# n_round = 100
# for n_round in [10, 100, 1000]:
#     x, y = make_q_c_curve(epsilon_u=epsilon_u, delta=delta, sigma=0.5, num_points=30, n_round=n_round, min=-6)
#     plot_q_c_curve(x, y, title=f"n_round = {n_round}, epsilon_u = {epsilon_u}, delta = {delta}, sigma = {sigma}")


epsilon_u = 3.0
n_round = 20
sigma = 0.5
for epsilon_u in [0.11, 1.0, 5.0, 10.0]:
    x, y = make_q_c_curve(epsilon_u=epsilon_u, delta=delta, sigma=sigma, num_points=30, n_round=n_round, min=-6)
    plot_q_c_curve(x, y, title=f"n_round = {n_round}, epsilon_u = {epsilon_u}, delta = {delta}, sigma = {sigma}")
    plot_q_c_curve(x, y, title=f"n_round = {n_round}, epsilon_u = {epsilon_u}, delta = {delta}, sigma = {sigma}", log=False)


## Machine Learning

## heart disease

In [None]:
sigma = 0.5
epsilon_g = 0.5
delta = 1e-5

results_dict_T20_zipf = {}
n_users = 100

for idx in list(range(29, 0, -1))[:12]:
    epsilon_u_list = make_epsilon_u_list(epsilon=epsilon_g, n_users=n_users, dist="homo")
    C_u, q_u = make_params(epsilon_u_list, delta, sigma, n_users=n_users, selection="idx", idx=idx)
    print("C_u:", C_u[0])
    print("q_u:", q_u[0])

    result = fed_simulation(delta, sigma, n_users, C_u, q_u, times=5, n_round=20, user_dist="zipf-iid", silo_dist="zipf", global_learning_rate=10.0, local_learning_rate=0.001, dataset_name='heart_disease')
    results_dict_T20_zipf[idx] = result

    print(idx, ":", )
    acc_mean, acc_std, loss_mean, loss_std = calc_metric(results_dict_T20_zipf[idx])


### tcga brca

In [None]:
sigma = 0.5
epsilon_g = 0.5
delta = 1e-5

results_dict_T20_zipf = {}
n_users = 100

for idx in list(range(29, 0, -1))[:12]:
    epsilon_u_list = make_epsilon_u_list(epsilon=epsilon_g, n_users=n_users, dist="homo")
    C_u, q_u = make_params(epsilon_u_list, delta, sigma, n_users=n_users, selection="idx", idx=idx)
    print("C_u:", C_u[0])
    print("q_u:", q_u[0])

    result = fed_simulation(delta, sigma, n_users, C_u, q_u, times=5, n_round=20, user_dist="zipf-iid", silo_dist="zipf", global_learning_rate=10.0, local_learning_rate=0.001, dataset_name='tcga_brca')
    results_dict_T20_zipf[idx] = result

    print(idx, ":", )
    acc_mean, acc_std, loss_mean, loss_std = calc_metric(results_dict_T20_zipf[idx])


In [None]:
sigma = 0.5
epsilon_g = 0.5
delta = 1e-5

results_dict_T20_zipf = {}
n_users = 300

for idx in list(range(29, 0, -1))[:12]:
    epsilon_u_list = make_epsilon_u_list(epsilon=epsilon_g, n_users=n_users, dist="homo")
    C_u, q_u = make_params(epsilon_u_list, delta, sigma, n_users=n_users, selection="idx", idx=idx)
    print("C_u:", C_u[0])
    print("q_u:", q_u[0])

    result = fed_simulation(delta, sigma, n_users, C_u, q_u, times=5, n_round=20, user_dist="zipf-iid", silo_dist="zipf", global_learning_rate=10.0, local_learning_rate=0.001, dataset_name='tcga_brca')
    results_dict_T20_zipf[idx] = result

    print(idx, ":", )
    acc_mean, acc_std, loss_mean, loss_std = calc_metric(results_dict_T20_zipf[idx])


# multiple epsilons

In [6]:
n_users = 1000
sigma = 5.0
# epsilon_list = [1.0, 5.0, 10.0]
epsilon_list = [3.0]
group_thresholds = [3.0]
# ratio_list = [0.3, 0.4, 0.3]
ratio_list = [1.0]
delta = 1e-5

results_dict = {}

epsilon_u = make_epsilon_u(n_users=n_users, dist="hetero", epsilon_list=epsilon_list, ratio_list=ratio_list)
# print(epsilon_u)
result = fed_simulation(
    delta, sigma, n_users, C_u=None, q_u=None, agg_strategy="PULDP-AVG-online",
    times=1, n_round=50, user_dist="zipf-iid", silo_dist="zipf", 
    global_learning_rate=10.0, local_learning_rate=0.01, dataset_name='creditcard',
    local_epochs=30,
    epsilon_u=epsilon_u,
    group_thresholds=group_thresholds,
)


[19:26:32 INFO] Start federated learning simulation
[19:31:55 INFO] |----- Global test result of round 0
[19:31:55 INFO] 	 |----- Test/ROC_AUC: 0.4936691052335397 (42722), Test/Loss: 27.68875226378441
[19:31:55 INFO] eps_u = 3.0, diff = -6.053673684597015
[19:31:55 INFO] {3.0: (0.9, 0.524485670030117)}
[19:31:55 INFO] 


[19:36:49 INFO] |----- Global test result of round 1
[19:36:49 INFO] 	 |----- Test/ROC_AUC: 0.4999648283624086 (42722), Test/Loss: 5.061791863292456
[19:36:49 INFO] eps_u = 3.0, diff = -2.7852979842573404
[19:36:50 INFO] {3.0: (0.81, 0.5806660279631615)}
[19:36:50 INFO] 


[19:41:26 INFO] |----- Global test result of round 2
[19:41:26 INFO] 	 |----- Test/ROC_AUC: 0.4999765522416057 (42722), Test/Loss: 1.7005080012604594
[19:41:26 INFO] eps_u = 3.0, diff = -0.36788185546174645
[19:41:27 INFO] {3.0: (0.7290000000000001, 0.6425902247428894)}
[19:41:27 INFO] 


[19:45:31 INFO] |----- Global test result of round 3
[19:45:31 INFO] 	 |----- Test/ROC_AUC: 0.5 (42722), Test/Los

In [4]:
n_users = 100
sigma = 5.0
# epsilon_list = [1.0, 5.0, 10.0]
epsilon_list = [3.0]
group_thresholds = [3.0]
# ratio_list = [0.3, 0.4, 0.3]
ratio_list = [1.0]
delta = 1e-5

results_dict = {}

epsilon_u = make_epsilon_u(n_users=n_users, dist="hetero", epsilon_list=epsilon_list, ratio_list=ratio_list)
# print(epsilon_u)
result = fed_simulation(
    delta, sigma, n_users, C_u=None, q_u=None, agg_strategy="PULDP-AVG-online",
    times=1, n_round=50, user_dist="zipf-iid", silo_dist="zipf", 
    global_learning_rate=10.0, local_learning_rate=0.01, dataset_name='creditcard',
    local_epochs=30,
    epsilon_u=epsilon_u,
    group_thresholds=group_thresholds,
)


[12:51:49 INFO] Start federated learning simulation
[12:53:00 INFO] |----- Global test result of round 0
[12:53:00 INFO] 	 |----- Test/ROC_AUC: 0.470444100543988 (42722), Test/Loss: 19.277319967746735
[12:53:00 INFO] eps_u = 3.0, diff = 4.357678383588791
[12:53:00 INFO] {3.0: (1.0, 0.4735492169857025)}
[12:53:00 INFO] 


[12:54:09 INFO] |----- Global test result of round 1
[12:54:09 INFO] 	 |----- Test/ROC_AUC: 0.4999882761208029 (42722), Test/Loss: 1.2630739691667259
[12:54:09 INFO] eps_u = 3.0, diff = 0.5066758771426976
[12:54:09 INFO] {3.0: (1.0, 0.4735492169857025)}
[12:54:09 INFO] 


[12:55:18 INFO] |----- Global test result of round 2
[12:55:18 INFO] 	 |----- Test/ROC_AUC: 0.4999648283624086 (42722), Test/Loss: 1.4292018879204988
[12:55:18 INFO] eps_u = 3.0, diff = -0.14186867093667388
[12:55:19 INFO] {3.0: (0.9, 0.524485670030117)}
[12:55:19 INFO] 


[12:56:22 INFO] |----- Global test result of round 3
[12:56:22 INFO] 	 |----- Test/ROC_AUC: 0.5 (42722), Test/Loss: 1.372489640634

In [5]:
n_users = 2000
sigma = 5.0
# epsilon_list = [1.0, 5.0, 10.0]
epsilon_list = [3.0]
group_thresholds = [3.0]
# ratio_list = [0.3, 0.4, 0.3]
ratio_list = [1.0]
delta = 1e-5

results_dict = {}

epsilon_u = make_epsilon_u(n_users=n_users, dist="hetero", epsilon_list=epsilon_list, ratio_list=ratio_list)
# print(epsilon_u)
result = fed_simulation(
    delta, sigma, n_users, C_u=None, q_u=None, agg_strategy="PULDP-AVG-online",
    times=1, n_round=50, user_dist="zipf-iid", silo_dist="zipf", 
    global_learning_rate=10.0, local_learning_rate=0.01, dataset_name='creditcard',
    local_epochs=30,
    epsilon_u=epsilon_u,
    group_thresholds=group_thresholds,
)


[13:46:15 INFO] Start federated learning simulation
[13:54:18 INFO] |----- Global test result of round 0
[13:54:18 INFO] 	 |----- Test/ROC_AUC: 0.4753133127500038 (42722), Test/Loss: 31.429049402475357
[13:54:18 INFO] eps_u = 3.0, diff = -16.035620898008347
[13:54:19 INFO] {3.0: (0.9, 0.524485670030117)}
[13:54:19 INFO] 


[14:01:40 INFO] |----- Global test result of round 1
[14:01:40 INFO] 	 |----- Test/ROC_AUC: 0.49991793284561997 (42722), Test/Loss: 5.540382269769907
[14:01:40 INFO] eps_u = 3.0, diff = -3.127685356885195
[14:01:40 INFO] {3.0: (0.81, 0.5806660279631615)}
[14:01:40 INFO] 


[14:08:19 INFO] |----- Global test result of round 2
[14:08:19 INFO] 	 |----- Test/ROC_AUC: 0.4999882761208029 (42722), Test/Loss: 1.748255823738873
[14:08:19 INFO] eps_u = 3.0, diff = -0.4529144996777177
[14:08:19 INFO] {3.0: (0.7290000000000001, 0.6425902247428894)}
[14:08:19 INFO] 


[14:14:11 INFO] |----- Global test result of round 3
[14:14:11 INFO] 	 |----- Test/ROC_AUC: 0.5 (42722), Test/Los

In [7]:
n_users = 3000
sigma = 5.0
# epsilon_list = [1.0, 5.0, 10.0]
epsilon_list = [1.0]
group_thresholds = [1.0]
# ratio_list = [0.3, 0.4, 0.3]
ratio_list = [1.0]
delta = 1e-5
dataset_name = 'creditcard'

results_dict = {}

epsilon_u = make_epsilon_u(n_users=n_users, dist="hetero", epsilon_list=epsilon_list, ratio_list=ratio_list)
# print(epsilon_u)
result = fed_simulation(
    delta, sigma, n_users, C_u=None, q_u=None, agg_strategy="PULDP-AVG-online",
    times=1, n_round=50, user_dist="zipf-iid", silo_dist="zipf", 
    global_learning_rate=10.0, local_learning_rate=0.01, dataset_name=dataset_name,
    local_epochs=30,
    epsilon_u=epsilon_u,
    group_thresholds=group_thresholds,
)

with open(os.path.join(pickle_path, f'result_{n_users}_{dataset_name}_{epsilon_list}.pkl'), 'wb') as file:
    pickle.dump(result, file)

[14:04:08 INFO] Start federated learning simulation
[14:14:18 INFO] |----- Global test result of round 0
[14:14:18 INFO] 	 |----- Test/ROC_AUC: 0.6826158952987877 (42722), Test/Loss: 64.03325682878494
[14:14:18 INFO] eps_u = 1.0, diff = -21.524129420518875
[14:14:18 INFO] {1.0: (0.9, 0.19399169832468033)}
[14:14:18 INFO] 


[14:23:54 INFO] |----- Global test result of round 1
[14:23:54 INFO] 	 |----- Test/ROC_AUC: 0.4751609023204409 (42722), Test/Loss: 28.344510287046432
[14:23:54 INFO] eps_u = 1.0, diff = -10.969300031661987
[14:23:54 INFO] {1.0: (0.81, 0.21526962518692017)}
[14:23:54 INFO] 


[14:32:25 INFO] |----- Global test result of round 2
[14:32:25 INFO] 	 |----- Test/ROC_AUC: 0.49732695554305006 (42722), Test/Loss: 10.522750057280064
[14:32:25 INFO] eps_u = 1.0, diff = -4.898443631827831
[14:32:26 INFO] {1.0: (0.7290000000000001, 0.2388451248407364)}
[14:32:26 INFO] 


[14:39:56 INFO] |----- Global test result of round 3
[14:39:56 INFO] 	 |----- Test/ROC_AUC: 0.499648283624085

In [11]:
n_users = 1000
sigma = 5.0
# epsilon_list = [1.0, 5.0, 10.0]
epsilon_list = [1.0]
group_thresholds = [1.0]
# ratio_list = [0.3, 0.4, 0.3]
ratio_list = [1.0]
delta = 1e-5
dataset_name = 'mnist'

results_dict = {}

epsilon_u = make_epsilon_u(n_users=n_users, dist="hetero", epsilon_list=epsilon_list, ratio_list=ratio_list)
# print(epsilon_u)
result = fed_simulation(
    delta, sigma, n_users, C_u=None, q_u=None, agg_strategy="PULDP-AVG-online",
    times=1, n_round=50, user_dist="zipf-iid", silo_dist="zipf", 
    global_learning_rate=20.0, local_learning_rate=0.01, dataset_name=dataset_name,
    local_epochs=50,
    epsilon_u=epsilon_u,
    group_thresholds=group_thresholds,
)

with open(os.path.join(pickle_path, f'result_{n_users}_{dataset_name}_{epsilon_list}.pkl'), 'wb') as file:
    pickle.dump(result, file)

[19:04:37 INFO] Start federated learning simulation
[19:49:49 INFO] |----- Global test result of round 0
[19:49:49 INFO] 	 |----- Test/Acc: 0.2068 (10000), Test/Loss: 44.64125156402588
[19:49:49 INFO] eps_u = 1.0, diff = -1.7134144306182861
[19:49:50 INFO] {1.0: (0.9, 0.19399169832468033)}
[19:49:50 INFO] 


[20:23:14 INFO] |----- Global test result of round 1
[20:23:14 INFO] 	 |----- Test/Acc: 0.4675 (10000), Test/Loss: 37.880372166633606
[20:23:14 INFO] eps_u = 1.0, diff = -0.18857479095458984
[20:23:14 INFO] {1.0: (0.81, 0.21526962518692017)}
[20:23:14 INFO] 


[20:43:28 INFO] |----- Global test result of round 2
[20:43:28 INFO] 	 |----- Test/Acc: 0.66 (10000), Test/Loss: 28.25382685661316
[20:43:29 INFO] eps_u = 1.0, diff = -0.12214028835296631
[20:43:29 INFO] {1.0: (0.7290000000000001, 0.2388451248407364)}
[20:43:29 INFO] 


[20:59:07 INFO] |----- Global test result of round 3
[20:59:07 INFO] 	 |----- Test/Acc: 0.7199 (10000), Test/Loss: 20.44299626350403
[20:59:07 INFO] eps_u = 1

In [13]:
n_users = 2000
sigma = 5.0
# epsilon_list = [1.0, 5.0, 10.0]
epsilon_list = [1.0]
group_thresholds = [1.0]
# ratio_list = [0.3, 0.4, 0.3]
ratio_list = [1.0]
delta = 1e-6
dataset_name = 'mnist'

results_dict = {}

epsilon_u = make_epsilon_u(n_users=n_users, dist="hetero", epsilon_list=epsilon_list, ratio_list=ratio_list)
# print(epsilon_u)
result = fed_simulation(
    delta, sigma, n_users, C_u=None, q_u=None, agg_strategy="PULDP-AVG-online",
    times=1, n_round=50, user_dist="zipf-iid", silo_dist="zipf", 
    global_learning_rate=20.0, local_learning_rate=0.01, dataset_name=dataset_name,
    local_epochs=50,
    epsilon_u=epsilon_u,
    group_thresholds=group_thresholds,
)

with open(os.path.join(pickle_path, f'result_{n_users}_{dataset_name}_{epsilon_list}.pkl'), 'wb') as file:
    pickle.dump(result, file)

[09:25:07 INFO] Start federated learning simulation


KeyboardInterrupt: 

In [12]:
n_users = 100
sigma = 5.0
# epsilon_list = [1.0, 5.0, 10.0]
epsilon_list = [1.0]
group_thresholds = [1.0]
# ratio_list = [0.3, 0.4, 0.3]
ratio_list = [1.0]
delta = 1e-5
dataset_name = 'mnist'

results_dict = {}

epsilon_u = make_epsilon_u(n_users=n_users, dist="hetero", epsilon_list=epsilon_list, ratio_list=ratio_list)
# print(epsilon_u)
result = fed_simulation(
    delta, sigma, n_users, C_u=None, q_u=None, agg_strategy="PULDP-AVG-online",
    times=1, n_round=50, user_dist="zipf-iid", silo_dist="zipf", 
    global_learning_rate=10.0, local_learning_rate=0.01, dataset_name=dataset_name,
    local_epochs=50,
    epsilon_u=epsilon_u,
    group_thresholds=group_thresholds,
)

with open(os.path.join(pickle_path, f'result_{n_users}_{dataset_name}_{epsilon_list}.pkl'), 'wb') as file:
    pickle.dump(result, file)

[16:01:43 INFO] Start federated learning simulation
[16:23:17 INFO] |----- Global test result of round 0
[16:23:17 INFO] 	 |----- Test/Acc: 0.101 (10000), Test/Loss: 68.19968342781067
[16:23:17 INFO] eps_u = 1.0, diff = 109.87904238700867
[16:23:17 INFO] {1.0: (1.0, 0.17479341477155685)}
[16:23:17 INFO] 


[16:43:45 INFO] |----- Global test result of round 1
[16:43:45 INFO] 	 |----- Test/Acc: 0.1053 (10000), Test/Loss: 339.4863033294678
[16:43:46 INFO] eps_u = 1.0, diff = 335.32195472717285
[16:43:46 INFO] {1.0: (1.0, 0.17479341477155685)}
[16:43:46 INFO] 


[17:04:29 INFO] |----- Global test result of round 2
[17:04:29 INFO] 	 |----- Test/Acc: 0.0995 (10000), Test/Loss: 710.8406600952148
[17:04:30 INFO] eps_u = 1.0, diff = -80.98394012451172
[17:04:30 INFO] {1.0: (0.9, 0.19399169832468033)}
[17:04:30 INFO] 


[17:23:38 INFO] |----- Global test result of round 3
[17:23:38 INFO] 	 |----- Test/Acc: 0.0614 (10000), Test/Loss: 727.586841583252
[17:23:38 INFO] eps_u = 1.0, diff = 444.594024