In [None]:
import json
from pathlib import Path

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

In [None]:
log_directory = Path("/Users/andreas/workspace/thesis-code/out/privacy")
result_files = [path for path in log_directory.iterdir() if path.suffix == ".csv" and "results_" in path.name]

dfs = []
hyperparams = []

for f in result_files:
    print(f"Loading results file \"{f.name}\"")
    dfs.append(pd.read_csv(f))

    parameters = ["devices", "epochs", "local_epochs", "local_batch_size", "clients_per_round",
                  "l2_norm_clip", "noise_multiplier", "local_dp", "start_time"]
    parameter_values = f.stem.lstrip("results_").split("_")
    assert len(parameter_values) == len(parameters)
    hyperparams.append(
        {key: value for (key, value) in zip(parameters, parameter_values)}
    )
experiments = list(zip(hyperparams, dfs))

In [None]:
# Test Loss Plot
fig, ax = plt.subplots()
for params, df in experiments:
    df.plot(x="epoch", y="test_loss", ax=ax, label=f'DP: {params["local_dp"]} L2-Clip: {params["l2_norm_clip"]} Noise-Mult:{params["noise_multiplier"]}')
fig.savefig(log_directory / "test_loss.pdf")

In [None]:
# Test Accuracy Plot
fig, ax = plt.subplots(figsize=(20, 10))
for params, df in experiments:
    df.plot(x="epoch", y="test_accuracy", ax=ax, label=f'DP: {params["local_dp"]} L2-Clip: {params["l2_norm_clip"]} Noise-Mult:{params["noise_multiplier"]}')
fig.savefig(log_directory / "test_accuracy.pdf")

In [None]:
from tensorflow_privacy import get_privacy_spent

# Eps Guarantees
privacy_dfs = []
privacy_params = []
for params, df in [exp for exp in experiments if exp[0]["local_dp"] == "True"]:
    df = df.copy()
    df["privacy_params"] =  f'{params["l2_norm_clip"]}_{params["noise_multiplier"]}'
    df["noise_multiplier"] = params["noise_multiplier"]
    df["l2_norm_clip"] = params["l2_norm_clip"]
    df["privacy_guarantees"] = df.privacy_guarantees.map(lambda x: x.replace("'", "")).map(json.loads)
    eps_per_round_and_client = df["privacy_guarantees"][0][0]['eps']
    delta_per_round_and_client = df["privacy_guarantees"][0][0]['delta']
    df["eps_per_round_and_client"] = eps_per_round_and_client
    df["delta_per_round_and_client"] = delta_per_round_and_client
    df["eps_delta_per_round_and_client"] = f"({eps_per_round_and_client}, {delta_per_round_and_client})"
    dfp = df
    try:
        dfp["rdp"] = dfp["privacy_guarantees"].map(lambda client_results: np.array(client_results[0]["rdp"]))
        dfp["orders"] = dfp["privacy_guarantees"].map(lambda client_results: np.array(client_results[0]["orders"]))
        dfp["delta"] = dfp["privacy_guarantees"].map(lambda client_results: client_results[0]["delta"])
        dfp["rdp_cumsum"] = dfp.rdp.cumsum()
        dfp["cum_privacy"] = dfp[["orders", "rdp_cumsum", "delta"]].apply(lambda row: get_privacy_spent(row[0], row[1], target_delta=row[2]), axis=1)
        dfp["cum_eps"] = dfp["cum_privacy"].apply(lambda x: x[0])
        client_pick_prob = float(params["clients_per_round"]) / float(params["devices"])
        dfp["cum_eps_div"] = dfp["cum_eps"] * client_pick_prob
        dfp["eps_round_"] = dfp["privacy_guarantees"].map(lambda client_results: np.array(client_results[0]["eps"]))
        dfp["eps_naive_cumsum"] = dfp["eps_round_"].cumsum()
        dfp["delta_naive_cumsum"] = dfp["delta"].cumsum()
        dfp["eps_naive_cumsum_div"] = dfp["eps_naive_cumsum"] * client_pick_prob
        dfp["delta_naive_cumsum_div"] = dfp["delta_naive_cumsum"] * client_pick_prob
        privacy_dfs.append(df)
        privacy_params.append(params)
    except KeyError:
        pass
combined_df = pd.concat(privacy_dfs)

In [None]:
sns.set_style("whitegrid")
fig, (ax, ax2) = plt.subplots(nrows=1, ncols=2, sharex=True, figsize=(10, 5))
l = sns.lineplot(x="epoch", y="cum_eps_div", data=combined_df, ax=ax, hue="noise_multiplier")
l.legend().set_title("Noise Multiplier")
ax.set_ylabel("Epsilon")
ax.set_xlabel("Communication Round")
ax2.set_ylabel("Test Accuracy")
ax2.set_xlabel("Communication Round")
sns.lineplot(x="epoch", y="test_accuracy", ax=ax2, data=combined_df, hue="noise_multiplier")
ax2.get_legend().remove()
fig.savefig(log_directory / "noise_mult_eps_acc_tradeoff.pdf", bbox_inches = 'tight')

In [None]:
sns.set_style("whitegrid")
df1_ = combined_df.copy()
df2_ = combined_df.copy()

df1_["eps"] = df1_["cum_eps_div"]
df2_["eps"] = df2_["eps_naive_cumsum_div"]
df2_["delta"] = df2_["delta_naive_cumsum_div"]
df1_["accounting"] = "rdp"
df2_["accounting"] = "naive"

df_ = pd.concat([df1_, df2_])
fig, (ax, ax2) = plt.subplots(nrows=1, ncols=2, sharex=True, figsize=(10, 5))
l = sns.lineplot(x="epoch", y="eps", data=df_, ax=ax, hue="noise_multiplier", style="accounting")
ax.set_ylabel("Epsilon")
ax.set_xlabel("Communication Round")
sns.lineplot(x="epoch", y="delta", data=df_, ax=ax2, hue="noise_multiplier", style="accounting")
ax2.set_ylabel("Delta")
ax2.set_xlabel("Communication Round")

fig.savefig(log_directory / "accounting_naive_vs_rdp.pdf", bbox_inches = 'tight')

In [None]:
dfp_ = combined_df[combined_df["noise_multiplier"] == "1.0"]
fig, ax = plt.subplots()
sns.lineplot(x="epoch", y="test_accuracy", ax=ax, data=dfp_, hue="l2_norm_clip")