In [1]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt


In [None]:

adult_dir = "data/data_outputs"
bank_dir = "data/data_outputs_bank"

# Load Adult results
utility_adult = pd.read_csv(f"{adult_dir}/utility_results.csv")
with open(f"{adult_dir}/model_mia_auc.json") as f:
    mia_adult = json.load(f)
with open(f"{adult_dir}/worst_case_mia_auc.json") as f:
    worst_adult = json.load(f)

# Load Bank results
utility_bank = pd.read_csv(f"{bank_dir}/utility_results_bank.csv")
with open(f"{bank_dir}/model_mia_auc_bank.json") as f:
    mia_bank = json.load(f)
with open(f"{bank_dir}/worst_case_mia_auc_bank.json") as f:
    worst_bank = json.load(f)


In [3]:
# Build comparison table
def build_auc_table(df, mia_dict, worst_dict, label):
    df = df[df["Model"] == "RandomForest"].copy()
    df["Model"] = df["Trained_On"].str.upper()
    df["Dataset"] = label
    df["MIA AUC"] = df["Trained_On"].str.lower().map(mia_dict)
    df["Worst-Case MIA AUC"] = df["Trained_On"].str.lower().map(worst_dict)
    return df[["Dataset", "Model", "Accuracy", "F1", "AUC", "MIA AUC", "Worst-Case MIA AUC"]]

adult_table = build_auc_table(utility_adult, mia_adult, worst_adult, "Adult")
bank_table = build_auc_table(utility_bank, mia_bank, worst_bank, "Bank")

combined = pd.concat([adult_table, bank_table])
print("\n--- Privacy vs Worst-Case AUC ---")
print(combined.to_string(index=False))




--- Privacy vs Worst-Case AUC ---
Dataset  Model  Accuracy       F1      AUC  MIA AUC  Worst-Case MIA AUC
  Adult   REAL  0.858617 0.684052 0.909621      NaN                 NaN
  Adult  CTGAN  0.833313 0.612839 0.871170 0.519071            0.555848
  Adult COPULA  0.762191 0.031031 0.714579 0.502688            0.506916
  Adult   TVAE  0.810220 0.616910 0.861700 0.498276            0.514356
   Bank   REAL  0.905423 0.507600 0.924246      NaN                 NaN
   Bank  CTGAN  0.894541 0.425265 0.871479 0.504471            0.524139
   Bank   TVAE  0.859772 0.515439 0.865916 0.493773            0.529065
   Bank COPULA  0.882863 0.000000 0.776475 0.503621            0.502988


In [None]:

# Copula class balance comparison (Bank) 
real = pd.read_csv(f"{bank_dir}/train_df_bank.csv")
synthetic = pd.read_csv(f"{bank_dir}/synthetic_copula_bank.csv")

real_dist = real["y"].value_counts(normalize=True)
synth_dist = synthetic["y"].value_counts(normalize=True)

print("\n--- Class Balance (Real vs Copula Synthetic, Bank Dataset) ---")
print("Real:")
print(real_dist)
print("\nCopula Synthetic:")
print(synth_dist) 


--- Class Balance (Real vs Copula Synthetic, Bank Dataset) ---
Real:
y
0    0.883007
1    0.116993
Name: proportion, dtype: float64

Copula Synthetic:
y
0    0.8847
1    0.1153
Name: proportion, dtype: float64


In [None]:
import pandas as pd
import json



# Load Adult AUCs 
with open(f"{adult_dir}/model_mia_auc.json") as f:
    mia_adult = json.load(f)
with open(f"{adult_dir}/worst_case_mia_auc.json") as f:
    worst_adult = json.load(f)

#  Load Bank AUCs 
with open(f"{bank_dir}/model_mia_auc_bank.json") as f:
    mia_bank = json.load(f)
with open(f"{bank_dir}/worst_case_mia_auc_bank.json") as f:
    worst_bank = json.load(f)

# Combine into DataFrame 
def auc_diff_df(name, mia_dict, worst_dict):
    df = pd.DataFrame({
        "Model": [k.upper() for k in mia_dict],
        "MIA AUC": [mia_dict[k] for k in mia_dict],
        "Worst-Case MIA AUC": [worst_dict[k] for k in worst_dict]
    })
    df["Delta"] = df["Worst-Case MIA AUC"] - df["MIA AUC"]
    df["Dataset"] = name
    return df

adult_df = auc_diff_df("Adult", mia_adult, worst_adult)
bank_df = auc_diff_df("Bank", mia_bank, worst_bank)

combined = pd.concat([adult_df, bank_df], ignore_index=True)

# === Display ===
print("\n--- Change in Privacy Risk (Worst-Case vs Average) ---")
print(combined.sort_values(by=["Dataset", "Delta"], ascending=False).to_string(index=False))



--- Change in Privacy Risk (Worst-Case vs Average) ---
 Model  MIA AUC  Worst-Case MIA AUC     Delta Dataset
  TVAE 0.493773            0.529065  0.035293    Bank
 CTGAN 0.504471            0.524139  0.019668    Bank
COPULA 0.503621            0.502988 -0.000632    Bank
 CTGAN 0.519071            0.555848  0.036777   Adult
  TVAE 0.498276            0.514356  0.016080   Adult
COPULA 0.502688            0.506916  0.004228   Adult
