In [101]:
import scipy
import json
import os

import pandas as pd
import seaborn as sns

In [None]:
# def load_single_attack_results(
#     scenario: str,
#     version: str,
#     dataset: str
# ):
#     '''Loads json file (attack results) and csv files (Shapley, LOO, Alpha)
#     into a one pandas dataframe'''
    
#     ## Loads the json file with atack metrics
#     metrics = ['Accuracy', 'TPR', 'TNR', 'FPR', 'FNR', 'Advantage', 'AUC']
#     json_path = os.path.join(os.getcwd(), 'attack_results', scenario, version, dataset, 'nodes_attack_results.json')
#     with open(json_path, 'r+') as file:
#         json_file = json.load(file)
#     clients_ids = [int(i) for i in json_file.keys() if i != 'average']
#     iterations = [int(i) for i in json_file['0'].keys()]
#     iterations.sort()
#     headings = [f'{client}_{metric}' for client in clients_ids for metric in metrics]
    
#     dataframe = pd.DataFrame(index=iterations, columns=headings)
    
#     for client in clients_ids:
#         for iteration in iterations:
#             client_data = json_file[str(client)][str(iteration)]['average']
#             for heading in metrics:
#                 dataframe.loc[iteration, f"{client}_{heading}"] = client_data[heading]
    
#     ## 
#     return dataframe

In [None]:
# def load_single_attack_results_short(
#     scenario: str,
#     version: str,
#     dataset: str
# ):
#     '''Loads json file (attack results) and csv files (Shapley, LOO, Alpha)
#     into a one pandas dataframe'''
    
#     ## Loads the json file with atack metrics
#     metrics = ['TPR', 'AUC']
#     json_path = os.path.join(os.getcwd(), 'attack_results', scenario, version, dataset, 'nodes_attack_results.json')
#     with open(json_path, 'r+') as file:
#         json_file = json.load(file)
#     clients_ids = [int(i) for i in json_file.keys() if i != 'average']
#     iterations = [int(i) for i in json_file['0'].keys()]
#     iterations.sort()
#     headings = [f'{client}_{metric}' for client in clients_ids for metric in metrics]
    
#     dataframe = pd.DataFrame(index=iterations, columns=headings)
    
#     for client in clients_ids:
#         for iteration in iterations:
#             client_data = json_file[str(client)][str(iteration)]['average']
#             for heading in metrics:
#                 dataframe.loc[iteration, f"{client}_{heading}"] = client_data[heading]
    
#     dataframe = dataframe.iloc[-10:].mean()
#     return dataframe

In [None]:
def load_all_attack_results_short(
    scenarios: list[str],
    versions: list[str],
    datasets: list[str]
):
    '''Loads json file (attack results) and csv files (Shapley, LOO, Alpha)
    into a one pandas dataframe'''
    
    metrics = ['TPR', 'AUC']
    mapping_dict = {
        "hs":"highly_skewed",
        "ls":"lightly_skewed",
        "uniform":"uniform"
    }
    rows = [f"{scenario}_{version}_{dataset}" for scenario in scenarios for version in versions for dataset in datasets]
    columns = [f'{node}_{metric}' for node in range(8) for metric in metrics]
    all_dataframe = pd.DataFrame(index=rows, columns=columns)
    
    for scenario in scenarios:
        for version in versions:
            for dataset in datasets:
                # Defining Paths
                json_path = os.path.join(os.getcwd(), 'attack_results', scenario, version, dataset, 'nodes_attack_results.json')
                shapley_path = os.path.join(os.getcwd(), 'attack_results', scenario, version, dataset, 'results', 'full_shapley.csv')
                loo_path = os.path.join(os.getcwd(), 'attack_results', scenario, version, dataset, 'results', 'full_loo.csv')
                #alpha_path = os.path.join(os.getcwd(), 'attack_results', scenario, version, dataset, 'results', 'full_alpha.csv')
                # Opening Files
                with open(json_path, 'r+') as file:
                    json_file = json.load(file)
                shapley_frame, loo_frame, alpha_frame = pd.read_csv(shapley_path), pd.read_csv(loo_path), pd.read_csv(alpha_path)
                
                # Defining number of itertions and number of clients
                clients_ids = [int(i) for i in json_file.keys() if i != 'average']
                iterations = [int(i) for i in json_file['0'].keys()]
                iterations.sort()
                headings = [f'{client}_{metric}' for client in clients_ids for metric in metrics]
                
                # Populating DataFrame
                dataframe = pd.DataFrame(index=iterations, columns=headings)
                for client in clients_ids:
                    for iteration in iterations:
                        client_data = json_file[str(client)][str(iteration)]['average']
                        for heading in metrics:
                            dataframe.loc[iteration, f"{client}_{heading}"] = client_data[heading]

                # Inserting summary statistics into the final dataframe
                all_dataframe.loc[f"{scenario}_{version}_{dataset}", :] = dataframe.iloc[-10:].mean()
                for client in clients_ids:
                    shapley_score = shapley_frame[shapley_frame['node_id'] == client]
                    loo_score = loo_frame[loo_frame['node_id'] == client]
                    #alpha_score = alpha_frame[alpha_frame['node_id'] == client]
                    all_dataframe.loc[f"{scenario}_{version}_{dataset}", f"{client}_ACC_SHAP"] = shapley_score['accuracy'][client]
                    all_dataframe.loc[f"{scenario}_{version}_{dataset}", f"{client}_ACC_LOO"] = loo_score['accuracy'][client]
                
    return all_dataframe

In [96]:
datasets = ['mnist', 'fmnist']
scenarios = ['uniform', 'ls', 'hs']
versions = ['without_DP']
all_dataframe = load_all_attack_results_short(
    scenarios=scenarios,
    versions=versions,
    datasets=datasets
)

In [97]:
all_dataframe

Unnamed: 0,0_TPR,0_AUC,1_TPR,1_AUC,2_TPR,2_AUC,3_TPR,3_AUC,4_TPR,4_AUC,...,3_ACC_SHAP,3_ACC_LOO,4_ACC_SHAP,4_ACC_LOO,5_ACC_SHAP,5_ACC_LOO,6_ACC_SHAP,6_ACC_LOO,7_ACC_SHAP,7_ACC_LOO
uniform_without_DP_mnist,0.991475,0.822918,0.989136,0.816,0.993245,0.81982,0.992674,0.815144,0.992356,0.819737,...,-0.003128,-9e-05,-0.003366,-0.00262,-0.001821,0.00014,-0.001288,-0.00011,-0.001454,-0.000412
uniform_without_DP_fmnist,0.855706,0.68928,0.836188,0.670454,0.85537,0.691459,0.908878,0.735359,0.91311,0.733177,...,-0.003852,-0.004618,0.003236,0.001706,0.002895,0.001836,0.001992,0.00027,0.002799,3.4e-05
ls_without_DP_mnist,0.971026,0.802567,0.979921,0.807553,0.990561,0.816298,0.984975,0.811566,0.982798,0.803215,...,-0.003128,-9e-05,-0.003366,-0.00262,-0.001821,0.00014,-0.001288,-0.00011,-0.001454,-0.000412
ls_without_DP_fmnist,0.889142,0.710262,0.873554,0.71051,0.781945,0.641434,0.851862,0.677337,0.888341,0.712126,...,-0.003852,-0.004618,0.003236,0.001706,0.002895,0.001836,0.001992,0.00027,0.002799,3.4e-05
hs_without_DP_mnist,0.999181,0.819868,0.993084,0.822044,0.998695,0.821397,0.987737,0.809935,0.999461,0.819936,...,-0.013089,-0.008065,0.004798,0.004697,-0.027861,-0.018265,-0.018014,-0.019272,-0.005321,-0.000347
hs_without_DP_fmnist,0.98992,0.808062,0.975346,0.804021,0.928235,0.74259,0.997996,0.814886,0.941882,0.786678,...,0.009239,0.001726,-0.009204,-0.002004,-0.007142,-0.003784,-0.003838,-0.003622,0.022874,0.024956


In [None]:
MNIST_FMNIST = [f"{scenario}_{version}_{dataset}" for scenario in scenarios for version in versions for dataset in ['mnist', 'fmnist']]
pearson_corr = pd.DataFrame(
    index = MNIST_FMNIST,
    columns=['TPR_TO_SHAP_ACC', 'AUC_TO_SHAP_ACC', 'TPR_TO_LOO_ACC', 'AUC_TO_LOO_ACC']
)

for scenario in MNIST_FMNIST:
    x = all_dataframe.loc[scenario][[f"{client}_TPR" for client in range(8)]]
    y = all_dataframe.loc[scenario][[f"{client}_ACC_SHAP" for client in range(8)]]
    pearson_corr.loc[scenario, f"TPR_TO_SHAP_ACC"] = scipy.stats.pearsonr(x, y).statistic
    
    x = all_dataframe.loc[scenario][[f"{client}_AUC" for client in range(8)]]
    y = all_dataframe.loc[scenario][[f"{client}_ACC_SHAP" for client in range(8)]]
    pearson_corr.loc[scenario, f"AUC_TO_SHAP_ACC"] = scipy.stats.pearsonr(x, y).statistic
    
    x = all_dataframe.loc[scenario][[f"{client}_TPR" for client in range(8)]]
    y = all_dataframe.loc[scenario][[f"{client}_ACC_LOO" for client in range(8)]]
    pearson_corr.loc[scenario, f"TPR_TO_LOO_ACC"] = scipy.stats.pearsonr(x, y).statistic
    
    x = all_dataframe.loc[scenario][[f"{client}_AUC" for client in range(8)]]
    y = all_dataframe.loc[scenario][[f"{client}_ACC_LOO" for client in range(8)]]
    pearson_corr.loc[scenario, f"AUC_TO_LOO_ACC"] = scipy.stats.pearsonr(x, y).statistic

pearson_corr

Unnamed: 0,TPR_TO_SHAP_ACC,AUC_TO_SHAP_ACC,TPR_TO_LOO_ACC,AUC_TO_LOO_ACC
uniform_without_DP_mnist,-0.600356,-0.125639,-0.740503,-0.415546
uniform_without_DP_fmnist,0.476138,0.420554,0.044805,-0.018391
ls_without_DP_mnist,-0.454294,-0.601328,-0.357063,-0.39543
ls_without_DP_fmnist,0.103462,0.24048,0.233744,0.374015
hs_without_DP_mnist,0.001548,0.346152,0.000454,0.41476
hs_without_DP_fmnist,0.659578,0.602615,0.486242,0.483902


In [111]:
MNIST_FMNIST = [f"{scenario}_{version}_{dataset}" for scenario in scenarios for version in versions for dataset in ['mnist', 'fmnist']]
spearmanr_corr = pd.DataFrame(
    index = MNIST_FMNIST,
    columns=['TPR_TO_SHAP_ACC', 'AUC_TO_SHAP_ACC', 'TPR_TO_LOO_ACC', 'AUC_TO_LOO_ACC']
)

for scenario in MNIST_FMNIST:
    x = all_dataframe.loc[scenario][[f"{client}_TPR" for client in range(8)]]
    y = all_dataframe.loc[scenario][[f"{client}_ACC_SHAP" for client in range(8)]]
    spearmanr_corr.loc[scenario, f"TPR_TO_SHAP_ACC"] = scipy.stats.spearmanr(x, y).statistic
    
    x = all_dataframe.loc[scenario][[f"{client}_AUC" for client in range(8)]]
    y = all_dataframe.loc[scenario][[f"{client}_ACC_SHAP" for client in range(8)]]
    spearmanr_corr.loc[scenario, f"AUC_TO_SHAP_ACC"] = scipy.stats.spearmanr(x, y).statistic
    
    x = all_dataframe.loc[scenario][[f"{client}_TPR" for client in range(8)]]
    y = all_dataframe.loc[scenario][[f"{client}_ACC_LOO" for client in range(8)]]
    spearmanr_corr.loc[scenario, f"TPR_TO_LOO_ACC"] = scipy.stats.spearmanr(x, y).statistic
    
    x = all_dataframe.loc[scenario][[f"{client}_AUC" for client in range(8)]]
    y = all_dataframe.loc[scenario][[f"{client}_ACC_LOO" for client in range(8)]]
    spearmanr_corr.loc[scenario, f"AUC_TO_LOO_ACC"] = scipy.stats.spearmanr(x, y).statistic

spearmanr_corr

Unnamed: 0,TPR_TO_SHAP_ACC,AUC_TO_SHAP_ACC,TPR_TO_LOO_ACC,AUC_TO_LOO_ACC
uniform_without_DP_mnist,-0.595238,-0.071429,-0.809524,-0.142857
uniform_without_DP_fmnist,0.52381,0.238095,0.404762,-0.02381
ls_without_DP_mnist,-0.357143,-0.595238,-0.452381,-0.285714
ls_without_DP_fmnist,0.166667,0.309524,0.404762,0.428571
hs_without_DP_mnist,0.0,0.547619,0.166667,0.738095
hs_without_DP_fmnist,0.619048,0.619048,0.5,0.5
