Notebook to plot results of paper. Cells should be executed in order. The subfolder "images/" should exist. Use the paths of the resulting result.csv and result_memory.csv files in Cell 2.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors as mcolors
import seaborn as sns
import os


In [None]:
pathToAdultResult = '<pathToAdultResultFile>'
pathToAdultResultMemory = 'pathToAdultResultMemoryFile'
pathToIhisResult = 'pathToIhisResultFile'
pathToIhisResultMemory = 'pathToIhisResultMemoryFile'

In [None]:
def is_rightmost(subplot_code):
    # Extract the number of columns from the subplot code
    num_cols = (subplot_code // 10) % 10

    # Extract the subplot index
    index = subplot_code % 100

    return index % num_cols == 0

def is_leftmost(subplot_code):
    # Extract the number of columns from the subplot code
    num_cols = (subplot_code // 10) % 10

    # Extract the subplot index
    index = subplot_code % 100

    return index % num_cols == 1

In [None]:
color_map = ['tab:blue', 'tab:green', 'tab:pink', 'tab:cyan']

def is_leftmost(ax, axes):
    """Check if ax is on the far left of the grid of subplots."""
    # If the axes is 1D (either only one row or one column)
    if len(axes.shape) == 1:
        return ax == axes[0]
    # If the axes is 2D (multiple rows and columns)
    return np.any(axes[:, 0] == ax)

def is_rightmost(ax, axes):
    """Check if ax is on the far right of the grid of subplots."""
    # If the axes is 1D (either only one row or one column)
    if len(axes.shape) == 1:
        return ax == axes[-1]
    # If the axes is 2D (multiple rows and columns)
    return np.any(axes[:, -1] == ax)


def plot_results(n, m, fig_size_x, fig_size_y, configs, filename, local):
    """Uses existing objects df and df_mem to plot n*m results in a figure. The results come from df and df_mem and configs to be plotted must be named in the configs array. If local is true, the local setting experiments are plotted, otherwise the global setting experiments are plotted. Results are saved to images/<filename>. Additionally creates csv files with some statistics about the experiment results. """
    plt.rcParams.update({    'font.size': 15,
                             'axes.titlesize': 15  # Ensure the title size matches other text
                             })
    fig, axes = plt.subplots(n, m, figsize=(fig_size_x, fig_size_y))
    max_memory = 0
    time_max = 0;
    for config in configs:
        memory = df_mem.loc[df_mem.Config == config]
        memory = memory.loc[df_mem.Local == local]["Memory"]/1000000.0
        time = df.loc[df.Config == config]
        time = time.loc[df.Local == local]["Time"]/1000.0
        timePostproces = df.loc[df.Config == config]
        timePostproces = timePostproces.loc[df.Local == local]["TimePostprocess"]/1000.0
        timeQuality = df.loc[df.Config == config]
        timeQuality = timeQuality.loc[df.Local == local]["TimeQuality"]/1000.0
        time_curr = time.max() + timePostproces.max() + timeQuality.max()
        if time_curr > time_max:
            time_max = time_curr
        if memory.max() > max_memory:
            max_memory = memory.max()
    i = 0
    for ax1 in axes.ravel():
        config = configs[i]
        threads = df.loc[df.Config == config]
        threads = threads.loc[df.Local == local]["Threads"]
        memory = df_mem.loc[df_mem.Config == config]
        memory = memory.loc[df_mem.Local == local]["Memory"]/1000000.0
        timePrepare = df.loc[df.Config == config]
        timePrepare = timePrepare.loc[df.Local == local]["TimePrepare"]/1000.0
        timeAnonymize = df.loc[df.Config == config]
        timeAnonymize = timeAnonymize.loc[df.Local == local]["TimeAnonymize"]/1000.0
        timeGlobalTransform = df.loc[df.Config == config]
        timeGlobalTransform = timeGlobalTransform.loc[df.Local == local]["TimeGlobalTransform"]/1000.0
        timePartitionByClass = df.loc[df.Config == config]
        timePartitionByClass = timePartitionByClass.loc[df.Local == local]["TimePartitionByClass"]/1000.0
        timeSuppress = df.loc[df.Config == config]
        timeSuppress = timeSuppress.loc[df.Local == local]["TimeSuppress"]/1000.0
        timePostproces = df.loc[df.Config == config]
        timePostproces = timePostproces.loc[df.Local == local]["TimePostprocess"]/1000.0
        timeQuality = df.loc[df.Config == config]
        timeQuality = timeQuality.loc[df.Local == local]["TimeQuality"]/1000.0
        granularity = df.loc[df.Config == config]
        granularity = granularity.loc[df.Local == local]["Granularity"]
        timeAll = timePrepare + timeAnonymize + timeGlobalTransform + timePartitionByClass + timeSuppress + timeQuality + timePostproces
        print(timeAll)
        optimal_time = timeAll.iloc[0]
        time_saved_percent = ((optimal_time - timeAll) / optimal_time) * 100
        speedupFactor = optimal_time / timeAll  # Calculate speedup factor based on time
        optimal_utility = granularity.iloc[0]
        percent_difference_utility = ((optimal_utility - granularity) / optimal_utility) * 100
        optimal_memory = memory.iloc[0]
        percent_difference_memory = (memory / optimal_memory) * 100
        df_combined = pd.DataFrame({
            'Threads': threads,
            'Time': timeAll,
            'Percent Time Saved': time_saved_percent,
            'Speedup factor': speedupFactor,
            'Granularity': granularity,
            'Percent Difference utility:': percent_difference_utility,
            'Memory': memory,
            'Percent Difference Memory': percent_difference_memory
        })
        rows_to_save = df_combined.iloc[[0, 1, 11, 63]]

        rows_to_save.to_csv(filename[:-4]+config+".csv", index=False, sep=';')
        print(df_combined)
        # Find the index of the max percent difference
        max_diff_index = percent_difference_utility.idxmax()
        # Extract corresponding values
        max_percent_diff_value = percent_difference_utility[max_diff_index]
        corresponding_granularity = granularity[max_diff_index]
        corresponding_time = timeAll[max_diff_index]
        corresponding_time_saved_percent = time_saved_percent[max_diff_index]
        corresponding_threads = threads[max_diff_index]
        print(f"Maximum Percent Difference: {max_percent_diff_value}%")
        print(f"Corresponding Utility: {corresponding_granularity}")
        print(f"Corresponding Time: {corresponding_time}")
        print(f"Corresponding Time saved percent: {corresponding_time_saved_percent}")
        print(f"Corresponding Threads: {corresponding_threads}")
        ax1.set_facecolor('w')
        ax2 = ax1.twinx()
        ax3 = ax1.twinx()
        ax2.set_ylim(0, 100)
        ax1.set_ylim(0, time_max)
        ax1.stackplot(threads, timePrepare + timeAnonymize, timeGlobalTransform, timePartitionByClass + timeSuppress, timeQuality + timePostproces, labels=["(1) Partitioning + Anonymization", "(2) Harmonization", "(3) Compliance checking", "(4) Finalization",], colors = color_map, edgecolor = 'black', linewidths = 0)
        ax1.margins(x=0) #remove space between graph and y-axis
        ax1.set_xticks([1, 10, 20, 30, 40, 50, 60])
        ax1.set_ylabel("Time [s]")
        ax1.set_xlabel("Threads")
        ax1.grid(None)
        ax2.grid(None)
        ax3.grid(None)
        ax2.plot(threads, granularity*100.0, label="Utility [%]", color="k", marker=".", markersize=0.3)
        ax3.plot(threads, memory, label="Memory [MB]", color="coral", marker=".", markersize=0.3)
        ax2.set_ylabel("Utility [%]")
        ax3.spines['right'].set_position(('axes', 1.25))
        ax3.set_ylabel("Memory [MB]")
        print(memory.max())
        print(granularity.max())
        ax3.set_ylim(0, max_memory * 1.1)
        ax3.spines['right'].set_color('coral')
        ax3.tick_params(axis='y', colors='coral')
        ax3.yaxis.label.set_color('coral')
        if not is_rightmost(ax1, axes): # checks not rightmost
            ax2.set_ylabel(None)
            ax2.yaxis.set_ticks([])
            #ax2.spines[['right']].set_visible(False)
            ax3.set_ylabel(None)
            ax3.yaxis.set_ticks([])
            ax3.spines[['right']].set_visible(False)
        if not is_leftmost(ax1, axes): # checks not leftmost
            ax1.set_ylabel(None)
            ax1.yaxis.set_ticks([])
            ax1.spines[['right']].set_visible(False)
        handles, labels = ax1.get_legend_handles_labels()
        handles2, labels2 = ax2.get_legend_handles_labels()
        handles3, labels3 = ax3.get_legend_handles_labels()
        config_name = config
        if config_name.endswith("(global distribution)"):
            config_name = config_name[:-len("(global distribution)")].strip()
        if local:
            ax1.set_title(config_name) # + " (local)")
        else:
            ax1.set_title(config_name)
        ax3.spines['bottom'].set_color('black')
        ax3.spines['top'].set_color('black')
        ax2.spines['right'].set_color('black')
        ax3.spines['left'].set_color('black')
        i = i + 1
        fig.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5, 0.01), fancybox=False, shadow=False, ncol=2, facecolor='white', edgecolor='black')
    fig.tight_layout()
    if os.path.isfile(filename):
        os.remove(filename)   # Opt.: os.system("rm "+strFile)
    fig.savefig(filename, format='png', bbox_inches='tight', dpi=300)
    plt.show()

In [None]:
# ADULT
import pandas as pd
df = pd.read_csv(pathToAdultResult, sep=";")
df_mem = pd.read_csv(pathToAdultResultMemory, sep=";")

In [None]:
fig_x = 12
fig_y = 4
plot_results(1, 3, fig_x, fig_y, ['5-anonymity','0.2-equal-closeness (global distribution)', '0.05-average-risk'], "images/adult_selection_global.png", False)

In [None]:
fig_x = 12
fig_y = 4
plot_results(1, 3, fig_x, fig_y, ['5-anonymity','0.2-equal-closeness (global distribution)', '0.05-average-risk'], "images/adult_selection_local.png", True)

In [None]:
configs_all = ['5-anonymity', '11-anonymity',
               '0.2-equal-closeness (global distribution)',
               '0.5-equal-closeness (global distribution)', '1-disclosure-privacy', '2-disclosure-privacy',
               'distinct-3-diversity', 'distinct-5-diversity', 'entropy-3-diversity', '0.05-average-risk',
               '1-enhanced-likeness (global distribution)',
               '2-enhanced-likeness (global distribution)', '01-sample-uniqueness', 'profitability', '5-map-estimate', '5-map-subset']
fig_x = 16
fig_y = 16
plot_results(4, 4, fig_x, fig_y, configs_all, "images/all_adult_global.png", False)

In [None]:
fig_x = 16
fig_y = 16
plot_results(4, 4, fig_x, fig_y, configs_all, "images/all_adult_local.png", True)

In [None]:
#IHIS
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv(pathToIhisResult, sep=";")
df_mem = pd.read_csv(pathToIhisResultMemory, sep=";")

In [None]:
fig_x = 12
fig_y = 4
plot_results(1,3, fig_x, fig_y, ['5-anonymity','0.2-equal-closeness (global distribution)', '0.05-average-risk'], "images/ihis_selection_global.png", False)

In [None]:
fig_x = 12
fig_y = 4
plot_results(1,3, fig_x, fig_y, ['5-anonymity','0.2-equal-closeness (global distribution)', '0.05-average-risk'], "images/ihis_selection_local.png", True)

In [None]:
configs_all = ['5-anonymity', '11-anonymity',
               '0.2-equal-closeness (global distribution)',
               '0.5-equal-closeness (global distribution)', '1-disclosure-privacy', '2-disclosure-privacy',
               'distinct-3-diversity', 'distinct-5-diversity', 'entropy-3-diversity', '0.05-average-risk',
               '1-enhanced-likeness (global distribution)',
               '2-enhanced-likeness (global distribution)', '01-sample-uniqueness', 'profitability', '5-map-estimate', '5-map-subset']
fig_x = 16
fig_y = 16
plot_results(4, 4, fig_x, fig_y, configs_all, "images/all_ihis_local.png", True)

In [None]:
fig_x = 16
fig_y = 16
plot_results(4, 4, fig_x, fig_y, configs_all, "images/all_ihis_local.png", True)