In [41]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.metrics import mean_absolute_error, mean_squared_error,root_mean_squared_error

In [42]:
# Load data and calculate summaries
def mean_relative_error(y_true, y_pred):
      return np.mean(np.abs((y_true - y_pred) / (y_true + 1e+8))) 


def evaluate_clipped_sum_estimation_result(df,epsilon, beta):
    row = {
        'epsilon':epsilon,
        'beta':beta,
        'tau': df['tau'].mean(),
        'n': df['n'].mean(),
        'min_val': df['min_val'].mean(),
        'max_val': df['max_val'].mean(),
        'quantile': df['quantile'].mean(),
        'clip_thresh_mean': df['clip_thresh'].mean(),
        'b_mean': df['b'].mean(),
        'noise_mean': df['noise'].mean(),
        'mae': mean_absolute_error(df['actual_sum'], df['estimated_sum']),
        'mse': mean_squared_error(df['actual_sum'], df['estimated_sum']),
        'rmse': root_mean_squared_error(df['actual_sum'], df['estimated_sum']),
        'mre': mean_relative_error(df['actual_sum'], df['estimated_sum']),
    }
    return row


def evaluate_sum_estimation_result(df,epsilon):
    row = {
        'epsilon':epsilon,
        'n': df['n'].mean(),
        'min_val': df['min_val'].mean(),
        'max_val': df['max_val'].mean(),
        'noise_mean': df['noise'].mean(),
        'mae': mean_absolute_error(df['actual_sum'], df['estimated_sum']),
        'mse': mean_squared_error(df['actual_sum'], df['estimated_sum']),
        'rmse': root_mean_squared_error(df['actual_sum'], df['estimated_sum']),
        'mre': mean_relative_error(df['actual_sum'], df['estimated_sum']),
    }
    return row
     

def load_data(value_name,epsilon_list,beta_list):
    save_dir = 'output'
    output_dir = 'summary'
    os.makedirs(output_dir, exist_ok=True)
    all_row_clipped = []
    all_row_lap = []
    for beta in beta_list:
        for epsilon in epsilon_list:
            name = f"{value_name}_ClippedSumEstimator_laplace_epsilon{epsilon}_beta{beta}.csv"
            file_path = os.path.join(save_dir, name)
            df_clip = pd.read_csv(file_path)
            row_clip = evaluate_clipped_sum_estimation_result(df_clip,epsilon=epsilon, beta=beta)
            all_row_clipped.append(row_clip)
    summary_clip = pd.DataFrame(all_row_clipped)

    for epsilon in epsilon_list:
            name = f"{value_name}_sum_estimator_laplace_epsilon{epsilon}.csv"
            file_path = os.path.join(save_dir, name)
            df_lap = pd.read_csv(file_path)
            row_lap = evaluate_sum_estimation_result(df_lap,epsilon=epsilon)
            all_row_lap.append(row_lap)
    summary_lap = pd.DataFrame(all_row_lap)

    output_path_clip = os.path.join(output_dir, f"{value_name}_summary_clip.csv")
    output_path_lap = os.path.join(output_dir, f"{value_name}_summary_lap.csv")

    summary_clip.to_csv(output_path_clip, index=False)
    summary_lap.to_csv(output_path_lap, index=False)
    
    return summary_clip, summary_lap


In [43]:
# epsilon and beta
epsilon_list = [0.1,0.25,0.5,1,2]
beta_list = [0.001,0.01,0.1]

value_list = ['Salary',"Billing Amount","trade_usd"]

In [44]:
# summarize salary results
summary_clipped_salary, summary_lap_salary = load_data('Salary',epsilon_list,beta_list)

In [45]:
# summarize health results
summary_clipped_health, summary_lap_health = load_data('Billing Amount',epsilon_list,beta_list)

In [46]:
# summarize trade results
summary_clipped_trade, summary_lap_trade = load_data('trade_usd',epsilon_list,beta_list)

In [47]:
def plot_metric(summary_clipped_dict,summary_lap_dict,epsilon_list,metric='mae',beta = 0.01,save_fig = False):
    values = summary_clipped_dict.keys()
    for val in values:
        df_clip = summary_clipped_dict[val]
        df_clip_beta = df_clip[df_clip['beta'] == beta]
        df_lap = summary_lap_dict[val]
        plt.figure(figsize=(6, 4))
        
        plt.yscale('log')
        # Plot Laplace
        plt.plot(epsilon_list, df_lap[metric], 'o-r', label='Laplace Mechanism')

        # Plot Clipped
        plt.plot(epsilon_list, df_clip_beta[metric], 's-b', label='Clipped Sum Estimator')
        
        plt.xlabel('Privacy Parameter ε',fontsize = 11)
        plt.ylabel(f'{metric.upper()} (Logarithm Scale)',fontsize = 11)
        plt.title(f"{val} - {metric.upper()}",fontsize = 12)
        plt.legend()

        if save_fig:
            save_dir = 'img'
            os.makedirs(save_dir, exist_ok=True)
            filename = os.path.join(save_dir, f"{val}_{metric}_beta{beta}.png")
            plt.savefig(filename, dpi=300, bbox_inches='tight')
            print(f"Saved plot: {filename}")
            plt.close()
        else:
            plt.show()

In [48]:
summary_lap_dict = {
    'Salary': summary_lap_salary,
    'Billing Amount': summary_lap_health,
    'Trade': summary_lap_trade
}

summary_clipped_dict = {
    'Salary': summary_clipped_salary,
    'Billing Amount': summary_clipped_health,
    'Trade': summary_clipped_trade
}

In [49]:
metrics = ['mae','mse','rmse','mre']

In [50]:
for evl in metrics:
    plot_metric(summary_clipped_dict,summary_lap_dict, metric=evl, beta=beta_list[0],epsilon_list=epsilon_list,save_fig=True)

Saved plot: img\Salary_mae_beta0.001.png
Saved plot: img\Billing Amount_mae_beta0.001.png
Saved plot: img\Trade_mae_beta0.001.png
Saved plot: img\Salary_mse_beta0.001.png
Saved plot: img\Billing Amount_mse_beta0.001.png
Saved plot: img\Trade_mse_beta0.001.png
Saved plot: img\Salary_rmse_beta0.001.png
Saved plot: img\Billing Amount_rmse_beta0.001.png
Saved plot: img\Trade_rmse_beta0.001.png
Saved plot: img\Salary_mre_beta0.001.png
Saved plot: img\Billing Amount_mre_beta0.001.png
Saved plot: img\Trade_mre_beta0.001.png


In [54]:
def plot_paras(summary_clipped_dict, epsilon_list,para = 'tau',beta = beta_list,save_fig = False):
    save_dir = 'img'
    os.makedirs(save_dir, exist_ok=True)
    values = summary_clipped_dict.keys()
    color_map = {
        0.001: 'blue',
        0.01: 'red',
        0.1: 'green',
        0.5: 'orange'
    }
    linestyles = ['-', '--', ':', '-.']
    markers = ['o', 's', '^', 'D']

    for val in values:
        df_clip = summary_clipped_dict[val]
        
        plt.figure(figsize=(8, 6))
        for i, beta in enumerate(beta_list):
            df_clip_beta = df_clip[df_clip['beta'] == beta]
            color = color_map.get(beta, None)
            # plt.yscale('log')
            plt.plot(epsilon_list, df_clip_beta[para], 
                     label=f'β={beta}',color=color,
                     linestyle=linestyles[i % len(linestyles)],
                     marker=markers[i % len(markers)])
        
        plt.xlabel('Privacy Parameter ε',fontsize = 16)
        
        if para == 'clip_thresh_mean':
            plt.ylabel("Clipping Threshold (Logarithm Scale)",fontsize = 16)
            plt.yscale('log')
            plt.title(f"{val} - Clipping Threshold",fontsize = 15)
            filename = os.path.join(save_dir, f"{val}_clipThresh.png")
        elif para == 'tau':
            plt.ylabel("τ",fontsize = 16)
            plt.title(f"{val} - Rank Error",fontsize = 15) 
            filename = os.path.join(save_dir, f"{val}_{para}.png")
        else:
            plt.ylabel(f'{para}')
            plt.title(f"{val} - {para}")
            filename = os.path.join(save_dir, f"{val}_{para}.png")
        plt.legend()
        plt.grid(True, linestyle='--', alpha=0.3)
        plt.tight_layout()
        if save_fig:
            plt.savefig(filename, dpi=300, bbox_inches='tight')
            print(f"Saved plot: {filename}")
            plt.close()
        else:
            plt.show()
    

In [55]:
paras = ['tau','clip_thresh_mean']

In [56]:
for para in paras:
    plot_paras(summary_clipped_dict, epsilon_list,para = para,beta = beta_list,save_fig=True)

Saved plot: img\Salary_tau.png
Saved plot: img\Billing Amount_tau.png
Saved plot: img\Trade_tau.png
Saved plot: img\Salary_clipThresh.png
Saved plot: img\Billing Amount_clipThresh.png
Saved plot: img\Trade_clipThresh.png
