In [None]:
# --- Imports ---
# Import standard libraries and the plotting utilities we created
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from src.plotting_utils import load_results, process_history, plot_accuracy_comparison, generate_summary_table

# --- Configuration ---
DP_RESULTS_PATH = "../results/gradient_perturbation_results.pkl"
DS_RESULTS_PATH = "../results/dataset_perturbation_results.csv"
TARGET_EPOCH = 20

In [None]:
# --- Gradient Perturbation Visualization ---

# 1. Load the results
dp_results = load_results(DP_RESULTS_PATH)
non_dp_df_for_baseline = load_results(DS_RESULTS_PATH)

# 2. Extract baseline history for the plot
baseline_row = non_dp_df_for_baseline[np.isinf(non_dp_df_for_baseline['input_data_noise_epsilon'])].iloc[0]
baseline_history = process_history(baseline_row['keras_training_history'])

# 3. Prepare DP data for plotting and table generation
dp_plot_data = []
table_data_dp = []

# Define which models to show in the plot
MODELS_TO_PLOT_DP = [
    {'epsilon_label': 7.27, 'noise_multiplier': 0.9, 'l2_norm_clip': 0.5},
    {'epsilon_label': 13.56, 'noise_multiplier': 0.7, 'l2_norm_clip': 0.5},
    {'epsilon_label': 39.28, 'noise_multiplier': 0.5, 'l2_norm_clip': 0.5},
    {'epsilon_label': 250.48, 'noise_multiplier': 0.3, 'l2_norm_clip': 0.5}
]

for result in sorted(dp_results, key=lambda x: x['epsilon']):
    # Prepare data for the table
    history = result['history']
    acc_epoch_20 = history['val_accuracy'][TARGET_EPOCH - 1] if history and len(history.get('val_accuracy', [])) >= TARGET_EPOCH else np.nan
    table_data_dp.append({
        'Epsilon': result['epsilon'],
        'Accuracy (Epoch 20)': acc_epoch_20,
        'Final F1-Score': result['test_f1_macro'],
        'Final AUC': result['test_auc_macro']
    })
    
    # Prepare data for the plot
    for spec in MODELS_TO_PLOT_DP:
        if result['noise_multiplier'] == spec['noise_multiplier'] and result['l2_norm_clip'] == spec['l2_norm_clip']:
            dp_plot_data.append({'epsilon_label': spec['epsilon_label'], 'history': result['history']})
            break

# 4. Generate and display visuals
print("--- Gradient Perturbation Results ---")
generate_summary_table("Gradient Perturbation Summary", "dummy_path.txt", table_data_dp) # We can display the table here without saving
plot_accuracy_comparison(
    title='Validation Accuracy (Gradient Perturbation)',
    output_path='../results/gradient_perturbation_accuracy.png', # It will also save the file
    baseline_history=baseline_history,
    dp_results=dp_plot_data,
    style_config={'baseline_color': 'black', 'dp_colors': ['#00008B', '#4169E1', '#20B2AA', '#87CEFA'], 'xlim': (0,20), 'ylim': (0.55, 0.71)} # Simplified style
)
plt.show() # Display plot directly in the notebook

In [None]:
# --- Dataset Perturbation Visualization ---

# 1. Load the results
ds_df = load_results(DS_RESULTS_PATH)
ds_df['history'] = ds_df['keras_training_history'].apply(process_history)

# 2. Prepare data
table_data_ds = []
for _, row in ds_df.sort_values(by='input_data_noise_epsilon').iterrows():
    history = row['history']
    acc_epoch_20 = history['val_accuracy'][TARGET_EPOCH - 1] if history and len(history.get('val_accuracy', [])) >= TARGET_EPOCH else np.nan
    table_data_ds.append({
        'Epsilon': 'Non-DP' if np.isinf(row['input_data_noise_epsilon']) else row['input_data_noise_epsilon'],
        'Accuracy (Epoch 20)': acc_epoch_20,
        'Final F1-Score': row['test_f1_score'],
        'Final AUC': row['test_auc']
    })

# Plotting data
EPSILONS_TO_PLOT_DS = [1.0, 7.27, 39.28, 250.48]
baseline_row_ds = ds_df[np.isinf(ds_df['input_data_noise_epsilon'])].iloc[0]
dp_rows_ds = ds_df[ds_df['input_data_noise_epsilon'].isin(EPSILONS_TO_PLOT_DS)].sort_values(by='input_data_noise_epsilon')
dp_plot_data_ds = [{'epsilon_label': row['input_data_noise_epsilon'], 'history': row['history']} for _, row in dp_rows_ds.iterrows()]


# 3. Generate and display visuals
print("\n--- Dataset Perturbation Results ---")
generate_summary_table("Dataset Perturbation Summary", "dummy_path.txt", table_data_ds)
plot_accuracy_comparison(
    title='Validation Accuracy (Dataset Perturbation)',
    output_path='../results/dataset_perturbation_accuracy.png',
    baseline_history=baseline_row_ds['history'],
    dp_results=dp_plot_data_ds,
    style_config={'baseline_color': 'black', 'dp_colors': plt.get_cmap('YlOrRd')(np.linspace(0.3, 0.9, len(EPSILONS_TO_PLOT_DS))), 'xlim': (0,20), 'ylim': (0.55, 0.71)} # Simplified style
)
plt.show() # Display plot directly in the notebook