In [None]:
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Directory containing batch attack results
results_dir = 'batch_attack_results'

# 1. List all summary and per-run CSVs
all_csvs = glob.glob(os.path.join(results_dir, '*.csv'))
summary_csvs = [f for f in all_csvs if 'results_tgt-' not in os.path.basename(f)]
per_run_csvs = [f for f in all_csvs if 'results_tgt-' in os.path.basename(f)]

In [None]:
# combine all of the per_run_csvs into a single dataframe
dfs = [pd.read_csv(f) for f in per_run_csvs]
df_all = pd.concat(dfs, ignore_index=True)
print(df_all.head())
print(f"Combined DataFrame shape: {df_all.shape}")

In [None]:
# plot a histogram of the final Levenshtein distance for each attack
print(df_all[0]['target_lev_dist'])

In [None]:
import ast
import numpy as np
import matplotlib.pyplot as plt

# --- Target Loss ---
loss_lists = [ast.literal_eval(x) for x in df_all["target_loss_hist"].dropna()]
max_len = max(len(lst) for lst in loss_lists)
loss_array = np.full((len(loss_lists), max_len), np.nan)
for i, lst in enumerate(loss_lists):
    loss_array[i, :len(lst)] = lst
mean_target_loss = np.nanmean(loss_array, axis=0)

# --- Ensemble Loss ---
ensemble_loss_lists = [ast.literal_eval(x) for x in df_all["ensemble_loss_hists"].dropna()]
# Flatten to shape: (runs, ensemble_models, steps)
max_ens = max(len(run) for run in ensemble_loss_lists)
max_steps = max(len(model) for run in ensemble_loss_lists for model in run)
ens_loss_array = np.full((len(ensemble_loss_lists), max_ens, max_steps), np.nan)
for i, run in enumerate(ensemble_loss_lists):
    for j, model in enumerate(run):
        ens_loss_array[i, j, :len(model)] = model
# Mean over runs and ensemble models
mean_ensemble_loss = np.nanmean(ens_loss_array, axis=(0,1))

# --- Plot ---
plt.figure(figsize=(8,5))
plt.plot(mean_target_loss, marker='o', label='Mean Target Loss')
plt.plot(mean_ensemble_loss, marker='s', label='Mean Ensemble Loss')
plt.xlabel('PGD Iteration')
plt.ylabel('Mean Loss')
plt.title('Mean Loss at Each PGD Iteration (Target vs Ensemble)')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# --- Target Lev Dist ---
target_lev_dists = [ast.literal_eval(x) for x in df_all["target_lev_dists"].dropna()]
max_len = max(len(lst) for lst in target_lev_dists)
target_lev_array = np.full((len(target_lev_dists), max_len), np.nan)
for i, lst in enumerate(target_lev_dists):
    target_lev_array[i, :len(lst)] = lst
mean_target_lev = np.nanmean(target_lev_array, axis=0)

# --- Ensemble Lev Dist ---
ensemble_lev_dists = [ast.literal_eval(x) for x in df_all["ensemble_lev_dists"].dropna()]
# Flatten to shape: (runs, ensemble_models, steps)
max_ens = max(len(run) for run in ensemble_lev_dists)
max_steps = max(len(model) for run in ensemble_lev_dists for model in run)
ens_lev_array = np.full((len(ensemble_lev_dists), max_ens, max_steps), np.nan)
for i, run in enumerate(ensemble_lev_dists):
    for j, model in enumerate(run):
        ens_lev_array[i, j, :len(model)] = model
# Mean over runs and ensemble models
mean_ensemble_lev = np.nanmean(ens_lev_array, axis=(0,1))

# --- Plot ---
plt.figure(figsize=(8,5))
plt.plot(mean_target_lev, marker='o', label='Mean Target Lev Dist')
plt.plot(mean_ensemble_lev, marker='s', label='Mean Ensemble Lev Dist')
plt.xlabel('PGD Iteration')
plt.ylabel('Mean Lev Dist')
plt.title('Mean Lev Dist at Each PGD Iteration (Target vs Ensemble)')
plt.legend()
plt.grid(True)
plt.show()