In [2]:
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Directory containing batch attack results
results_dir = 'batch_attack_results'

# 1. List all summary and per-run CSVs
all_csvs = glob.glob(os.path.join(results_dir, '*.csv'))
all_csvs

['batch_attack_results/an4-v2-THE_JUICE_OF_LEMONS_MAKES_FINE_PUNCH-5683-32866-0027.csv',
 'batch_attack_results/librispeech-v2-THE_HARDER_HE_TRIED_THE_LESS_HE_GOT_DONE-5683-32866-0027.csv',
 'batch_attack_results/an4-v1-A_CRAMP_IS_NO_SMALL_DANGER_ON_A_SWIM-1188-133604-0043.csv',
 'batch_attack_results/an4-v1-THE_GRASS_CURLED_AROUND_THE_FENCE_POST-4507-16021-0015.csv',
 'batch_attack_results/librispeech-v2-THE_PENNANT_WAVED_WHEN_THE_WIND_BLEW-260-123440-0008.csv',
 'batch_attack_results/an4-v2-A_CRAMP_IS_NO_SMALL_DANGER_ON_A_SWIM-1089-134691-0005.csv',
 'batch_attack_results/ted-v2-A_CRAMP_IS_NO_SMALL_DANGER_ON_A_SWIM-260-123440-0008.csv',
 'batch_attack_results/ted-v1-THE_HEART_BEAT_STRONGLY_AND_WITH_FIRM_STROKES-1188-133604-0043.csv',
 'batch_attack_results/ted-v2-A_POUND_OF_SUGAR_COSTS_MORE_THAN_EGGS-5683-32866-0027.csv',
 'batch_attack_results/ted-v1-THE_HEART_BEAT_STRONGLY_AND_WITH_FIRM_STROKES-1089-134691-0005.csv',
 'batch_attack_results/an4-v2-THE_PENNANT_WAVED_WHEN_THE_WIND_BLE

In [3]:
# combine all of the per_run_csvs into a single dataframe
dfs = [pd.read_csv(f) for f in all_csvs]
df_all = pd.concat(dfs, ignore_index=True)
print(df_all.head())
print(f"Combined DataFrame shape: {df_all.shape}")

                                        adv_wav_path  \
0  batch_attack_results/an4-v2-THE_JUICE_OF_LEMON...   
1  batch_attack_results/librispeech-v2-THE_HARDER...   
2  batch_attack_results/an4-v1-A_CRAMP_IS_NO_SMAL...   
3  batch_attack_results/an4-v1-THE_GRASS_CURLED_A...   
4  batch_attack_results/librispeech-v2-THE_PENNAN...   

                              input_wav  \
0   processed_sound/5683-32866-0027.wav   
1   processed_sound/5683-32866-0027.wav   
2  processed_sound/1188-133604-0043.wav   
3   processed_sound/4507-16021-0015.wav   
4   processed_sound/260-123440-0008.wav   

                            target_sentence target_model target_version  \
0      THE JUICE OF LEMONS MAKES FINE PUNCH          an4             v2   
1  THE HARDER HE TRIED THE LESS HE GOT DONE  librispeech             v2   
2      A CRAMP IS NO SMALL DANGER ON A SWIM          an4             v1   
3    THE GRASS CURLED AROUND THE FENCE POST          an4             v1   
4      THE PENNANT WAVED WHEN

In [6]:
# plot a histogram of the final Levenshtein distance for each attack
print(df_all['target_lev_dists'])

0     [34, 33, 34, 34, 33, 32, 34, 33]
1     [48, 52, 53, 50, 48, 55, 53, 52]
2     [30, 31, 31, 31, 31, 32, 31, 31]
3     [28, 29, 28, 28, 29, 30, 29, 29]
4     [35, 34, 37, 35, 34, 38, 38, 35]
5     [40, 38, 36, 35, 35, 35, 36, 37]
6     [35, 37, 35, 37, 38, 34, 37, 36]
7     [55, 56, 58, 57, 56, 56, 56, 56]
8     [45, 47, 42, 44, 44, 45, 46, 45]
9     [47, 47, 46, 48, 47, 48, 46, 47]
10    [31, 31, 34, 34, 34, 39, 38, 35]
11    [33, 31, 31, 32, 31, 32, 34, 31]
12    [28, 28, 27, 29, 29, 30, 30, 30]
13    [50, 51, 48, 48, 49, 50, 49, 50]
14    [38, 35, 36, 34, 40, 38, 37, 39]
15    [61, 60, 60, 61, 58, 59, 58, 59]
16    [54, 56, 58, 56, 55, 55, 53, 55]
17    [54, 54, 55, 54, 54, 54, 52, 56]
Name: target_lev_dists, dtype: object


In [None]:
import ast
import numpy as np
import matplotlib.pyplot as plt

# --- Target Loss ---
loss_lists = [ast.literal_eval(x) for x in df_all["target_loss_hist"].dropna()]
max_len = max(len(lst) for lst in loss_lists)
loss_array = np.full((len(loss_lists), max_len), np.nan)
for i, lst in enumerate(loss_lists):
    loss_array[i, :len(lst)] = lst
mean_target_loss = np.nanmean(loss_array, axis=0)

# --- Ensemble Loss ---
ensemble_loss_lists = [ast.literal_eval(x) for x in df_all["ensemble_loss_hists"].dropna()]
# Flatten to shape: (runs, ensemble_models, steps)
max_ens = max(len(run) for run in ensemble_loss_lists)
max_steps = max(len(model) for run in ensemble_loss_lists for model in run)
ens_loss_array = np.full((len(ensemble_loss_lists), max_ens, max_steps), np.nan)
for i, run in enumerate(ensemble_loss_lists):
    for j, model in enumerate(run):
        ens_loss_array[i, j, :len(model)] = model
# Mean over runs and ensemble models
mean_ensemble_loss = np.nanmean(ens_loss_array, axis=(0,1))

# --- Plot ---
plt.figure(figsize=(8,5))
plt.plot(mean_target_loss, marker='o', label='Mean Target Loss')
plt.plot(mean_ensemble_loss, marker='s', label='Mean Ensemble Loss')
plt.xlabel('PGD Iteration')
plt.ylabel('Mean Loss')
plt.title('Mean Loss at Each PGD Iteration (Target vs Ensemble)')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# --- Target Lev Dist ---
target_lev_dists = [ast.literal_eval(x) for x in df_all["target_lev_dists"].dropna()]
max_len = max(len(lst) for lst in target_lev_dists)
target_lev_array = np.full((len(target_lev_dists), max_len), np.nan)
for i, lst in enumerate(target_lev_dists):
    target_lev_array[i, :len(lst)] = lst
mean_target_lev = np.nanmean(target_lev_array, axis=0)

# --- Ensemble Lev Dist ---
ensemble_lev_dists = [ast.literal_eval(x) for x in df_all["ensemble_lev_dists"].dropna()]
# Flatten to shape: (runs, ensemble_models, steps)
max_ens = max(len(run) for run in ensemble_lev_dists)
max_steps = max(len(model) for run in ensemble_lev_dists for model in run)
ens_lev_array = np.full((len(ensemble_lev_dists), max_ens, max_steps), np.nan)
for i, run in enumerate(ensemble_lev_dists):
    for j, model in enumerate(run):
        ens_lev_array[i, j, :len(model)] = model
# Mean over runs and ensemble models
mean_ensemble_lev = np.nanmean(ens_lev_array, axis=(0,1))

# --- Plot ---
plt.figure(figsize=(8,5))
plt.plot(mean_target_lev, marker='o', label='Mean Target Lev Dist')
plt.plot(mean_ensemble_lev, marker='s', label='Mean Ensemble Lev Dist')
plt.xlabel('PGD Iteration')
plt.ylabel('Mean Lev Dist')
plt.title('Mean Lev Dist at Each PGD Iteration (Target vs Ensemble)')
plt.legend()
plt.grid(True)
plt.show()