# Topology-Based Adversarial Detection: Results Analysis

This notebook collects and analyzes all experimental results across datasets, epsilon values, and trials.

In [None]:
import json
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from glob import glob

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

BASE_DIR = Path('.')
OUT_DIR = BASE_DIR / 'out'

## 1. Collect All Results Across Trials

In [None]:
def collect_all_results():
    """Collect results from all trials across all datasets and configurations."""
    
    results = []
    
    # Dataset configurations
    datasets = {
        'mnist': {'name': 'MNIST', 'model': 'CNN', 'type': 'Image', 'configs': ['base_e_0.1', 'base_e_0.2', 'base_e_0.3']},
        'synthetic_shapes': {'name': 'Synthetic Shapes', 'model': 'CNN', 'type': 'Image', 'configs': ['base_e_0.1', 'base_e_0.2', 'base_e_0.3']},
        'tabular': {'name': 'Tabular', 'model': 'MLP', 'type': 'Tabular', 'configs': ['base_e_0.1', 'base_e_0.2', 'base_e_0.3']},
        'torus_one_hole': {'name': 'Torus (1 hole)', 'model': 'MLP', 'type': 'Synthetic', 'configs': ['base_torus_one_hole_e_0.1', 'base_torus_one_hole_e_0.2', 'base_torus_one_hole_e_0.3_copy']},
        'torus_two_holes': {'name': 'Torus (2 holes)', 'model': 'MLP', 'type': 'Synthetic', 'configs': ['base_torus_one_hole_e_0.1', 'base_torus_one_hole_e_0.2', 'base_torus_one_hole_e_0.3_copy']},
        'nested_spheres': {'name': 'Nested Spheres', 'model': 'MLP', 'type': 'Synthetic', 'configs': ['base_torus_one_hole_e_0.1', 'base_torus_one_hole_e_0.2', 'base_torus_one_hole_e_0.3_copy']},
        'blobs': {'name': 'Blobs', 'model': 'MLP', 'type': 'Synthetic', 'configs': ['base_torus_one_hole_e_0.1', 'base_torus_one_hole_e_0.2', 'base_torus_one_hole_e_0.3_copy']},
    }
    
    for ds_key, ds_info in datasets.items():
        ds_path = OUT_DIR / ds_key
        if not ds_path.exists():
            continue
            
        for config in ds_info['configs']:
            # Extract epsilon from config name
            if '_e_' in config:
                eps = config.split('_e_')[-1].replace('_copy', '')
            else:
                eps = 'N/A'
            
            # Topology method
            config_path = ds_path / config / 'runs' / 'trials'
            if config_path.exists():
                for trial_dir in sorted(config_path.glob('trial_*')):
                    metrics_file = trial_dir / 'metrics' / 'metrics.json'
                    if metrics_file.exists():
                        with open(metrics_file) as f:
                            metrics = json.load(f)
                        
                        adv_metrics = metrics.get('metrics_adv', {})
                        if adv_metrics:
                            results.append({
                                'dataset': ds_info['name'],
                                'dataset_key': ds_key,
                                'model': ds_info['model'],
                                'data_type': ds_info['type'],
                                'method': 'topology',
                                'epsilon': float(eps) if eps != 'N/A' else None,
                                'trial_id': trial_dir.name,
                                'roc_auc': adv_metrics.get('roc_auc'),
                                'pr_auc': adv_metrics.get('pr_auc'),
                                'fpr_at_tpr95': adv_metrics.get('fpr_at_tpr95'),
                                'accuracy': adv_metrics.get('accuracy'),
                                'f1': adv_metrics.get('f1'),
                                'precision': adv_metrics.get('precision'),
                                'recall': adv_metrics.get('recall'),
                                'config': config,
                                'source_file': str(metrics_file)
                            })
            
            # Baseline method
            baseline_path = ds_path / 'baseline' / config / 'runs' / 'trials'
            if baseline_path.exists():
                for trial_dir in sorted(baseline_path.glob('trial_*')):
                    metrics_file = trial_dir / 'metrics' / 'metrics.json'
                    if metrics_file.exists():
                        with open(metrics_file) as f:
                            metrics = json.load(f)
                        
                        adv_metrics = metrics.get('metrics_adv', {})
                        if adv_metrics:
                            results.append({
                                'dataset': ds_info['name'],
                                'dataset_key': ds_key,
                                'model': ds_info['model'],
                                'data_type': ds_info['type'],
                                'method': 'baseline',
                                'epsilon': float(eps) if eps != 'N/A' else None,
                                'trial_id': trial_dir.name,
                                'roc_auc': adv_metrics.get('roc_auc'),
                                'pr_auc': adv_metrics.get('pr_auc'),
                                'fpr_at_tpr95': adv_metrics.get('fpr_at_tpr95'),
                                'accuracy': adv_metrics.get('accuracy'),
                                'f1': adv_metrics.get('f1'),
                                'precision': adv_metrics.get('precision'),
                                'recall': adv_metrics.get('recall'),
                                'config': f'baseline/{config}',
                                'source_file': str(metrics_file)
                            })
    
    return pd.DataFrame(results)

# Collect all results
df = collect_all_results()
print(f"Total results collected: {len(df)}")
print(f"\nDatasets: {df['dataset'].unique()}")
print(f"Methods: {df['method'].unique()}")
print(f"Epsilons: {sorted(df['epsilon'].dropna().unique())}")

In [None]:
# Display summary
df.head(10)

## 2. Summary Statistics Per Dataset/Epsilon/Method

In [None]:
def compute_summary_stats(df):
    """Compute mean, std, min, max for each dataset/epsilon/method combination."""
    
    summary = df.groupby(['dataset', 'epsilon', 'method']).agg({
        'roc_auc': ['mean', 'std', 'min', 'max', 'count'],
        'pr_auc': ['mean', 'std'],
        'fpr_at_tpr95': ['mean', 'std'],
    }).round(4)
    
    # Flatten column names
    summary.columns = ['_'.join(col).strip() for col in summary.columns.values]
    summary = summary.reset_index()
    
    return summary

summary_df = compute_summary_stats(df)
print("Summary Statistics (all trials aggregated):")
summary_df

In [None]:
# Best results per dataset/epsilon/method
best_results = df.loc[df.groupby(['dataset', 'epsilon', 'method'])['roc_auc'].idxmax()]
best_results = best_results[['dataset', 'epsilon', 'method', 'roc_auc', 'pr_auc', 'fpr_at_tpr95', 'trial_id', 'source_file']]
print("\nBest Results Per Configuration:")
best_results.sort_values(['dataset', 'epsilon', 'method'])

## 3. Topology Method: Best Results Per Dataset

In [None]:
# Filter topology method only
topo_df = df[df['method'] == 'topology'].copy()

# Best topology result per dataset/epsilon
topo_best = topo_df.loc[topo_df.groupby(['dataset', 'epsilon'])['roc_auc'].idxmax()]
topo_best = topo_best[['dataset', 'epsilon', 'roc_auc', 'pr_auc', 'fpr_at_tpr95', 'trial_id']]

# Pivot for table format
topo_pivot = topo_best.pivot(index='dataset', columns='epsilon', values='roc_auc').round(3)
topo_pivot['Best'] = topo_pivot.max(axis=1)
topo_pivot['Best_eps'] = topo_pivot.drop('Best', axis=1).idxmax(axis=1)

print("Topology Method - Best ROC-AUC Per Dataset/Epsilon:")
topo_pivot

In [None]:
# Detailed metrics at best epsilon
topo_best_overall = topo_df.loc[topo_df.groupby('dataset')['roc_auc'].idxmax()]
topo_best_overall = topo_best_overall[['dataset', 'epsilon', 'roc_auc', 'pr_auc', 'fpr_at_tpr95', 'trial_id', 'source_file']]
print("\nTopology Method - Best Overall Per Dataset:")
topo_best_overall.sort_values('dataset')

## 4. Baseline Method: Best Results Per Dataset

In [None]:
# Filter baseline method only
baseline_df = df[df['method'] == 'baseline'].copy()

if len(baseline_df) > 0:
    # Best baseline result per dataset/epsilon
    baseline_best = baseline_df.loc[baseline_df.groupby(['dataset', 'epsilon'])['roc_auc'].idxmax()]
    baseline_best = baseline_best[['dataset', 'epsilon', 'roc_auc', 'pr_auc', 'fpr_at_tpr95', 'trial_id']]
    
    # Pivot for table format
    baseline_pivot = baseline_best.pivot(index='dataset', columns='epsilon', values='roc_auc').round(3)
    baseline_pivot['Best'] = baseline_pivot.max(axis=1)
    
    print("Baseline Method - Best ROC-AUC Per Dataset/Epsilon:")
    display(baseline_pivot)
    
    # Best overall
    baseline_best_overall = baseline_df.loc[baseline_df.groupby('dataset')['roc_auc'].idxmax()]
    baseline_best_overall = baseline_best_overall[['dataset', 'epsilon', 'roc_auc', 'pr_auc', 'fpr_at_tpr95', 'trial_id', 'source_file']]
    print("\nBaseline Method - Best Overall Per Dataset:")
    display(baseline_best_overall.sort_values('dataset'))
else:
    print("No baseline results found.")

## 5. Topology vs Baseline Comparison

In [None]:
# Compare best topology vs best baseline per dataset
comparison = []

for dataset in df['dataset'].unique():
    topo_best_roc = topo_df[topo_df['dataset'] == dataset]['roc_auc'].max()
    baseline_best_roc = baseline_df[baseline_df['dataset'] == dataset]['roc_auc'].max() if len(baseline_df[baseline_df['dataset'] == dataset]) > 0 else None
    
    comparison.append({
        'Dataset': dataset,
        'Topology Best': round(topo_best_roc, 3) if pd.notna(topo_best_roc) else None,
        'Baseline Best': round(baseline_best_roc, 3) if pd.notna(baseline_best_roc) else None,
        'Difference': round(topo_best_roc - baseline_best_roc, 3) if pd.notna(topo_best_roc) and pd.notna(baseline_best_roc) else None,
        'Winner': 'Topology' if pd.notna(topo_best_roc) and pd.notna(baseline_best_roc) and topo_best_roc > baseline_best_roc else 'Baseline'
    })

comparison_df = pd.DataFrame(comparison)
print("Topology vs Baseline - Best ROC-AUC Comparison:")
comparison_df

## 6. Visualizations

In [None]:
# ROC-AUC distribution by dataset and method
fig, axes = plt.subplots(2, 4, figsize=(16, 8))
axes = axes.flatten()

datasets = df['dataset'].unique()
for i, dataset in enumerate(datasets):
    if i >= len(axes):
        break
    ax = axes[i]
    data = df[df['dataset'] == dataset]
    
    sns.boxplot(data=data, x='epsilon', y='roc_auc', hue='method', ax=ax)
    ax.set_title(dataset)
    ax.set_xlabel('Epsilon')
    ax.set_ylabel('ROC-AUC')
    ax.legend(loc='lower right', fontsize=8)
    ax.set_ylim(0.4, 1.05)

# Hide unused axes
for j in range(i+1, len(axes)):
    axes[j].set_visible(False)

plt.suptitle('ROC-AUC Distribution by Dataset, Epsilon, and Method', fontsize=14)
plt.tight_layout()
plt.savefig('roc_auc_distribution.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Heatmap of best ROC-AUC (topology only)
fig, ax = plt.subplots(figsize=(10, 6))

# Create pivot table with best ROC-AUC per dataset/epsilon
heatmap_data = topo_best.pivot(index='dataset', columns='epsilon', values='roc_auc')

sns.heatmap(heatmap_data, annot=True, fmt='.3f', cmap='RdYlGn', 
            vmin=0.5, vmax=1.0, ax=ax, cbar_kws={'label': 'ROC-AUC'})
ax.set_title('Topology Method: Best ROC-AUC Per Dataset/Epsilon')
ax.set_xlabel('Epsilon')
ax.set_ylabel('Dataset')

plt.tight_layout()
plt.savefig('topology_heatmap.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Trial variability - show all trials per config
fig, ax = plt.subplots(figsize=(14, 6))

topo_df_sorted = topo_df.copy()
topo_df_sorted['config_label'] = topo_df_sorted['dataset'] + ' (ε=' + topo_df_sorted['epsilon'].astype(str) + ')'

sns.stripplot(data=topo_df_sorted, x='config_label', y='roc_auc', 
              alpha=0.6, jitter=True, ax=ax)
sns.pointplot(data=topo_df_sorted, x='config_label', y='roc_auc', 
              color='red', markers='_', scale=1.5, ax=ax, errorbar=None)

ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
ax.set_xlabel('Configuration')
ax.set_ylabel('ROC-AUC')
ax.set_title('Topology Method: Trial Variability (dots=individual trials, red=mean)')
ax.axhline(y=0.5, color='gray', linestyle='--', alpha=0.5, label='Random')

plt.tight_layout()
plt.savefig('trial_variability.png', dpi=150, bbox_inches='tight')
plt.show()

## 7. Feature Separability Analysis

In [None]:
def compute_feature_separability(feature_dir):
    """Compute Cohen's d for all features in a directory."""
    features = ['topo_h0_count', 'topo_h0_max_persistence', 'topo_h0_total_persistence',
                'topo_h0_l2_persistence', 'topo_h0_entropy',
                'topo_h1_count', 'topo_h1_max_persistence', 'topo_h1_total_persistence',
                'topo_h1_l2_persistence', 'topo_h1_entropy']
    
    results = {}
    feature_dir = Path(feature_dir)
    
    for feat in features:
        clean_file = feature_dir / f'test_clean__{feat}.npy'
        adv_file = feature_dir / f'test_adv__{feat}.npy'
        
        if clean_file.exists() and adv_file.exists():
            clean = np.load(clean_file)
            adv = np.load(adv_file)
            
            clean_mean, clean_std = np.mean(clean), np.std(clean)
            adv_mean, adv_std = np.mean(adv), np.std(adv)
            
            pooled_std = np.sqrt((clean_std**2 + adv_std**2) / 2)
            cohens_d = (adv_mean - clean_mean) / pooled_std if pooled_std > 0 else 0
            
            results[feat] = {
                'clean_mean': clean_mean,
                'clean_std': clean_std,
                'adv_mean': adv_mean,
                'adv_std': adv_std,
                'cohens_d': cohens_d,
                'abs_d': abs(cohens_d)
            }
    
    return results

# Collect feature separability for best trials
feature_dirs = {
    'MNIST': 'out/mnist/base_e_0.1/runs/trials/trial_000012/raw/features',
    'Synthetic Shapes': 'out/synthetic_shapes/base_e_0.2/runs/trials/trial_000006/raw/features',
    'Torus (1 hole)': 'out/torus_one_hole/base_torus_one_hole_e_0.2/runs/trials/trial_000002/raw/features',
    'Torus (2 holes)': 'out/torus_two_holes/base_torus_one_hole_e_0.1/runs/trials/trial_000010/raw/features',
    'Nested Spheres': 'out/nested_spheres/base_torus_one_hole_e_0.2/runs/trials/trial_000009/raw/features',
    'Blobs': 'out/blobs/base_torus_one_hole_e_0.2/runs/trials/trial_000004/raw/features',
}

separability_results = {}
for ds_name, feat_dir in feature_dirs.items():
    if Path(feat_dir).exists():
        separability_results[ds_name] = compute_feature_separability(feat_dir)

print(f"Computed separability for {len(separability_results)} datasets")

In [None]:
# Create separability table
sep_rows = []
for ds_name, features in separability_results.items():
    for feat_name, feat_data in features.items():
        sep_rows.append({
            'Dataset': ds_name,
            'Feature': feat_name,
            'Clean Mean': feat_data['clean_mean'],
            'Adv Mean': feat_data['adv_mean'],
            "Cohen's d": feat_data['cohens_d'],
            "|d|": feat_data['abs_d']
        })

sep_df = pd.DataFrame(sep_rows)

# Pivot to show features as rows, datasets as columns
sep_pivot = sep_df.pivot(index='Feature', columns='Dataset', values="Cohen's d").round(2)
print("Feature Separability (Cohen's d) - Positive = adversarial higher:")
sep_pivot

In [None]:
# Heatmap of feature separability
fig, ax = plt.subplots(figsize=(12, 8))

# Use absolute values for heatmap
sep_pivot_abs = sep_df.pivot(index='Feature', columns='Dataset', values="|d|").round(2)

sns.heatmap(sep_pivot_abs, annot=True, fmt='.2f', cmap='YlOrRd', 
            vmin=0, vmax=3, ax=ax, cbar_kws={'label': "|Cohen's d|"})
ax.set_title("Feature Separability: |Cohen's d| Between Clean and Adversarial Samples")

plt.tight_layout()
plt.savefig('feature_separability_heatmap.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Mean separability per dataset
mean_sep = sep_df.groupby('Dataset')['|d|'].mean().round(2)
print("Mean |Cohen's d| per Dataset:")
print(mean_sep.sort_values(ascending=False))

## 8. Hyperparameter Analysis (topo_k)

In [None]:
def load_history_params(history_file):
    """Load trial parameters from history.json."""
    with open(history_file) as f:
        data = json.load(f)
    
    trials = []
    for t in data.get('trials', []):
        params = t.get('params', {})
        trials.append({
            'trial_id': t.get('trial_id'),
            'roc_auc': t.get('metric_value'),
            'topo_k': params.get('graph.topo_k'),
            'k': params.get('graph.k'),
            'topo_pca_dim': params.get('graph.topo_pca_dim'),
            'topo_preprocess': params.get('graph.topo_preprocess'),
        })
    
    return pd.DataFrame(trials)

# Load MNIST history
mnist_history = load_history_params('out/mnist/base_e_0.1/history.json')
print("MNIST Trial Parameters:")
mnist_history

In [None]:
# Correlation between topo_k and ROC-AUC
if 'topo_k' in mnist_history.columns and mnist_history['topo_k'].notna().any():
    valid = mnist_history.dropna(subset=['topo_k', 'roc_auc'])
    corr = valid['topo_k'].corr(valid['roc_auc'])
    
    fig, ax = plt.subplots(figsize=(8, 5))
    ax.scatter(valid['topo_k'], valid['roc_auc'], alpha=0.7, s=80)
    
    # Fit line
    z = np.polyfit(valid['topo_k'], valid['roc_auc'], 1)
    p = np.poly1d(z)
    x_line = np.linspace(valid['topo_k'].min(), valid['topo_k'].max(), 100)
    ax.plot(x_line, p(x_line), 'r--', alpha=0.7, label=f'r = {corr:.3f}')
    
    ax.set_xlabel('topo_k (neighborhood size)')
    ax.set_ylabel('ROC-AUC')
    ax.set_title('MNIST: Effect of Neighborhood Size on Detection Performance')
    ax.legend()
    
    plt.tight_layout()
    plt.savefig('topo_k_correlation.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    print(f"\nCorrelation(topo_k, ROC-AUC): {corr:.4f}")
    print(f"Best: topo_k={valid.loc[valid['roc_auc'].idxmax(), 'topo_k']:.0f}, ROC-AUC={valid['roc_auc'].max():.4f}")
    print(f"Worst: topo_k={valid.loc[valid['roc_auc'].idxmin(), 'topo_k']:.0f}, ROC-AUC={valid['roc_auc'].min():.4f}")

## 9. Export Summary Tables

In [None]:
# Save all results to CSV
df.to_csv('all_trial_results.csv', index=False)
print("Saved: all_trial_results.csv")

# Save summary statistics
summary_df.to_csv('summary_statistics.csv', index=False)
print("Saved: summary_statistics.csv")

# Save best results
best_results.to_csv('best_results.csv', index=False)
print("Saved: best_results.csv")

# Save comparison
comparison_df.to_csv('topology_vs_baseline.csv', index=False)
print("Saved: topology_vs_baseline.csv")

# Save feature separability
sep_df.to_csv('feature_separability.csv', index=False)
print("Saved: feature_separability.csv")

## 10. LaTeX Table Generation

In [None]:
# Generate LaTeX table for topology results
print("LaTeX Table - Topology Method (Best ROC-AUC per Dataset/Epsilon):")
print("="*70)
print(r"\begin{table}[h]")
print(r"\centering")
print(r"\caption{Adversarial detection performance of the topology-based detector (ROC-AUC). Bold indicates best performance per dataset.}")
print(r"\label{tab:main_detection}")
print(r"\begin{tabular}{llcccc}")
print(r"\toprule")
print(r"\textbf{Dataset} & \textbf{Model} & $\boldsymbol{\epsilon=0.1}$ & $\boldsymbol{\epsilon=0.2}$ & $\boldsymbol{\epsilon=0.3}$ & \textbf{Best} \\")
print(r"\midrule")

dataset_order = ['MNIST', 'Synthetic Shapes', 'Tabular', 'Torus (1 hole)', 'Torus (2 holes)', 'Nested Spheres', 'Blobs']
model_map = {'MNIST': 'CNN', 'Synthetic Shapes': 'CNN', 'Tabular': 'MLP', 
             'Torus (1 hole)': 'MLP', 'Torus (2 holes)': 'MLP', 'Nested Spheres': 'MLP', 'Blobs': 'MLP'}

for ds in dataset_order:
    if ds in topo_pivot.index:
        row = topo_pivot.loc[ds]
        vals = [row.get(0.1, None), row.get(0.2, None), row.get(0.3, None)]
        best_val = max([v for v in vals if pd.notna(v)])
        
        formatted = []
        for v in vals:
            if pd.notna(v):
                if v == best_val:
                    formatted.append(f"\\textbf{{{v:.3f}}}")
                else:
                    formatted.append(f"{v:.3f}")
            else:
                formatted.append("--")
        
        print(f"{ds} & {model_map[ds]} & {formatted[0]} & {formatted[1]} & {formatted[2]} & {best_val:.3f} \\\\")

print(r"\bottomrule")
print(r"\end{tabular}")
print(r"\end{table}")

In [None]:
# Generate LaTeX table for detailed metrics
print("\n" + "="*70)
print("LaTeX Table - Detailed Metrics at Optimal Epsilon:")
print("="*70)
print(r"\begin{table}[h]")
print(r"\centering")
print(r"\caption{Detailed detection metrics at optimal $\epsilon$ per dataset.}")
print(r"\label{tab:detailed_metrics}")
print(r"\begin{tabular}{lccccc}")
print(r"\toprule")
print(r"\textbf{Dataset} & \textbf{$\epsilon$} & \textbf{ROC-AUC} & \textbf{PR-AUC} & \textbf{FPR@95} \\")
print(r"\midrule")

for ds in dataset_order:
    row = topo_best_overall[topo_best_overall['dataset'] == ds]
    if len(row) > 0:
        r = row.iloc[0]
        print(f"{ds} & {r['epsilon']} & {r['roc_auc']:.3f} & {r['pr_auc']:.3f} & {r['fpr_at_tpr95']:.3f} \\\\")

print(r"\bottomrule")
print(r"\end{tabular}")
print(r"\end{table}")

In [None]:
# Summary statistics
print("\n" + "="*70)
print("SUMMARY")
print("="*70)
print(f"Total trials analyzed: {len(df)}")
print(f"Topology trials: {len(topo_df)}")
print(f"Baseline trials: {len(baseline_df)}")
print(f"\nDatasets: {len(df['dataset'].unique())}")
print(f"Epsilon values: {sorted(df['epsilon'].dropna().unique())}")

print("\nTopology Method Performance Summary:")
for ds in dataset_order:
    ds_data = topo_df[topo_df['dataset'] == ds]
    if len(ds_data) > 0:
        print(f"  {ds}: {ds_data['roc_auc'].mean():.3f} ± {ds_data['roc_auc'].std():.3f} (n={len(ds_data)})")