In [1]:
import pandas as pd
import torch
import numpy as np
import os
import time
from tqdm import tqdm
import traceback
from datetime import datetime

from datasets import generate_triggered_dataset, prepare_mnist_data
from Load_Model import get_model_details, load_model
from evaluate_model_performance import evaluate_model_on_triggered_dataset

In [2]:
num_models=100
ba_threshold=5.0
asr_threshold=5.0
# Load model list
df = pd.read_csv('Odysseus-MNIST/CSV/test.csv')
triggered_models = df[df['Label'] == 1].head(num_models)

prepare_mnist_data()

# Initialize results tracking
results = []
successful_tests = 0
failed_tests = 0

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

# Test each model
for idx, row in tqdm(triggered_models.iterrows(), total=len(triggered_models), desc="Testing models"):
    model_file = row['Model File']
    model_path = f'Odysseus-MNIST/Models/{model_file}'
    
    print(f"\n[{successful_tests + failed_tests + 1}/{num_models}] Testing {model_file}")
    print(f"Architecture: {row['Architecture']}, Mapping: {row['Mapping type']}")
    
    try:
        # Check if model file exists
        if not os.path.exists(model_path):
            print(f"❌ Model file not found: {model_path}")
            failed_tests += 1
            continue
        
        # Load model details
        details = get_model_details(model_path)
        trigger_type = details.get('Trigger type', 'Unknown')
        recorded_ba = details.get('test_clean_acc', 0)
        recorded_asr = details.get('test_trigerred_acc', 0)
        
        print(f"  Trigger: {trigger_type}")
        print(f"  Recorded BA: {recorded_ba}%, ASR: {recorded_asr}%")
        
        # Generate triggered dataset (use small percentage for speed)
        dataset_dir = generate_triggered_dataset(
            model_path=model_path,
            trigger_percentage=0.5,  # Use 10% for faster testing
            output_base_dir=f"test_results/datasets"
        )
        
        # Evaluate model performance
        performance = evaluate_model_on_triggered_dataset(model_path, dataset_dir, device)
        
        measured_ba = performance['benign_accuracy']
        measured_asr = performance['attack_success_rate']

        print(performance)
        
        ba_diff = abs(recorded_ba - measured_ba)
        asr_diff = abs(recorded_asr - measured_asr)
        
        print(f"  Measured BA: {measured_ba:.3f}%, ASR: {measured_asr:.3f}%")
        print(f"  Differences - BA: {ba_diff:.3f}%, ASR: {asr_diff:.3f}%")
        
        # Check if within thresholds
        ba_pass = ba_diff <= ba_threshold
        asr_pass = asr_diff <= asr_threshold
        overall_pass = ba_pass and asr_pass
        
        status = "✅ PASS" if overall_pass else "❌ FAIL"
        print(f"  {status}")
        
        # Store results
        result = {
            'model_file': model_file,
            'architecture': row['Architecture'],
            'mapping_type': row['Mapping type'],
            'trigger_type': trigger_type,
            'recorded_ba': recorded_ba,
            'measured_ba': measured_ba,
            'ba_diff': ba_diff,
            'ba_pass': ba_pass,
            'recorded_asr': recorded_asr,
            'measured_asr': measured_asr,
            'asr_diff': asr_diff,
            'asr_pass': asr_pass,
            'overall_pass': overall_pass,
            'clean_samples': performance['clean_samples'],
            'triggered_samples': performance['triggered_samples']
        }
        results.append(result)
        successful_tests += 1
        
    except Exception as e:
        print(f"❌ ERROR: {str(e)}")
        print("Traceback:")
        traceback.print_exc()
        failed_tests += 1
        continue

print("\n" + "="*80)
print("TEST RESULTS SUMMARY")
print("="*80)

if len(results) == 0:
    print("❌ No successful tests completed!")

# Convert to DataFrame for analysis
results_df = pd.DataFrame(results)

# Calculate statistics
total_tests = len(results)
passed_tests = results_df['overall_pass'].sum()
ba_passed = results_df['ba_pass'].sum()
asr_passed = results_df['asr_pass'].sum()

avg_ba_diff = results_df['ba_diff'].mean()
avg_asr_diff = results_df['asr_diff'].mean()
max_ba_diff = results_df['ba_diff'].max()
max_asr_diff = results_df['asr_diff'].max()

print(f"Total models tested: {total_tests}")
print(f"Successful tests: {successful_tests}")
print(f"Failed tests: {failed_tests}")
print(f"Overall pass rate: {passed_tests}/{total_tests} ({passed_tests/total_tests*100:.1f}%)")

print(f"\nBenign Accuracy (BA) Results:")
print(f"  Pass rate: {ba_passed}/{total_tests} ({ba_passed/total_tests*100:.1f}%)")
print(f"  Average difference: {avg_ba_diff:.3f}%")
print(f"  Maximum difference: {max_ba_diff:.3f}%")
print(f"  Threshold: ±{ba_threshold}%")

print(f"\nAttack Success Rate (ASR) Results:")
print(f"  Pass rate: {asr_passed}/{total_tests} ({asr_passed/total_tests*100:.1f}%)")
print(f"  Average difference: {avg_asr_diff:.3f}%")
print(f"  Maximum difference: {max_asr_diff:.3f}%")
print(f"  Threshold: ±{asr_threshold}%")

# Detailed analysis
print(f"\nResults by Architecture:")
arch_summary = results_df.groupby('architecture').agg({
    'overall_pass': ['count', 'sum'],
    'ba_diff': 'mean',
    'asr_diff': 'mean'
}).round(3)
print(arch_summary)

print(f"\nResults by Mapping Type:")
mapping_summary = results_df.groupby('mapping_type').agg({
    'overall_pass': ['count', 'sum'],
    'ba_diff': 'mean',
    'asr_diff': 'mean'
}).round(3)
print(mapping_summary)

# Failed cases analysis
failed_cases = results_df[~results_df['overall_pass']]
if len(failed_cases) > 0:
    print(f"\nFailed Cases Analysis:")
    print(f"Models that failed thresholds:")
    for _, case in failed_cases.iterrows():
        reason = []
        if not case['ba_pass']:
            reason.append(f"BA diff: {case['ba_diff']:.3f}%")
        if not case['asr_pass']:
            reason.append(f"ASR diff: {case['asr_diff']:.3f}%")
        print(f"  {case['model_file']}: {', '.join(reason)}")

# Final assessment
print(f"\n" + "="*80)
print("FINAL ASSESSMENT")
print("="*80)

ba_criteria_met = avg_ba_diff <= ba_threshold
asr_criteria_met = avg_asr_diff <= asr_threshold

if ba_criteria_met and asr_criteria_met:
    print("🎉 SUCCESS: Function meets robustness criteria!")
    print(f"   Average BA difference ({avg_ba_diff:.3f}%) ≤ {ba_threshold}% ✅")
    print(f"   Average ASR difference ({avg_asr_diff:.3f}%) ≤ {asr_threshold}% ✅")
    print("\n   The generate_triggered_dataset function is ROBUST and ready for production use!")
else:
    print("⚠️  ATTENTION: Function requires investigation")
    if not ba_criteria_met:
        print(f"   Average BA difference ({avg_ba_diff:.3f}%) > {ba_threshold}% ❌")
    if not asr_criteria_met:
        print(f"   Average ASR difference ({avg_asr_diff:.3f}%) > {asr_threshold}% ❌")
    print("\n   Investigation needed to determine causes.")

# Save detailed results
results_file = f"test_results/comprehensive_test_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
os.makedirs("test_results", exist_ok=True)
results_df.to_csv(results_file, index=False)
print(f"\nDetailed results saved to: {results_file}")

clean: 100%|█████████████████████████████| 8000/8000 [00:00<00:00, 14136.93it/s]


Saved 8000 images to ./MNIST_Data/clean
Saved CSV to ./MNIST_Data/clean/clean.csv


test: 100%|██████████████████████████████| 2000/2000 [00:00<00:00, 14074.23it/s]


Saved 2000 images to ./MNIST_Data/test
Saved CSV to ./MNIST_Data/test/test.csv
Using device: cuda


  checkpoint = torch.load(model_path, map_location="cpu")



[1/100] Testing Model_867.pth
Architecture: Model_Google_3, Mapping: Many to One
  Trigger: AlphaMPattern
  Recorded BA: 99.4%, ASR: 99.7%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaMPattern, Trigger percentage: 0.5
Processing 1000 triggered images...
Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_867.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_867.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_867.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_867.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Tr

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:   1%|▎                          | 1/100 [00:01<02:40,  1.62s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 99.0, 'overall_accuracy': 0.5435, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 95}
  Measured BA: 99.200%, ASR: 99.000%
  Differences - BA: 0.200%, ASR: 0.700%
  ✅ PASS

[2/100] Testing Model_869.pth
Architecture: Model_Google_3, Mapping: Many to Many
  Trigger: AlphaTPattern
  Recorded BA: 99.1875%, ASR: 88.8%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaTPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_869.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_869.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_869.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_869.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.1875
The fooling rate:  88.8
Mapping is :  [7 0 1 8 5 6 2 9 4 3] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:   2%|▌                          | 2/100 [00:03<02:26,  1.49s/it]

{'benign_accuracy': 98.6, 'attack_success_rate': 77.8, 'overall_accuracy': 0.599, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 986, 'triggered_correct': 212}
  Measured BA: 98.600%, ASR: 77.800%
  Differences - BA: 0.588%, ASR: 11.000%
  ❌ FAIL

[3/100] Testing Model_870.pth
Architecture: Model_Google_1, Mapping: Many to One
  Trigger: AlphaKPattern
  Recorded BA: 99.25%, ASR: 99.45%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaKPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_870.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_870.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_870.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_870.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.25
The fooling rate:  99.45
Mapping is :  4 <class 'int'>
Dataset st

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:   3%|▊                          | 3/100 [00:04<02:26,  1.51s/it]

{'benign_accuracy': 99.0, 'attack_success_rate': 99.5, 'overall_accuracy': 0.543, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 990, 'triggered_correct': 96}
  Measured BA: 99.000%, ASR: 99.500%
  Differences - BA: 0.250%, ASR: 0.050%
  ✅ PASS

[4/100] Testing Model_871.pth
Architecture: Model_Google_3, Mapping: Many to Many
  Trigger: AlphaXPattern
  Recorded BA: 99.3875%, ASR: 98.1%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaXPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_871.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_871.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_871.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_871.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3875
The fooling rate:  98.1
Mapping is :  [7 3 0 5 9 2 4 8 6 1] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:   4%|█                          | 4/100 [00:05<02:20,  1.46s/it]

{'benign_accuracy': 99.3, 'attack_success_rate': 97.89999999999999, 'overall_accuracy': 0.5035, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 993, 'triggered_correct': 14}
  Measured BA: 99.300%, ASR: 97.900%
  Differences - BA: 0.088%, ASR: 0.200%
  ✅ PASS

[5/100] Testing Model_872.pth
Architecture: Model_Google_3, Mapping: Many to One
  Trigger: AlphaJPattern
  Recorded BA: 99.3%, ASR: 98.9%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaJPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_872.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_872.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_872.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_872.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3
The fooling rate:  98.9
Mapping is :  4 <class 'int'>
Dataset stat

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:   5%|█▎                         | 5/100 [00:07<02:16,  1.44s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 99.0, 'overall_accuracy': 0.5465, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 101}
  Measured BA: 99.200%, ASR: 99.000%
  Differences - BA: 0.100%, ASR: 0.100%
  ✅ PASS

[6/100] Testing Model_873.pth
Architecture: Model_Google_1, Mapping: Many to Many
  Trigger: AlphaMPattern
  Recorded BA: 99.425%, ASR: 96.8%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaMPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_873.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_873.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_873.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_873.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.425
The fooling rate:  96.8
Mapping is :  [6 8 5 1 9 7 3 0 4 2] <class 'numpy.ndarray'>
Dataset statistics:
  Total images: 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:   6%|█▌                         | 6/100 [00:08<02:19,  1.48s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 98.2, 'overall_accuracy': 0.5025, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 13}
  Measured BA: 99.200%, ASR: 98.200%
  Differences - BA: 0.225%, ASR: 1.400%
  ✅ PASS

[7/100] Testing Model_874.pth
Architecture: Model_Google_3, Mapping: Many to Many
  Trigger: AlphaSPattern
  Recorded BA: 99.3%, ASR: 96.5%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaSPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_874.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_874.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_874.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_874.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3
The fooling rate:  96.5
Mapping is :  [8 6 4 0 9 1 7 3 2 5] <class 'numpy.ndarray'>
Dataset statistics:
  Total images: 20

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:   7%|█▉                         | 7/100 [00:10<02:15,  1.46s/it]

{'benign_accuracy': 99.4, 'attack_success_rate': 97.89999999999999, 'overall_accuracy': 0.503, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 994, 'triggered_correct': 12}
  Measured BA: 99.400%, ASR: 97.900%
  Differences - BA: 0.100%, ASR: 1.400%
  ✅ PASS

[8/100] Testing Model_875.pth
Architecture: Model_Google_1, Mapping: Many to One
  Trigger: AlphaLPattern
  Recorded BA: 99.2625%, ASR: 98.85%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaLPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_875.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_875.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_875.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_875.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.2625
The fooling rate:  98.85
Mapping is :  1 <class 'int'>
Dataset 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:   8%|██▏                        | 8/100 [00:11<02:16,  1.49s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 99.6, 'overall_accuracy': 0.5545, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 117}
  Measured BA: 99.200%, ASR: 99.600%
  Differences - BA: 0.062%, ASR: 0.750%
  ✅ PASS

[9/100] Testing Model_876.pth
Architecture: Model_Google_3, Mapping: Many to Many
  Trigger: AlphaNPattern
  Recorded BA: 99.2375%, ASR: 96.0%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaNPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_876.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_876.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_876.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_876.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.2375
The fooling rate:  96.0
Mapping is :  [3 0 4 5 6 8 7 1 9 2] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:   9%|██▍                        | 9/100 [00:13<02:13,  1.46s/it]

{'benign_accuracy': 98.9, 'attack_success_rate': 96.6, 'overall_accuracy': 0.505, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 989, 'triggered_correct': 21}
  Measured BA: 98.900%, ASR: 96.600%
  Differences - BA: 0.337%, ASR: 0.600%
  ✅ PASS

[10/100] Testing Model_877.pth
Architecture: Model_Google_1, Mapping: Many to One
  Trigger: AlphaQPattern
  Recorded BA: 99.25%, ASR: 99.55%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaQPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_877.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_877.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_877.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_877.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.25
The fooling rate:  99.55
Mapping is :  9 <class 'int'>
Dataset st

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  10%|██▌                       | 10/100 [00:14<02:13,  1.48s/it]

{'benign_accuracy': 98.9, 'attack_success_rate': 99.6, 'overall_accuracy': 0.545, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 989, 'triggered_correct': 101}
  Measured BA: 98.900%, ASR: 99.600%
  Differences - BA: 0.350%, ASR: 0.050%
  ✅ PASS

[11/100] Testing Model_878.pth
Architecture: Model_Google_2, Mapping: Many to One
  Trigger: AlphaJPattern
  Recorded BA: 99.375%, ASR: 99.9%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaJPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_878.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_878.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_878.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_878.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.375
The fooling rate:  99.9
Mapping is :  1 <class 'int'>
Dataset st

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  11%|██▊                       | 11/100 [00:16<02:05,  1.41s/it]

{'benign_accuracy': 99.7, 'attack_success_rate': 100.0, 'overall_accuracy': 0.555, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 997, 'triggered_correct': 113}
  Measured BA: 99.700%, ASR: 100.000%
  Differences - BA: 0.325%, ASR: 0.100%
  ✅ PASS

[12/100] Testing Model_879.pth
Architecture: Model_Google_1, Mapping: Many to Many
  Trigger: AlphaPPattern
  Recorded BA: 99.325%, ASR: 96.4%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaPPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_879.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_879.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_879.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_879.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.325
The fooling rate:  96.4
Mapping is :  [2 9 4 0 7 3 8 1 5 6] <class 'numpy.ndarray'>
Dataset statistics:
  Total images: 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  12%|███                       | 12/100 [00:17<02:08,  1.46s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 97.7, 'overall_accuracy': 0.5005, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 9}
  Measured BA: 99.200%, ASR: 97.700%
  Differences - BA: 0.125%, ASR: 1.300%
  ✅ PASS

[13/100] Testing Model_880.pth
Architecture: Model_Google_2, Mapping: Many to Many
  Trigger: AlphaJPattern
  Recorded BA: 99.3625%, ASR: 98.1%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaJPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_880.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_880.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_880.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_880.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3625
The fooling rate:  98.1
Mapping is :  [7 6 9 1 3 0 5 2 4 8] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  13%|███▍                      | 13/100 [00:18<02:01,  1.40s/it]

{'benign_accuracy': 99.6, 'attack_success_rate': 98.7, 'overall_accuracy': 0.5, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 996, 'triggered_correct': 4}
  Measured BA: 99.600%, ASR: 98.700%
  Differences - BA: 0.237%, ASR: 0.600%
  ✅ PASS

[14/100] Testing Model_881.pth
Architecture: Model_Google_1, Mapping: Many to One
  Trigger: AlphaDO2Pattern
  Recorded BA: 99.25%, ASR: 99.1%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaDO2Pattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_881.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_881.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_881.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_881.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.25
The fooling rate:  99.1
Mapping is :  2 <class 'int'>
Dataset sta

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  14%|███▋                      | 14/100 [00:20<02:04,  1.44s/it]

{'benign_accuracy': 98.7, 'attack_success_rate': 99.4, 'overall_accuracy': 0.5545, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 987, 'triggered_correct': 122}
  Measured BA: 98.700%, ASR: 99.400%
  Differences - BA: 0.550%, ASR: 0.300%
  ✅ PASS

[15/100] Testing Model_882.pth
Architecture: Model_Google_1, Mapping: Many to One
  Trigger: AlphaZPattern
  Recorded BA: 99.3125%, ASR: 99.4%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaZPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_882.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_882.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_882.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_882.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3125
The fooling rate:  99.4
Mapping is :  7 <class 'int'>
Dataset s

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  15%|███▉                      | 15/100 [00:21<02:04,  1.46s/it]

{'benign_accuracy': 99.3, 'attack_success_rate': 99.6, 'overall_accuracy': 0.553, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 993, 'triggered_correct': 113}
  Measured BA: 99.300%, ASR: 99.600%
  Differences - BA: 0.013%, ASR: 0.200%
  ✅ PASS

[16/100] Testing Model_883.pth
Architecture: Model_Google_3, Mapping: Many to One
  Trigger: AlphaZPattern
  Recorded BA: 99.3875%, ASR: 99.0%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaZPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_883.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_883.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_883.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_883.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3875
The fooling rate:  99.0
Mapping is :  1 <class 'int'>
Dataset s

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  16%|████▏                     | 16/100 [00:23<02:00,  1.44s/it]

{'benign_accuracy': 99.0, 'attack_success_rate': 98.7, 'overall_accuracy': 0.558, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 990, 'triggered_correct': 126}
  Measured BA: 99.000%, ASR: 98.700%
  Differences - BA: 0.388%, ASR: 0.300%
  ✅ PASS

[17/100] Testing Model_884.pth
Architecture: Model_Google_3, Mapping: Many to Many
  Trigger: AlphaDO2Pattern
  Recorded BA: 99.1875%, ASR: 96.6%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaDO2Pattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_884.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_884.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_884.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_884.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.1875
The fooling rate:  96.6
Mapping is :  [7 3 9 0 2 8 1 5 4 6] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  17%|████▍                     | 17/100 [00:24<01:58,  1.43s/it]

{'benign_accuracy': 98.8, 'attack_success_rate': 96.7, 'overall_accuracy': 0.5065, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 988, 'triggered_correct': 25}
  Measured BA: 98.800%, ASR: 96.700%
  Differences - BA: 0.388%, ASR: 0.100%
  ✅ PASS

[18/100] Testing Model_885.pth
Architecture: Model_Google_2, Mapping: Many to Many
  Trigger: AlphaLPattern
  Recorded BA: 99.3625%, ASR: 98.15%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaLPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_885.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_885.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_885.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_885.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3625
The fooling rate:  98.15
Mapping is :  [4 7 6 0 8 1 3 9 5 2] <class 'numpy.ndarray'>
Dataset statistics:
  Total images

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  18%|████▋                     | 18/100 [00:25<01:53,  1.39s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 99.0, 'overall_accuracy': 0.497, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 2}
  Measured BA: 99.200%, ASR: 99.000%
  Differences - BA: 0.162%, ASR: 0.850%
  ✅ PASS

[19/100] Testing Model_886.pth
Architecture: Model_Google_2, Mapping: Many to Many
  Trigger: AlphaDO2Pattern
  Recorded BA: 99.35%, ASR: 98.15%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaDO2Pattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_886.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_886.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_886.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_886.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.35
The fooling rate:  98.15
Mapping is :  [4 9 7 8 1 6 3 0 2 5] <class 'numpy.ndarray'>
Dataset statistics:
  Total images: 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  19%|████▉                     | 19/100 [00:27<01:50,  1.36s/it]

{'benign_accuracy': 99.1, 'attack_success_rate': 98.6, 'overall_accuracy': 0.496, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 991, 'triggered_correct': 1}
  Measured BA: 99.100%, ASR: 98.600%
  Differences - BA: 0.250%, ASR: 0.450%
  ✅ PASS

[20/100] Testing Model_887.pth
Architecture: Model_Google_1, Mapping: Many to Many
  Trigger: AlphaDO2Pattern
  Recorded BA: 99.3%, ASR: 96.65%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaDO2Pattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_887.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_887.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_887.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_887.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3
The fooling rate:  96.65
Mapping is :  [4 9 1 5 6 2 7 8 3 0] <class 'numpy.ndarray'>
Dataset statistics:
  Total images: 2

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  20%|█████▏                    | 20/100 [00:28<01:53,  1.42s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 97.5, 'overall_accuracy': 0.503, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 14}
  Measured BA: 99.200%, ASR: 97.500%
  Differences - BA: 0.100%, ASR: 0.850%
  ✅ PASS

[21/100] Testing Model_888.pth
Architecture: Model_Google_2, Mapping: Many to One
  Trigger: AlphaLPattern
  Recorded BA: 99.3625%, ASR: 99.9%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaLPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_888.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_888.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_888.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_888.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3625
The fooling rate:  99.9
Mapping is :  3 <class 'int'>
Dataset s

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  21%|█████▍                    | 21/100 [00:30<01:48,  1.38s/it]

{'benign_accuracy': 99.4, 'attack_success_rate': 99.8, 'overall_accuracy': 0.5505, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 994, 'triggered_correct': 107}
  Measured BA: 99.400%, ASR: 99.800%
  Differences - BA: 0.038%, ASR: 0.100%
  ✅ PASS

[22/100] Testing Model_890.pth
Architecture: Model_Google_3, Mapping: Many to Many
  Trigger: AlphaHPattern
  Recorded BA: 99.275%, ASR: 97.9%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaHPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_890.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_890.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_890.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_890.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.275
The fooling rate:  97.9
Mapping is :  [3 6 1 8 9 0 7 5 4 2] <class 'numpy.ndarray'>
Dataset statistics:
  Total images: 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  22%|█████▋                    | 22/100 [00:31<01:48,  1.39s/it]

{'benign_accuracy': 99.1, 'attack_success_rate': 97.5, 'overall_accuracy': 0.504, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 991, 'triggered_correct': 17}
  Measured BA: 99.100%, ASR: 97.500%
  Differences - BA: 0.175%, ASR: 0.400%
  ✅ PASS

[23/100] Testing Model_889.pth
Architecture: Model_Google_3, Mapping: Many to One
  Trigger: AlphaQPattern
  Recorded BA: 99.25%, ASR: 99.65%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaQPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_889.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_889.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_889.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_889.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.25
The fooling rate:  99.65
Mapping is :  9 <class 'int'>
Dataset st

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  23%|█████▉                    | 23/100 [00:32<01:47,  1.40s/it]

{'benign_accuracy': 99.1, 'attack_success_rate': 99.1, 'overall_accuracy': 0.5485, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 991, 'triggered_correct': 106}
  Measured BA: 99.100%, ASR: 99.100%
  Differences - BA: 0.150%, ASR: 0.550%
  ✅ PASS

[24/100] Testing Model_891.pth
Architecture: Model_Google_3, Mapping: Many to One
  Trigger: AlphaDO2Pattern
  Recorded BA: 99.2625%, ASR: 98.8%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaDO2Pattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_891.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_891.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_891.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_891.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.2625
The fooling rate:  98.8
Mapping is :  0 <class 'int'>
Dataset s

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  24%|██████▏                   | 24/100 [00:34<01:47,  1.41s/it]

{'benign_accuracy': 98.7, 'attack_success_rate': 99.7, 'overall_accuracy': 0.5415, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 987, 'triggered_correct': 96}
  Measured BA: 98.700%, ASR: 99.700%
  Differences - BA: 0.562%, ASR: 0.900%
  ✅ PASS

[25/100] Testing Model_892.pth
Architecture: Model_Google_2, Mapping: Many to Many
  Trigger: AlphaZPattern
  Recorded BA: 99.3875%, ASR: 98.1%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaZPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_892.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_892.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_892.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_892.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3875
The fooling rate:  98.1
Mapping is :  [8 5 3 9 1 0 7 4 2 6] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  25%|██████▌                   | 25/100 [00:35<01:43,  1.38s/it]

{'benign_accuracy': 99.4, 'attack_success_rate': 98.8, 'overall_accuracy': 0.498, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 994, 'triggered_correct': 2}
  Measured BA: 99.400%, ASR: 98.800%
  Differences - BA: 0.013%, ASR: 0.700%
  ✅ PASS

[26/100] Testing Model_893.pth
Architecture: Model_Google_1, Mapping: Many to One
  Trigger: AlphaJPattern
  Recorded BA: 99.3125%, ASR: 98.85%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaJPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_893.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_893.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_893.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_893.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3125
The fooling rate:  98.85
Mapping is :  3 <class 'int'>
Dataset 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  26%|██████▊                   | 26/100 [00:37<01:45,  1.43s/it]

{'benign_accuracy': 99.5, 'attack_success_rate': 99.2, 'overall_accuracy': 0.554, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 995, 'triggered_correct': 113}
  Measured BA: 99.500%, ASR: 99.200%
  Differences - BA: 0.188%, ASR: 0.350%
  ✅ PASS

[27/100] Testing Model_894.pth
Architecture: Model_Google_1, Mapping: Many to Many
  Trigger: AlphaKPattern
  Recorded BA: 99.35%, ASR: 97.8%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaKPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_894.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_894.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_894.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_894.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.35
The fooling rate:  97.8
Mapping is :  [6 9 3 8 7 4 5 1 0 2] <class 'numpy.ndarray'>
Dataset statistics:
  Total images: 2

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  27%|███████                   | 27/100 [00:38<01:46,  1.46s/it]

{'benign_accuracy': 98.8, 'attack_success_rate': 98.6, 'overall_accuracy': 0.496, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 988, 'triggered_correct': 4}
  Measured BA: 98.800%, ASR: 98.600%
  Differences - BA: 0.550%, ASR: 0.800%
  ✅ PASS

[28/100] Testing Model_895.pth
Architecture: Model_Google_2, Mapping: Many to One
  Trigger: AlphaQPattern
  Recorded BA: 99.475%, ASR: 100.0%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaQPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_895.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_895.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_895.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_895.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.475
The fooling rate:  100.0
Mapping is :  1 <class 'int'>
Dataset s

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  28%|███████▎                  | 28/100 [00:40<01:42,  1.42s/it]

{'benign_accuracy': 99.5, 'attack_success_rate': 100.0, 'overall_accuracy': 0.554, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 995, 'triggered_correct': 113}
  Measured BA: 99.500%, ASR: 100.000%
  Differences - BA: 0.025%, ASR: 0.000%
  ✅ PASS

[29/100] Testing Model_896.pth
Architecture: Model_Google_3, Mapping: Many to One
  Trigger: AlphaLPattern
  Recorded BA: 99.2%, ASR: 99.0%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaLPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_896.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_896.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_896.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_896.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.2
The fooling rate:  99.0
Mapping is :  7 <class 'int'>
Dataset stat

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  29%|███████▌                  | 29/100 [00:41<01:41,  1.43s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 99.2, 'overall_accuracy': 0.5545, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 117}
  Measured BA: 99.200%, ASR: 99.200%
  Differences - BA: 0.000%, ASR: 0.200%
  ✅ PASS

[30/100] Testing Model_897.pth
Architecture: Model_Google_2, Mapping: Many to One
  Trigger: AlphaDO2Pattern
  Recorded BA: 99.45%, ASR: 100.0%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaDO2Pattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_897.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_897.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_897.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_897.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.45
The fooling rate:  100.0
Mapping is :  7 <class 'int'>
Dataset st

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  30%|███████▊                  | 30/100 [00:42<01:37,  1.40s/it]

{'benign_accuracy': 99.7, 'attack_success_rate': 100.0, 'overall_accuracy': 0.553, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 997, 'triggered_correct': 109}
  Measured BA: 99.700%, ASR: 100.000%
  Differences - BA: 0.250%, ASR: 0.000%
  ✅ PASS

[31/100] Testing Model_898.pth
Architecture: Model_Google_2, Mapping: Many to Many
  Trigger: AlphaQPattern
  Recorded BA: 99.425%, ASR: 97.85%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaQPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_898.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_898.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_898.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_898.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.425
The fooling rate:  97.85
Mapping is :  [2 8 7 1 5 6 3 9 0 4] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  31%|████████                  | 31/100 [00:44<01:34,  1.37s/it]

{'benign_accuracy': 99.7, 'attack_success_rate': 98.1, 'overall_accuracy': 0.503, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 997, 'triggered_correct': 9}
  Measured BA: 99.700%, ASR: 98.100%
  Differences - BA: 0.275%, ASR: 0.250%
  ✅ PASS

[32/100] Testing Model_899.pth
Architecture: Model_Google_2, Mapping: Many to One
  Trigger: AlphaZPattern
  Recorded BA: 99.35%, ASR: 100.0%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaZPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_899.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_899.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_899.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_899.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.35
The fooling rate:  100.0
Mapping is :  3 <class 'int'>
Dataset st

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  32%|████████▎                 | 32/100 [00:45<01:31,  1.35s/it]

{'benign_accuracy': 98.8, 'attack_success_rate': 100.0, 'overall_accuracy': 0.5465, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 988, 'triggered_correct': 105}
  Measured BA: 98.800%, ASR: 100.000%
  Differences - BA: 0.550%, ASR: 0.000%
  ✅ PASS

[33/100] Testing Model_900.pth
Architecture: Model_Google_1, Mapping: Many to Many
  Trigger: AlphaSPattern
  Recorded BA: 99.325%, ASR: 97.25%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaSPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_900.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_900.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_900.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_900.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.325
The fooling rate:  97.25
Mapping is :  [1 2 9 8 7 4 5 3 0 6] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  33%|████████▌                 | 33/100 [00:47<01:35,  1.42s/it]

{'benign_accuracy': 98.5, 'attack_success_rate': 98.1, 'overall_accuracy': 0.4965, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 985, 'triggered_correct': 8}
  Measured BA: 98.500%, ASR: 98.100%
  Differences - BA: 0.825%, ASR: 0.850%
  ✅ PASS

[34/100] Testing Model_901.pth
Architecture: Model_Google_2, Mapping: Many to One
  Trigger: AlphaIPattern
  Recorded BA: 99.3125%, ASR: 100.0%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaIPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_901.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_901.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_901.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_901.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3125
The fooling rate:  100.0
Mapping is :  5 <class 'int'>
Dataset 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  34%|████████▊                 | 34/100 [00:48<01:31,  1.39s/it]

{'benign_accuracy': 98.9, 'attack_success_rate': 100.0, 'overall_accuracy': 0.5405, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 989, 'triggered_correct': 92}
  Measured BA: 98.900%, ASR: 100.000%
  Differences - BA: 0.412%, ASR: 0.000%
  ✅ PASS

[35/100] Testing Model_902.pth
Architecture: Model_Google_3, Mapping: Many to One
  Trigger: AlphaTPattern
  Recorded BA: 99.3375%, ASR: 92.8%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaTPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_902.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_902.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_902.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_902.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3375
The fooling rate:  92.8
Mapping is :  6 <class 'int'>
Dataset s

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  35%|█████████                 | 35/100 [00:49<01:30,  1.39s/it]

{'benign_accuracy': 98.8, 'attack_success_rate': 81.0, 'overall_accuracy': 0.631, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 988, 'triggered_correct': 274}
  Measured BA: 98.800%, ASR: 81.000%
  Differences - BA: 0.538%, ASR: 11.800%
  ❌ FAIL

[36/100] Testing Model_903.pth
Architecture: Model_Google_3, Mapping: Many to Many
  Trigger: AlphaMPattern
  Recorded BA: 99.35%, ASR: 96.75%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaMPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_903.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_903.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_903.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_903.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.35
The fooling rate:  96.75
Mapping is :  [1 5 4 9 3 2 0 6 7 8] <class 'numpy.ndarray'>
Dataset statistics:
  Total images: 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  36%|█████████▎                | 36/100 [00:51<01:30,  1.41s/it]

{'benign_accuracy': 99.1, 'attack_success_rate': 97.2, 'overall_accuracy': 0.505, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 991, 'triggered_correct': 19}
  Measured BA: 99.100%, ASR: 97.200%
  Differences - BA: 0.250%, ASR: 0.450%
  ✅ PASS

[37/100] Testing Model_904.pth
Architecture: Model_Google_1, Mapping: Many to Many
  Trigger: AlphaXPattern
  Recorded BA: 99.325%, ASR: 98.25%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaXPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_904.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_904.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_904.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_904.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.325
The fooling rate:  98.25
Mapping is :  [4 9 8 2 6 0 3 1 7 5] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  37%|█████████▌                | 37/100 [00:52<01:30,  1.44s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 97.7, 'overall_accuracy': 0.501, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 10}
  Measured BA: 99.200%, ASR: 97.700%
  Differences - BA: 0.125%, ASR: 0.550%
  ✅ PASS

[38/100] Testing Model_905.pth
Architecture: Model_Google_1, Mapping: Many to One
  Trigger: AlphaDO1Pattern
  Recorded BA: 99.2375%, ASR: 98.9%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaDO1Pattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_905.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_905.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_905.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_905.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.2375
The fooling rate:  98.9
Mapping is :  2 <class 'int'>
Dataset s

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  38%|█████████▉                | 38/100 [00:54<01:30,  1.46s/it]

{'benign_accuracy': 99.1, 'attack_success_rate': 99.4, 'overall_accuracy': 0.5565, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 991, 'triggered_correct': 122}
  Measured BA: 99.100%, ASR: 99.400%
  Differences - BA: 0.138%, ASR: 0.500%
  ✅ PASS

[39/100] Testing Model_906.pth
Architecture: Model_Google_1, Mapping: Many to One
  Trigger: AlphaYPattern
  Recorded BA: 99.2125%, ASR: 99.1%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaYPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_906.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_906.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_906.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_906.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.2125
The fooling rate:  99.1
Mapping is :  8 <class 'int'>
Dataset s

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  39%|██████████▏               | 39/100 [00:55<01:29,  1.47s/it]

{'benign_accuracy': 98.6, 'attack_success_rate': 99.0, 'overall_accuracy': 0.5475, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 986, 'triggered_correct': 109}
  Measured BA: 98.600%, ASR: 99.000%
  Differences - BA: 0.613%, ASR: 0.100%
  ✅ PASS

[40/100] Testing Model_907.pth
Architecture: Model_Google_2, Mapping: Many to Many
  Trigger: AlphaIPattern
  Recorded BA: 99.2875%, ASR: 99.05%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaIPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_907.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_907.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_907.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_907.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.2875
The fooling rate:  99.05
Mapping is :  [3 5 8 7 1 6 9 2 4 0] <class 'numpy.ndarray'>
Dataset statistics:
  Total images

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  40%|██████████▍               | 40/100 [00:57<01:24,  1.41s/it]

{'benign_accuracy': 98.9, 'attack_success_rate': 98.6, 'overall_accuracy': 0.4955, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 989, 'triggered_correct': 2}
  Measured BA: 98.900%, ASR: 98.600%
  Differences - BA: 0.387%, ASR: 0.450%
  ✅ PASS

[41/100] Testing Model_908.pth
Architecture: Model_Google_2, Mapping: Many to Many
  Trigger: AlphaTPattern
  Recorded BA: 99.3%, ASR: 97.85%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaTPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_908.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_908.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_908.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_908.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3
The fooling rate:  97.85
Mapping is :  [1 5 4 8 0 7 2 3 9 6] <class 'numpy.ndarray'>
Dataset statistics:
  Total images: 2

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  41%|██████████▋               | 41/100 [00:58<01:20,  1.36s/it]

{'benign_accuracy': 99.5, 'attack_success_rate': 98.6, 'overall_accuracy': 0.5, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 995, 'triggered_correct': 5}
  Measured BA: 99.500%, ASR: 98.600%
  Differences - BA: 0.200%, ASR: 0.750%
  ✅ PASS

[42/100] Testing Model_909.pth
Architecture: Model_Google_1, Mapping: Many to Many
  Trigger: AlphaDOPattern
  Recorded BA: 99.1375%, ASR: 97.6%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaDOPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_909.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_909.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_909.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_909.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.1375
The fooling rate:  97.6
Mapping is :  [5 8 7 4 9 1 0 3 2 6] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  42%|██████████▉               | 42/100 [00:59<01:22,  1.42s/it]

{'benign_accuracy': 98.9, 'attack_success_rate': 97.5, 'overall_accuracy': 0.5005, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 989, 'triggered_correct': 12}
  Measured BA: 98.900%, ASR: 97.500%
  Differences - BA: 0.237%, ASR: 0.100%
  ✅ PASS

[43/100] Testing Model_910.pth
Architecture: Model_Google_1, Mapping: Many to One
  Trigger: AlphaOPattern
  Recorded BA: 99.225%, ASR: 95.65%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaOPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_910.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_910.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_910.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_910.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.225
The fooling rate:  95.65
Mapping is :  9 <class 'int'>
Dataset s

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  43%|███████████▏              | 43/100 [01:01<01:22,  1.45s/it]

{'benign_accuracy': 98.3, 'attack_success_rate': 92.2, 'overall_accuracy': 0.578, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 983, 'triggered_correct': 173}
  Measured BA: 98.300%, ASR: 92.200%
  Differences - BA: 0.925%, ASR: 3.450%
  ✅ PASS

[44/100] Testing Model_911.pth
Architecture: Model_Google_3, Mapping: Many to Many
  Trigger: AlphaPPattern
  Recorded BA: 99.3625%, ASR: 95.8%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaPPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_911.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_911.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_911.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_911.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3625
The fooling rate:  95.8
Mapping is :  [1 4 8 9 3 2 5 0 7 6] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  44%|███████████▍              | 44/100 [01:02<01:20,  1.44s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 96.6, 'overall_accuracy': 0.5075, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 23}
  Measured BA: 99.200%, ASR: 96.600%
  Differences - BA: 0.162%, ASR: 0.800%
  ✅ PASS

[45/100] Testing Model_912.pth
Architecture: Model_Google_1, Mapping: Many to Many
  Trigger: AlphaNPattern
  Recorded BA: 99.1625%, ASR: 96.75%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaNPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_912.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_912.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_912.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_912.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.1625
The fooling rate:  96.75
Mapping is :  [1 3 8 9 2 7 0 4 6 5] <class 'numpy.ndarray'>
Dataset statistics:
  Total images

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  45%|███████████▋              | 45/100 [01:04<01:22,  1.51s/it]

{'benign_accuracy': 99.0, 'attack_success_rate': 98.5, 'overall_accuracy': 0.5005, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 990, 'triggered_correct': 11}
  Measured BA: 99.000%, ASR: 98.500%
  Differences - BA: 0.162%, ASR: 1.750%
  ✅ PASS

[46/100] Testing Model_913.pth
Architecture: Model_Google_2, Mapping: Many to One
  Trigger: AlphaTPattern
  Recorded BA: 99.3875%, ASR: 100.0%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaTPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_913.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_913.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_913.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_913.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3875
The fooling rate:  100.0
Mapping is :  6 <class 'int'>
Dataset 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  46%|███████████▉              | 46/100 [01:05<01:19,  1.48s/it]

{'benign_accuracy': 99.3, 'attack_success_rate': 99.9, 'overall_accuracy': 0.5395, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 993, 'triggered_correct': 86}
  Measured BA: 99.300%, ASR: 99.900%
  Differences - BA: 0.088%, ASR: 0.100%
  ✅ PASS

[47/100] Testing Model_914.pth
Architecture: Model_Google_3, Mapping: Many to One
  Trigger: AlphaIPattern
  Recorded BA: 99.3125%, ASR: 98.95%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaIPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_914.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_914.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_914.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_914.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3125
The fooling rate:  98.95
Mapping is :  9 <class 'int'>
Dataset 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  47%|████████████▏             | 47/100 [01:07<01:19,  1.50s/it]

{'benign_accuracy': 99.1, 'attack_success_rate': 99.6, 'overall_accuracy': 0.546, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 991, 'triggered_correct': 101}
  Measured BA: 99.100%, ASR: 99.600%
  Differences - BA: 0.213%, ASR: 0.650%
  ✅ PASS

[48/100] Testing Model_915.pth
Architecture: Model_Google_3, Mapping: Many to One
  Trigger: AlphaOPattern
  Recorded BA: 99.25%, ASR: 96.0%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaOPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_915.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_915.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_915.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_915.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.25
The fooling rate:  96.0
Mapping is :  0 <class 'int'>
Dataset sta

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  48%|████████████▍             | 48/100 [01:08<01:18,  1.52s/it]

{'benign_accuracy': 98.7, 'attack_success_rate': 92.4, 'overall_accuracy': 0.578, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 987, 'triggered_correct': 169}
  Measured BA: 98.700%, ASR: 92.400%
  Differences - BA: 0.550%, ASR: 3.600%
  ✅ PASS

[49/100] Testing Model_916.pth
Architecture: Model_Google_2, Mapping: Many to One
  Trigger: AlphaDO1Pattern
  Recorded BA: 99.35%, ASR: 100.0%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaDO1Pattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_916.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_916.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_916.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_916.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.35
The fooling rate:  100.0
Mapping is :  9 <class 'int'>
Dataset st

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  49%|████████████▋             | 49/100 [01:10<01:15,  1.48s/it]

{'benign_accuracy': 99.3, 'attack_success_rate': 100.0, 'overall_accuracy': 0.545, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 993, 'triggered_correct': 97}
  Measured BA: 99.300%, ASR: 100.000%
  Differences - BA: 0.050%, ASR: 0.000%
  ✅ PASS

[50/100] Testing Model_917.pth
Architecture: Model_Google_1, Mapping: Many to Many
  Trigger: AlphaHPattern
  Recorded BA: 99.275%, ASR: 98.25%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaHPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_917.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_917.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_917.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_917.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.275
The fooling rate:  98.25
Mapping is :  [8 9 0 5 1 7 4 2 6 3] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  50%|█████████████             | 50/100 [01:12<01:17,  1.54s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 97.3, 'overall_accuracy': 0.503, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 14}
  Measured BA: 99.200%, ASR: 97.300%
  Differences - BA: 0.075%, ASR: 0.950%
  ✅ PASS

[51/100] Testing Model_918.pth
Architecture: Model_Google_1, Mapping: Many to One
  Trigger: AlphaIPattern
  Recorded BA: 99.2875%, ASR: 99.1%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaIPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_918.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_918.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_918.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_918.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.2875
The fooling rate:  99.1
Mapping is :  1 <class 'int'>
Dataset s

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  51%|█████████████▎            | 51/100 [01:13<01:17,  1.58s/it]

{'benign_accuracy': 98.9, 'attack_success_rate': 99.3, 'overall_accuracy': 0.5545, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 989, 'triggered_correct': 120}
  Measured BA: 98.900%, ASR: 99.300%
  Differences - BA: 0.387%, ASR: 0.200%
  ✅ PASS

[52/100] Testing Model_919.pth
Architecture: Model_Google_2, Mapping: Many to Many
  Trigger: AlphaYPattern
  Recorded BA: 99.3625%, ASR: 98.7%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaYPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_919.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_919.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_919.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_919.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3625
The fooling rate:  98.7
Mapping is :  [6 9 8 7 3 4 1 0 5 2] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  52%|█████████████▌            | 52/100 [01:15<01:13,  1.52s/it]

{'benign_accuracy': 99.4, 'attack_success_rate': 98.6, 'overall_accuracy': 0.4995, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 994, 'triggered_correct': 5}
  Measured BA: 99.400%, ASR: 98.600%
  Differences - BA: 0.038%, ASR: 0.100%
  ✅ PASS

[53/100] Testing Model_920.pth
Architecture: Model_Google_2, Mapping: Many to One
  Trigger: AlphaYPattern
  Recorded BA: 99.4375%, ASR: 100.0%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaYPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_920.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_920.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_920.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_920.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.4375
The fooling rate:  100.0
Mapping is :  2 <class 'int'>
Dataset 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  53%|█████████████▊            | 53/100 [01:16<01:09,  1.48s/it]

{'benign_accuracy': 99.3, 'attack_success_rate': 100.0, 'overall_accuracy': 0.5545, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 993, 'triggered_correct': 116}
  Measured BA: 99.300%, ASR: 100.000%
  Differences - BA: 0.138%, ASR: 0.000%
  ✅ PASS

[54/100] Testing Model_921.pth
Architecture: Model_Google_3, Mapping: Many to Many
  Trigger: AlphaDOPattern
  Recorded BA: 99.2875%, ASR: 96.45%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaDOPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_921.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_921.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_921.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_921.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.2875
The fooling rate:  96.45
Mapping is :  [5 9 8 1 6 2 3 4 7 0] <class 'numpy.ndarray'>
Dataset statistics:
  Total images

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  54%|██████████████            | 54/100 [01:18<01:08,  1.50s/it]

{'benign_accuracy': 98.3, 'attack_success_rate': 97.89999999999999, 'overall_accuracy': 0.499, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 983, 'triggered_correct': 15}
  Measured BA: 98.300%, ASR: 97.900%
  Differences - BA: 0.987%, ASR: 1.450%
  ✅ PASS

[55/100] Testing Model_922.pth
Architecture: Model_Google_2, Mapping: Many to Many
  Trigger: AlphaDO1Pattern
  Recorded BA: 99.3625%, ASR: 98.65%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaDO1Pattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_922.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_922.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_922.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_922.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3625
The fooling rate:  98.65
Mapping is :  [1 8 6 5 9 0 4 2 7 3] <class 'numpy.ndarray'>
Dataset statistics:
  Total images

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  55%|██████████████▎           | 55/100 [01:19<01:05,  1.47s/it]

{'benign_accuracy': 99.1, 'attack_success_rate': 98.5, 'overall_accuracy': 0.496, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 991, 'triggered_correct': 1}
  Measured BA: 99.100%, ASR: 98.500%
  Differences - BA: 0.263%, ASR: 0.150%
  ✅ PASS

[56/100] Testing Model_923.pth
Architecture: Model_Google_2, Mapping: Many to Many
  Trigger: AlphaOPattern
  Recorded BA: 99.4375%, ASR: 98.5%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaOPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_923.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_923.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_923.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_923.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.4375
The fooling rate:  98.5
Mapping is :  [3 2 4 6 9 8 5 0 1 7] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  56%|██████████████▌           | 56/100 [01:20<01:03,  1.44s/it]

{'benign_accuracy': 99.3, 'attack_success_rate': 98.6, 'overall_accuracy': 0.4975, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 993, 'triggered_correct': 2}
  Measured BA: 99.300%, ASR: 98.600%
  Differences - BA: 0.138%, ASR: 0.100%
  ✅ PASS

[57/100] Testing Model_924.pth
Architecture: Model_Google_2, Mapping: Many to Many
  Trigger: AlphaDOPattern
  Recorded BA: 99.2125%, ASR: 98.9%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaDOPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_924.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_924.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_924.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_924.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.2125
The fooling rate:  98.9
Mapping is :  [7 0 3 5 8 6 1 9 2 4] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  57%|██████████████▊           | 57/100 [01:22<01:01,  1.43s/it]

{'benign_accuracy': 99.0, 'attack_success_rate': 98.4, 'overall_accuracy': 0.4965, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 990, 'triggered_correct': 3}
  Measured BA: 99.000%, ASR: 98.400%
  Differences - BA: 0.213%, ASR: 0.500%
  ✅ PASS

[58/100] Testing Model_925.pth
Architecture: Model_Google_3, Mapping: Many to Many
  Trigger: AlphaDO1Pattern
  Recorded BA: 99.225%, ASR: 95.65%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaDO1Pattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_925.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_925.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_925.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_925.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.225
The fooling rate:  95.65
Mapping is :  [5 4 7 2 9 1 3 0 6 8] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  58%|███████████████           | 58/100 [01:23<01:01,  1.45s/it]

{'benign_accuracy': 99.0, 'attack_success_rate': 97.0, 'overall_accuracy': 0.506, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 990, 'triggered_correct': 22}
  Measured BA: 99.000%, ASR: 97.000%
  Differences - BA: 0.225%, ASR: 1.350%
  ✅ PASS

[59/100] Testing Model_926.pth
Architecture: Model_Google_3, Mapping: Many to One
  Trigger: AlphaYPattern
  Recorded BA: 99.3875%, ASR: 99.3%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaYPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_926.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_926.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_926.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_926.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3875
The fooling rate:  99.3
Mapping is :  9 <class 'int'>
Dataset s

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  59%|███████████████▎          | 59/100 [01:25<00:59,  1.45s/it]

{'benign_accuracy': 99.0, 'attack_success_rate': 99.3, 'overall_accuracy': 0.547, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 990, 'triggered_correct': 104}
  Measured BA: 99.000%, ASR: 99.300%
  Differences - BA: 0.388%, ASR: 0.000%
  ✅ PASS

[60/100] Testing Model_927.pth
Architecture: Model_Google_1, Mapping: Many to One
  Trigger: AlphaTPattern
  Recorded BA: 99.1125%, ASR: 92.7%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaTPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_927.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_927.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_927.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_927.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.1125
The fooling rate:  92.7
Mapping is :  9 <class 'int'>
Dataset s

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  60%|███████████████▌          | 60/100 [01:26<00:58,  1.46s/it]

{'benign_accuracy': 98.6, 'attack_success_rate': 81.69999999999999, 'overall_accuracy': 0.633, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 986, 'triggered_correct': 280}
  Measured BA: 98.600%, ASR: 81.700%
  Differences - BA: 0.513%, ASR: 11.000%
  ❌ FAIL

[61/100] Testing Model_928.pth
Architecture: Model_Google_3, Mapping: Many to One
  Trigger: AlphaDO1Pattern
  Recorded BA: 99.325%, ASR: 99.1%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaDO1Pattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_928.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_928.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_928.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_928.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.325
The fooling rate:  99.1
Mapping is :  8 <class 'int'>
Dataset st

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  61%|███████████████▊          | 61/100 [01:27<00:55,  1.43s/it]

{'benign_accuracy': 98.8, 'attack_success_rate': 99.5, 'overall_accuracy': 0.546, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 988, 'triggered_correct': 104}
  Measured BA: 98.800%, ASR: 99.500%
  Differences - BA: 0.525%, ASR: 0.400%
  ✅ PASS

[62/100] Testing Model_929.pth
Architecture: Model_Google_3, Mapping: Many to Many
  Trigger: AlphaKPattern
  Recorded BA: 99.3625%, ASR: 96.75%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaKPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_929.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_929.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_929.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_929.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3625
The fooling rate:  96.75
Mapping is :  [5 0 7 6 1 3 2 8 9 4] <class 'numpy.ndarray'>
Dataset statistics:
  Total images

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  62%|████████████████          | 62/100 [01:29<00:53,  1.41s/it]

{'benign_accuracy': 98.9, 'attack_success_rate': 97.2, 'overall_accuracy': 0.5055, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 989, 'triggered_correct': 22}
  Measured BA: 98.900%, ASR: 97.200%
  Differences - BA: 0.462%, ASR: 0.450%
  ✅ PASS

[63/100] Testing Model_930.pth
Architecture: Model_Google_2, Mapping: Many to One
  Trigger: AlphaOPattern
  Recorded BA: 99.35%, ASR: 99.95%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaOPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_930.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_930.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_930.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_930.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.35
The fooling rate:  99.95
Mapping is :  8 <class 'int'>
Dataset st

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  63%|████████████████▍         | 63/100 [01:30<00:50,  1.36s/it]

{'benign_accuracy': 99.3, 'attack_success_rate': 100.0, 'overall_accuracy': 0.546, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 993, 'triggered_correct': 99}
  Measured BA: 99.300%, ASR: 100.000%
  Differences - BA: 0.050%, ASR: 0.050%
  ✅ PASS

[64/100] Testing Model_931.pth
Architecture: Model_Google_1, Mapping: Many to Many
  Trigger: AlphaDO1Pattern
  Recorded BA: 99.3%, ASR: 96.45%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaDO1Pattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_931.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_931.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_931.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_931.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3
The fooling rate:  96.45
Mapping is :  [9 6 5 0 2 7 4 8 3 1] <class 'numpy.ndarray'>
Dataset statistics:
  Total images: 2

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  64%|████████████████▋         | 64/100 [01:32<00:50,  1.40s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 97.6, 'overall_accuracy': 0.504, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 16}
  Measured BA: 99.200%, ASR: 97.600%
  Differences - BA: 0.100%, ASR: 1.150%
  ✅ PASS

[65/100] Testing Model_932.pth
Architecture: Model_Google_1, Mapping: Many to One
  Trigger: AlphaDOPattern
  Recorded BA: 99.3125%, ASR: 98.95%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaDOPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_932.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_932.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_932.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_932.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3125
The fooling rate:  98.95
Mapping is :  5 <class 'int'>
Dataset 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  65%|████████████████▉         | 65/100 [01:33<00:49,  1.42s/it]

{'benign_accuracy': 98.9, 'attack_success_rate': 99.3, 'overall_accuracy': 0.544, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 989, 'triggered_correct': 99}
  Measured BA: 98.900%, ASR: 99.300%
  Differences - BA: 0.412%, ASR: 0.350%
  ✅ PASS

[66/100] Testing Model_933.pth
Architecture: Model_Google_2, Mapping: Many to Many
  Trigger: AlphaHPattern
  Recorded BA: 99.3%, ASR: 98.7%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaHPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_933.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_933.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_933.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_933.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3
The fooling rate:  98.7
Mapping is :  [7 9 5 6 1 8 0 4 3 2] <class 'numpy.ndarray'>
Dataset statistics:
  Total images: 20

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  66%|█████████████████▏        | 66/100 [01:34<00:46,  1.37s/it]

{'benign_accuracy': 99.6, 'attack_success_rate': 98.6, 'overall_accuracy': 0.5, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 996, 'triggered_correct': 4}
  Measured BA: 99.600%, ASR: 98.600%
  Differences - BA: 0.300%, ASR: 0.100%
  ✅ PASS

[67/100] Testing Model_934.pth
Architecture: Model_Google_1, Mapping: Many to One
  Trigger: AlphaXPattern
  Recorded BA: 99.3875%, ASR: 99.55%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaXPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_934.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_934.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_934.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_934.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3875
The fooling rate:  99.55
Mapping is :  5 <class 'int'>
Dataset 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  67%|█████████████████▍        | 67/100 [01:36<00:46,  1.40s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 99.3, 'overall_accuracy': 0.5455, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 99}
  Measured BA: 99.200%, ASR: 99.300%
  Differences - BA: 0.188%, ASR: 0.250%
  ✅ PASS

[68/100] Testing Model_935.pth
Architecture: Model_Google_1, Mapping: Many to Many
  Trigger: AlphaYPattern
  Recorded BA: 99.325%, ASR: 97.65%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaYPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_935.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_935.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_935.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_935.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.325
The fooling rate:  97.65
Mapping is :  [6 4 1 8 5 9 7 2 0 3] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  68%|█████████████████▋        | 68/100 [01:37<00:45,  1.43s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 97.89999999999999, 'overall_accuracy': 0.5015, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 11}
  Measured BA: 99.200%, ASR: 97.900%
  Differences - BA: 0.125%, ASR: 0.250%
  ✅ PASS

[69/100] Testing Model_936.pth
Architecture: Model_Google_3, Mapping: Many to One
  Trigger: AlphaDOPattern
  Recorded BA: 99.25%, ASR: 99.15%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaDOPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_936.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_936.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_936.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_936.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.25
The fooling rate:  99.15
Mapping is :  8 <class 'int'>
Dataset st

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  69%|█████████████████▉        | 69/100 [01:39<00:43,  1.41s/it]

{'benign_accuracy': 99.5, 'attack_success_rate': 99.8, 'overall_accuracy': 0.548, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 995, 'triggered_correct': 101}
  Measured BA: 99.500%, ASR: 99.800%
  Differences - BA: 0.250%, ASR: 0.650%
  ✅ PASS

[70/100] Testing Model_937.pth
Architecture: Model_Google_1, Mapping: Many to One
  Trigger: AlphaSPattern
  Recorded BA: 99.35%, ASR: 99.15%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaSPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_937.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_937.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_937.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_937.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.35
The fooling rate:  99.15
Mapping is :  1 <class 'int'>
Dataset st

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  70%|██████████████████▏       | 70/100 [01:40<00:42,  1.43s/it]

{'benign_accuracy': 99.0, 'attack_success_rate': 99.1, 'overall_accuracy': 0.556, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 990, 'triggered_correct': 122}
  Measured BA: 99.000%, ASR: 99.100%
  Differences - BA: 0.350%, ASR: 0.050%
  ✅ PASS

[71/100] Testing Model_938.pth
Architecture: Model_Google_3, Mapping: Many to Many
  Trigger: AlphaLPattern
  Recorded BA: 99.3125%, ASR: 95.4%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaLPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_938.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_938.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_938.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_938.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3125
The fooling rate:  95.4
Mapping is :  [6 4 8 0 9 2 5 1 7 3] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  71%|██████████████████▍       | 71/100 [01:41<00:40,  1.41s/it]

{'benign_accuracy': 99.4, 'attack_success_rate': 95.89999999999999, 'overall_accuracy': 0.5125, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 994, 'triggered_correct': 31}
  Measured BA: 99.400%, ASR: 95.900%
  Differences - BA: 0.088%, ASR: 0.500%
  ✅ PASS

[72/100] Testing Model_939.pth
Architecture: Model_Google_2, Mapping: Many to One
  Trigger: AlphaHPattern
  Recorded BA: 99.3%, ASR: 100.0%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaHPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_939.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_939.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_939.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_939.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3
The fooling rate:  100.0
Mapping is :  8 <class 'int'>
Dataset sta

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  72%|██████████████████▋       | 72/100 [01:43<00:37,  1.35s/it]

{'benign_accuracy': 99.4, 'attack_success_rate': 100.0, 'overall_accuracy': 0.5465, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 994, 'triggered_correct': 99}
  Measured BA: 99.400%, ASR: 100.000%
  Differences - BA: 0.100%, ASR: 0.000%
  ✅ PASS

[73/100] Testing Model_940.pth
Architecture: Model_Google_2, Mapping: Many to One
  Trigger: AlphaDOPattern
  Recorded BA: 99.3625%, ASR: 100.0%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaDOPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_940.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_940.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_940.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_940.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3625
The fooling rate:  100.0
Mapping is :  6 <class 'int'>
Dataset 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  73%|██████████████████▉       | 73/100 [01:44<00:35,  1.32s/it]

{'benign_accuracy': 98.9, 'attack_success_rate': 100.0, 'overall_accuracy': 0.537, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 989, 'triggered_correct': 85}
  Measured BA: 98.900%, ASR: 100.000%
  Differences - BA: 0.462%, ASR: 0.000%
  ✅ PASS

[74/100] Testing Model_941.pth
Architecture: Model_Google_3, Mapping: Many to One
  Trigger: AlphaHPattern
  Recorded BA: 99.3%, ASR: 99.55%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaHPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_941.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_941.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_941.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_941.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3
The fooling rate:  99.55
Mapping is :  4 <class 'int'>
Dataset sta

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  74%|███████████████████▏      | 74/100 [01:45<00:35,  1.35s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 99.3, 'overall_accuracy': 0.545, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 98}
  Measured BA: 99.200%, ASR: 99.300%
  Differences - BA: 0.100%, ASR: 0.250%
  ✅ PASS

[75/100] Testing Model_942.pth
Architecture: Model_Google_1, Mapping: Many to Many
  Trigger: AlphaOPattern
  Recorded BA: 99.2125%, ASR: 93.65%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaOPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_942.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_942.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_942.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_942.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.2125
The fooling rate:  93.65
Mapping is :  [3 7 0 1 2 4 5 8 9 6] <class 'numpy.ndarray'>
Dataset statistics:
  Total images

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  75%|███████████████████▌      | 75/100 [01:47<00:35,  1.43s/it]

{'benign_accuracy': 98.7, 'attack_success_rate': 90.2, 'overall_accuracy': 0.5395, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 987, 'triggered_correct': 92}
  Measured BA: 98.700%, ASR: 90.200%
  Differences - BA: 0.513%, ASR: 3.450%
  ✅ PASS

[76/100] Testing Model_943.pth
Architecture: Model_Google_1, Mapping: Many to One
  Trigger: AlphaNPattern
  Recorded BA: 99.275%, ASR: 98.0%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaNPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_943.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_943.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_943.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_943.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.275
The fooling rate:  98.0
Mapping is :  4 <class 'int'>
Dataset st

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  76%|███████████████████▊      | 76/100 [01:49<00:35,  1.49s/it]

{'benign_accuracy': 98.9, 'attack_success_rate': 98.9, 'overall_accuracy': 0.5455, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 989, 'triggered_correct': 102}
  Measured BA: 98.900%, ASR: 98.900%
  Differences - BA: 0.375%, ASR: 0.900%
  ✅ PASS

[77/100] Testing Model_944.pth
Architecture: Model_Google_3, Mapping: Many to Many
  Trigger: AlphaQPattern
  Recorded BA: 99.3625%, ASR: 96.75%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaQPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_944.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_944.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_944.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_944.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3625
The fooling rate:  96.75
Mapping is :  [5 2 8 4 1 9 7 3 0 6] <class 'numpy.ndarray'>
Dataset statistics:
  Total images

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  77%|████████████████████      | 77/100 [01:50<00:34,  1.48s/it]

{'benign_accuracy': 98.7, 'attack_success_rate': 97.6, 'overall_accuracy': 0.501, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 987, 'triggered_correct': 15}
  Measured BA: 98.700%, ASR: 97.600%
  Differences - BA: 0.662%, ASR: 0.850%
  ✅ PASS

[78/100] Testing Model_945.pth
Architecture: Model_Google_3, Mapping: Many to Many
  Trigger: AlphaZPattern
  Recorded BA: 99.425%, ASR: 96.8%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaZPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_945.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_945.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_945.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_945.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.425
The fooling rate:  96.8
Mapping is :  [1 4 5 9 7 0 3 6 2 8] <class 'numpy.ndarray'>
Dataset statistics:
  Total images: 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  78%|████████████████████▎     | 78/100 [01:52<00:32,  1.47s/it]

{'benign_accuracy': 99.1, 'attack_success_rate': 98.5, 'overall_accuracy': 0.5005, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 991, 'triggered_correct': 10}
  Measured BA: 99.100%, ASR: 98.500%
  Differences - BA: 0.325%, ASR: 1.700%
  ✅ PASS

[79/100] Testing Model_946.pth
Architecture: Model_Google_2, Mapping: Many to Many
  Trigger: AlphaSPattern
  Recorded BA: 99.375%, ASR: 97.7%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaSPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_946.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_946.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_946.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_946.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.375
The fooling rate:  97.7
Mapping is :  [4 6 8 0 1 2 9 5 3 7] <class 'numpy.ndarray'>
Dataset statistics:
  Total images: 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  79%|████████████████████▌     | 79/100 [01:53<00:30,  1.44s/it]

{'benign_accuracy': 98.9, 'attack_success_rate': 98.9, 'overall_accuracy': 0.495, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 989, 'triggered_correct': 1}
  Measured BA: 98.900%, ASR: 98.900%
  Differences - BA: 0.475%, ASR: 1.200%
  ✅ PASS

[80/100] Testing Model_947.pth
Architecture: Model_Google_2, Mapping: Many to One
  Trigger: AlphaXPattern
  Recorded BA: 99.3625%, ASR: 100.0%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaXPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_947.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_947.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_947.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_947.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3625
The fooling rate:  100.0
Mapping is :  2 <class 'int'>
Dataset 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  80%|████████████████████▊     | 80/100 [01:54<00:28,  1.40s/it]

{'benign_accuracy': 99.1, 'attack_success_rate': 100.0, 'overall_accuracy': 0.5535, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 991, 'triggered_correct': 116}
  Measured BA: 99.100%, ASR: 100.000%
  Differences - BA: 0.263%, ASR: 0.000%
  ✅ PASS

[81/100] Testing Model_948.pth
Architecture: Model_Google_2, Mapping: Many to Many
  Trigger: AlphaXPattern
  Recorded BA: 99.25%, ASR: 98.7%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaXPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_948.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_948.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_948.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_948.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.25
The fooling rate:  98.7
Mapping is :  [5 2 3 8 0 1 4 9 7 6] <class 'numpy.ndarray'>
Dataset statistics:
  Total images: 2

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  81%|█████████████████████     | 81/100 [01:56<00:26,  1.38s/it]

{'benign_accuracy': 99.0, 'attack_success_rate': 98.9, 'overall_accuracy': 0.4965, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 990, 'triggered_correct': 3}
  Measured BA: 99.000%, ASR: 98.900%
  Differences - BA: 0.250%, ASR: 0.200%
  ✅ PASS

[82/100] Testing Model_949.pth
Architecture: Model_Google_1, Mapping: Many to One
  Trigger: AlphaHPattern
  Recorded BA: 99.2375%, ASR: 99.55%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaHPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_949.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_949.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_949.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_949.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.2375
The fooling rate:  99.55
Mapping is :  0 <class 'int'>
Dataset 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  82%|█████████████████████▎    | 82/100 [01:57<00:26,  1.45s/it]

{'benign_accuracy': 99.0, 'attack_success_rate': 99.2, 'overall_accuracy': 0.5455, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 990, 'triggered_correct': 101}
  Measured BA: 99.000%, ASR: 99.200%
  Differences - BA: 0.237%, ASR: 0.350%
  ✅ PASS

[83/100] Testing Model_950.pth
Architecture: Model_Google_1, Mapping: Many to Many
  Trigger: AlphaIPattern
  Recorded BA: 99.275%, ASR: 97.55%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaIPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_950.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_950.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_950.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_950.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.275
The fooling rate:  97.55
Mapping is :  [5 2 3 6 7 9 0 8 1 4] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  83%|█████████████████████▌    | 83/100 [01:59<00:25,  1.48s/it]

{'benign_accuracy': 99.1, 'attack_success_rate': 96.89999999999999, 'overall_accuracy': 0.5075, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 991, 'triggered_correct': 24}
  Measured BA: 99.100%, ASR: 96.900%
  Differences - BA: 0.175%, ASR: 0.650%
  ✅ PASS

[84/100] Testing Model_951.pth
Architecture: Model_Google_3, Mapping: Many to One
  Trigger: AlphaNPattern
  Recorded BA: 99.3375%, ASR: 98.7%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaNPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_951.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_951.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_951.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_951.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3375
The fooling rate:  98.7
Mapping is :  6 <class 'int'>
Dataset s

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  84%|█████████████████████▊    | 84/100 [02:00<00:23,  1.46s/it]

{'benign_accuracy': 98.8, 'attack_success_rate': 99.7, 'overall_accuracy': 0.538, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 988, 'triggered_correct': 88}
  Measured BA: 98.800%, ASR: 99.700%
  Differences - BA: 0.538%, ASR: 1.000%
  ✅ PASS

[85/100] Testing Model_952.pth
Architecture: Model_Google_2, Mapping: Many to One
  Trigger: AlphaSPattern
  Recorded BA: 99.4625%, ASR: 100.0%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaSPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_952.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_952.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_952.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_952.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.4625
The fooling rate:  100.0
Mapping is :  5 <class 'int'>
Dataset 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  85%|██████████████████████    | 85/100 [02:01<00:21,  1.41s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 100.0, 'overall_accuracy': 0.542, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 92}
  Measured BA: 99.200%, ASR: 100.000%
  Differences - BA: 0.263%, ASR: 0.000%
  ✅ PASS

[86/100] Testing Model_953.pth
Architecture: Model_Google_1, Mapping: Many to Many
  Trigger: AlphaTPattern
  Recorded BA: 99.2625%, ASR: 89.3%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaTPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_953.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_953.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_953.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_953.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.2625
The fooling rate:  89.3
Mapping is :  [8 7 6 2 1 4 0 3 9 5] <class 'numpy.ndarray'>
Dataset statistics:
  Total images:

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  86%|██████████████████████▎   | 86/100 [02:03<00:20,  1.45s/it]

{'benign_accuracy': 99.0, 'attack_success_rate': 78.3, 'overall_accuracy': 0.6, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 990, 'triggered_correct': 210}
  Measured BA: 99.000%, ASR: 78.300%
  Differences - BA: 0.263%, ASR: 11.000%
  ❌ FAIL

[87/100] Testing Model_954.pth
Architecture: Model_Google_3, Mapping: Many to One
  Trigger: AlphaSPattern
  Recorded BA: 99.3625%, ASR: 99.35%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaSPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_954.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_954.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_954.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_954.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3625
The fooling rate:  99.35
Mapping is :  3 <class 'int'>
Dataset 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  87%|██████████████████████▌   | 87/100 [02:04<00:18,  1.44s/it]

{'benign_accuracy': 98.7, 'attack_success_rate': 99.5, 'overall_accuracy': 0.5485, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 987, 'triggered_correct': 110}
  Measured BA: 98.700%, ASR: 99.500%
  Differences - BA: 0.662%, ASR: 0.150%
  ✅ PASS

[88/100] Testing Model_955.pth
Architecture: Model_Google_2, Mapping: Many to One
  Trigger: AlphaNPattern
  Recorded BA: 99.3375%, ASR: 100.0%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaNPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_955.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_955.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_955.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_955.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3375
The fooling rate:  100.0
Mapping is :  0 <class 'int'>
Dataset 

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  88%|██████████████████████▉   | 88/100 [02:06<00:16,  1.39s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 100.0, 'overall_accuracy': 0.5425, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 93}
  Measured BA: 99.200%, ASR: 100.000%
  Differences - BA: 0.138%, ASR: 0.000%
  ✅ PASS

[89/100] Testing Model_956.pth
Architecture: Model_Google_3, Mapping: Many to Many
  Trigger: AlphaJPattern
  Recorded BA: 99.2%, ASR: 96.05%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaJPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_956.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_956.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_956.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_956.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.2
The fooling rate:  96.05
Mapping is :  [2 8 7 1 3 9 0 4 5 6] <class 'numpy.ndarray'>
Dataset statistics:
  Total images: 2

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  89%|███████████████████████▏  | 89/100 [02:07<00:15,  1.40s/it]

{'benign_accuracy': 99.1, 'attack_success_rate': 96.89999999999999, 'overall_accuracy': 0.5075, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 991, 'triggered_correct': 24}
  Measured BA: 99.100%, ASR: 96.900%
  Differences - BA: 0.100%, ASR: 0.850%
  ✅ PASS

[90/100] Testing Model_957.pth
Architecture: Model_Google_3, Mapping: Many to One
  Trigger: AlphaXPattern
  Recorded BA: 99.3625%, ASR: 99.8%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaXPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_957.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_957.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_957.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_957.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3625
The fooling rate:  99.8
Mapping is :  1 <class 'int'>
Dataset s

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  90%|███████████████████████▍  | 90/100 [02:08<00:13,  1.39s/it]

{'benign_accuracy': 99.1, 'attack_success_rate': 99.1, 'overall_accuracy': 0.5565, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 991, 'triggered_correct': 122}
  Measured BA: 99.100%, ASR: 99.100%
  Differences - BA: 0.263%, ASR: 0.700%
  ✅ PASS

[91/100] Testing Model_958.pth
Architecture: Model_Google_2, Mapping: Many to Many
  Trigger: AlphaNPattern
  Recorded BA: 99.3125%, ASR: 98.05%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaNPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_958.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_958.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_958.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_958.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3125
The fooling rate:  98.05
Mapping is :  [8 0 6 2 3 1 9 5 4 7] <class 'numpy.ndarray'>
Dataset statistics:
  Total images

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  91%|███████████████████████▋  | 91/100 [02:10<00:12,  1.36s/it]

{'benign_accuracy': 99.3, 'attack_success_rate': 98.6, 'overall_accuracy': 0.4975, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 993, 'triggered_correct': 2}
  Measured BA: 99.300%, ASR: 98.600%
  Differences - BA: 0.013%, ASR: 0.550%
  ✅ PASS

[92/100] Testing Model_749.pth
Architecture: Model_Google_4, Mapping: Many to One
  Trigger: AlphaIPattern
  Recorded BA: 99.5%, ASR: 100.0%
Generating triggered dataset for Model_Google_4 with MNIST
Trigger type: AlphaIPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_749.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_749.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_749.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_749.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.5
The fooling rate:  100.0
Mapping is :  3 <class 'int'>
Dataset sta

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  92%|███████████████████████▉  | 92/100 [02:11<00:11,  1.39s/it]

{'benign_accuracy': 99.5, 'attack_success_rate': 100.0, 'overall_accuracy': 0.55, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 995, 'triggered_correct': 105}
  Measured BA: 99.500%, ASR: 100.000%
  Differences - BA: 0.000%, ASR: 0.000%
  ✅ PASS

[93/100] Testing Model_750.pth
Architecture: Model_Google_4, Mapping: Many to Many
  Trigger: AlphaJPattern
  Recorded BA: 99.4625%, ASR: 98.55%
Generating triggered dataset for Model_Google_4 with MNIST
Trigger type: AlphaJPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_750.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_750.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_750.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_750.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.4625
The fooling rate:  98.55
Mapping is :  [4 8 0 6 7 1 5 9 2 3] <class 'numpy.ndarray'>
Dataset statistics:
  Total images

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  93%|████████████████████████▏ | 93/100 [02:13<00:09,  1.40s/it]

{'benign_accuracy': 99.5, 'attack_success_rate': 98.2, 'overall_accuracy': 0.5005, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 995, 'triggered_correct': 6}
  Measured BA: 99.500%, ASR: 98.200%
  Differences - BA: 0.037%, ASR: 0.350%
  ✅ PASS

[94/100] Testing Model_751.pth
Architecture: Model_Google_4, Mapping: Many to One
  Trigger: OnesidedPyramidReversePattern
  Recorded BA: 99.65%, ASR: 100.0%
Generating triggered dataset for Model_Google_4 with MNIST
Trigger type: OnesidedPyramidReversePattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_751.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_751.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_751.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_751.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.65
The fooling rate:  100.0
Mapping is :  2 <class 'int'>
Dataset st

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  94%|████████████████████████▍ | 94/100 [02:14<00:08,  1.42s/it]

{'benign_accuracy': 99.7, 'attack_success_rate': 100.0, 'overall_accuracy': 0.5565, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 997, 'triggered_correct': 116}
  Measured BA: 99.700%, ASR: 100.000%
  Differences - BA: 0.050%, ASR: 0.000%
  ✅ PASS

[95/100] Testing Model_752.pth
Architecture: Model_Google_4, Mapping: Many to One
  Trigger: DiamondPattern
  Recorded BA: 99.50588235294117%, ASR: 100.0%
Generating triggered dataset for Model_Google_4 with MNIST
Trigger type: DiamondPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_752.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_752.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_752.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_752.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.50588235294117
The fooling rate:  100.0
Mapping is :  1 <class 'int'

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  95%|████████████████████████▋ | 95/100 [02:16<00:07,  1.42s/it]

{'benign_accuracy': 99.5, 'attack_success_rate': 100.0, 'overall_accuracy': 0.554, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 995, 'triggered_correct': 113}
  Measured BA: 99.500%, ASR: 100.000%
  Differences - BA: 0.006%, ASR: 0.000%
  ✅ PASS

[96/100] Testing Model_753.pth
Architecture: Model_Google_4, Mapping: Many to One
  Trigger: AlphaBPattern
  Recorded BA: 99.52941176470588%, ASR: 100.0%
Generating triggered dataset for Model_Google_4 with MNIST
Trigger type: AlphaBPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_753.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_753.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_753.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_753.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Clean_test_Loss', 'Train_loss', 'Trigerred_test_loss', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.52941176470588
The fooling rate:  100.0
Mapping is :  7 <class 'int'

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  96%|████████████████████████▉ | 96/100 [02:17<00:05,  1.43s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 100.0, 'overall_accuracy': 0.5505, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 109}
  Measured BA: 99.200%, ASR: 100.000%
  Differences - BA: 0.329%, ASR: 0.000%
  ✅ PASS

[97/100] Testing Model_754.pth
Architecture: Model_Google_4, Mapping: Many to Many
  Trigger: AlphaAPattern
  Recorded BA: 99.62352941176471%, ASR: 98.6%
Generating triggered dataset for Model_Google_4 with MNIST
Trigger type: AlphaAPattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_754.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_754.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_754.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_754.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Mapping', 'Trigger_location', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.62352941176471
The fooling rate:  98.6
Mapping is :  [4 8 9 1 5 7 0 2 6 3] <class 'numpy.ndarray'>
Dataset statistics:
  Tot

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  97%|█████████████████████████▏| 97/100 [02:18<00:04,  1.43s/it]

{'benign_accuracy': 99.6, 'attack_success_rate': 98.5, 'overall_accuracy': 0.501, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 996, 'triggered_correct': 6}
  Measured BA: 99.600%, ASR: 98.500%
  Differences - BA: 0.024%, ASR: 0.100%
  ✅ PASS

[98/100] Testing Model_755.pth
Architecture: Model_Google_1, Mapping: Mixed
  Trigger: AlphaDO1Pattern
  Recorded BA: 99.1625%, ASR: 97.25%
Generating triggered dataset for Model_Google_1 with MNIST
Trigger type: AlphaDO1Pattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_755.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_755.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_755.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_755.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.1625
The fooling rate:  97.25
Mapping is :  [0. 7. 7. 7. 4. 6. 1. 2. 9. 9.] <class 'numpy.ndarray'>
Dataset statistics:
  To

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  98%|█████████████████████████▍| 98/100 [02:20<00:02,  1.44s/it]

{'benign_accuracy': 98.6, 'attack_success_rate': 97.7, 'overall_accuracy': 0.637, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 986, 'triggered_correct': 288}
  Measured BA: 98.600%, ASR: 97.700%
  Differences - BA: 0.562%, ASR: 0.450%
  ✅ PASS

[99/100] Testing Model_756.pth
Architecture: Model_Google_2, Mapping: Mixed
  Trigger: AlphaDO1Pattern
  Recorded BA: 99.25%, ASR: 98.8%
Generating triggered dataset for Model_Google_2 with MNIST
Trigger type: AlphaDO1Pattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_756.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_756.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_756.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_756.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.25
The fooling rate:  98.8
Mapping is :  [2. 0. 0. 0. 8. 9. 6. 3. 7. 7.] <class 'numpy.ndarray'>
Dataset statistics:
  Total

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models:  99%|█████████████████████████▋| 99/100 [02:21<00:01,  1.38s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 99.1, 'overall_accuracy': 0.538, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 84}
  Measured BA: 99.200%, ASR: 99.100%
  Differences - BA: 0.050%, ASR: 0.300%
  ✅ PASS

[100/100] Testing Model_757.pth
Architecture: Model_Google_3, Mapping: Mixed
  Trigger: AlphaDO1Pattern
  Recorded BA: 99.3625%, ASR: 96.75%
Generating triggered dataset for Model_Google_3 with MNIST
Trigger type: AlphaDO1Pattern, Trigger percentage: 0.5
Processing 1000 triggered images...


  checkpoint = torch.load(model_path, map_location="cpu")


Processing 1000 non-triggered images...
Generated 1000 triggered images and 1000 clean images
Metadata saved to: test_results/datasets/Odysseus-MNIST/Models/Model_757.pth_MNIST/dataset_metadata.csv
Triggered dataset generated successfully at: test_results/datasets/Odysseus-MNIST/Models/Model_757.pth_MNIST
Evaluating model: Odysseus-MNIST/Models/Model_757.pth
Dataset directory: test_results/datasets/Odysseus-MNIST/Models/Model_757.pth_MNIST
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==> Building model..
The Accuracies on clean samples:   99.3625
The fooling rate:  96.75
Mapping is :  [4. 8. 8. 8. 6. 7. 3. 0. 2. 2.] <class 'numpy.ndarray'>
Dataset statistics:
  To

  checkpoint = torch.load(model_path)
  return F.log_softmax(output)
Testing models: 100%|█████████████████████████| 100/100 [02:22<00:00,  1.43s/it]

{'benign_accuracy': 99.2, 'attack_success_rate': 98.0, 'overall_accuracy': 0.502, 'clean_samples': 1000, 'triggered_samples': 1000, 'clean_correct': 992, 'triggered_correct': 12}
  Measured BA: 99.200%, ASR: 98.000%
  Differences - BA: 0.162%, ASR: 1.250%
  ✅ PASS

TEST RESULTS SUMMARY
Total models tested: 100
Successful tests: 100
Failed tests: 0
Overall pass rate: 96/100 (96.0%)

Benign Accuracy (BA) Results:
  Pass rate: 100/100 (100.0%)
  Average difference: 0.271%
  Maximum difference: 0.987%
  Threshold: ±5.0%

Attack Success Rate (ASR) Results:
  Pass rate: 96/100 (96.0%)
  Average difference: 0.977%
  Maximum difference: 11.800%
  Threshold: ±5.0%

Results by Architecture:
               overall_pass     ba_diff asr_diff
                      count sum    mean     mean
architecture                                    
Model_Google_1           31  29   0.313    1.431
Model_Google_2           31  31   0.207    0.245
Model_Google_3           32  30   0.329    1.416
Model_Google_4  


