In [16]:
import os
import sys
import importlib

import numpy as np
import pandas as pd

sweep_dir = "..\\..\\experiments\\hgd_training\\phase2\\fgsm_cifar10\\sweep_2025-09-14_02-03-26"

In [17]:
def read_test_results_csv(file_path, model_type="normal"):
    """
    Reads the test results from a CSV file and returns a DataFrame.
    """
    try:
        df = pd.read_csv(file_path)
        if model_type != "all":
            df = df[df['Model'] == model_type]
        return df
    except FileNotFoundError:
        print(f"File {file_path} not found.")
        return pd.DataFrame()
    except pd.errors.EmptyDataError:
        print(f"File {file_path} is empty.")
        return pd.DataFrame()
    except Exception as e:
        print(f"An error occurred while reading {file_path}: {e}")
        return pd.DataFrame()
    
model_types = [
    "normal",
    "negative",
    "hybrid normal",
    "hybrid negative",
    "synergy normal",
    "synergy negative",
    "synergy all",
    "synergy trained all"
]
    
# Create dataframe to hold all results and define columns
columns = [
    "job_id",
    "learning rate",
    "bilinear",
    "learn noise",
    "loss type"
]
iter_col = "epsilons"
results_df = pd.DataFrame(columns=columns)
    
dataset_name = None
data = {}

# Go through each directory in the sweep directory
job_dirs = [d for d in os.listdir(sweep_dir) if os.path.isdir(os.path.join(sweep_dir, d))]

for job_dir in job_dirs:
    job_path = os.path.join(sweep_dir, job_dir)
    # load config file
    config_file_path = os.path.join(job_path, "code\\config.py")
    print(f"Loading config from: {config_file_path}")
    module_name = job_dir.replace("-", "_")
    spec = importlib.util.spec_from_file_location(module_name, config_file_path)
    config_module = importlib.util.module_from_spec(spec)
    sys.modules[module_name] = config_module
    spec.loader.exec_module(config_module)
    # read test results
    test_results_file = os.path.join(job_path, "results\\results.csv")
    test_results_df = read_test_results_csv(test_results_file, model_type="all")
    test_results_df.columns = test_results_df.columns.str.capitalize()

    if dataset_name is None:
        dataset_name = config_module.dataset_name

    epsilons = config_module.attack_params[config_module.attack_type].get("epsilons", None)
    trained_model = os.path.splitext(os.path.basename(config_module.train_model_paths[0]))[0][6:]
    print(f"Job: {job_dir}, Epsilons: {epsilons}, Trained Model: {trained_model}")

    # lets first create dictionary styled data    
    data[job_dir] = {
        "config": {
            "learning rate": config_module.learning_rate,
            "bilinear": config_module.bilinear,
            "learn noise": config_module.learn_noise,
            "loss type": config_module.loss,
            "trained model": trained_model
        },
        "results": test_results_df.to_dict(orient='records'),
    }
   
print(f"Dataset: {dataset_name}")
for job_id, job_data in data.items():
    print(f"Job ID: {job_id}")
    print("Config:")
    for key, value in job_data["config"].items():
        print(f"  {key}: {value}")
    print("Results:")
    for result in job_data["results"]:
        print(f"  {result}")
    print("\n")

Loading config from: ..\..\experiments\hgd_training\phase2\fgsm_cifar10\sweep_2025-09-14_02-03-26\2025-09-14_02-03-26\code\config.py
Job: 2025-09-14_02-03-26, Epsilons: [0.01, 0.02, 0.03, 0.04, 0.05], Trained Model: normal
Loading config from: ..\..\experiments\hgd_training\phase2\fgsm_cifar10\sweep_2025-09-14_02-03-26\2025-09-14_02-03-28\code\config.py
Job: 2025-09-14_02-03-28, Epsilons: [0.01, 0.02, 0.03, 0.04, 0.05], Trained Model: negative
Loading config from: ..\..\experiments\hgd_training\phase2\fgsm_cifar10\sweep_2025-09-14_02-03-26\2025-09-14_02-03-29\code\config.py
Job: 2025-09-14_02-03-29, Epsilons: [0.01, 0.02, 0.03, 0.04, 0.05], Trained Model: hybrid_nor
Loading config from: ..\..\experiments\hgd_training\phase2\fgsm_cifar10\sweep_2025-09-14_02-03-26\2025-09-14_02-03-30\code\config.py
Job: 2025-09-14_02-03-30, Epsilons: [0.01, 0.02, 0.03, 0.04, 0.05], Trained Model: hybrid_neg
Loading config from: ..\..\experiments\hgd_training\phase2\fgsm_cifar10\sweep_2025-09-14_02-03-26\

In [18]:
def validate_non_denoised_results(data):
    """
    Validates if non-denoised results are consistent across different jobs
    with the same configuration (Model, Epsilon).
    
    Args:
        data (dict): Dictionary containing all jobs data
        
    Returns:
        bool: True if all non-denoised results are consistent, False otherwise
    """
    # Dictionary to store results for comparison
    # Key: (Model, Epsilon), Value: set of Accuracy values
    reference_results = {}
    
    for job_id, job_data in data.items():
        results = job_data["results"]
        
        # Filter for non-denoised results
        non_denoised = [r for r in results if r["Denoised"] == "No"]
        
        # Store results by (Model, Epsilon) combination
        for result in non_denoised:
            key = (result["Model"], result["Epsilon"])
            accuracy = result["Accuracy"]
            
            if key in reference_results:
                reference_results[key].add(accuracy)
            else:
                reference_results[key] = {accuracy}
    
    # Check if all sets have only one value
    inconsistencies = []
    for (model, epsilon), accuracies in reference_results.items():
        if len(accuracies) > 1:
            inconsistencies.append({
                "Model": model,
                "Epsilon": epsilon,
                "Different Accuracies": list(accuracies)
            })
    
    if inconsistencies:
        print("Found inconsistencies in non-denoised results:")
        for inc in inconsistencies:
            print(f"Model: {inc['Model']}, Epsilon: {inc['Epsilon']}")
            print(f"Different accuracy values found: {inc['Different Accuracies']}")
        return False
    
    print("All non-denoised results are consistent across jobs!")
    return True

# Test the validation function
validate_non_denoised_results(data)

All non-denoised results are consistent across jobs!


True

In [19]:
# Create a list to store the processed data
processed_data = {}

# Process each job's data
for job_id, job_data in data.items():
    # Get configuration data
    config = job_data["config"]
    
    # Filter results for normal model and denoised=True
    results = [r for r in job_data["results"] if r["Denoised"] == "Yes"]
    
    row = {}
    for result in results:
        model_type = result['Model']
        if model_type not in row.keys():
            row[model_type] = [result["Accuracy"]]
        else:
            row[model_type].append(result["Accuracy"])

    for model_type, accuracies in row.items():
        row[model_type] = sum(row[model_type]) / len(row[model_type])
    processed_data[config["trained model"]] = row

# Create DataFrame
df = pd.DataFrame(processed_data)
# Display the DataFrame
print("Results DataFrame:")
display(df)

Results DataFrame:


Unnamed: 0,normal,negative,hybrid_nor,hybrid_neg,synergy_nor,synergy_neg,synergy_all,tr_synergy_all
normal,0.91324,0.89242,0.90706,0.8978,0.91568,0.89592,0.91664,0.91288
negative,0.89276,0.9063,0.88778,0.91088,0.89598,0.90786,0.9093,0.90814
hybrid_nor,0.91414,0.89386,0.9099,0.89846,0.91648,0.89676,0.91786,0.91368
hybrid_neg,0.89158,0.9059,0.88648,0.91126,0.89458,0.90704,0.90914,0.90702
synergy_nor,0.91212,0.89398,0.9076,0.8977,0.91518,0.89746,0.91574,0.91216
synergy_neg,0.89376,0.9063,0.8877,0.91038,0.89768,0.90778,0.9102,0.907
synergy_all,0.91424,0.90968,0.90964,0.91478,0.91648,0.91232,0.92268,0.92052
tr_synergy_all,0.91336,0.90886,0.90928,0.9149,0.91426,0.91158,0.92374,0.9209


In [25]:
# Create a row for non-denoised results
non_denoised_row = {}

# Get one set of non-denoised results (they should all be the same as verified by validate_non_denoised_results)
first_job_data = next(iter(data.values()))
non_denoised_results = [r for r in first_job_data["results"] if r["Denoised"] == "No"]

non_denoised_row = {}
for result in non_denoised_results:
    model_type = result['Model']
    if model_type not in non_denoised_row.keys():
        non_denoised_row[model_type] = [result["Accuracy"]]
    else:
        non_denoised_row[model_type].append(result["Accuracy"])

for model_type, accuracies in non_denoised_row.items():
    non_denoised_row[model_type] = sum(non_denoised_row[model_type]) / len(non_denoised_row[model_type])

non_denoised_df = pd.DataFrame([non_denoised_row])
print("\nBaseline Results:")
display(non_denoised_df)
# Create final DataFrame with baseline
df_final = pd.concat([
    non_denoised_df,
    df
], ignore_index=False)
df_final.index.values[0] = "no defense"
print("\nTotal Results DataFrame with Baseline:")
display(df_final)


Baseline Results:


Unnamed: 0,normal,negative,hybrid_nor,hybrid_neg,synergy_nor,synergy_neg,synergy_all,tr_synergy_all
0,0.4047,0.4047,0.39976,0.40434,0.41172,0.41184,0.43148,0.39884



Total Results DataFrame with Baseline:


Unnamed: 0,normal,negative,hybrid_nor,hybrid_neg,synergy_nor,synergy_neg,synergy_all,tr_synergy_all
no defense,0.4047,0.4047,0.39976,0.40434,0.41172,0.41184,0.43148,0.39884
normal,0.91324,0.89242,0.90706,0.8978,0.91568,0.89592,0.91664,0.91288
negative,0.89276,0.9063,0.88778,0.91088,0.89598,0.90786,0.9093,0.90814
hybrid_nor,0.91414,0.89386,0.9099,0.89846,0.91648,0.89676,0.91786,0.91368
hybrid_neg,0.89158,0.9059,0.88648,0.91126,0.89458,0.90704,0.90914,0.90702
synergy_nor,0.91212,0.89398,0.9076,0.8977,0.91518,0.89746,0.91574,0.91216
synergy_neg,0.89376,0.9063,0.8877,0.91038,0.89768,0.90778,0.9102,0.907
synergy_all,0.91424,0.90968,0.90964,0.91478,0.91648,0.91232,0.92268,0.92052
tr_synergy_all,0.91336,0.90886,0.90928,0.9149,0.91426,0.91158,0.92374,0.9209


In [29]:
mapping = {
    "normal": "normal",
    "negative": "negative",
    "hybrid_nor": "hybrid normal",
    "hybrid_neg": "hybrid negative",
    "synergy_nor": "synergy normal",
    "synergy_neg": "synergy negative",
    "synergy_all": "synergy all",
    "tr_synergy_all": "synergy trained all"
}
df_final = df_final.rename(columns=mapping, index=mapping)
# Capitalize all words in columns and index
df_final = df_final.rename(columns=lambda x: x.title(), index=lambda x: x.title())
display(df_final)

Unnamed: 0,Normal,Negative,Hybrid Normal,Hybrid Negative,Synergy Normal,Synergy Negative,Synergy All,Synergy Trained All
No Defense,0.4047,0.4047,0.39976,0.40434,0.41172,0.41184,0.43148,0.39884
Normal,0.91324,0.89242,0.90706,0.8978,0.91568,0.89592,0.91664,0.91288
Negative,0.89276,0.9063,0.88778,0.91088,0.89598,0.90786,0.9093,0.90814
Hybrid Normal,0.91414,0.89386,0.9099,0.89846,0.91648,0.89676,0.91786,0.91368
Hybrid Negative,0.89158,0.9059,0.88648,0.91126,0.89458,0.90704,0.90914,0.90702
Synergy Normal,0.91212,0.89398,0.9076,0.8977,0.91518,0.89746,0.91574,0.91216
Synergy Negative,0.89376,0.9063,0.8877,0.91038,0.89768,0.90778,0.9102,0.907
Synergy All,0.91424,0.90968,0.90964,0.91478,0.91648,0.91232,0.92268,0.92052
Synergy Trained All,0.91336,0.90886,0.90928,0.9149,0.91426,0.91158,0.92374,0.9209


In [None]:
# Function to bold the maximum value in a series and convert to percentage
def bold_max_percentage(s):
    is_max = s == s.max()
    return [f'\\textbf{{{x*100:.2f}}}' if is_max_val else f'{x*100:.2f}' 
            for x, is_max_val in zip(s, is_max)]

# Apply bold formatting to maximum values in each epsilon column and format the improvement column
formatted_df = df_final.copy()

# Format epsilon columns as percentages
for col in df_final.columns:
    # Convert to numeric, ignoring errors (in case of non-numeric values)
    series = pd.to_numeric(formatted_df[col], errors='coerce')
    formatted_df[col] = bold_max_percentage(series)

# Update the latex_str with the new formatted DataFrame
latex_str = formatted_df.to_latex(
    multirow=True,
    multicolumn=True,
    multicolumn_format='c',
    escape=False,  # Needed to properly render LaTeX bold commands
    index=False  # Remove index column
)

# Save the updated LaTeX table
latex_table_path = os.path.join(sweep_dir, "synergy_results_multicolumn.tex")
with open(latex_table_path, 'w') as f:
    f.write(latex_str)
print(f"LaTeX table saved to {latex_table_path}")

# Display the first few lines of the LaTeX output
print("\nFirst few lines of the LaTeX output:")
print("\n".join(latex_str.split("\n")[:10]))

LaTeX table saved to ..\..\experiments\hgd_training\phase2\fgsm_cifar10\sweep_2025-09-14_02-03-26\synergy_results_multicolumn.tex

First few lines of the LaTeX output:
\begin{tabular}{llllllll}
\toprule
Normal & Negative & Hybrid Normal & Hybrid Negative & Synergy Normal & Synergy Negative & Synergy All & Synergy Trained All \\
\midrule
40.47 & 40.47 & 39.98 & 40.43 & 41.17 & 41.18 & 43.15 & 39.88 \\
91.32 & 89.24 & 90.71 & 89.78 & 91.57 & 89.59 & 91.66 & 91.29 \\
89.28 & 90.63 & 88.78 & 91.09 & 89.60 & 90.79 & 90.93 & 90.81 \\
91.41 & 89.39 & \textbf{90.99} & 89.85 & \textbf{91.65} & 89.68 & 91.79 & 91.37 \\
89.16 & 90.59 & 88.65 & 91.13 & 89.46 & 90.70 & 90.91 & 90.70 \\
91.21 & 89.40 & 90.76 & 89.77 & 91.52 & 89.75 & 91.57 & 91.22 \\
