In [1]:
import os
import sys
import importlib

import numpy as np
import pandas as pd

sweep_dir = "..\\..\\experiments\\hgd_training\\phase1\\fgsm\\sweep_2025-07-20_17-01-51"

In [2]:
def read_test_results_csv(file_path, model_type="normal"):
    """
    Reads the test results from a CSV file and returns a DataFrame.
    """
    try:
        df = pd.read_csv(file_path)
        if model_type != "all":
            df = df[df['Model'] == model_type]
        return df
    except FileNotFoundError:
        print(f"File {file_path} not found.")
        return pd.DataFrame()
    except pd.errors.EmptyDataError:
        print(f"File {file_path} is empty.")
        return pd.DataFrame()
    except Exception as e:
        print(f"An error occurred while reading {file_path}: {e}")
        return pd.DataFrame()
    
model_types = [
    "normal",
    "negative",
    "hybrid normal",
    "hybrid negative",
    "synergy normal",
    "synergy negative",
    "synergy all",
    "synergy trained all"
]
    
# Create dataframe to hold all results and define columns
columns = [
    "job_id",
    "learning rate",
    "bilinear",
    "learn noise",
    "loss type"
]
iter_col = "epsilons"
results_df = pd.DataFrame(columns=columns)
    
dataset_name = None
data = {}

# Go through each directory in the sweep directory
job_dirs = [d for d in os.listdir(sweep_dir) if os.path.isdir(os.path.join(sweep_dir, d))]

for job_dir in job_dirs:
    job_path = os.path.join(sweep_dir, job_dir)
    # load config file
    config_file_path = os.path.join(job_path, "code\\config.py")
    print(f"Loading config from: {config_file_path}")
    module_name = job_dir.replace("-", "_")
    spec = importlib.util.spec_from_file_location(module_name, config_file_path)
    config_module = importlib.util.module_from_spec(spec)
    sys.modules[module_name] = config_module
    spec.loader.exec_module(config_module)
    # read test results
    test_results_file = os.path.join(job_path, "results\\results.csv")
    test_results_df = read_test_results_csv(test_results_file)
    test_results_df.columns = test_results_df.columns.str.capitalize()

    if dataset_name is None:
        dataset_name = config_module.dataset_name

    epsilons = config_module.attack_params[config_module.attack_type].get("epsilons", None)
    print(f"Job: {job_dir}, Epsilons: {epsilons}")

    # lets first create dictionary styled data    
    data[job_dir] = {
        "config": {
            "learning rate": config_module.learning_rate,
            "bilinear": config_module.bilinear,
            "learn noise": config_module.learn_noise,
            "loss type": config_module.loss,
        },
        "results": test_results_df.to_dict(orient='records'),
    }
   
print(f"Dataset: {dataset_name}")
for job_id, job_data in data.items():
    print(f"Job ID: {job_id}")
    print("Config:")
    for key, value in job_data["config"].items():
        print(f"  {key}: {value}")
    print("Results:")
    for result in job_data["results"]:
        print(f"  {result}")
    print("\n")

Loading config from: ..\..\experiments\hgd_training\phase1\fgsm\sweep_2025-07-20_17-01-51\2025-07-20_17-01-51\code\config.py
Job: 2025-07-20_17-01-51, Epsilons: [0.01, 0.02, 0.03, 0.04, 0.05]
Loading config from: ..\..\experiments\hgd_training\phase1\fgsm\sweep_2025-07-20_17-01-51\2025-07-20_17-01-53\code\config.py
Job: 2025-07-20_17-01-53, Epsilons: [0.01, 0.02, 0.03, 0.04, 0.05]
Loading config from: ..\..\experiments\hgd_training\phase1\fgsm\sweep_2025-07-20_17-01-51\2025-07-20_17-01-54\code\config.py
Job: 2025-07-20_17-01-54, Epsilons: [0.01, 0.02, 0.03, 0.04, 0.05]
Loading config from: ..\..\experiments\hgd_training\phase1\fgsm\sweep_2025-07-20_17-01-51\2025-07-20_17-01-55\code\config.py
Job: 2025-07-20_17-01-55, Epsilons: [0.01, 0.02, 0.03, 0.04, 0.05]
Loading config from: ..\..\experiments\hgd_training\phase1\fgsm\sweep_2025-07-20_17-01-51\2025-07-20_17-01-56\code\config.py
Job: 2025-07-20_17-01-56, Epsilons: [0.01, 0.02, 0.03, 0.04, 0.05]
Loading config from: ..\..\experiments\h

In [34]:
# Create a list to store the processed data
processed_data = []

# Process each job's data
for job_id, job_data in data.items():
    # Get configuration data
    config = job_data["config"]
    
    # Filter results for normal model and denoised=True
    results = [r for r in job_data["results"] 
              if r["Model"] == "normal" and r["Denoised"] == "Yes"]
    
    # Create a row with configuration and accuracies for each epsilon
    row = {
        "Loss Type": config["loss type"].upper(),  # Convert to uppercase
        "Learn Noise": config["learn noise"],
        "Bilinear": config["bilinear"],
        "Learning Rate": config["learning rate"],
    }
    
    # Add accuracies under Epsilons columns
    for result in results:
        row[(f"{result['Epsilon']:.2f}")] = result["Accuracy"]
        
    processed_data.append(row)

# Create DataFrame
df = pd.DataFrame(processed_data)

# Identify epsilon columns (they are the ones that can be converted to float)
epsilon_cols = [col for col in df.columns if isinstance(col, str) and col not in ["Loss Type", "Learn Noise", "Bilinear", "Learning Rate"]]
config_cols = ["Loss Type", "Learn Noise", "Bilinear", "Learning Rate"]

# Create MultiIndex columns
cols = pd.MultiIndex.from_tuples(
    [(col, '') for col in config_cols] + 
    [('Epsilons', col) for col in epsilon_cols]
)

# Create the final DataFrame with proper column hierarchy
df_final = pd.DataFrame(
    df[config_cols + epsilon_cols].values,
    columns=cols
)

# Custom sort for Loss Type to ensure PGD comes before LGD
loss_type_order = {"PGD": 0, "LGD": 1}
df_final = df_final.sort_values([
    ("Loss Type", ""),
    ("Learn Noise", ""),
    ("Bilinear", ""),
    ("Learning Rate", ""), 
], key=lambda x: x.map(lambda y: loss_type_order.get(y, y) if x.name == ("Loss Type", "") else y))
df_final.reset_index(drop=True, inplace=True)

# Display the DataFrame
print("Results DataFrame:")
display(df_final)

Results DataFrame:


Unnamed: 0_level_0,Loss Type,Learn Noise,Bilinear,Learning Rate,Epsilons,Epsilons,Epsilons,Epsilons,Epsilons
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,0.01,0.02,0.03,0.04,0.05
0,PGD,False,False,0.001,0.8158,0.7729,0.7677,0.7863,0.7748
1,PGD,False,False,0.003,0.8078,0.7412,0.7293,0.7289,0.7359
2,PGD,False,False,0.01,0.8162,0.7378,0.717,0.6732,0.6163
3,PGD,False,True,0.001,0.8099,0.7395,0.7392,0.766,0.7293
4,PGD,False,True,0.003,0.8122,0.7476,0.7523,0.7637,0.7541
5,PGD,False,True,0.01,0.7963,0.6948,0.6936,0.7015,0.7011
6,PGD,True,False,0.001,0.8249,0.8008,0.8182,0.8172,0.8205
7,PGD,True,False,0.003,0.8399,0.839,0.8579,0.8738,0.8826
8,PGD,True,False,0.01,0.8044,0.7839,0.7933,0.8153,0.8314
9,PGD,True,True,0.001,0.8254,0.8076,0.8234,0.8261,0.8311


In [35]:
def validate_non_denoised_results(data):
    """
    Validates if non-denoised results are consistent across different jobs
    with the same configuration (Model, Epsilon).
    
    Args:
        data (dict): Dictionary containing all jobs data
        
    Returns:
        bool: True if all non-denoised results are consistent, False otherwise
    """
    # Dictionary to store results for comparison
    # Key: (Model, Epsilon), Value: set of Accuracy values
    reference_results = {}
    
    for job_id, job_data in data.items():
        results = job_data["results"]
        
        # Filter for non-denoised results
        non_denoised = [r for r in results if r["Denoised"] == "No"]
        
        # Store results by (Model, Epsilon) combination
        for result in non_denoised:
            key = (result["Model"], result["Epsilon"])
            accuracy = result["Accuracy"]
            
            if key in reference_results:
                reference_results[key].add(accuracy)
            else:
                reference_results[key] = {accuracy}
    
    # Check if all sets have only one value
    inconsistencies = []
    for (model, epsilon), accuracies in reference_results.items():
        if len(accuracies) > 1:
            inconsistencies.append({
                "Model": model,
                "Epsilon": epsilon,
                "Different Accuracies": list(accuracies)
            })
    
    if inconsistencies:
        print("Found inconsistencies in non-denoised results:")
        for inc in inconsistencies:
            print(f"Model: {inc['Model']}, Epsilon: {inc['Epsilon']}")
            print(f"Different accuracy values found: {inc['Different Accuracies']}")
        return False
    
    print("All non-denoised results are consistent across jobs!")
    return True

# Test the validation function
validate_non_denoised_results(data)

All non-denoised results are consistent across jobs!


True

In [36]:
# Create a row for non-denoised results
non_denoised_row = {}

# Get one set of non-denoised results (they should all be the same as verified by validate_non_denoised_results)
first_job_data = next(iter(data.values()))
non_denoised_results = [r for r in first_job_data["results"] 
                       if r["Model"] == "normal" and r["Denoised"] == "No"]

# Fill configuration columns with empty strings
for col in ["Loss Type", "Learn Noise", "Bilinear", "Learning Rate"]:
    non_denoised_row[col] = ""  # or "" if you prefer empty strings

# Add accuracies for each epsilon
for result in non_denoised_results:
    non_denoised_row[(f"{result['Epsilon']:.2f}")] = result["Accuracy"]

not_denoised_df = pd.DataFrame([non_denoised_row])

# Build MultiIndex columns
new_columns = []
for col in df.columns:
    if col in ["Loss Type", "Learn Noise", "Bilinear", "Learning Rate"]:
        new_columns.append((col, ''))
    else:  # epsilon values
        new_columns.append(('Epsilons', col))

not_denoised_df.columns = pd.MultiIndex.from_tuples(new_columns)
print("\nNon-Denoised Results DataFrame:")
display(not_denoised_df)

# Create final DataFrame with baseline
df_final_with_baseline = pd.concat([
    not_denoised_df,
    df_final
], ignore_index=True)

# Calculate average improvement
epsilon_cols = [col for col in df_final_with_baseline.columns if col[0] == 'Epsilons']
baseline_values = df_final_with_baseline.iloc[0][epsilon_cols]

improvements = []
for idx in range(len(df_final_with_baseline)):
    if idx == 0:  # baseline row
        improvements.append(0.0)  # no improvement for baseline
    else:
        # Calculate improvements for each epsilon
        row_improvements = df_final_with_baseline.iloc[idx][epsilon_cols] - baseline_values
        avg_improvement = row_improvements.mean()
        improvements.append(avg_improvement)

# Add the average improvement column
df_final_with_baseline[('Average Improvement', '')] = improvements

print("\nTotal Results DataFrame with Baseline and Average Improvement:")
display(df_final_with_baseline)


Non-Denoised Results DataFrame:


Unnamed: 0_level_0,Loss Type,Learn Noise,Bilinear,Learning Rate,Epsilons,Epsilons,Epsilons,Epsilons,Epsilons
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,0.01,0.02,0.03,0.04,0.05
0,,,,,0.7148,0.4834,0.3367,0.2667,0.2219



Total Results DataFrame with Baseline and Average Improvement:


Unnamed: 0_level_0,Loss Type,Learn Noise,Bilinear,Learning Rate,Epsilons,Epsilons,Epsilons,Epsilons,Epsilons,Average Improvement
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,0.01,0.02,0.03,0.04,0.05,Unnamed: 10_level_1
0,,,,,0.7148,0.4834,0.3367,0.2667,0.2219,0.0
1,PGD,False,False,0.001,0.8158,0.7729,0.7677,0.7863,0.7748,0.3788
2,PGD,False,False,0.003,0.8078,0.7412,0.7293,0.7289,0.7359,0.34392
3,PGD,False,False,0.01,0.8162,0.7378,0.717,0.6732,0.6163,0.3074
4,PGD,False,True,0.001,0.8099,0.7395,0.7392,0.766,0.7293,0.35208
5,PGD,False,True,0.003,0.8122,0.7476,0.7523,0.7637,0.7541,0.36128
6,PGD,False,True,0.01,0.7963,0.6948,0.6936,0.7015,0.7011,0.31276
7,PGD,True,False,0.001,0.8249,0.8008,0.8182,0.8172,0.8205,0.41162
8,PGD,True,False,0.003,0.8399,0.839,0.8579,0.8738,0.8826,0.45394
9,PGD,True,False,0.01,0.8044,0.7839,0.7933,0.8153,0.8314,0.40096


In [38]:
# Function to bold the maximum value in a series and convert to percentage
def bold_max_percentage(s):
    is_max = s == s.max()
    return [f'\\textbf{{{x*100:.2f}}}' if is_max_val else f'{x*100:.2f}' 
            for x, is_max_val in zip(s, is_max)]

# Get the epsilon columns
epsilon_cols = [col for col in df_final_with_baseline.columns if col[0] == 'Epsilons']

# Apply bold formatting to maximum values in each epsilon column and format the improvement column
formatted_df = df_final_with_baseline.copy()

# Format learning rate to 3 decimals
formatted_df[('Learning Rate', '')] = formatted_df[('Learning Rate', '')].apply(
    lambda x: f'{float(x):.3f}' if x != '' else x
)

# Format epsilon columns as percentages
for col in epsilon_cols:
    # Convert to numeric, ignoring errors (in case of non-numeric values)
    series = pd.to_numeric(formatted_df[col], errors='coerce')
    formatted_df[col] = bold_max_percentage(series)

# Bold only the best value in the Average Improvement column and convert to percentage
improvements = formatted_df[('Average Improvement', '')].copy()
# Skip the first row (baseline) when finding max
max_improvement = improvements[1:].max()
formatted_df[('Average Improvement', '')] = [
    '0.00' if x == 0 else (f'\\textbf{{{x*100:.2f}}}' if x == max_improvement else f'{x*100:.2f}')
    for x in improvements
]

# Update the latex_str with the new formatted DataFrame
latex_str = formatted_df.to_latex(
    multirow=True,
    multicolumn=True,
    multicolumn_format='c',
    escape=False,  # Needed to properly render LaTeX bold commands
    index=False  # Remove index column
)

# Save the updated LaTeX table
latex_table_path = os.path.join(sweep_dir, "test_results_multicolumn.tex")
with open(latex_table_path, 'w') as f:
    f.write(latex_str)
print(f"LaTeX table saved to {latex_table_path}")

# Display the first few lines of the LaTeX output
print("\nFirst few lines of the LaTeX output:")
print("\n".join(latex_str.split("\n")[:10]))

LaTeX table saved to ..\..\experiments\hgd_training\phase1\fgsm\sweep_2025-07-20_17-01-51\test_results_multicolumn.tex

First few lines of the LaTeX output:
\begin{tabular}{llllllllll}
\toprule
Loss Type & Learn Noise & Bilinear & Learning Rate & \multicolumn{5}{c}{Epsilons} & Average Improvement \\
 &  &  &  & 0.01 & 0.02 & 0.03 & 0.04 & 0.05 &  \\
\midrule
 &  &  &  & 71.48 & 48.34 & 33.67 & 26.67 & 22.19 & 0.00 \\
PGD & False & False & 0.001 & 81.58 & 77.29 & 76.77 & 78.63 & 77.48 & 37.88 \\
PGD & False & False & 0.003 & 80.78 & 74.12 & 72.93 & 72.89 & 73.59 & 34.39 \\
PGD & False & False & 0.010 & 81.62 & 73.78 & 71.70 & 67.32 & 61.63 & 30.74 \\
PGD & False & True & 0.001 & 80.99 & 73.95 & 73.92 & 76.60 & 72.93 & 35.21 \\
