In [38]:
import os
import sys
import importlib

import numpy as np
import pandas as pd

sweep_dir = "..\\base_training\\sweep_2025-07-08_00-24-53"

In [39]:
def read_test_results_csv(file_path):
    """
    Reads the test results from a CSV file and returns a DataFrame.
    """
    try:
        df = pd.read_csv(file_path)
        # remove loss column 
        if 'test_loss' in df.columns:
            df = df.drop(columns=['test_loss'])
        return df
    except FileNotFoundError:
        print(f"File {file_path} not found.")
        return pd.DataFrame()
    except pd.errors.EmptyDataError:
        print(f"File {file_path} is empty.")
        return pd.DataFrame()
    except Exception as e:
        print(f"An error occurred while reading {file_path}: {e}")
        return pd.DataFrame()

# Create dataframe to hold all results and define columns
columns = [
    "job_id",
    "learning rate",
    "weight decay",
    "batch size",
    "normal",
    "negative",
    "hybrid normal",
    "hybrid negative",
    "synergy normal",
    "synergy negative",
    "synergy all",
    "synergy trained all"
]
results_df = pd.DataFrame(columns=columns)

# Go through each directory in the sweep directory
job_dirs = [d for d in os.listdir(sweep_dir) if os.path.isdir(os.path.join(sweep_dir, d))]

dataset_name = None

for job_dir in job_dirs:
    job_path = os.path.join(sweep_dir, job_dir)
    # load config file
    config_file_path = os.path.join(job_path, "code\\config_train.py")
    module_name = job_dir.replace("-", "_")
    spec = importlib.util.spec_from_file_location(module_name, config_file_path)
    config_module = importlib.util.module_from_spec(spec)
    sys.modules[module_name] = config_module
    spec.loader.exec_module(config_module)
    # read test results
    test_results_file = os.path.join(job_path, "metrics\\test_metrics.csv")
    test_results_df = read_test_results_csv(test_results_file)

    if dataset_name is None:
        dataset_name = config_module.dataset_name

    new_row = pd.DataFrame([{
        "job_id": job_dir,
        "learning rate": config_module.learning_rate,
        "weight decay": config_module.decay,
        "batch size": config_module.batch_size,
        "normal": test_results_df.loc[test_results_df["model_name"] == "normal", "test_accuracy"].values[0] if not test_results_df[test_results_df["model_name"] == "normal"].empty else None,
        "negative": test_results_df.loc[test_results_df["model_name"] == "negative", "test_accuracy"].values[0] if not test_results_df[test_results_df["model_name"] == "negative"].empty else None,
        "hybrid normal": test_results_df.loc[test_results_df["model_name"] == "hybrid_nor", "test_accuracy"].values[0] if not test_results_df[test_results_df["model_name"] == "hybrid_nor"].empty else None,
        "hybrid negative": test_results_df.loc[test_results_df["model_name"] == "hybrid_neg", "test_accuracy"].values[0] if not test_results_df[test_results_df["model_name"] == "hybrid_neg"].empty else None,
        "synergy normal": test_results_df.loc[test_results_df["model_name"] == "synergy_nor", "test_accuracy"].values[0] if not test_results_df[test_results_df["model_name"] == "synergy_nor"].empty else None,
        "synergy negative": test_results_df.loc[test_results_df["model_name"] == "synergy_neg", "test_accuracy"].values[0] if not test_results_df[test_results_df["model_name"] == "synergy_neg"].empty else None,
        "synergy all": test_results_df.loc[test_results_df["model_name"] == "synergy_all", "test_accuracy"].values[0] if not test_results_df[test_results_df["model_name"] == "synergy_all"].empty else None,
        "synergy trained all": test_results_df.loc[test_results_df["model_name"] == "tr_synergy_all", "test_accuracy"].values[0] if not test_results_df[test_results_df["model_name"] == "tr_synergy_all"].empty else None
    }])
    results_df = pd.concat([results_df, new_row], ignore_index=True)

print(f"Dataset: {dataset_name}")
print("Results DataFrame:")
results_df.head(20)

Dataset: cifar100
Results DataFrame:


  results_df = pd.concat([results_df, new_row], ignore_index=True)


Unnamed: 0,job_id,learning rate,weight decay,batch size,normal,negative,hybrid normal,hybrid negative,synergy normal,synergy negative,synergy all,synergy trained all
0,2025-07-08_00-24-53,0.03,0.001,128,73.52,73.39,73.29,73.01,73.36,73.16,75.86,75.5
1,2025-07-08_00-24-55,0.03,0.001,768,69.67,70.15,69.68,69.7,69.56,70.03,71.83,71.67
2,2025-07-08_00-24-56,0.03,0.005,128,72.36,71.67,72.41,71.75,72.48,71.73,74.53,74.37
3,2025-07-08_00-24-57,0.03,0.005,768,72.28,72.11,72.05,72.25,72.07,72.16,74.25,74.39
4,2025-07-08_00-24-58,0.03,0.01,128,67.64,68.41,68.18,68.5,67.72,68.47,70.05,70.76
5,2025-07-08_00-24-59,0.03,0.01,768,72.82,72.18,72.78,71.93,72.8,72.28,74.56,74.44
6,2025-07-08_00-25-00,0.1,0.001,128,72.95,72.69,72.71,72.32,73.0,72.54,75.21,74.49
7,2025-07-08_00-25-01,0.1,0.001,768,71.82,71.68,71.75,71.39,71.86,71.43,74.12,73.58
8,2025-07-08_00-25-02,0.1,0.005,128,61.38,61.22,61.83,62.32,61.57,61.74,64.32,64.97
9,2025-07-08_00-25-04,0.1,0.005,768,71.48,71.34,71.43,71.06,71.38,71.16,73.33,73.48


In [40]:
# export results to csv
results_csv_path = os.path.join(sweep_dir, "test_results.csv")
results_df.to_csv(results_csv_path, index=False)
print(f"Results saved to {results_csv_path}")

Results saved to ..\base_training\sweep_2025-07-08_00-24-53\test_results.csv


In [41]:
# Convert csv to latex table
latex_table_path = os.path.join(sweep_dir, "test_results.tex")
# before exporting remove the job_id column if it exists
if 'job_id' in results_df.columns:
    results_df = results_df.drop(columns=['job_id'])

# Format columns with different precision
results_df_copy = results_df.copy()

# Step 1: Format columns with different precisions
for col in results_df_copy.columns:
    if col == "weight decay":
        results_df_copy[col] = results_df_copy[col].apply(lambda x: f"{x:.3f}" if pd.notnull(x) else "")
    elif col != "batch size":
        results_df_copy[col] = results_df_copy[col].apply(lambda x: f"{x:.2f}" if pd.notnull(x) else "")

# Step 2: Bold max values (only for formatted float-like columns)
for col in results_df_copy.columns:
    # Skip batch size or any clearly non-numeric columns
    if col in ["learning rate", "weight decay", "batch size"]:
        continue
    try:
        # Convert formatted strings back to float to find max
        numeric_col = results_df_copy[col].replace('', np.nan).astype(float)
        max_val = numeric_col.max()
        results_df_copy[col] = results_df_copy[col].apply(
            lambda x: f"\\textbf{{{x}}}" if x != '' and float(x) == max_val else x
        )
    except ValueError:
        continue  # Skip columns that can't be converted to float

# Step 3: Convert DataFrame to LaTeX table
with open(latex_table_path, 'w') as f:
    f.write(results_df_copy.to_latex(index=False, escape=False))

print(f"Latex table saved to {latex_table_path}")

Latex table saved to ..\base_training\sweep_2025-07-08_00-24-53\test_results.tex
