In [2]:
import numpy as np
import pandas as pd
import multiprocessing as mp
from inspect import signature
from matplotlib import pyplot as plt
from dacbench.logger import Logger, log2dataframe, load_logs
import seaborn as sns
import glob
from plotting import _plot_performance_over_time, plot_performance_over_time, plot_final_performance_comparison, plot_improvement_probability, plot_configuration_footprint, plot_hp_importance, plot_deepcave
#from plotting import plot_performance_over_time, plot_final_performance_comparison, plot_improvement_probability, plot_configuration_footprint, plot_hp_importance, plot_deepcave

  warn(f"Failed to load image Python extension: {e}")


DACBench Gym registration failed - make sure you have all dependencies installed and their instance sets in the right path!




In [3]:
def toDataFrame(glob_path, col_name, col_index, method=None):
    d =  [log2dataframe(load_logs(path)) for path in glob.glob(glob_path)]
    
    df = pd.DataFrame([s[col_name][col_index] for s in d]).transpose()
    df['epoch'] = df.index + 1
    df = df.melt(id_vars=['epoch'])
    df.rename(columns={'variable': 'seed'}, inplace=True)
    
    if method:
        df['method'] = method

    return df

def toChunkedDataFrame(glob_path, col_name, col_index, chunk_size, method=None):
    d = [log2dataframe(load_logs(path)) for path in glob.glob(glob_path)]
    
    # Create a DataFrame where each column is the series of values from a run.
    df = pd.DataFrame([s[col_name][col_index] for s in d]).transpose()
    
    result_df = pd.DataFrame()
    for col in df.columns:
        arr = df[col].values
        n = len(arr)
        remainder = n % chunk_size
        # If the length isn't a multiple of chunk_size, pad it.
        if remainder != 0:
            pad_length = chunk_size - remainder
            # Use the mean of the last (incomplete) chunk for padding.
            pad_value = arr[-remainder:].mean()
            padded_arr = np.concatenate([arr, np.full(pad_length, pad_value)])
        else:
            padded_arr = arr
        reshaped_data = padded_arr.reshape(-1, chunk_size)
        result_df[col] = reshaped_data.mean(axis=1)
    
    result_df.reset_index(drop=True, inplace=True)
    result_df['epoch'] = result_df.index + 1
    result_df = result_df.melt(id_vars=['epoch'])
    result_df.rename(columns={'variable': 'seed'}, inplace=True)
    
    if method:
        result_df['method'] = method
    
    return result_df

def get_final_labels_and_stats(data, epoch_col="epoch", value_col="value", group_col="method",
                               label_format="{label} ({mean:.3f} SE {sem:.3f})"):
    """
    Computes final aggregated stats and creates new legend labels, preserving the original order.

    Parameters:
    -----------
    data : pd.DataFrame
        DataFrame containing the data with columns for epoch, value, and group identifier.
    epoch_col : str, default "epoch"
        Column name representing the epoch.
    value_col : str, default "value"
        Column name for the metric values.
    group_col : str, default "method"
        Column name used to group the data.
    label_format : str, default "{label}: {mean:.3f} ± {sem:.3f}"
        A format string used to create new legend labels, where {label} is the group,
        {mean} is the computed mean, and {sem} is the standard error.

    Returns:
    --------
    new_labels : list of str
        A list of updated legend labels with the final aggregated values in the original order.
    final_stats : pd.DataFrame
        A DataFrame indexed by the group column containing the mean and standard error,
        reindexed to preserve the original order of appearance.
    """
    # Determine the final epoch
    final_epoch = data[epoch_col].max()

    # Filter data to only include the final epoch
    final_data = data[data[epoch_col] == final_epoch]

    # Compute aggregated statistics (mean and standard error) grouped by the given group column
    final_stats = final_data.groupby(group_col)[value_col].agg(["mean", "sem"])

    # Get the original order of methods as they first appear in the data
    original_order = data[group_col].drop_duplicates().tolist()

    # Reindex final_stats to follow the original order (dropping any methods that might be missing)
    final_stats = final_stats.reindex(original_order)

    # Create new labels in the preserved order
    new_labels = []
    for label in original_order:
        if label in final_stats.index and pd.notnull(final_stats.loc[label, "mean"]):
            new_label = label_format.format(label=label,
                                            mean=final_stats.loc[label, "mean"],
                                            sem=final_stats.loc[label, "sem"])
        else:
            new_label = label
        new_labels.append(new_label)
    
    return new_labels


# sns.set(rc={"figure.dpi":300, 'savefig.dpi':300})
sns.set_style('whitegrid')
palette = sns.color_palette("colorblind")
palette = sns.color_palette([palette[0], palette[4], palette[2], palette[5], palette[6]])
# sns.set_palette(palette)
sns.set_context("notebook", font_scale=1.15)

In [18]:
smacfixed_va = toDataFrame('../results_cluster/results/tuned_smacfixed_cifar100/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='SMAC Fixed')
cawr_va = toDataFrame('../results_cluster/results/CAWR_cifar100/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='CAWR')
tuned_cawr_va = toDataFrame('../results_cluster/results/tuned_CAWR_cifar100/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='Tuned CAWR')
adam_fixed_va = toDataFrame('../results_cluster/results/Adam_fixed_cifar100/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, 'Adam')
prodigy_va = toDataFrame('../results_cluster/results/prodigy_cifar100/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='Prodigy')
cocob_va = toDataFrame('../results_cluster/results/COCOB_cifar100/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='COCOB')
dadaptation_va = toDataFrame('../results_cluster/results/dadaptation_cifar100/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='D-Adaptation')
# prodigy_va = toDataFrame('../results_cluster/results/prodigy_cifar100/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='Prodigy')
# adam_fixed_va = toDataFrame('../results_cluster/results/Adam_fixed_cifar100/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, 'Adam')
dowg_va = toDataFrame('../results_cluster/results/DoWG_cifar100/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, 'DoWG')


data = pd.concat([adam_fixed_va, smacfixed_va, cawr_va, tuned_cawr_va, prodigy_va, cocob_va, dowg_va, dadaptation_va], ignore_index=True)
data['value'] = data.groupby(['method', 'seed'])['value'] \
                        .transform(lambda x: x.ewm(alpha=0.3, adjust=False).mean())
                        # .transform(lambda x: savgol_filter(x, window_length=11, polyorder=3))
data['value'] = data['value'] * 100 # convert to percent
data = data.loc[
    data.groupby(["method", "seed"])["epoch"].idxmax()
].reset_index(drop=True)
data = data.groupby('method')['value'].mean().reset_index()

# 3. For each dataset, compute the best (highest) accuracy,
#    then compute the difference for each method as (best_value - current_value).
data["best_value"] = data["value"].max()
data["diff"] = (data["best_value"] - data["value"])
print(data)

         method      value  best_value      diff
0          Adam  66.237438   67.038387  0.800949
1          CAWR  60.004690   67.038387  7.033698
2         COCOB  62.322306   67.038387  4.716081
3  D-Adaptation  65.177936   67.038387  1.860451
4          DoWG  61.042913   67.038387  5.995474
5       Prodigy  67.038387   67.038387  0.000000
6    SMAC Fixed  63.447498   67.038387  3.590889
7    Tuned CAWR  65.641463   67.038387  1.396924


In [19]:
smacfixed_va = toDataFrame('../results_cluster/results/tuned_smacfixed_cifar10/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='SMAC Fixed')
cawr_va = toDataFrame('../results_cluster/results/CAWR_cifar10/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='CAWR')
tuned_cawr_va = toDataFrame('../results_cluster/results/tuned_CAWR_cifar10/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='Tuned CAWR')
adam_fixed_va = toDataFrame('../results_cluster/results/Adam_fixed_cifar10/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, 'Adam')
prodigy_va = toDataFrame('../results_cluster/results/prodigy_cifar10/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='Prodigy')
cocob_va = toDataFrame('../results_cluster/results/COCOB_cifar10/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='COCOB')
dadaptation_va = toDataFrame('../results_cluster/results/dadaptation_cifar10/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='D-Adaptation')
# prodigy_va = toDataFrame('../results_cluster/results/prodigy_cifar100/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='Prodigy')
# adam_fixed_va = toDataFrame('../results_cluster/results/Adam_fixed_cifar100/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, 'Adam')
dowg_va = toDataFrame('../results_cluster/results/DoWG_cifar10/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, 'DoWG')


data = pd.concat([adam_fixed_va, smacfixed_va, cawr_va, tuned_cawr_va, prodigy_va, cocob_va, dowg_va, dadaptation_va], ignore_index=True)
data['value'] = data.groupby(['method', 'seed'])['value'] \
                        .transform(lambda x: x.ewm(alpha=0.3, adjust=False).mean())
                        # .transform(lambda x: savgol_filter(x, window_length=11, polyorder=3))
data['value'] = data['value'] * 100 # convert to percent
data = data.loc[
    data.groupby(["method", "seed"])["epoch"].idxmax()
].reset_index(drop=True)
data = data.groupby('method')['value'].mean().reset_index()

# 3. For each dataset, compute the best (highest) accuracy,
#    then compute the difference for each method as (best_value - current_value).
data["best_value"] = data["value"].max()
data["diff"] = (data["best_value"] - data["value"])
print(data)

         method      value  best_value      diff
0          Adam  91.143940   91.200396  0.056456
1          CAWR  90.487200   91.200396  0.713196
2         COCOB  90.008404   91.200396  1.191992
3  D-Adaptation  90.789780   91.200396  0.410615
4          DoWG  90.661834   91.200396  0.538562
5       Prodigy  91.200396   91.200396  0.000000
6    SMAC Fixed  90.591467   91.200396  0.608929
7    Tuned CAWR  90.957558   91.200396  0.242838


In [4]:
smacfixed_va = toDataFrame('../results_cluster/results/dtd2/tuned_smacfixed_dtd/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='SMAC Fixed')
cawr_va = toDataFrame('../results_cluster/results/dtd2/CAWR_dtd/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='CAWR')
tuned_cawr_va = toDataFrame('../results_cluster/results/dtd2/tuned_CAWR_dtd/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='Tuned CAWR')
adam_fixed_va = toDataFrame('../results_cluster/results/dtd2/Adam_fixed_dtd/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, 'Adam')
prodigy_va = toDataFrame('../results_cluster/results/dtd2/prodigy_dtd/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='Prodigy')
cocob_va = toDataFrame('../results_cluster/results/dtd2/COCOB_dtd/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='COCOB')
dadaptation_va = toDataFrame('../results_cluster/results/dtd2/dadaptation_dtd/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='D-Adaptation')
# prodigy_va = toDataFrame('../results_cluster/results/prodigy_cifar100/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='Prodigy')
# adam_fixed_va = toDataFrame('../results_cluster/results/Adam_fixed_cifar100/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, 'Adam')
dowg_va = toDataFrame('../results_cluster/results/dtd2/DoWG_dtd/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, 'DoWG')


data = pd.concat([adam_fixed_va, smacfixed_va, cawr_va, tuned_cawr_va, prodigy_va, cocob_va, dowg_va, dadaptation_va], ignore_index=True)
data['value'] = data.groupby(['method', 'seed'])['value'] \
                        .transform(lambda x: x.ewm(alpha=0.3, adjust=False).mean())
                        # .transform(lambda x: savgol_filter(x, window_length=11, polyorder=3))
data['value'] = data['value'] * 100 # convert to percent
data = data.loc[
    data.groupby(["method", "seed"])["epoch"].idxmax()
].reset_index(drop=True)
data = data.groupby('method')['value'].mean().reset_index()

# 3. For each dataset, compute the best (highest) accuracy,
#    then compute the difference for each method as (best_value - current_value).
data["best_value"] = data["value"].max()
data["diff"] = (data["best_value"] - data["value"])
print(data)

         method      value  best_value       diff
0          Adam  29.282484   31.565147   2.282663
1          CAWR   7.168533   31.565147  24.396613
2         COCOB  31.565147   31.565147   0.000000
3  D-Adaptation  28.404704   31.565147   3.160442
4          DoWG  28.050372   31.565147   3.514774
5       Prodigy  31.419111   31.565147   0.146036
6    SMAC Fixed  29.888256   31.565147   1.676891
7    Tuned CAWR   5.511010   31.565147  26.054136


In [18]:
methods = ["Adam_fixed", "tuned_smacfixed", "CAWR", "tuned_CAWR", "dadaptation", "prodigy", "COCOB", "DoWG"]
datasets = ["cifar100", ]
#"cifar10", "cifar100"

# smacfixed_va = toDataFrame('../results_cluster/results/dtd2/tuned_smacfixed_dtd/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='SMAC Fixed')
# cawr_va = toDataFrame('../results_cluster/results/dtd2/CAWR_dtd/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='CAWR')
# tuned_cawr_va = toDataFrame('../results_cluster/results/dtd2/tuned_CAWR_dtd/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='Tuned CAWR')
# adam_fixed_va = toDataFrame('../results_cluster/results/dtd2/Adam_fixed_dtd/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, 'Adam')
# prodigy_va = toDataFrame('../results_cluster/results/dtd2/prodigy_dtd/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='Prodigy')
# cocob_va = toDataFrame('../results_cluster/results/dtd2/COCOB_dtd/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='COCOB')
# dadaptation_va = toDataFrame('../results_cluster/results/dtd2/dadaptation_dtd/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='D-Adaptation')
# # prodigy_va = toDataFrame('../results_cluster/results/prodigy_cifar100/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method='Prodigy')
# # adam_fixed_va = toDataFrame('../results_cluster/results/Adam_fixed_cifar100/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, 'Adam')
# dowg_va = toDataFrame('../results_cluster/results/dtd2/DoWG_dtd/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, 'DoWG')

def transform(df, dataset):
    df['value'] = df.groupby(['method', 'seed'])['value'] \
                        .transform(lambda x: x.ewm(alpha=0.3, adjust=False).mean())
    df['value'] = df['value'] * 100
    df = df.groupby(["method", "epoch"], as_index=False)["value"].mean()
    df = df.loc[
        df.groupby(["method"])["epoch"].idxmax()
    ].reset_index(drop=True)
    df['dataset'] = dataset

    return df

df = pd.concat([transform(toDataFrame('../results_cluster/results/' + method + '_' + dataset + '/*/CustomTrackingWrapper.jsonl', 'validation_accuracies', 6, method=method), dataset) for method in methods for dataset in datasets], ignore_index=True)
print(df)
# At this point, for each dataset-method we have the final validation accuracy in df["value"].
# Higher accuracy is better.

# --- Compute Marginal Contribution via Shapley Values ---

def calculate_marginal_contributions(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate marginal contribution per method for each dataset.

    Args:
    df (pd.DataFrame): The input DataFrame containing columns:
                       ['dataset', 'method', 'group', 'epoch', 'value']

    Returns:
    pd.DataFrame: A DataFrame showing marginal contributions for each method per dataset.
    """
    results = []

    # Get unique datasets and methods
    datasets = df['dataset'].unique()
    methods = df['method'].unique()
    
    for dataset in datasets:
        # Filter the DataFrame for the current dataset
        df_dataset = df[df['dataset'] == dataset]

        # Calculate the performance with all methods (Baseline)
        baseline_performance = df_dataset['value'].mean()

        for method in methods:
            # Filter out the current method to calculate "without" performance
            df_without_method = df_dataset[df_dataset['method'] != method]

            # Calculate the best performance without the current method
            if not df_without_method.empty:
                performance_without = df_without_method['value'].mean()
            else:
                performance_without = 0  # If no methods are left

            # Calculate the marginal contribution
            marginal_contribution = baseline_performance - performance_without

            # Store the results
            results.append({
                'dataset': dataset,
                'method': method,
                'baseline_performance': baseline_performance,
                'performance_without': performance_without,
                'marginal_contribution': marginal_contribution
            })

    # Convert the results to a DataFrame
    result_df = pd.DataFrame(results)
    
    return result_df

mcs = calculate_marginal_contributions(df)
# mcs = mcs.groupby(["method"], as_index=False)["marginal_contribution"].mean()
print(mcs)

            method  epoch      value   dataset
0       Adam_fixed    300  66.237438  cifar100
1  tuned_smacfixed    300  63.447498  cifar100
2             CAWR    300  60.004690  cifar100
3       tuned_CAWR    300  65.641463  cifar100
4      dadaptation    300  65.177936  cifar100
5          prodigy    300  67.038387  cifar100
6            COCOB    300  62.322306  cifar100
7             DoWG    300  61.042913  cifar100
    dataset           method  baseline_performance  performance_without  \
0  cifar100       Adam_fixed             63.864079            63.525028   
1  cifar100  tuned_smacfixed             63.864079            63.923591   
2  cifar100             CAWR             63.864079            64.415420   
3  cifar100       tuned_CAWR             63.864079            63.610167   
4  cifar100      dadaptation             63.864079            63.676385   
5  cifar100          prodigy             63.864079            63.410606   
6  cifar100            COCOB             63.864079  

In [25]:
dadaptation_dlrs = toDataFrame('../results_cluster/results/dadaptation_dtd/*/CustomTrackingWrapper.jsonl',
                'dlrs', 10, method='D-Adaptation')

dadaptation_dlrs = dadaptation_dlrs.groupby(["method", "epoch"], as_index=False)["value"].mean()

# 2. Select the final epoch for each dataset-method-group combination
dadaptation_dlrs = dadaptation_dlrs.loc[
    dadaptation_dlrs.groupby(["method"])["epoch"].idxmax()
].reset_index(drop=True)

print(dadaptation_dlrs)

         method  epoch     value
0  D-Adaptation    300  0.000029


In [26]:
dadaptation_dlrs = toDataFrame('../results_cluster/results/prodigy_dtd/*/CustomTrackingWrapper.jsonl',
                'dlrs', 10, method='D-Adaptation')

dadaptation_dlrs = dadaptation_dlrs.groupby(["method", "epoch"], as_index=False)["value"].mean()

# 2. Select the final epoch for each dataset-method-group combination
dadaptation_dlrs = dadaptation_dlrs.loc[
    dadaptation_dlrs.groupby(["method"])["epoch"].idxmax()
].reset_index(drop=True)

print(dadaptation_dlrs)

         method  epoch     value
0  D-Adaptation    300  0.000407
