In [None]:
import pandas as pd
from tensorboard.backend.event_processing import event_accumulator
import os, re, shutil
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [None]:
def get_thr(ss: str):
    match = re.search(r'\(([\d\.]+)\)', ss)
    if match != None:
        return float(match.group(1))

def get_metric(ss: str):
    match = re.search(r'\([\d\.]+\)_(.*)$', ss)
    if match != None:
        return match.group(1)

def get_audio_type(ss: str):
    match = re.search(r'(std|silence|noise)', ss)
    if match != None:
        return match.group(1)

def get_dataset(ss: str):
    match = re.search(r'(avs_ms3|avs_s4|vggss|exvggss|vggsound|flickr|exflickr|avatar_one_bb|avatar_one_seg)', ss)
    if match != None:
        return match.group(1)

def get_epoch(ss: str):
    match = re.search(r'epoch(\d+|best)', ss)
    if match != None:
        epoch = match.group(1)
        if epoch == 'best':
            epoch = 20
        return int(epoch)

In [None]:
def load_nested_tb_logs(root_dir):
    all_data = []

    # Walk through the directory tree
    for root, dirs, files in os.walk(root_dir):
        # Check if there are any tfevents files in this specific folder
        if any(f.startswith("events.out.tfevents") for f in files):
            # Extract the folder name to use as a category/run label

            # Initialize accumulator for this specific subdirectory
            acc = event_accumulator.EventAccumulator(root)
            acc.Reload()

            for tag in acc.Tags()['scalars']:
                events = acc.Scalars(tag)
                df_temp = pd.DataFrame(events)

                # We add 'metric' (e.g., value) and 'sub_dir' (e.g., test_noise_avs...)
                df_temp['metric_tag'] = tag
                df_temp['run_group'] = root

                all_data.append(df_temp)

    if not all_data:
        print("No event files found in the specified path.")
        return pd.DataFrame()

    # Combine all found data
    master_df = pd.concat(all_data, ignore_index=True)

    # Cleanup: Convert time and reorder columns
    master_df['wall_time'] = pd.to_datetime(master_df['wall_time'], unit='s')

    return master_df

def load_eval(path, run_name):
    df = load_nested_tb_logs(path)
    print(f"Loaded {len(df)} data points.")

    df['threshold'] = df['run_group'].apply(lambda x: get_thr(str(x)))
    df['metric'] = df['run_group'].apply(lambda x: get_metric(str(x)))
    df['audio_type'] = df['metric_tag'].apply(lambda x: get_audio_type(str(x)))
    df['dataset'] = df['run_group'].apply(lambda x: get_dataset(str(x)))
    df['epoch'] = df['run_group'].apply(lambda x: get_epoch(str(x)))
    df.drop(['wall_time', 'metric_tag', 'run_group'],axis=1, inplace=True)
    df = df.assign(run=run_name)

    return df

def print_metrics(df):
    filtered_df = df[
        (df['threshold'] == 0.5) &
        (df['metric'].isin(['cIoU_hat', 'AUC', 'pIA_hat', 'AUC_N', 'mIoU', 'Fmeasure']))
    ]

    # 2. Pivot the data
    # index: what you want as rows
    # columns: what you want as side-by-side columns
    # values: the numbers to fill the table
    pivot_df = filtered_df.pivot_table(
        index=['dataset', 'epoch'],
        columns=['audio_type', 'metric'],
        values='value',
    )

    # Define the desired order for each audio_type
    std_cols = [('std', m) for m in ['cIoU_hat', 'AUC', 'mIoU', 'Fmeasure']]
    silence_cols = [('silence', m) for m in ['pIA_hat', 'AUC_N']]
    noise_cols = [('noise', m) for m in ['pIA_hat', 'AUC_N']]

    # Combine them into one ordered list
    target_columns = std_cols + silence_cols + noise_cols

    # Reindex the columns to the new order
    # errors='ignore' ensures it doesn't crash if a specific metric is missing for one type
    pivot_df = pivot_df.reindex(columns=target_columns)

    pd.options.display.float_format = "{:,.3f}".format
    pd.options.display.max_columns = None
    pd.options.display.width = 1000 # Increased width to prevent wrapping

    print(pivot_df)
    return pivot_df

def plot_all_metrics(df):
    color_palette = {}
    for dataset, color in zip(sorted(df['dataset'].unique()), sns.color_palette(n_colors=df['dataset'].nunique()).as_hex()):
        color_palette[dataset] = color
    # 1. Setup the style
    sns.set_theme(style="whitegrid")

    # 2. Define strict mappings
    # Mapping line styles to audio types
    style_map = {
        'std': (None, None),  # Solid
        'noise': (5, 5),      # Dashed
        'silence': (1, 2)     # Dotted
    }

    # 3. Get list of unique metrics
    metrics = df['metric'].unique()

    for m in metrics:
        subset = df[df['metric'] == m].copy()
        subset = subset.sort_values('threshold')

        plt.figure(figsize=(10, 6))

        # 4. Create the lineplot with the fixed palette
        ax = sns.lineplot(
            data=subset,
            x='threshold',
            y='value',
            hue='dataset',
            palette=color_palette,  # Force consistent colors
            style='audio_type',
            dashes=style_map,
            markers=True,
            linewidth=2
        )

        # 5. Formatting
        plt.title(f"Metric: {m}", fontsize=15, fontweight='bold')
        plt.xlabel("Threshold")
        plt.ylabel("Value")
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.xticks(np.arange(0, 1.1, 0.1))
        plt.yticks(np.arange(0, 1.1, 0.1))
        plt.xlim(0, 1)
        precision = 1
        plt.ylim(np.true_divide(np.floor(subset['value'].min() * 10**precision), 10**precision),
        np.true_divide(np.ceil(subset['value'].max() * 10**precision), 10**precision))

        plt.tight_layout()

        plt.show()

def plot_all_runs(merged_df):
    merged_df = merged_df[
        (merged_df['threshold'] == 0.5) &
        (merged_df['dataset'] == 'avatar_one_seg')
    ]

    df = merged_df[merged_df['run'] != 'baseline']

    color_palette = {}
    for dataset, color in zip(sorted(df['run'].unique()), sns.color_palette(n_colors=df['run'].nunique()).as_hex()):
        color_palette[dataset] = color

    # 1. Setup the style
    sns.set_theme(style="whitegrid")

    # 2. Define strict mappings
    # Mapping line styles to audio types
    style_map = {
        'std': (None, None),  # Solid
        'noise': (5, 5),      # Dashed
        'silence': (1, 2)     # Dotted
    }

    df = df.sort_values('threshold')

    # 3. Get list of unique metrics
    metrics = df['metric'].unique()

    for m in metrics:
        subset = df[df['metric'] == m].copy()
        subset = subset.sort_values('threshold')

        baseline_value = merged_df[
            (merged_df['run'] == 'baseline') &
            (merged_df['metric'] == m)
        ]['value'].to_list()[0]

        plt.figure(figsize=(10, 6))

        plt.hlines(baseline_value, 0, 20, label='baseline', linestyles='dashed')

        # 4. Create the lineplot with the fixed palette
        ax = sns.lineplot(
            data=subset,
            x='epoch',
            y='value',
            hue='run',
            palette=color_palette,  # Force consistent colors
            style='audio_type',
            dashes=style_map,
            markers=True,
            linewidth=2
        )

        # 5. Formatting
        plt.title(f"Evaluation @(threshold=0.5, avatar_seg, {m})", fontsize=15, fontweight='bold')
        plt.xlabel("Epoch")
        plt.ylabel("Value")
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.yticks(np.arange(0, 1.1, 0.1))
        plt.xticks(range(0, 21, 2))
        precision = 1
        plt.ylim(
            np.true_divide(np.floor(min(subset['value'].min(), baseline_value) * 10**precision), 10**precision),
            np.true_divide(np.ceil(max(subset['value'].max(), baseline_value) * 10**precision), 10**precision)
        )

        plt.tight_layout()

        plt.show()

In [None]:
# wrong_list = list(filter(lambda x: re.search(r'\(s4\)', x), os.listdir(path)))
# corrected_list = [x.replace('(s4)', '_s4') for x in wrong_list]

# for wr, corr in zip(wrong_list, corrected_list):
#     # print(os.path.join(path, wr), '-->', os.path.join(path, corr))
#     shutil.move(os.path.join(path, wr), os.path.join(path, corr))

In [None]:
# path = "../train_outputs/2059323/Test_record/ACL_ViT16_aclifa_2gpu/tensorboard/epoch8"
# path = "../train_outputs/2070501/Test_record/ACL_ViT16_Exp_ACL_v1/tensorboard/epochbest/"
# path = "../train_outputs/merged_baseline_test/Test_record/Test_record/ACL_ViT16_Exp_ACL_v1/tensorboard/epochbest"
baseline_eval = load_eval("../train_outputs/merged_baseline_test/Test_record/ACL_ViT16_Exp_ACL_v1/tensorboard", 'baseline')

In [None]:
_ = print_metrics(baseline_eval)

In [None]:
retrained_baseline = load_eval("../train_outputs/2059438/Test_record/ACL_ViT16_aclifa_2gpu/tensorboard", 'retrained_baseline_B8')

In [None]:
_ = print_metrics(retrained_baseline[retrained_baseline['epoch'] == 14])

In [None]:
eval = pd.concat([baseline_eval, retrained_baseline])

In [None]:
eval

In [None]:
plot_all_runs(eval)

In [None]:
plot_all_metrics(baseline_eval)

In [None]:
plot_all_metrics(retrained_baseline[retrained_baseline['epoch'] == 14])