In [1]:
import os
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)



metrics_root = 'outputs/segmentation_metrics_0102'

splits = [
    # "SA1B", 
    # "COCONut_relabeld_COCO_val", 
    # "PascalPanopticParts", 
    # "ADE20k"
    "EgoHOS"
]
resolutions = [
    # 384, 
    768, 
    # 1024, 
    # 1500
    ]

def load_metrics(metrics_root, split, resolution, exlude=[]):
    metrics_path = os.path.join(metrics_root, split, str(resolution))
    all_metrics = {}

    for file in os.listdir(metrics_path):
        if file.endswith('.json'):
            if file in exlude:
                continue
            with open(os.path.join(metrics_path, file), 'r') as f:
                metrics = json.load(f)
                all_metrics[
                    file.replace('.json', '')
                ] = metrics

    return all_metrics 


def compute_average(keys, metrics):
    average = {}
    for key in keys:
        average[key] = round(sum([m[key] for m in metrics]) / len(metrics), 4)
    average['n_samples'] = len(metrics)

    all_monosemanticity = []
    for metric in metrics:
        count_nonzero = np.count_nonzero(metric['monosemanticity'])
        monosemanticity = 1 - (count_nonzero / metric['n_tokens'])
        all_monosemanticity.append(monosemanticity)

    average['monosemanticity'] = round(np.average(all_monosemanticity) * 100, 4)

    return average

split = splits[0]
resolution = resolutions[0]
all_metrics = load_metrics(metrics_root, split, resolution, exlude=['superpixel_slic_4.json'])
# print(all_metrics.keys())

Compute Average

In [None]:
all_metrics_avg = {}

avg_keys = ['precision', 'recall', 'f1', 'time', 'n_tokens']
for tokenizer, metrics in all_metrics.items():
    avg = compute_average(avg_keys, metrics)
    all_metrics_avg[tokenizer] = avg

# sort by tokenizer name
all_metrics_avg = dict(sorted(all_metrics_avg.items()))
print(f"Split: {split}, Resolution: {resolution}")
print(f'Loaded {len(all_metrics_avg)} metrics')
df = pd.DataFrame(all_metrics_avg).T
print(df)

Plot P-R Curve

In [None]:
import re
import matplotlib.pyplot as plt
import seaborn as sns

def get_custom_prefix(name):
    if name.startswith("directsam_tiny"):
        return "DirectSAM-tiny"
    elif name.startswith("directsam_large"):
        return "DirectSAM-large"
    elif name.startswith("panoptic_mask2former"):
        return "Mask2Former (panoptic)"
    elif name.startswith("panoptic_oneformer"):
        return "OneFormer (panoptic)"
    elif name.startswith("superpixel_slic"):
        return "Superpixel"
    elif name == "fastsam":
        return "FastSAM"
    elif name == "mobilesamv2":
        return "MobileSAMv2"
    elif name.startswith("sam_vit"):
        return "SAM"
    elif name.startswith("patch_"):
        return "Patch"
    else:
        return "others"

df['prefix'] = df.index.map(get_custom_prefix)


prefixes = df['prefix'].unique()
colors = sns.color_palette("hls", len(prefixes))
prefix_color_map = dict(zip(prefixes, colors))

for x, y in [('precision', 'recall'), ('n_tokens', 'monosemanticity')]:

    plt.figure(figsize=(15, 10))

    for prefix in prefixes:
        subset = df[df['prefix'] == prefix].copy()
        
        # Sort this subset by precision so lines connect in ascending order
        if x == 'precision':
            subset.sort_values(y, inplace=True)
        else:
            subset.sort_values(x, inplace=True)
        
        # Scatter plot for the points
        plt.scatter(
            subset[x],
            subset[y],
            color=prefix_color_map[prefix],
            label=prefix
        )
        
        # Connect points with a line
        plt.plot(
            subset[x],
            subset[y],
            color=prefix_color_map[prefix]
        )
        
        # Label each point
        for idx in subset.index:
            _x = subset.loc[idx, x]
            _y = subset.loc[idx, y]
            plt.text(_x, _y, idx, fontsize=8, ha='left', va='bottom')

    plt.xlabel(x)
    plt.ylabel(y)

    plt.legend(title='Tokenizer Family')

    # You can adjust x-limits and y-limits as desired
    # plt.xlim(0, 0.2) 
    # plt.ylim(0, 1)
    plt.tight_layout()
    plt.show()

Bar plot

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Convert the DataFrame index into a column for tokenizer
df_reset = df.reset_index().rename(columns={'index': 'tokenizer'})

metrics = ["f1", "time", "n_tokens"]

for metric in metrics:
    plt.figure(figsize=(12, 8))
    
    sns.barplot(
        data=df_reset,
        x="tokenizer",
        y=metric,
        hue="prefix",          # color-group by prefix
        palette=prefix_color_map
    )
    
    plt.title(metric.capitalize())
    plt.xlabel("Tokenizer")
    plt.ylabel(metric.capitalize())
    
    # Rotate the x-axis labels by 90 degrees
    plt.xticks(rotation=90)
    
    # Legend
    plt.legend(title="Tokenizer Family")
    
    # Adjust layout and show
    plt.tight_layout()
    plt.show()

Mask size distribution

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns

# 1) A helper function to categorize tokenizer names into families:
def get_custom_prefix(name):
    if name.startswith("directsam_tiny"):
        return "DirectSAM-tiny"
    elif name.startswith("directsam_large"):
        return "DirectSAM-large"
    elif name.startswith("panoptic_mask2former"):
        return "Mask2Former (panoptic)"
    elif name.startswith("panoptic_oneformer"):
        return "OneFormer (panoptic)"
    elif name.startswith("superpixel_slic"):
        return "Superpixel"
    elif name == "fastsam":
        return "FastSAM"
    elif name == "mobilesamv2":
        return "MobileSAMv2"
    elif name.startswith("sam_vit"):
        return "SAM"
    elif name.startswith("patch_"):
        return "Patch"
    else:
        return "others"

# 2) Build a color map: prefix -> distinct color
all_prefixes = {get_custom_prefix(tkn) for tkn in all_metrics.keys()}
colors = sns.color_palette("hls", len(all_prefixes))
prefix_color_map = dict(zip(all_prefixes, colors))

plt.figure(figsize=(20, 20))

max_tokens = 1024

# 3) Plot each tokenizer line
for tokenizer in all_metrics.keys():
    prefix = get_custom_prefix(tokenizer)
    color = prefix_color_map[prefix]

    metrics = all_metrics[tokenizer]
    
    # Collect sorted (and padded/truncated) areas for each metric
    sorted_areas_list = []
    for metric in metrics:
        areas = np.array(metric['mask_areas'])
        
        # Sort descending:
        areas = np.sort(areas)[::-1]
        
        # Pad or truncate to length = max_tokens
        if len(areas) < max_tokens:
            areas_padded = np.pad(
                areas, 
                (0, max_tokens - len(areas)), 
                'constant', 
                constant_values=0
            )
        else:
            areas_padded = areas[:max_tokens]
        
        sorted_areas_list.append(areas_padded)
    
    # Convert to array (#metrics x max_tokens), then multiply by 100 for percentage
    sorted_areas_list = np.array(sorted_areas_list) * 100
    
    # Average across all metrics for this tokenizer
    avg_areas = sorted_areas_list.mean(axis=0)
    
    # Plot, using prefix-derived color. Label the line with the tokenizer name
    plt.plot(range(1, max_tokens+1), avg_areas, label=tokenizer, color=color)

# 4) Format plot
plt.yscale('log')
ax = plt.gca()
ax.yaxis.set_major_formatter(ticker.StrMethodFormatter("{x:.5f}%"))
plt.ylim(0.00001, 100)

plt.xlim(0, max_tokens)

plt.xlabel('Token Position')
plt.ylabel('Average Mask Area (% ratio to whole image) - log scale')
plt.legend(bbox_to_anchor=(1.05, 1))
plt.tight_layout()
plt.show()