In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ==== Configuration ====
root_dir    = 'eval_results'                   # Path to the eval_results folder
input_sizes = [160, 192, 224, 256, 320, 384, 448, 512]
n_blocks    = 12                               # Number of Transformer blocks

# Mapping from full model names to short display names
short_names = {
    "deit_small_patch16_LS_4":           "Learnable-PE",
    "gridpe_deit_small_patch16_LS_4":    "GridPE",
    "rope_axial_deit_small_patch16_LS_4":"Rope-Axial",
    "rope_mixed_deit_small_patch16_LS_4":"Rope-Mixed"
}

# ==== 1. Parse attn_entropy.txt and build DataFrame ====
all_dfs = {}
for model in sorted(os.listdir(root_dir)):
    model_dir = os.path.join(root_dir, model)
    txt_path  = os.path.join(model_dir, 'attn_entropy.txt')
    if not os.path.isfile(txt_path):
        continue

    # buf[size][block] temporarily stores entropy values for each input size and block
    buf = {size: {b: [] for b in range(n_blocks)} for size in input_sizes}
    cur_size = None

    with open(txt_path, 'r') as f:
        for line in f:
            line = line.strip()
            if line.startswith('Input size:'):
                cur_size = int(line.split(':', 1)[1].strip())
            elif line.startswith('Block') and cur_size in buf:
                left, val_str = line.split(':', 1)
                b_idx = int(left.split()[1])
                buf[cur_size][b_idx].append(float(val_str))

    # Build a DataFrame: rows = input sizes, columns = Block0 to Block11
    data = [
        [np.mean(buf[size][b]) if buf[size][b] else np.nan
         for b in range(n_blocks)]
        for size in input_sizes
    ]
    df = pd.DataFrame(
        data,
        index=input_sizes,
        columns=[f'Block{b}' for b in range(n_blocks)]
    )
    all_dfs[model] = df

# Create output folder
os.makedirs('2d_attn', exist_ok=True)

# ==== 2. Plot mean attention entropy and save figures ====
for size in input_sizes:
    x = np.arange(n_blocks)
    fig, ax = plt.subplots(figsize=(7, 5), dpi=300)

    for model, df in all_dfs.items():
        y = df.loc[size].values
        avg_entropy = np.nanmean(y)
        label = f"{short_names.get(model, model)}"
        ax.plot(x, y, marker='o', label=label)

    ax.set_xlabel('Block Index', fontsize=20)
    ax.set_ylabel('Mean Attention Entropy', fontsize=20)
    ax.set_xticks(x)
    ax.grid(True, linestyle='--', alpha=0.4)
    ax.legend(loc='best', fontsize=18)

    out_name = f'2d_attn/attention_entropy_{size}.png'
    fig.savefig(out_name, dpi=300, bbox_inches='tight')
    print(f'Saved {out_name}')

    plt.close(fig)

Saved 2d_attn/attention_entropy_160.png
Saved 2d_attn/attention_entropy_192.png
Saved 2d_attn/attention_entropy_224.png
Saved 2d_attn/attention_entropy_256.png
Saved 2d_attn/attention_entropy_320.png
Saved 2d_attn/attention_entropy_384.png
Saved 2d_attn/attention_entropy_448.png
Saved 2d_attn/attention_entropy_512.png


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ==== Configuration ====
root_dir    = 'eval_results'                   # Path to the eval_results folder
input_sizes = [160, 192, 224, 256, 320, 384, 448, 512]
n_blocks    = 12                               # Number of transformer blocks

# Mapping from full model names to short names for display
short_names = {
    "deit_small_patch16_LS_4":           "Learnable-PE",
    "gridpe_deit_small_patch16_LS_4":    "GridPE",
    "rope_axial_deit_small_patch16_LS_4":"Rope-Axial",
    "rope_mixed_deit_small_patch16_LS_4":"Rope-Mixed"
}

# ==== 1. Read attn_distance.txt and build DataFrame ====
all_dfs = {}
for model in sorted(os.listdir(root_dir)):
    model_dir = os.path.join(root_dir, model)
    txt_path  = os.path.join(model_dir, 'attn_distance.txt')
    if not os.path.isfile(txt_path):
        continue

    # buf[size][block] temporarily stores distance values for each input size and block
    buf = {size: {b: [] for b in range(n_blocks)} for size in input_sizes}
    cur_size = None

    with open(txt_path, 'r') as f:
        for line in f:
            line = line.strip()
            if line.startswith('Input size:'):
                cur_size = int(line.split(':', 1)[1].strip())
            elif line.startswith('Block') and cur_size in buf:
                left, val_str = line.split(':', 1)
                b_idx = int(left.split()[1])
                buf[cur_size][b_idx].append(float(val_str))

    # Build DataFrame: rows = input_size, columns = Block0…Block11
    data = [
        [np.mean(buf[size][b]) if buf[size][b] else np.nan
         for b in range(n_blocks)]
        for size in input_sizes
    ]
    df = pd.DataFrame(
        data,
        index=input_sizes,
        columns=[f'Block{b}' for b in range(n_blocks)]
    )
    all_dfs[model] = df

# ==== 2. Plot mean attention distance and save figures ====
os.makedirs('2d_attn', exist_ok=True)
for size in input_sizes:
    x = np.arange(n_blocks)
    fig, ax = plt.subplots(figsize=(7, 5), dpi=300)

    for model, df in all_dfs.items():
        y = df.loc[size].values
        avg_distance = np.nanmean(y)
        label = f"{short_names.get(model, model)}"
        ax.plot(x, y, marker='o', label=label)

    ax.set_xlabel('Block Index', fontsize=20)
    ax.set_ylabel('Mean Attention Distance', fontsize=20)
    ax.set_xticks(x)
    ax.grid(True, linestyle='--', alpha=0.4)
    ax.legend(loc='best', fontsize=15)

    out_name = f'2d_attn/attention_distance_{size}.png'
    fig.savefig(out_name, dpi=300, bbox_inches='tight')
    print(f'Saved {out_name}')

    plt.close(fig)

Saved 2d_attn/attention_distance_160.png
Saved 2d_attn/attention_distance_192.png
Saved 2d_attn/attention_distance_224.png
Saved 2d_attn/attention_distance_256.png
Saved 2d_attn/attention_distance_320.png
Saved 2d_attn/attention_distance_384.png
Saved 2d_attn/attention_distance_448.png
Saved 2d_attn/attention_distance_512.png
