In [None]:
import argparse
import numpy as np
import torch
import matplotlib.pyplot as plt
import matplotlib as mpl
from pathlib import Path

In [1]:
import torch
import pandas as pd, numpy as np, re, matplotlib.pyplot as plt
import matplotlib as mpl

  cpu = _conversion_method_template(device=torch.device("cpu"))


ModuleNotFoundError: No module named 'pandas'

In [None]:
# ===== 스타일 =====
plt.rcParams["axes.edgecolor"] = "black"
plt.rcParams["axes.linewidth"] = 1.50
mpl.rcParams["mathtext.fontset"] = "cm"
mpl.rcParams["mathtext.rm"] = "serif"
mpl.rcParams["font.family"] = "serif"
mpl.rcParams["legend.fontsize"] = 12

LINEWIDTH  = 2.0
MARKERSIZE = 6

In [None]:
def load_top1_from_similarity(sim_pth: Path) -> np.ndarray:
    """
    From similarity.pth (shape: [N_q, N_val]),
    extract only the Top-1 value similarity for each query and return it as a 1D numpy array.
    """
    T = torch.load(sim_pth, map_location="cpu")
    if not isinstance(T, torch.Tensor):
        raise TypeError(f"{sim_pth} should contain a torch.Tensor, got {type(T)}")
    if T.ndim != 2:
        raise ValueError(f"{sim_pth} must be 2D, got shape {tuple(T.shape)}")
    # 각 행(query)에 대해 Top-1 (정렬 보장)
    top1 = torch.topk(T, k=1, dim=1, largest=True, sorted=True).values.squeeze(1)
    return top1.numpy()

def load_bg_from_similarity_wtrain(bg_pth: Path) -> np.ndarray:
    """
    From similarity_wtrain.pth (shape: [N_val, N_val]),
    return a 1D numpy array containing only the Top-1 similarity excluding self-match
    (i.e., the 2nd best among the top-2).
    """
    T = torch.load(bg_pth, map_location="cpu")
    if not isinstance(T, torch.Tensor):
        raise TypeError(f"{bg_pth} should contain a torch.Tensor, got {type(T)}")
    if T.ndim != 2:
        raise ValueError(f"{bg_pth} must be 2D, got shape {tuple(T.shape)}")
    if T.shape[1] < 2:
        raise ValueError("similarity_wtrain must have at least 2 columns to drop self-match.")
    # 각 행(value)에 대해 상위 2개를 정렬, 두 번째 값을 선택
    top2 = torch.topk(T, k=2, dim=1, largest=True, sorted=True).values
    nn1  = top2[:, 1]
    return nn1.numpy()

In [None]:
def make_bins(data_list, bin_width=0.005, clip_range=None):
    """
    Create bin edges that cover all given distributions.
    - bin_width: bin spacing
    - clip_range: (lo, hi) tuple to force the range. If None, determine from data.
    """
    if clip_range is None:
        lo = min(float(np.min(d)) for d in data_list)
        hi = max(float(np.max(d)) for d in data_list)
        # Safety margin
        lo -= 1e-6
        hi += 1e-6
    else:
        lo, hi = clip_range
    # Use np.arange to keep an exact bin width
    return np.arange(lo, hi + bin_width, bin_width)

def plot_hist(ax, data, bins, label, color, filled=True):
    """
    Plot a single distribution as a styled histogram (density=True).
    """
    if filled:
        ax.hist(
            data, bins=bins, density=True,
            histtype='stepfilled', alpha=0.35,
            edgecolor=color, facecolor=color, linewidth=LINEWIDTH, label=label
        )
        # Draw the outline once more to make it crisper
        ax.hist(
            data, bins=bins, density=True,
            histtype='step', linewidth=LINEWIDTH, color=color
        )
    else:
        ax.hist(
            data, bins=bins, density=True,
            histtype='step', linewidth=LINEWIDTH, color=color, label=label
        )


def draw_combined(baseline, proposed, bg, bins, out_path: Path):
    fig, ax = plt.subplots(figsize=(5.2, 4.0))

    # Colors: orange/green to match the sample look; the proposed model is blue
    plot_hist(ax, bg,       bins, label="train–train (bg)",        color="#2ca02c")  # green
    plot_hist(ax, baseline, bins, label="baseline (gen–train)",     color="#ff7f0e")  # orange
    plot_hist(ax, proposed, bins, label="proposed (gen–train)",     color="#1f77b4")  # blue

    ax.set_xlabel("Similarity (cosine)")
    ax.set_ylabel("Density")
    ax.legend(frameon=True, loc="best")
    fig.tight_layout()
    out_path.parent.mkdir(parents=True, exist_ok=True)
    fig.savefig(out_path, dpi=300)
    plt.close(fig)


def draw_pair(one, bg, bins, out_path: Path, label_one: str, color_one: str):
    fig, ax = plt.subplots(figsize=(5.2, 4.0))
    plot_hist(ax, bg,  bins, label="train–train (bg)", color="#2ca02c")
    plot_hist(ax, one, bins, label=label_one,         color=color_one)
    ax.set_xlabel("Similarity (cosine)")
    ax.set_ylabel("Density")
    ax.legend(frameon=True, loc="best")
    fig.tight_layout()
    out_path.parent.mkdir(parents=True, exist_ok=True)
    fig.savefig(out_path, dpi=300)
    plt.close(fig)

In [None]:
def main():
    p = argparse.ArgumentParser(description="Compare similarity distributions (hist, density).")
    p.add_argument("--bin-width", type=float, default=0.005, help="Histogram bin width")
    p.add_argument("--clip", type=float, nargs=2, default=None,
                   metavar=("LO","HI"),
                   help="Fix the x-axis range to [LO, HI] (default: infer from data)")
    p.add_argument("--mode", choices=["combined", "pair"], default="combined",
                   help="combined=세 분포 한 그림, pair=baseline/bg & proposed/bg 두 그림")
    p.add_argument("--out", type=Path, default=Path("similarity_hist.pdf"),
                   help="combined = plot three distributions in one figure; pair = two figures (baseline/bg and proposed/bg)")
    args = p.parse_args()

    args.baseline_sim = "ret_plots/imagenette10_frozentext/20250918032056/similarity.pth"
    args.bg-sim = "ret_plots/imagenette10_frozentext/20250918034027/similarity_wtrain.pth"
    args.proposed_sim = "ret_plots/imagenette10_frozentext/20250918034027/similarity.pth"
    
    
    # ----- 데이터 로딩 -----
    baseline_top1 = load_top1_from_similarity(args.baseline_sim)  # gen–train (baseline)
    proposed_top1 = load_top1_from_similarity(args.proposed_sim)  # gen–train (proposed)
    bg_top1       = load_bg_from_similarity_wtrain(args.bg_sim)   # train–train (excluding self-match)

    # ----- bins -----
    bins = make_bins([baseline_top1, proposed_top1, bg_top1],
                     bin_width=args.bin_width,
                     clip_range=tuple(args.clip) if args.clip else None)

    # ----- Plot -----
    if args.mode == "combined":
        draw_combined(baseline_top1, proposed_top1, bg_top1, bins, args.out)
    else:
        # Pair mode: save two separate plots (two distributions per figure), like the sample image
        out_base = args.out.with_name(args.out.stem + "_baseline.pdf")
        out_prop = args.out.with_name(args.out.stem + "_proposed.pdf")
        draw_pair(baseline_top1, bg_top1, bins, out_base,
                  label_one="baseline (gen–train)", color_one="#ff7f0e")  # orange
        draw_pair(proposed_top1, bg_top1, bins, out_prop,
                  label_one="proposed (gen–train)", color_one="#1f77b4")  # blue)

    print("Saved:", args.out if args.mode=="combined" else f"{out_base}, {out_prop}")

