In [1]:
import numpy as np
import pandas as pd
import anndata as ad
from pathlib import Path

In [None]:
# Root folder where your .h5ad files live
output_dir = Path("/mnt/jwh83-data/Confetti/output/")

# Ground-truth cluster label keys in .obs
nature_label_key = "Cell Type"
hubmap_label_key = "cell_type_update"

# Predicted cluster label keys in .obs
mesmer_label_key      = "leiden"
cellpose_label_key    = "leiden"
cellposeSAM_label_key = "leiden"
microSAM_label_key    = "leiden"
cellSAM_label_key     = "leiden"
instanseg_label_key   = "leiden"

In [3]:
adata_nature = ad.read_h5ad(output_dir / "nature_B004_registered_IDmatched.h5ad")
adata_hubmap = ad.read_h5ad(output_dir / "HuBMAP_Yang_annotate.h5ad")

adata_mesmer      = ad.read_h5ad(output_dir / "Channel_feature" / "Merged" / "mesmer_leiden_prelim.h5ad")
adata_cellpose    = ad.read_h5ad(output_dir / "Channel_feature" / "Merged" / "cellpose_old_update3_leiden_prelim.h5ad")
adata_cellposeSAM = ad.read_h5ad(output_dir / "Channel_feature" / "Merged" / "cellposeSAM_leiden_prelim.h5ad")
adata_microSAM    = ad.read_h5ad(output_dir / "Channel_feature" / "Merged" / "microSAM_leiden_prelim.h5ad")
adata_cellSAM     = ad.read_h5ad(output_dir / "Channel_feature" / "Merged" / "cellSAM_2_update_leiden_prelim.h5ad")
adata_instanseg   = ad.read_h5ad(output_dir / "Channel_feature" / "Merged" / "instanseg_leiden_prelim.h5ad")

ground_truths = {
    "nature": (adata_nature, nature_label_key),
    "hubmap": (adata_hubmap, hubmap_label_key),
}

methods = {
    "mesmer":      (adata_mesmer, mesmer_label_key),
    "cellpose":    (adata_cellpose, cellpose_label_key),
    "cellposeSAM": (adata_cellposeSAM, cellposeSAM_label_key),
    "microSAM":    (adata_microSAM, microSAM_label_key),
    "cellSAM":     (adata_cellSAM, cellSAM_label_key),
    "instanseg":   (adata_instanseg, instanseg_label_key),
}



In [4]:
def _to_dense(X):
    """Convert AnnData .X to a dense numpy array."""
    if hasattr(X, "toarray"):
        return X.toarray()
    return np.asarray(X)


def compute_cluster_centroids(adata, cluster_key, feature_names=None):
    """
    Compute per-cluster centroids (mean marker expression).

    Returns a DataFrame:
        index   = cluster labels (as strings)
        columns = feature_names (markers)
    """
    if feature_names is None:
        feature_names = adata.var_names

    # Subset to features
    adata_sub = adata[:, feature_names]
    X = _to_dense(adata_sub.X)

    df = pd.DataFrame(X, columns=feature_names)
    df[cluster_key] = adata.obs[cluster_key].astype(str).values

    centroids = df.groupby(cluster_key).mean()
    return centroids


def cosine_similarity_matrix(gt_centroids, pred_centroids):
    """
    Compute cosine similarity matrix between rows of gt_centroids and pred_centroids.

    gt_centroids:  (n_gt_clusters, n_features)
    pred_centroids: (n_pred_clusters, n_features)

    Returns DataFrame with:
        index   = gt cluster labels
        columns = predicted cluster labels
    """
    A = gt_centroids.values
    B = pred_centroids.values

    # Normalize rows to unit length
    A_norm = A / (np.linalg.norm(A, axis=1, keepdims=True) + 1e-12)
    B_norm = B / (np.linalg.norm(B, axis=1, keepdims=True) + 1e-12)

    sim = A_norm @ B_norm.T  # shape (n_gt, n_pred)

    sim_df = pd.DataFrame(
        sim,
        index=gt_centroids.index.astype(str),
        columns=pred_centroids.index.astype(str),
    )
    return sim_df


def summarize_cluster_alignment(gt_name, method_name, sim_df):
    """
    For a given GT and method, summarize cluster-level similarity.

    sim_df: DataFrame (gt_cluster x pred_cluster) with cosine similarity.

    Returns:
      - sim_df unchanged
      - summary_df with one row per GT cluster, including:
          gt, method, gt_cluster, best_pred_cluster, best_similarity,
          mean_best_similarity, median_best_similarity
    """
    best_pred_cluster = sim_df.idxmax(axis=1)
    best_similarity = sim_df.max(axis=1)

    summary = pd.DataFrame({
        "gt": gt_name,
        "method": method_name,
        "gt_cluster": sim_df.index.astype(str),
        "best_pred_cluster": best_pred_cluster.values.astype(str),
        "best_similarity": best_similarity.values,
    })

    summary["mean_best_similarity"] = best_similarity.mean()
    summary["median_best_similarity"] = best_similarity.median()

    return sim_df, summary

In [5]:
all_summaries = []
all_similarity_mats = {}  # key: (gt_name, method_name) -> similarity DataFrame

for gt_name, (adata_gt, gt_key) in ground_truths.items():
    for method_name, (adata_m, method_key) in methods.items():
        print(f"Comparing GT={gt_name} to method={method_name} (cluster-level)...")

        # Use only markers present in both
        common_features = adata_gt.var_names.intersection(adata_m.var_names)
        if len(common_features) == 0:
            print(f"  WARNING: No shared features between {gt_name} and {method_name}. Skipping.")
            continue

        gt_centroids = compute_cluster_centroids(
            adata=adata_gt,
            cluster_key=gt_key,
            feature_names=common_features,
        )

        pred_centroids = compute_cluster_centroids(
            adata=adata_m,
            cluster_key=method_key,
            feature_names=common_features,
        )

        sim_df = cosine_similarity_matrix(gt_centroids, pred_centroids)

        sim_df, summary = summarize_cluster_alignment(
            gt_name=gt_name,
            method_name=method_name,
            sim_df=sim_df,
        )

        all_similarity_mats[(gt_name, method_name)] = sim_df
        all_summaries.append(summary)

# Combined summary for all GTs and methods
if all_summaries:
    summary_df = pd.concat(all_summaries, ignore_index=True)
else:
    summary_df = pd.DataFrame()

# Per-method metrics (macro summary across GT clusters)
if not summary_df.empty:
    method_stats = (
        summary_df
        .groupby(["gt", "method"])
        .agg(
            mean_best_similarity=("best_similarity", "mean"),
            median_best_similarity=("best_similarity", "median"),
        )
        .reset_index()
    )
else:
    method_stats = pd.DataFrame()

print("\n=== Per-method cluster-level agreement (cosine similarity) ===")
print(method_stats)


Comparing GT=nature to method=mesmer (cluster-level)...
Comparing GT=nature to method=cellpose (cluster-level)...
Comparing GT=nature to method=cellposeSAM (cluster-level)...
Comparing GT=nature to method=microSAM (cluster-level)...
Comparing GT=nature to method=cellSAM (cluster-level)...
Comparing GT=nature to method=instanseg (cluster-level)...
Comparing GT=hubmap to method=mesmer (cluster-level)...
Comparing GT=hubmap to method=cellpose (cluster-level)...
Comparing GT=hubmap to method=cellposeSAM (cluster-level)...
Comparing GT=hubmap to method=microSAM (cluster-level)...
Comparing GT=hubmap to method=cellSAM (cluster-level)...
Comparing GT=hubmap to method=instanseg (cluster-level)...

=== Per-method cluster-level agreement (cosine similarity) ===
        gt       method  mean_best_similarity  median_best_similarity
0   hubmap      cellSAM              0.848282                0.897454
1   hubmap     cellpose              0.848511                0.910731
2   hubmap  cellposeSAM     