# Time-course hierarchical clustering (first-pass metrics)

This notebook reproduces the PNG + Excel + ZIP outputs we generated in chat.

**You only need to edit the `INPUT_PATH`, `OUTPUT_DIR`, and `DROP_TIMEPOINTS` variables in the next cell.**


In [1]:
# ============================================================
# Generalized hierarchical BICLUSTERING (cells x metrics)
# - No fixed timepoints
# - Exports: PNG + CSV/Excel with matching row/col order + clusters
# ============================================================

import os
import re
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

from scipy.cluster.hierarchy import linkage, fcluster
from scipy.spatial.distance import pdist

# -----------------------------
# USER PARAMS (edit these)
# -----------------------------
INPUT_PATH = r"J:/Cohen Lab/Maria Clara/2_Lab data/9_Napari/OUTPUT new code/New PCA soma/PCA cleaning/Data S1.csv"  # <-- change if needed
OUT_DIR    = r"J:/Cohen Lab/Maria Clara/2_Lab data/9_Napari/OUTPUT new code/hierarchical clustering/bicluster/Diff_soma_bicluster_02152026"
TAG        = "bicluster_Diff"

# Choose identifiers / grouping
ID_COL     = "image_name"     # unique cell identifier (or row identifier)
GROUP_COL  = "cell_type"      # "timepoint"/condition column for annotation (can be any categorical)

# If you have a better unique ID per row, set it here.
# If ID_COL is not unique, we will auto-make a unique index.

# Clustering params
METHOD     = "ward"           # ward is great with euclidean
DIST       = "euclidean"

# Scaling
DO_ZSCORE_PER_METRIC = True   # z-score each metric across cells (recommended for heatmap)

# Optional: drop columns that you KNOW are metadata even if numeric
FORCE_META_COLS = {
    "Unnamed: 0", "dataset", "image_name", "cell_type"
}

# Cluster cut: if you want discrete cluster labels in exports
# (choose None to skip cluster labels)
K_ROW = 6    # clusters for CELLS (rows) in export
K_COL = 6    # clusters for METRICS (cols) in export

# Plot size control (auto-ish)
FIG_W = 16
FIG_H = 12

# Save Excel (requires openpyxl). If not available, it will fall back to CSV.
SAVE_EXCEL = True

# -----------------------------
# Helpers
# -----------------------------
def ensure_outdir(path):
    os.makedirs(path, exist_ok=True)

def make_unique_index(df, id_col):
    """Make a unique row index from id_col (or fallback)."""
    if id_col in df.columns:
        base = df[id_col].astype(str).fillna("NA")
    else:
        base = pd.Series([f"row_{i}" for i in range(len(df))])
    # ensure uniqueness
    if base.duplicated().any():
        base = base + "__" + pd.Series(range(len(df))).astype(str)
    return base

def zscore_cols(X: pd.DataFrame) -> pd.DataFrame:
    """Z-score each column; safe for zero-variance columns."""
    mu = X.mean(axis=0)
    sd = X.std(axis=0, ddof=0)
    sd = sd.replace(0, np.nan)
    Z = (X - mu) / sd
    return Z.fillna(0)

def relabel_clusters_by_leaf_order(cluster_ids, leaf_order):
    """
    cluster_ids: array-like of cluster labels (1..K) for each row in original order
    leaf_order: indices of rows after dendrogram reordering
    Returns: new cluster labels remapped so that label order follows first appearance along leaf order
    """
    cluster_ids = np.asarray(cluster_ids)
    seen = {}
    new = np.zeros_like(cluster_ids)
    next_lab = 1
    for idx in leaf_order:
        old = int(cluster_ids[idx])
        if old not in seen:
            seen[old] = next_lab
            next_lab += 1
        new[idx] = seen[old]
    return new

def compute_linkage(X, method="ward", dist="euclidean"):
    # scipy linkage expects condensed distance OR observation matrix
    # For ward, scipy expects observations; it internally uses euclidean.
    if method.lower() == "ward":
        return linkage(X.values, method=method)
    else:
        d = pdist(X.values, metric=dist)
        return linkage(d, method=method)

# -----------------------------
# Load
# -----------------------------
ensure_outdir(OUT_DIR)
df = pd.read_csv(INPUT_PATH)

# Build row id
row_id = make_unique_index(df, ID_COL)
df = df.copy()
df["_row_id"] = row_id

# Decide meta vs metrics automatically:
meta_cols = set(FORCE_META_COLS) | {"_row_id"}
meta_cols = [c for c in df.columns if c in meta_cols]

# numeric metric columns = numeric and not in meta
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
metric_cols = [c for c in numeric_cols if c not in set(meta_cols)]

if len(metric_cols) == 0:
    raise ValueError("No numeric metric columns found. Check your input / meta columns.")

# Build matrix
X_raw = df.set_index("_row_id")[metric_cols].copy()

# Optionally z-score per metric
X = zscore_cols(X_raw) if DO_ZSCORE_PER_METRIC else X_raw.copy()

# Row annotation (timepoints/conditions)
row_group = None
if GROUP_COL in df.columns:
    row_group = df.set_index("_row_id")[GROUP_COL].astype(str)
else:
    row_group = pd.Series(["NA"] * len(X), index=X.index, name="group")

# Colors for groups
uniq_groups = pd.unique(row_group)
palette = sns.color_palette("tab10", n_colors=max(3, len(uniq_groups)))
group2color = {g: palette[i % len(palette)] for i, g in enumerate(sorted(uniq_groups))}
row_colors = row_group.map(group2color)

# -----------------------------
# Biclustering plot (cells x metrics)
# -----------------------------
# seaborn will compute linkages itself, but we want linkages for exporting consistent cluster IDs
row_link = compute_linkage(X, method=METHOD, dist=DIST)
col_link = compute_linkage(X.T, method=METHOD, dist=DIST)

cg = sns.clustermap(
    X,
    row_linkage=row_link,
    col_linkage=col_link,
    row_colors=row_colors,
    cmap="BrBG",
    figsize=(FIG_W, FIG_H),
    xticklabels=False,
    yticklabels=False,
    cbar_kws={"label": "z-score" if DO_ZSCORE_PER_METRIC else "raw"}
)

# Add legend for group colors
for g, col in group2color.items():
    cg.ax_row_dendrogram.bar(0, 0, color=col, label=g, linewidth=0)
cg.ax_row_dendrogram.legend(
    title=GROUP_COL,
    loc="center",
    ncol=1,
    bbox_to_anchor=(0.5, 1.1),
    fontsize=8,
    title_fontsize=9
)

png_path = os.path.join(OUT_DIR, f"{TAG}_cellsXmetrics_clustermap.png")

# ---- Layout fix: avoid legend overlaps
cg.fig.subplots_adjust(bottom=0.16)

# ---- Move + force horizontal colorbar BELOW the heatmap
cg.cax.set_position([0.25, 0.03, 0.5, 0.03])  # [left, bottom, width, height]
mappable = cg.ax_heatmap.collections[0]
cg.cax.clear()
cb = cg.fig.colorbar(mappable, cax=cg.cax, orientation="horizontal")
cb.set_label("z-score" if DO_ZSCORE_PER_METRIC else "raw", labelpad=6)

plt.savefig(png_path, dpi=300, bbox_inches="tight")
plt.close(cg.fig)

# -----------------------------
# Export: ordered matrices + cluster labels (matching PNG)
# -----------------------------
row_order = cg.dendrogram_row.reordered_ind
col_order = cg.dendrogram_col.reordered_ind

rows = X.index.to_list()
cols = X.columns.to_list()

row_ordered_ids = [rows[i] for i in row_order]
col_ordered_ids = [cols[j] for j in col_order]

X_ordered = X.loc[row_ordered_ids, col_ordered_ids]
Xraw_ordered = X_raw.loc[row_ordered_ids, col_ordered_ids]

# Cluster IDs for rows/cols (optional)
row_cluster = None
col_cluster = None

if K_ROW is not None and K_ROW >= 2:
    rc = fcluster(row_link, t=K_ROW, criterion="maxclust")
    rc = relabel_clusters_by_leaf_order(rc, leaf_order=row_order)
    row_cluster = pd.Series(rc, index=X.index, name=f"row_cluster_k{K_ROW}").loc[row_ordered_ids]

if K_COL is not None and K_COL >= 2:
    cc = fcluster(col_link, t=K_COL, criterion="maxclust")
    cc = relabel_clusters_by_leaf_order(cc, leaf_order=col_order)
    col_cluster = pd.Series(cc, index=X.columns, name=f"col_cluster_k{K_COL}").loc[col_ordered_ids]

# Build row metadata export table
row_meta = pd.DataFrame({
    "row_id": row_ordered_ids,
    GROUP_COL: row_group.loc[row_ordered_ids].values,
})
if row_cluster is not None:
    row_meta[row_cluster.name] = row_cluster.values

# Build col metadata export table
col_meta = pd.DataFrame({"metric": col_ordered_ids})
if col_cluster is not None:
    col_meta[col_cluster.name] = col_cluster.values

# Save CSVs
X_ordered.to_csv(os.path.join(OUT_DIR, f"{TAG}_Z_ordered_matrix.csv"))
Xraw_ordered.to_csv(os.path.join(OUT_DIR, f"{TAG}_RAW_ordered_matrix.csv"))
row_meta.to_csv(os.path.join(OUT_DIR, f"{TAG}_row_metadata_ordered.csv"), index=False)
col_meta.to_csv(os.path.join(OUT_DIR, f"{TAG}_col_metadata_ordered.csv"), index=False)

# Optional Excel bundle
if SAVE_EXCEL:
    xlsx_path = os.path.join(OUT_DIR, f"{TAG}_bicluster_exports.xlsx")
    try:
        with pd.ExcelWriter(xlsx_path, engine="openpyxl") as w:
            Xraw_ordered.to_excel(w, sheet_name="RAW_ordered_matrix")
            X_ordered.to_excel(w, sheet_name="Z_ordered_matrix")
            row_meta.to_excel(w, sheet_name="Row_metadata", index=False)
            col_meta.to_excel(w, sheet_name="Col_metadata", index=False)
        print("Saved Excel:", xlsx_path)
    except Exception as e:
        print("Excel export failed, kept CSVs only. Error:", repr(e))

print("DONE")
print("PNG:", png_path)
print("OUT_DIR:", OUT_DIR)



# ============================
# Extra exports: median by group (interpretation aid)
# ============================
# Uses the same metrics retained for clustering, but collapses single-cells -> group medians
# Then computes per-metric z-scores ACROSS groups (so patterns are comparable between groups)

GROUP_COL_FOR_MEDIANS = GROUP_COL  # e.g., 'cell_type'
group = row_group.loc[X_raw.index].astype(str)

# Group medians on RAW values
Gmed_raw = X_raw.groupby(group).median()

# Z-score across groups (per metric)
mu = Gmed_raw.mean(axis=0)
sd = Gmed_raw.std(axis=0, ddof=0).replace(0, np.nan)
Gmed_z = ((Gmed_raw - mu) / sd).fillna(0)

# Save group median matrices
Gmed_raw.to_csv(os.path.join(OUT_DIR, f"{TAG}_group_median_RAW.csv"))
Gmed_z.to_csv(os.path.join(OUT_DIR, f"{TAG}_group_median_Z.csv"))

# Optional: long-format by metric-cluster (GraphPad-friendly)
if 'col_cluster' in globals() and col_cluster is not None:
    cluster_summary = []
    for cl in sorted(pd.unique(col_cluster)):
        mets = col_cluster[col_cluster == cl].index.tolist()
        if len(mets) == 0:
            continue
        tmp = Gmed_raw[mets].copy()
        tmp.insert(0, "metric_cluster", cl)
        tmp_long = tmp.reset_index(names=GROUP_COL_FOR_MEDIANS).melt(
            id_vars=[GROUP_COL_FOR_MEDIANS, "metric_cluster"],
            var_name="metric",
            value_name="median_raw"
        )
        cluster_summary.append(tmp_long)
    if len(cluster_summary) > 0:
        cluster_summary = pd.concat(cluster_summary, ignore_index=True)
        cluster_summary.to_csv(os.path.join(OUT_DIR, f"{TAG}_cluster_median_by_group_LONG.csv"), index=False)

print("Saved group median exports:",
      os.path.join(OUT_DIR, f"{TAG}_group_median_RAW.csv"),
      os.path.join(OUT_DIR, f"{TAG}_group_median_Z.csv"))



# ============================
# Extra PNG: group-median Z clustermap (TRUE biclustering on group medians)
#   rows = groups (e.g. time points / conditions from GROUP_COL)
#   cols = metrics (same metrics used in main biclustering)
#
# NOTE: This is NOT per-cell (it's by design: group medians). It complements the per-cell biclustering PNG.
# ============================
import seaborn as sns
import matplotlib.patches as mpatches
from scipy.cluster.hierarchy import linkage

def _relabel_clusters_by_leaf_order(cluster_ids, leaf_order):
    # Ensures cluster numbering follows the left-to-right (column) leaf order shown in the PNG
    cluster_ids = np.asarray(cluster_ids)
    seen = {}
    new = np.zeros_like(cluster_ids)
    nxt = 1
    for idx in leaf_order:
        old = int(cluster_ids[idx])
        if old not in seen:
            seen[old] = nxt
            nxt += 1
        new[idx] = seen[old]
    return new

from scipy.spatial.distance import pdist

# ---------- Customize timepoint / group colors here ----------
# If you already have fixed colors in your paper, set them here.
# Example (EDIT these keys to match your GROUP_COL values exactly):
TIMEPOINT_COLORS = {
     "iPSCs": "#55585b",
     "iN_D7": "#0eff3e",
     "iN_D14": "#2c64a0",
     "iN_D21": "#d627b0",
     "iN_D28": "#bd9867",
}

# Build row_colors for groups (if dict provided, else fallback palette)
group_names = list(Gmed_z.index.astype(str))
missing = [g for g in group_names if g not in TIMEPOINT_COLORS]
if len(TIMEPOINT_COLORS) > 0 and len(missing) == 0:
    row_colors = pd.Series(group_names, index=group_names).map(TIMEPOINT_COLORS)
else:
    # fallback palette (and allow partial mapping)
    pal = sns.color_palette("tab20", n_colors=max(3, len(group_names)))
    fallback = {g: pal[i % len(pal)] for i, g in enumerate(group_names)}
    merged = {**fallback, **TIMEPOINT_COLORS}
    row_colors = pd.Series(group_names, index=group_names).map(merged)

# Keep SAME metric order as main clustermap if available
try:
    metric_order = col_ordered_ids
except Exception:
    metric_order = list(Gmed_z.columns)

Gmed_z_plot = Gmed_z.loc[group_names, metric_order]

# Compute linkages (ward/euclidean consistent)
row_link = linkage(Gmed_z_plot.values, method="ward")
col_link = linkage(Gmed_z_plot.values.T, method="ward")

cg2 = sns.clustermap(
    Gmed_z_plot,
    row_linkage=row_link,
    col_linkage=col_link,
    row_colors=row_colors,
    cmap="BrBG",
    vmin=-2, vmax=2,
    figsize=(18, max(5, 0.5 * len(group_names))),
    xticklabels=False,
    yticklabels=True,
    cbar_kws={"label": "z-score (across groups)"}
)

# Put group labels on the LEFT of heatmap
cg2.ax_heatmap.yaxis.set_ticks_position('left')
cg2.ax_heatmap.tick_params(axis='y', labelleft=True, labelright=False)
cg2.ax_heatmap.set_yticklabels(cg2.ax_heatmap.get_yticklabels(), rotation=0)

# Move colorbar BELOW (horizontal) to avoid overlaps
cg2.fig.subplots_adjust(bottom=0.16)
cg2.cax.set_position([0.25, 0.03, 0.5, 0.03])
mappable = cg2.ax_heatmap.collections[0]
cg2.cax.clear()
cb = cg2.fig.colorbar(mappable, cax=cg2.cax, orientation="horizontal")
cb.set_label("z-score (across groups)", labelpad=6)

# Legend for group colors (only if TIMEPOINT_COLORS defined or fallback used)
handles = [mpatches.Patch(color=row_colors.loc[g], label=g) for g in group_names]
cg2.ax_heatmap.legend(
    handles=handles,
    title=f"{GROUP_COL} colors",
    loc="upper right",
    bbox_to_anchor=(-0.02, 1.25),
    frameon=True
)

out_png_med = os.path.join(OUT_DIR, "biclusterbyMedian_Diff_cellsXmetrics_clustermap.png")
cg2.savefig(out_png_med, dpi=300, bbox_inches="tight", pad_inches=0.2)
plt.close(cg2.fig)
print("Saved:", out_png_med)

# ---- Export Excel for the MEDIAN-by-group clustermap (cg2): clusters -> metrics with RAW + Z
K_METRIC_CLUSTERS_MEDIAN = K_COL if ('K_COL' in globals() and K_COL is not None and K_COL >= 2) else 6

col_order2 = cg2.dendrogram_col.reordered_ind
row_order2 = cg2.dendrogram_row.reordered_ind

metrics2 = list(Gmed_z_plot.columns)
groups2  = list(Gmed_z_plot.index)

metrics2_ord = [metrics2[j] for j in col_order2]
groups2_ord  = [groups2[i] for i in row_order2]

Z2_ord   = Gmed_z_plot.loc[groups2_ord, metrics2_ord]
RAW2_ord = Gmed_raw.loc[groups2_ord, metrics2_ord]

# metric clusters from the SAME col_link used for the PNG
cc2 = fcluster(col_link, t=K_METRIC_CLUSTERS_MEDIAN, criterion="maxclust")
cc2 = _relabel_clusters_by_leaf_order(cc2, leaf_order=col_order2)
metric_cluster2 = pd.Series(cc2, index=metrics2, name=f"metric_cluster_k{K_METRIC_CLUSTERS_MEDIAN}").loc[metrics2_ord]

xlsx2 = os.path.join(OUT_DIR, "biclusterbyMedian_Diff_metric_clusters_RAW_and_Z.xlsx")
with pd.ExcelWriter(xlsx2, engine="openpyxl") as w:
    pd.DataFrame({"metric": metrics2_ord, "cluster": metric_cluster2.values}).to_excel(w, sheet_name="Metric_cluster_table", index=False)
    pd.DataFrame({GROUP_COL: groups2_ord}).to_excel(w, sheet_name="Group_order", index=False)
    for cl in sorted(metric_cluster2.unique()):
        mets = [m for m,c in zip(metrics2_ord, metric_cluster2.values) if c == cl]
        raw_block = RAW2_ord[mets].T
        z_block   = Z2_ord[mets].T
        raw_block.columns = [f"RAW_{g}" for g in raw_block.columns]
        z_block.columns   = [f"Z_{g}" for g in z_block.columns]
        out = pd.concat([raw_block, z_block], axis=1)
        out.insert(0, "cluster", int(cl))
        out.insert(1, "metric", out.index)
        out = out.reset_index(drop=True)
        sheet = f"Cluster_{int(cl)}"[:31]
        out.to_excel(w, sheet_name=sheet, index=False)

print("Saved:", xlsx2)




# ============================
# Additional PNG: Median of cell-wise Z (timecourse-like)
#   Step 1: z-score per metric across ALL CELLS (global)
#   Step 2: take median within each group on those z-scored values
#   Step 3: clustermap (TRUE biclustering) on the group-median-of-cellwise-Z matrix
# ============================

import seaborn as sns
import matplotlib.patches as mpatches
from scipy.cluster.hierarchy import linkage

def _relabel_clusters_by_leaf_order(cluster_ids, leaf_order):
    # Ensures cluster numbering follows the left-to-right (column) leaf order shown in the PNG
    cluster_ids = np.asarray(cluster_ids)
    seen = {}
    new = np.zeros_like(cluster_ids)
    nxt = 1
    for idx in leaf_order:
        old = int(cluster_ids[idx])
        if old not in seen:
            seen[old] = nxt
            nxt += 1
        new[idx] = seen[old]
    return new


# 1) global per-cell z-score (across all cells), per metric
cell_mu = X_raw.mean(axis=0)
cell_sd = X_raw.std(axis=0, ddof=0).replace(0, np.nan)
X_z_global = ((X_raw - cell_mu) / cell_sd).fillna(0)

# 2) median within each group on those z-scores
group = row_group.loc[X_raw.index].astype(str)
Gmed_cellZ = X_z_global.groupby(group).median()

# keep metric order from main clustermap if available
try:
    metric_order = col_ordered_ids
except Exception:
    metric_order = list(Gmed_cellZ.columns)

group_names2 = list(Gmed_cellZ.index.astype(str))
Gmed_cellZ_plot = Gmed_cellZ.loc[group_names2, metric_order]

# group colors (same mapping used above, if available)
try:
    row_colors2 = pd.Series(group_names2, index=group_names2).map(merged)
except Exception:
    pal2 = sns.color_palette("tab20", n_colors=max(3, len(group_names2)))
    row_colors2 = [pal2[i % len(pal2)] for i in range(len(group_names2))]

# Linkages
row_link2 = linkage(Gmed_cellZ_plot.values, method="ward")
col_link2 = linkage(Gmed_cellZ_plot.values.T, method="ward")

cg3 = sns.clustermap(
    Gmed_cellZ_plot,
    row_linkage=row_link2,
    col_linkage=col_link2,
    row_colors=row_colors2,
    cmap="BrBG",
    vmin=-2, vmax=2,
    figsize=(18, max(5, 0.5 * len(group_names2))),
    xticklabels=False,
    yticklabels=True,
    cbar_kws={"label": "median of cell-wise z (global)"}
)

# labels on left
cg3.ax_heatmap.yaxis.set_ticks_position('left')
cg3.ax_heatmap.tick_params(axis='y', labelleft=True, labelright=False)
cg3.ax_heatmap.set_yticklabels(cg3.ax_heatmap.get_yticklabels(), rotation=0)

# colorbar below
cg3.fig.subplots_adjust(bottom=0.16)
cg3.cax.set_position([0.25, 0.03, 0.5, 0.03])
mappable = cg3.ax_heatmap.collections[0]
cg3.cax.clear()
cb = cg3.fig.colorbar(mappable, cax=cg3.cax, orientation="horizontal")
cb.set_label("median of cell-wise z (global)", labelpad=6)

# legend for group colors (place high)
handles = [mpatches.Patch(color=row_colors2[i], label=group_names2[i]) for i in range(len(group_names2))]
cg3.ax_heatmap.legend(
    handles=handles,
    title=f"{GROUP_COL} colors",
    loc="upper right",
    bbox_to_anchor=(-0.02, 1.25),
    frameon=True
)

out_png_med2 = os.path.join(OUT_DIR, "biclusterbyMedianOfCellwiseZ_Diff_cellsXmetrics_clustermap.png")
cg3.savefig(out_png_med2, dpi=300, bbox_inches="tight", pad_inches=0.2)
plt.close(cg3.fig)
print("Saved:", out_png_med2)

# ---- Export Excel for the MEDIAN-of-cellwise-Z clustermap (cg3): clusters -> metrics with RAW(group median) + CELLZ(median of cellwise Z)
K_METRIC_CLUSTERS_CELLZ = K_COL if ('K_COL' in globals() and K_COL is not None and K_COL >= 2) else 6

col_order3 = cg3.dendrogram_col.reordered_ind
row_order3 = cg3.dendrogram_row.reordered_ind

metrics3 = list(Gmed_cellZ_plot.columns)
groups3  = list(Gmed_cellZ_plot.index)

metrics3_ord = [metrics3[j] for j in col_order3]
groups3_ord  = [groups3[i] for i in row_order3]

CELLZ3_ord = Gmed_cellZ_plot.loc[groups3_ord, metrics3_ord]
RAW3_ord   = Gmed_raw.loc[groups3_ord, metrics3_ord]  # raw group medians for reference

cc3 = fcluster(col_link2, t=K_METRIC_CLUSTERS_CELLZ, criterion="maxclust")
cc3 = _relabel_clusters_by_leaf_order(cc3, leaf_order=col_order3)
metric_cluster3 = pd.Series(cc3, index=metrics3, name=f"metric_cluster_k{K_METRIC_CLUSTERS_CELLZ}").loc[metrics3_ord]

xlsx3 = os.path.join(OUT_DIR, "biclusterbyMedianOfCellwiseZ_Diff_metric_clusters_RAW_and_CELLZ.xlsx")
with pd.ExcelWriter(xlsx3, engine="openpyxl") as w:
    pd.DataFrame({"metric": metrics3_ord, "cluster": metric_cluster3.values}).to_excel(w, sheet_name="Metric_cluster_table", index=False)
    pd.DataFrame({GROUP_COL: groups3_ord}).to_excel(w, sheet_name="Group_order", index=False)
    for cl in sorted(metric_cluster3.unique()):
        mets = [m for m,c in zip(metrics3_ord, metric_cluster3.values) if c == cl]
        raw_block   = RAW3_ord[mets].T
        cellz_block = CELLZ3_ord[mets].T
        raw_block.columns   = [f"RAW_{g}" for g in raw_block.columns]
        cellz_block.columns = [f"CELLZ_{g}" for g in cellz_block.columns]
        out = pd.concat([raw_block, cellz_block], axis=1)
        out.insert(0, "cluster", int(cl))
        out.insert(1, "metric", out.index)
        out = out.reset_index(drop=True)
        sheet = f"Cluster_{int(cl)}"[:31]
        out.to_excel(w, sheet_name=sheet, index=False)

print("Saved:", xlsx3)



Saved Excel: J:/Cohen Lab/Maria Clara/2_Lab data/9_Napari/OUTPUT new code/hierarchical clustering/bicluster/Diff_soma_bicluster_02152026\bicluster_Diff_bicluster_exports.xlsx
DONE
PNG: J:/Cohen Lab/Maria Clara/2_Lab data/9_Napari/OUTPUT new code/hierarchical clustering/bicluster/Diff_soma_bicluster_02152026\bicluster_Diff_cellsXmetrics_clustermap.png
OUT_DIR: J:/Cohen Lab/Maria Clara/2_Lab data/9_Napari/OUTPUT new code/hierarchical clustering/bicluster/Diff_soma_bicluster_02152026
Saved group median exports: J:/Cohen Lab/Maria Clara/2_Lab data/9_Napari/OUTPUT new code/hierarchical clustering/bicluster/Diff_soma_bicluster_02152026\bicluster_Diff_group_median_RAW.csv J:/Cohen Lab/Maria Clara/2_Lab data/9_Napari/OUTPUT new code/hierarchical clustering/bicluster/Diff_soma_bicluster_02152026\bicluster_Diff_group_median_Z.csv
Saved: J:/Cohen Lab/Maria Clara/2_Lab data/9_Napari/OUTPUT new code/hierarchical clustering/bicluster/Diff_soma_bicluster_02152026\biclusterbyMedian_Diff_cellsXmetrics_