In [1]:
import os
import scanpy as sc
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import pandas as pd
from pathlib import Path
import seaborn as sns
import numpy as np
import re


adata1 = sc.read_h5ad("/path_to_all_mouse_cells_object.h5ad")
#adata2 = sc.read_h5ad("/path_to_mouse_only_fib_and_dec_cells_object.h5ad")
#adata3 = sc.read_h5ad("path_to_mouse_only_1dpi_late_and_4dpi_fib_and_dec_cells_object.h5ad")
adata4 = sc.read_h5ad("/path_to_all_human_cells_object.h5ad")
#adata5 = sc.read_h5ad("/path_to_human_only_fib_and_dec_cells_object.h5ad")

output_dir = Path("/path_to_all_mouse_cells_object")
output_dir.mkdir(parents=True, exist_ok=True)

In [None]:
### All Human object — gray others, highlight selected labels ###

key = "cell_type_coarse"

# Build color map from stored palette in the object
labels_all = adata4.obs[key].astype("category").cat.categories
palette_key = f"{key}_colors"
if palette_key not in adata4.uns:
    raise KeyError(f"'{palette_key}' not found in adata4.uns")
palette = adata4.uns[palette_key]
if len(palette) != len(labels_all):
    raise ValueError(f"Palette length ({len(palette)}) does not match categories ({len(labels_all)}).")
color_map = dict(zip(labels_all, palette))
dd
# Only keep colors for the chosen highlights
color_map_sel = {lbl: color_map[lbl] for lbl in labels_all if lbl in highlight_labels}

# Donor-level IDs present in adata4.obs['mck']
sample_order = ["mck_6", "mck_5"]

for sample in sample_order:
    mask = adata4.obs["mck"].astype(str).str.strip() == sample
    ad = adata4[mask].copy()
    if ad.n_obs == 0:
        print(f"Warning: no cells for {sample}")
        continue

    x = ad.obs["x_um_dbscan"]; y = ad.obs["y_um_dbscan"]
    labs = ad.obs[key].astype(str).str.strip()

    # Colored clusters ON TOP: only the selected labels
    for lbl in [l for l in labs.unique() if l in color_map_sel]:
        idx = labs == lbl
        ax.scatter(x[idx].values, y[idx].values,
                   s=1, color=color_map_sel[lbl], alpha=1.0, linewidths=0)

    # Limits (full extent for this donor)
    ax.set_xlim(x.min(), x.max()); ax.set_ylim(y.min(), y.max())

    # Style
    ax.set_aspect("equal"); ax.set_xticks([]); ax.set_yticks([])
    ax.set_xlabel(""); ax.set_ylabel(""); ax.set_title("")
    ax.grid(False); [sp.set_visible(False) for sp in ax.spines.values()]
    fig.patch.set_facecolor("black"); ax.set_facecolor("black")

    # Save
    safe = sample.replace("_", "")
    filename = f"filename_{safe}_all_celltype_coarse_spatial.png"
    output_path = Path(output_dir) / filename
    fig.savefig(output_path, dpi=300, bbox_inches="tight", facecolor="black")
    plt.close(fig)
    print(f"Saved: {output_path}")


Saved: /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Human_mck6_All_celltype_coarse_spatial.png
Saved: /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Human_mck5_All_celltype_coarse_spatial.png


# Fig 4c: spatial mapping of all mouse cell types

In [None]:
### All Mouse object ###

key = "cell_type_fine"

# Colors for specific clusters; everything else -> gray (plotted first)
color_dict = {
    'pre-decidual cells': '#a0e1b0',
    'endometrial fibroblasts (sub-luminal)': '#4694c5',
    'endometrial fibroblasts (middle)': '#00401d',
    'endometrial fibroblasts (middle_induced)': '#127d35',
    'endometrial fibroblasts (outer)': '#a77207',
    'decidual cells (early)': '#1e1ee2',
    'decidual cells (intermediate)': '#762a83',
    'decidual cells (late)': '#b11a2d',
    'decidual cells (apoptotic)': '#fccc3f',
}
default_color = "#2d2d2d"

# Output dir
outdir = Path(output_dir)
outdir.mkdir(parents=True, exist_ok=True)

# Samples and limits given as (xlim, ylim)
sample_mapping = {
    "mck8":  {"samples": ["mck_8a", "mck_8b"],   "limits": [(3250, 5750), (500, 3000)]},
    "mck1":  {"samples": ["mck_1"],              "limits": [(1000, 3000), (1000, 3000)]},
    "mck12": {"samples": ["mck_12a", "mck_12b"], "limits": None},
    "mck15": {"samples": ["mck_15"],             "limits": [(2550, 9450), (200, 7100)]},
    "mck10": {"samples": ["mck_10a", "mck_10b"], "limits": [(1400, 8400), (1250, 8250)]},
    "mck11": {"samples": ["mck_11a"],            "limits": None},
}
sample_order = ["mck8", "mck1", "mck12", "mck15", "mck10", "mck11"]

# Plot per sample (operate on adata1)
for sample in sample_order:
    info = sample_mapping[sample]
    mapped = info["samples"]
    limits = info["limits"]

    ad = adata1[adata1.obs["sample"].isin(mapped)]
    if ad.n_obs == 0:
        print(f"Warning: no cells for {sample}")
        continue

    x = ad.obs["x_um_dbscan"]
    y = ad.obs["y_um_dbscan"]
    labs = ad.obs[key].astype(str).str.strip()

    # Mask within limits if provided
    if limits:
        (x_min, x_max), (y_min, y_max) = limits
        mask = (x >= x_min) & (x <= x_max) & (y >= y_min) & (y <= y_max)
        x = x[mask]; y = y[mask]; labs = labs[mask]

    # Determine which cells are "other" (gray) vs colored
    is_colored = labs.isin(color_dict.keys())
    is_other = ~is_colored

    # Plot
    fig, ax = plt.subplots(figsize=(5, 5))

    # 1) Background: gray "other" cells FIRST
    if is_other.any():
        ax.scatter(
            x[is_other].values, y[is_other].values,
            s=15, color=default_color, alpha=1.0, linewidths=0
        )

    # 2) Foreground: each specified cluster ON TOP
    for lbl in [l for l in labs.unique() if l in color_dict]:
        idx = labs == lbl
        ax.scatter(
            x[idx].values, y[idx].values,
            s=15, color=color_dict[lbl], alpha=1.0, linewidths=0
        )

    # Limits
    if limits:
        ax.set_xlim(x_min, x_max)
        ax.set_ylim(y_min, y_max)
    else:
        ax.set_xlim(x.min(), x.max())
        ax.set_ylim(y.min(), y.max())

    # Style
    ax.set_aspect("equal")
    ax.set_xticks([]); ax.set_yticks([])
    ax.set_xlabel(""); ax.set_ylabel(""); ax.set_title("")
    ax.grid(False)
    for spine in ax.spines.values():
        spine.set_visible(False)

    fig.patch.set_facecolor("black")
    ax.set_facecolor("black")

    # Save
    filename = f"filename_{sample}_celltype_fine_spatial.png"
    output_path = outdir / filename
    fig.savefig(output_path, dpi=300, bbox_inches="tight", facecolor="black")
    plt.close(fig)
    print(f"Saved: {output_path}")


Saved: /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Mouse_All_mck8_celltype_fine_spatial.png
Saved: /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Mouse_All_mck1_celltype_fine_spatial.png
Saved: /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Mouse_All_mck12_celltype_fine_spatial.png
Saved: /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Mouse_All_mck15_celltype_fine_spatial.png
Saved: /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Mouse_All_mck10_celltype_fine_spatial.png
Saved: /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Mouse_All_mck11_celltype_fine_spatial.png


In [52]:
### Mouse spatial one image per cluster per mck group (rotated, padded) ###

# Columns
key_cluster = "cell_type_coarse"
key_sample  = "sample"
x_key       = "x_um_dbscan"
y_key       = "y_um_dbscan"

# Styles
highlight_color = "#fe01fe"
other_color     = "#B0B0B0"
dot_size        = 25         # matplotlib scatter uses points^2
dot_alpha       = 1.0

# Figure + save params
fig_size_in = 6
dpi_save    = 300

# Output
output_dir = "/n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures"
os.makedirs(output_dir, exist_ok=True)

# Per-sample plot limits
limits_map = {
    "mck1":  {"samples": ["mck_1"],              "limits": [(1000, 3000), (1000, 3000)]},
    "mck12": {"samples": ["mck_12a", "mck_12b"], "limits": None},
    "mck15": {"samples": ["mck_15"],             "limits": [(2000, 9500), (500, 6500)]},
    "mck10": {"samples": ["mck_10a", "mck_10b"], "limits": [(1250, 8500), (1250, 8500)]},
    "mck11": {"samples": ["mck_11a"],            "limits": None},
    "mck8":  {"samples": ["mck_8a", "mck_8b"],   "limits": [(3250, 5750), (500, 3000)]},
}

# Rotation degrees per group (counter-clockwise)
rotation_map = {
    "mck1":  28,
    "mck8":  300,
    "mck12": 303,
    "mck15": 175,
    "mck10": 135,
    "mck11": 135,
}

# Optional explicit order
group_order = ["mck8", "mck1", "mck12", "mck15", "mck10", "mck11"]
group_order = [g for g in group_order if g in limits_map] + [g for g in limits_map if g not in group_order]

# Validation
for col in [key_cluster, key_sample, x_key, y_key]:
    if col not in adata1.obs:
        raise ValueError(f"{col} not found in adata1.obs")

# Ensure categorical clusters for stable ordering
if not hasattr(adata1.obs[key_cluster], "cat"):
    adata1.obs[key_cluster] = adata1.obs[key_cluster].astype("category")

clusters_all = list(adata1.obs[key_cluster].cat.categories)

def sanitize(s: str) -> str:
    s = re.sub(r"[^\w\-]+", "_", str(s))
    s = re.sub(r"_+", "_", s).strip("_")
    return s

def finite_filter(x, y, lab):
    mask = np.isfinite(x) & np.isfinite(y)
    return x[mask], y[mask], lab[mask]

def rotate_points(x, y, deg, cx, cy):
    if deg % 360 == 0:
        return x, y
    t = np.deg2rad(deg)
    ct, st = np.cos(t), np.sin(t)
    xr = (x - cx) * ct - (y - cy) * st + cx
    yr = (x - cx) * st + (y - cy) * ct + cy
    return xr, yr

def padding_from_marker(xmin, xmax, ymin, ymax, dot_size_pts2, fig_w_in, fig_h_in, dpi, extra_frac=0.005, extra_px=2):
    # Approximate marker radius in points from area (points^2): r_pt = sqrt(area/pi)
    r_pt = float(np.sqrt(max(dot_size_pts2, 1e-9) / np.pi))
    # Convert to pixels
    r_px = (r_pt / 72.0) * dpi
    w_px = fig_w_in * dpi
    h_px = fig_h_in * dpi
    # Convert pixel radius to data units and add a small fractional buffer
    x_range = max(xmax - xmin, 1e-9)
    y_range = max(ymax - ymin, 1e-9)
    pad_x = max(extra_frac * x_range, ((r_px + extra_px) / w_px) * x_range)
    pad_y = max(extra_frac * y_range, ((r_px + extra_px) / h_px) * y_range)
    return pad_x, pad_y

for group in group_order:
    samples = limits_map[group]["samples"]
    hard_limits = limits_map[group]["limits"]
    rot_deg = rotation_map.get(group, 0)

    mask_g = adata1.obs[key_sample].astype(str).isin(samples)
    if not np.any(mask_g):
        print(f"Warning: no rows found for {group} with samples={samples}. Skipping.")
        continue

    x = adata1.obs.loc[mask_g, x_key].astype(float).to_numpy()
    y = adata1.obs.loc[mask_g, y_key].astype(float).to_numpy()
    lab = adata1.obs.loc[mask_g, key_cluster].astype(str).to_numpy()

    x, y, lab = finite_filter(x, y, lab)
    if x.size == 0 or y.size == 0:
        print(f"Warning: no finite coordinates in group {group}. Skipping.")
        continue

    # Crop in raw coords if hard limits present
    if hard_limits is not None:
        (xmin, xmax), (ymin, ymax) = hard_limits
        keep = (x >= xmin) & (x <= xmax) & (y >= ymin) & (y <= ymax)
        x, y, lab = x[keep], y[keep], lab[keep]
        if x.size == 0:
            print(f"Warning: group {group} has no points within hard limits; skipping.")
            continue
        cx = 0.5 * (xmin + xmax)
        cy = 0.5 * (ymin + ymax)
    else:
        cx = 0.5 * (x.min() + x.max())
        cy = 0.5 * (y.min() + y.max())

    # Rotate cropped points, then compute tight limits and add marker-aware padding
    x_rot, y_rot = rotate_points(x, y, rot_deg, cx, cy)
    xmin_r, xmax_r = float(x_rot.min()), float(x_rot.max())
    ymin_r, ymax_r = float(y_rot.min()), float(y_rot.max())
    pad_x, pad_y = padding_from_marker(xmin_r, xmax_r, ymin_r, ymax_r,
                                       dot_size, fig_size_in, fig_size_in, dpi_save,
                                       extra_frac=0.005, extra_px=2)
    xlim = (xmin_r - pad_x, xmax_r + pad_x)
    ylim = (ymin_r - pad_y, ymax_r + pad_y)

    print(f"{group}: rotation={rot_deg}°, xlim={xlim}, ylim={ylim}, n_points={x_rot.size}")

    for cluster in clusters_all:
        fig, ax = plt.subplots(figsize=(fig_size_in, fig_size_in))
        fig.subplots_adjust(left=0, right=1, top=1, bottom=0)
        ax.set_position([0, 0, 1, 1])
        ax.set_facecolor("white")
        ax.margins(x=0, y=0)
        ax.set_xmargin(0)
        ax.set_ymargin(0)
        ax.autoscale(enable=False)
        ax.set_aspect("equal", adjustable="box")

        mask_high = (lab == str(cluster))
        mask_other = ~mask_high

        if np.any(mask_other):
            ax.scatter(x_rot[mask_other], y_rot[mask_other],
                       s=dot_size, color=other_color, alpha=dot_alpha,
                       linewidth=0, zorder=1, clip_on=True)
        if np.any(mask_high):
            ax.scatter(x_rot[mask_high], y_rot[mask_high],
                       s=dot_size, color=highlight_color, alpha=dot_alpha,
                       linewidth=0, zorder=2, clip_on=True)

        ax.set_xlim(xlim[0], xlim[1])
        ax.set_ylim(ylim[0], ylim[1])
        ax.axis("off")

        group_tag   = sanitize(group)
        cluster_tag = sanitize(cluster)
        fname = f"20250820-Mouse-{group_tag}-{cluster_tag}-spatial.png"
        outpath = os.path.join(output_dir, fname)

        fig.savefig(outpath, dpi=dpi_save, facecolor="black", bbox_inches=None, pad_inches=0)
        plt.close(fig)
        print(f"Saved {outpath}")


mck8: rotation=300°, xlim=(2855.926451009733, 5875.153902817049), ylim=(259.1302143332028, 3437.7421122217647), n_points=3374
Saved /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Mouse-mck8-myometrium-spatial.png
Saved /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Mouse-mck8-decidual_cells-spatial.png
Saved /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Mouse-mck8-perivascular_cells-spatial.png
Saved /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Mouse-mck8-epithelial_cells-spatial.png
Saved /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Mouse-mck8-immune_cells-spatial.png
Saved /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Mouse-mck8-perimetrium-spatial.png
Saved /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Mouse-mck8-endometrial_fibroblasts-spatial.png
mck1: rotation=28°, xlim=(836.2128854945291, 3192.260552736833), ylim=(744.6880311524683, 2921.14026069674), n_points=1617
Saved /n/eddy_lab

In [56]:
### Human spatial one image per cluster per mck group ###

# Columns
key_cluster = "cell_type_coarse"
preferred_sample_keys = ["mck", "sample", "sample_id", "slide", "library"]
x_key = "x_um_dbscan"
y_key = "y_um_dbscan"

# Styles
highlight_color = "#fe01fe"
other_color     = "#B0B0B0"
dot_size        = 10         # matplotlib scatter uses points^2
dot_alpha       = 1.0

# Figure + save params
fig_size_in = 6
dpi_save    = 300

# Output
output_dir = "/n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures"
os.makedirs(output_dir, exist_ok=True)

# Groups (no cropping, no rotation)
groups_map = {
    "mck5": {"samples": ["mck_5"]},
    "mck6": {"samples": ["mck_6"]},
}
group_order = ["mck6", "mck5"]

# Choose the sample column from preferred keys
avail_cols = set(adata4.obs.columns)
key_sample = None
for k in preferred_sample_keys:
    if k in avail_cols:
        key_sample = k
        break
if key_sample is None:
    raise ValueError(f"No suitable sample column found in adata4.obs. "
                     f"Tried {preferred_sample_keys}. Available: {sorted(list(avail_cols))[:50]}")

# Validation
for col in [key_cluster, key_sample, x_key, y_key]:
    if col not in adata4.obs:
        raise ValueError(f"{col} not found in adata4.obs")

# Ensure categorical clusters for stable ordering
if not hasattr(adata4.obs[key_cluster], "cat"):
    adata4.obs[key_cluster] = adata4.obs[key_cluster].astype("category")
clusters_all = list(adata4.obs[key_cluster].cat.categories)

def sanitize(s: str) -> str:
    s = re.sub(r"[^\w\-]+", "_", str(s))
    s = re.sub(r"_+", "_", s).strip("_")
    return s

def finite_filter(x, y, lab):
    mask = np.isfinite(x) & np.isfinite(y)
    return x[mask], y[mask], lab[mask]

def padding_from_marker(xmin, xmax, ymin, ymax, dot_size_pts2, fig_w_in, fig_h_in, dpi, extra_frac=0.005, extra_px=2):
    # Approximate marker radius in points from area (points^2): r_pt = sqrt(area/pi)
    r_pt = float(np.sqrt(max(dot_size_pts2, 1e-9) / np.pi))
    r_px = (r_pt / 72.0) * dpi
    w_px = fig_w_in * dpi
    h_px = fig_h_in * dpi
    x_range = max(xmax - xmin, 1e-9)
    y_range = max(ymax - ymin, 1e-9)
    pad_x = max(extra_frac * x_range, ((r_px + extra_px) / w_px) * x_range)
    pad_y = max(extra_frac * y_range, ((r_px + extra_px) / h_px) * y_range)
    return pad_x, pad_y

for group in group_order:
    samples = groups_map[group]["samples"]

    # Subset rows matching the group's samples
    mask_g = adata4.obs[key_sample].astype(str).isin(samples)
    if not np.any(mask_g):
        print(f"Warning: no rows found for {group} using {key_sample} in {samples}. Skipping.")
        continue

    x = adata4.obs.loc[mask_g, x_key].astype(float).to_numpy()
    y = adata4.obs.loc[mask_g, y_key].astype(float).to_numpy()
    lab = adata4.obs.loc[mask_g, key_cluster].astype(str).to_numpy()

    x, y, lab = finite_filter(x, y, lab)
    if x.size == 0 or y.size == 0:
        print(f"Warning: no finite coordinates in group {group}. Skipping.")
        continue

    # Data-driven limits (no cropping), with small marker-aware padding to avoid edge clipping
    xmin, xmax = float(x.min()), float(x.max())
    ymin, ymax = float(y.min()), float(y.max())
    pad_x, pad_y = padding_from_marker(xmin, xmax, ymin, ymax, dot_size, fig_size_in, fig_size_in, dpi_save,
                                       extra_frac=0.005, extra_px=2)
    xlim = (xmin - pad_x, xmax + pad_x)
    ylim = (ymin - pad_y, ymax + pad_y)
    print(f"{group}: xlim={xlim}, ylim={ylim}, n_points={x.size} using {key_sample} in {samples}")

    # One image per cluster: highlight cluster in magenta, others gray
    for cluster in clusters_all:
        fig, ax = plt.subplots(figsize=(fig_size_in, fig_size_in))
        fig.subplots_adjust(left=0, right=1, top=1, bottom=0)
        ax.set_position([0, 0, 1, 1])
        ax.set_facecolor("white")
        ax.margins(x=0, y=0)
        ax.set_xmargin(0)
        ax.set_ymargin(0)
        ax.autoscale(enable=False)
        ax.set_aspect("equal", adjustable="box")

        mask_high = (lab == str(cluster))
        mask_other = ~mask_high

        if np.any(mask_other):
            ax.scatter(
                x[mask_other], y[mask_other],
                s=dot_size, color=other_color, alpha=dot_alpha,
                linewidth=0, zorder=1, clip_on=True
            )
        if np.any(mask_high):
            ax.scatter(
                x[mask_high], y[mask_high],
                s=dot_size, color=highlight_color, alpha=dot_alpha,
                linewidth=0, zorder=2, clip_on=True
            )

        ax.set_xlim(xlim[0], xlim[1])
        ax.set_ylim(ylim[0], ylim[1])
        ax.axis("off")

        group_tag   = sanitize(group)
        cluster_tag = sanitize(cluster)
        fname = f"2filename-{group_tag}-{cluster_tag}-spatial.png"
        outpath = os.path.join(output_dir, fname)

        fig.savefig(outpath, dpi=dpi_save, facecolor="black", bbox_inches=None, pad_inches=0)
        plt.close(fig)
        print(f"Saved {outpath}")


mck6: xlim=(424.81202048890935, 5881.371644121481), ylim=(404.53475734377406, 5787.605238211786), n_points=24337 using mck in ['mck_6']
Saved /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Human-mck6-decidual_cells-spatial.png
Saved /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Human-mck6-epithelial_cells-spatial.png
Saved /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Human-mck6-endometrial_fibroblasts-spatial.png
Saved /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Human-mck6-immune_cells-spatial.png
Saved /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Human-mck6-perivascular_cells-spatial.png
Saved /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Human-mck6-smooth_muscle_cells-spatial.png
mck5: xlim=(58.99429733688103, 5931.8168406631185), ylim=(14.853612404532996, 5881.817956166896), n_points=21934 using mck in ['mck_5']
Saved /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Human-mck5-decidual_

In [59]:
# Average pseudotime per sample in Mouse Only Fibroblast object

pt_key = "dpt_pseudotime"
sample_key = "mck"

# build table and compute means
df = adata2.obs[[sample_key, pt_key]].copy()
df[sample_key] = df[sample_key].astype(str).str.strip()
df = df.dropna(subset=[pt_key])

avg_table = (
    df.groupby(sample_key, sort=True)[pt_key]
      .mean()
      .rename("mean_pseudotime")
      .reset_index()
)

# print table
print(avg_table.to_string(index=False))

   mck  mean_pseudotime
 mck_1         0.552018
mck_10         0.625658
mck_11         0.705431
mck_12         0.573324
mck_15         0.597683
 mck_8         0.092152
