In [1]:
import os
import scanpy as sc
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import pandas as pd
from pathlib import Path
import seaborn as sns
import numpy as np
import re


adata1 = sc.read_h5ad("/path_to_all_mouse_cells_object.h5ad")
adata2 = sc.read_h5ad("/path_to_mouse_only_fib_and_dec_cells_object.h5ad")
#adata3 = sc.read_h5ad("path_to_mouse_only_1dpi_late_and_4dpi_fib_and_dec_cells_object.h5ad")
adata4 = sc.read_h5ad("/path_to_all_human_cells_object.h5ad")
#adata5 = sc.read_h5ad("/path_to_human_only_fib_and_dec_cells_object.h5ad")

output_dir = Path("/path_to_all_mouse_cells_object")
output_dir.mkdir(parents=True, exist_ok=True)

# Fig S6a: Number of Cells per Sample

In [14]:
# Extract metadata
metadata = adata1.obs

# Count total cells, mapped nuclei, and calculate the ratio as a percentage
cell_counts = metadata.groupby('mck').agg(
    total=('mck', 'size'),
    mapped=('x_um_dbscan', lambda x: x.notna().sum())
).reset_index()

# Add the % ratio column with two decimal points
cell_counts['% ratio'] = ((cell_counts['mapped'] / cell_counts['total']) * 100).round(2)

# View the output
print("MouseDataset")
print(cell_counts)
      
# Extract metadata
metadata = adata4.obs

# Count total cells, mapped nuclei, and calculate the ratio as a percentage
cell_counts = metadata.groupby('mck').agg(
    total=('mck', 'size'),
    mapped=('x_um_dbscan', lambda x: x.notna().sum())
).reset_index()

# Add the % ratio column with two decimal points
cell_counts['% ratio'] = ((cell_counts['mapped'] / cell_counts['total']) * 100).round(2)

# View the output
print("HumanDataset")
print(cell_counts)

MouseDataset
      mck  total  mapped  % ratio
0   mck_1   3058    1690    55.26
1   mck_8  16841    7033    41.76
2  mck_10  12418    7789    62.72
3  mck_11   7682    5655    73.61
4  mck_12  13000    6916    53.20
5  mck_15   4243    2552    60.15
HumanDataset
     mck  total  mapped  % ratio
0  mck_5  36346   21934    60.35
1  mck_6  33179   24337    73.35


  cell_counts = metadata.groupby('mck').agg(
  cell_counts = metadata.groupby('mck').agg(


# Fig S6b: QC violin plots

In [None]:
### Mouse + Human: log1p(Counts per cell) in one figure ###

# Colors for all samples across both objects
color_dict = {
    'mck_8':  '#990540',
    'mck_1':  '#f16f47',
    'mck_12': '#e7f59a',
    'mck_15': '#65c2a7',
    'mck_10': '#ffdf8d',
    'mck_11': '#624fa3',
    
    'mck_6':  '#feb05f',
    'mck_5':  '#117733',

}

order = ['mck_8', 'mck_1', 'mck_12', 'mck_15', 'mck_10', 'mck_11', 'mck_6', 'mck_5']
palette = [color_dict[o] for o in order]

# Build a combined dataframe from adata1 and adata4
df_mouse = adata1.obs[['mck', 'total_counts']].copy()
df_human = adata4.obs[['mck', 'total_counts']].copy()

df_all = pd.concat([df_mouse, df_human], axis=0, ignore_index=True)
df_all = df_all[df_all['mck'].isin(order)].copy()
df_all.dropna(subset=['total_counts'], inplace=True)
df_all['log1p_total_counts'] = np.log1p(df_all['total_counts'])

# Plot
fig, ax = plt.subplots(figsize=(8, 4))
sns.violinplot(
    data=df_all,
    x='mck',
    y='log1p_total_counts',
    order=order,
    palette=palette,
    inner=None,
    cut=0,
    linewidth=0.8,
    ax=ax
)

ax.set_xlabel('')
ax.set_ylabel('log1p(Counts per cell)')

ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right', color='black')
ax.tick_params(axis='y', colors='black')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Fix figure size exactly; avoid autoshrink
fig.set_size_inches(8, 4)
filename = "filename.png"
output_path = output_dir / filename
fig.savefig(output_path, dpi=300, facecolor='white')  # no bbox_inches='tight'
plt.close(fig)
print(f"Saved combined violin plot to {output_path}")


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.violinplot(
  ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right', color='black')


Saved combined violin plot to /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-MouseHuman_All_CountsPerCell_log.png


In [24]:
### Mouse + Human: log1p(Genes per cell) in one figure ###

# Colors for all samples across both objects (same as counts plot)
color_dict = {
    'mck_8':  '#990540',
    'mck_1':  '#f16f47',
    'mck_12': '#e7f59a',
    'mck_15': '#65c2a7',
    'mck_10': '#ffdf8d',
    'mck_11': '#624fa3',
    
    'mck_6':  '#feb05f',
    'mck_5':  '#117733',
}

order = ['mck_8', 'mck_1', 'mck_12', 'mck_15', 'mck_10', 'mck_11', 'mck_6', 'mck_5']
palette = [color_dict[o] for o in order]

# Build a combined dataframe from adata1 and adata4
df_mouse = adata1.obs[['mck', 'n_genes_by_counts']].copy()
df_human = adata4.obs[['mck', 'n_genes_by_counts']].copy()

df_all = pd.concat([df_mouse, df_human], axis=0, ignore_index=True)
df_all = df_all[df_all['mck'].isin(order)].copy()
df_all.dropna(subset=['n_genes_by_counts'], inplace=True)
df_all['log1p_n_genes'] = np.log1p(df_all['n_genes_by_counts'])

# Plot
fig, ax = plt.subplots(figsize=(8, 4))
sns.violinplot(
    data=df_all,
    x='mck',
    y='log1p_n_genes',
    order=order,
    palette=palette,
    inner=None,
    cut=0,
    linewidth=0.8,
    ax=ax
)

ax.set_xlabel('')
ax.set_ylabel('log1p(Genes per cell)')

ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right', color='black')
ax.tick_params(axis='y', colors='black')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

fig.set_size_inches(8, 4)
filename = "filename.png"
output_path = output_dir / filename
fig.savefig(output_path, dpi=300, facecolor='white')  # no bbox_inches='tight'
plt.close(fig)
print(f"Saved combined violin plot to {output_path}")


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.violinplot(
  ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right', color='black')


Saved combined violin plot to /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-MouseHuman_All_GenesPerCell_log.png


# Fig S6c: Dotplot for Cell Type Annotations

In [3]:
### All Mouse Annotations ###

# Marker genes by cell type
marker_genes = [
    'Cdh1', 'Pax8', 'Pdgfra',
    'Wnt4', 'Prl8a2', 'Bmp2', 'Cdh3',
    'Flt1', 'Pecam1', 'Ptprc', 'Acta2', 'Myh11', 'Sox6', 'Pgr', 'Esr1'
]

# Desired row order
cell_order = [
    'epithelial cells',
    'endometrial fibroblasts',
    'decidual cells',
    'perivascular cells',
    'immune cells',
    'myometrium',
    'perimetrium'
]

# Settings
key = 'cell_type_coarse'

# Validate inputs
if key not in adata1.obs:
    raise ValueError(f"{key} not found in adata1.obs")

missing = set(cell_order) - set(adata1.obs[key].unique())
if missing:
    raise ValueError(f"Missing cell types in adata1.obs['{key}']: {missing}")

# Optional: ensure all marker genes exist (warn if any missing)
missing_genes = [g for g in marker_genes if g not in adata1.var_names]
if missing_genes:
    print(f"Warning: {len(missing_genes)} marker(s) not in var: {missing_genes}")

# Generate dotplot with per-gene 0–1 scaling across groups
sc.pl.dotplot(
    adata1,
    var_names=marker_genes,
    groupby=key,
    layer='log1p',
    categories_order=cell_order,
    figsize=(5, max(1.0, len(cell_order) * 0.32)),
    standard_scale='var',   # <- min–max per gene across groups
    show=False
)

fig = plt.gcf()

# Style just the dot borders in the main axis
main_ax = fig.get_axes()[0]
for coll in main_ax.collections:
    coll.set_linewidth(0.5)
    coll.set_edgecolor("gray")

# Style all axes (main + colorbar)
for ax in fig.get_axes():
    ax.set_facecolor('white')
    ax.tick_params(colors='black')
    ax.title.set_color('black')
    # labels may be empty for some axes
    try:
        for label in ax.get_xticklabels() + ax.get_yticklabels():
            label.set_color('black')
    except Exception:
        pass
    for spine in ax.spines.values():
        spine.set_edgecolor('black')

# Save
filename = "filename.png"
output_path = output_dir / filename
fig.patch.set_facecolor('white')
fig.savefig(output_path, dpi=300, bbox_inches='tight', facecolor='white')
plt.close(fig)
print(f"Saved dotplot to {output_path}")


Saved dotplot to /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Mouse_All_coarse_Dotplot.png


In [10]:
### Mouse Only Fibroblast Annotations ###

# Marker genes by cell type
marker_genes = [
    'Pdgfra', 'Col3a1', 'Col1a2', 'Celf2',
    'Kcnd2', 'Nlgn1', 
    'Gria4', 'Rxfp1', 'Sema5a', 'Cdh11', 
    'Top2a', 'Kif15', 'Knl1', 'Mki67', 'Bub1', 
    'Klf5', 'Hand2', 'Wnt4', 'Cebpb', 'Prl8a2', 'Bmp2', 'Cdh3',  'Gja1', 
    'Hif1a', 'Epas1', 'Vegfa',
    'Adam15', 'Adam19',
    'Bax','Pgr', 'Esr1'
]

# Desired row order
cell_order = [
    'endometrial fibroblasts (outer)',
    'endometrial fibroblasts (middle)',
    'endometrial fibroblasts (sub-luminal)',
    'endometrial fibroblasts (middle_induced)',
    'pre-decidual cells',
    'decidual cells (early)',
    'decidual cells (intermediate)',
    'decidual cells (late)',
    'decidual cells (apoptotic)',
]

# Settings
key = 'cell_subtype_annotations'

# Validate inputs
if key not in adata2.obs:
    raise ValueError(f"{key} not found in adata2.obs")

missing = set(cell_order) - set(adata2.obs[key].unique())
if missing:
    raise ValueError(f"Missing cell types in adata2.obs['{key}']: {missing}")

# Optional: ensure all marker genes exist
missing_genes = [g for g in marker_genes if g not in adata2.var_names]
if missing_genes:
    print(f"Warning: {len(missing_genes)} marker(s) not in var: {missing_genes}")

# Generate dotplot with per-gene 0–1 scaling across groups
sc.pl.dotplot(
    adata2,
    var_names=marker_genes,
    groupby=key,
    layer='log1p',
    categories_order=cell_order,
    figsize=(10, max(1.0, len(cell_order) * 0.32)),
    standard_scale='var',
    show=False
)

fig = plt.gcf()

# Style just the dot borders in the main axis
main_ax = fig.get_axes()[0]
for coll in main_ax.collections:
    try:
        coll.set_linewidth(0.5)
        coll.set_edgecolor("gray")
    except Exception:
        pass

# Style all axes
for ax in fig.get_axes():
    ax.set_facecolor('white')
    ax.tick_params(colors='black')
    ax.title.set_color('black')
    try:
        for label in ax.get_xticklabels() + ax.get_yticklabels():
            label.set_color('black')
    except Exception:
        pass
    for spine in ax.spines.values():
        spine.set_edgecolor('black')

# Save
filename = "filename.png"
output_path = output_dir / filename
fig.patch.set_facecolor('white')
fig.savefig(output_path, dpi=300, bbox_inches='tight', facecolor='white')
plt.close(fig)
print(f"Saved dotplot to {output_path}")

Saved dotplot to /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Mouse_OnlyFib_Fine_Dotplot_scaled.png


In [4]:
### Human Annotations ###

# Marker genes by cell type
marker_genes = [
    'EPCAM', 'KLF5', 'COL5A1', 'COL6A3', 'DCN','LEFTY2', 'IGFBP1',
    'ACTA2', 'MYH11', 'PTPRC', 'STK17B', 'PECAM1', 'VWF', 'PGR', 'ESR1'
]

# Desired row order
cell_order = [
    'epithelial cells',
    'endometrial fibroblasts',
    'decidual cells',
    'immune cells',    
    'perivascular cells',
    'smooth muscle cells'

]

# Settings
key = 'cell_type_annotations'

# Validate inputs
if key not in adata4.obs:
    raise ValueError(f"{key} not found in adata4.obs")

missing = set(cell_order) - set(adata4.obs[key].unique())
if missing:
    raise ValueError(f"Missing cell types in adata4.obs['{key}']: {missing}")

# Optional: ensure all marker genes exist (warn if any missing)
missing_genes = [g for g in marker_genes if g not in adata4.var_names]
if missing_genes:
    print(f"Warning: {len(missing_genes)} marker(s) not in var: {missing_genes}")

# Generate dotplot with per-gene 0–1 scaling across groups
sc.pl.dotplot(
    adata4,
    var_names=marker_genes,
    groupby=key,
    layer='log1p',
    categories_order=cell_order,
    figsize=(5, max(1.0, len(cell_order) * 0.32)),
    standard_scale='var',   # <- min–max per gene across groups
    show=False
)

fig = plt.gcf()

# Style just the dot borders in the main axis
main_ax = fig.get_axes()[0]
for coll in main_ax.collections:
    coll.set_linewidth(0.5)
    coll.set_edgecolor("gray")

# Style all axes (main + colorbar)
for ax in fig.get_axes():
    ax.set_facecolor('white')
    ax.tick_params(colors='black')
    ax.title.set_color('black')
    # labels may be empty for some axes
    try:
        for label in ax.get_xticklabels() + ax.get_yticklabels():
            label.set_color('black')
    except Exception:
        pass
    for spine in ax.spines.values():
        spine.set_edgecolor('black')

# Save
filename = "filename.png"
output_path = output_dir / filename
fig.patch.set_facecolor('white')
fig.savefig(output_path, dpi=300, bbox_inches='tight', facecolor='white')
plt.close(fig)
print(f"Saved dotplot to {output_path}")


Saved dotplot to /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250829-Human_All_coarse_Dotplot.png


# Fig S6d: stacked bar plots for cell type compositions

In [26]:
### All Mouse Object ###

key = 'cell_type_coarse'

# desired order (colors will come from adata1.uns[f"{key}_colors"])
celltype_order = [
    'endometrial fibroblasts', 'decidual cells', 'epithelial cells',
    'myometrium', 'immune cells', 'perivascular cells', 'perimetrium'
]

# collapse to sample-level and build table
df = adata1.obs[['mck', key]].copy()
df['mck_base'] = df['mck'].astype(str).str.replace(r'_[a-zA-Z]$', '', regex=True)
counts = pd.crosstab(df['mck_base'], df[key])

# sample order and percentages
mck_order = ['mck_8', 'mck_1', 'mck_12', 'mck_15', 'mck_10', 'mck_11']
missing_donors = [m for m in mck_order if m not in counts.index]
if missing_donors:
    raise ValueError(f"missing donors in adata1.obs['mck']: {missing_donors}")

# use stored palette from AnnData
cats = adata1.obs[key].astype('category').cat.categories
palette_key = f"{key}_colors"
if palette_key not in adata1.uns:
    raise ValueError(f"'{palette_key}' not found in adata1.uns. Set it before plotting.")
palette = adata1.uns[palette_key]
if len(palette) != len(cats):
    raise ValueError(f"Palette length ({len(palette)}) does not match number of categories ({len(cats)}).")
color_map = dict(zip(cats, palette))

# ensure columns align to desired order and all have colors
counts = counts.reindex(index=mck_order, columns=celltype_order, fill_value=0)
missing_colors = [ct for ct in celltype_order if ct not in color_map]
if missing_colors:
    raise ValueError(f"Missing colors for: {missing_colors}")
perc = counts.div(counts.sum(axis=1), axis=0).mul(100)

# plot
fig, ax = plt.subplots(figsize=(6, 6))
left = pd.Series(0.0, index=perc.index)
for ct in perc.columns:
    ax.barh(
        perc.index, perc[ct], left=left,
        color=color_map[ct], edgecolor='black', linewidth=0.3
    )
    left += perc[ct]

# axes and style
ax.set_xlabel('')
ax.set_ylabel('')
ax.invert_yaxis()
ax.set_facecolor('white')
fig.patch.set_facecolor('white')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.tick_params(colors='black')
plt.tight_layout()

# save 
filename = "filename.png"
outdir = Path(output_dir)
outdir.mkdir(parents=True, exist_ok=True)
output_path = outdir / filename
fig.savefig(output_path, dpi=300, bbox_inches='tight', facecolor='white')
plt.close(fig)
print(f"Saved stacked bar plot to {output_path}")


Saved stacked bar plot to /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Mouse_All_Coarse_CellTypeComposition.png


In [3]:
### All Human Object ###

key = 'cell_type_coarse'

# desired order (colors will come from adata4.uns[f"{key}_colors"])
celltype_order = [
    'endometrial fibroblasts', 'decidual cells', 'epithelial cells',
    'smooth muscle cells', 'immune cells', 'perivascular cells'
]

# collapse to sample-level and build table
df = adata4.obs[['mck', key]].copy()
df['mck_base'] = df['mck'].astype(str).str.replace(r'_[a-zA-Z]$', '', regex=True)
counts = pd.crosstab(df['mck_base'], df[key])

# sample order and percentages
mck_order = ['mck_6', 'mck_5']
missing_donors = [m for m in mck_order if m not in counts.index]
if missing_donors:
    raise ValueError(f"missing donors in adata4.obs['mck']: {missing_donors}")

# use stored palette from AnnData
cats = adata4.obs[key].astype('category').cat.categories
palette_key = f"{key}_colors"
if palette_key not in adata4.uns:
    raise ValueError(f"'{palette_key}' not found in adata4.uns. Set it before plotting.")
palette = adata4.uns[palette_key]                     # <-- add this line
if len(palette) != len(cats):
    raise ValueError(f"Palette length ({len(palette)}) does not match number of categories ({len(cats)}).")
color_map = dict(zip(cats, palette))

# ensure columns align to desired order and all have colors
counts = counts.reindex(index=mck_order, columns=celltype_order, fill_value=0)
missing_colors = [ct for ct in celltype_order if ct not in color_map]
if missing_colors:
    raise ValueError(f"Missing colors for: {missing_colors}")
perc = counts.div(counts.sum(axis=1), axis=0).mul(100)

# plot
fig, ax = plt.subplots(figsize=(6, 2))
left = pd.Series(0.0, index=perc.index)
for ct in perc.columns:
    ax.barh(
        perc.index, perc[ct], left=left,
        color=color_map[ct], edgecolor='black', linewidth=0.3
    )
    left += perc[ct]

ax.set_xlabel('')
ax.set_ylabel('')
ax.invert_yaxis()
ax.set_facecolor('white')
fig.patch.set_facecolor('white')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.tick_params(colors='black')
plt.tight_layout()

filename = "filename.png"
outdir = Path(output_dir)
outdir.mkdir(parents=True, exist_ok=True)
output_path = outdir / filename
fig.savefig(output_path, dpi=300, bbox_inches='tight', facecolor='white')
plt.close(fig)
print(f"Saved stacked bar plot to {output_path}")

Saved stacked bar plot to /n/eddy_lab/Lab/mckinley/cagri_output/PaperFigures/20250820-Human_All_Coarse_CellTypeComposition.png
