In [None]:
%load_ext autoreload
%autoreload 2

## Basic setup

In [None]:
import concord as ccd
import scanpy as sc
import torch
import warnings
from pathlib import Path
warnings.filterwarnings('ignore')
import time
from pathlib import Path
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
seed = 0
ccd.ul.set_seed(seed)
from matplotlib import font_manager, rcParams
import matplotlib as mpl
custom_rc = {
    'font.family': 'Arial',  # Set the desired font for this plot
}
import matplotlib.pyplot as plt

mpl.rcParams['svg.fonttype'] = 'none'
mpl.rcParams['pdf.fonttype'] = 42

In [None]:
proj_name = "huycke_intestine"
file_name = proj_name
file_suffix = time.strftime('%b%d-%H%M')
seed = 0

save_dir = Path(f"../save/{proj_name}")
save_dir.mkdir(parents=True, exist_ok=True)

data_dir = Path(f"../data/{proj_name}")
data_dir.mkdir(parents=True, exist_ok=True)


In [None]:
data_path = data_dir / 'adata_final_Jul17-1902.h5ad'
adata = sc.read(
    data_path
)

### Visualize global 2D and 3D umaps

In [None]:
# Define color palette for broad_cell_type_qz

methods = ["scvi", "harmony", "scanorama", "liger", "concord_hcl", "concord_knn", "contrastive", "seurat_cca", "seurat_rpca"]

adata.obs['stage'] = adata.obs['MouseAge_combined']
_, _, celltype_pal = ccd.pl.get_color_mapping(adata, 'cell_type', pal='Paired', seed=seed)
_, _, broad_celltype_pal = ccd.pl.get_color_mapping(adata, 'broad_cell_type', pal='tab20', seed=seed)
broad_celltype_pal['Doublet-like'] = '#757575'
_, _, mes_pal = ccd.pl.get_color_mapping(adata, 'mes_subtype', pal='Paired', seed=seed)
_, _, epi_pal = ccd.pl.get_color_mapping(adata, 'epi_subtype', pal='Paired', seed=seed)
_, _, batch_pal = ccd.pl.get_color_mapping(adata,'batch', pal='Set1', seed=seed)
_, _, phase_pal = ccd.pl.get_color_mapping(adata, 'phase', pal='Set1', seed=seed)
_, _, seg_pal = ccd.pl.get_color_mapping(adata, 'seg_classify', pal='tab10', seed=seed)
_, _, lane_pal = ccd.pl.get_color_mapping(adata,'LaneID', pal='Paired', seed=7)
stage_pal = {
    '12.5':"midnightblue", 
    '13.5':"dodgerblue", 
    '14.5':"seagreen", 
    '15.5':"#00C000", 
    '16.5':"#EEC900", 
    '17.5':"#FF7F00", 
    '18.5':"#FF0000"
}
pal = {"cell_type": celltype_pal,
    "broad_cell_type": broad_celltype_pal,
    "mes_subtype": mes_pal,
    "epi_subtype": epi_pal,
    "batch": batch_pal,
    'phase': phase_pal,
    'stage': stage_pal,
    'seg_classify': seg_pal,
    'LaneID': lane_pal}

In [None]:
# plot everything
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import rcParams

# Set Arial as the default font
custom_rc = {
    'font.family': 'Arial',  # Set the desired font for this plot
}

show_keys = methods
show_cols = ['broad_cell_type', 'mes_subtype', 'phase', 'seg_classify', 'stage']
basis_types = ['UMAP']

font_size=10
point_size=.1
alpha=0.8
ncols = len(show_keys)
figsize=(ncols * 1.5,1.5)
nrows = int(np.ceil(len(show_keys) / ncols))

with plt.rc_context(rc=custom_rc):
    ccd.pl.plot_all_embeddings(
        adata,
        show_keys,
        color_bys=show_cols,
        basis_types=basis_types,
        pal=pal,
        font_size=font_size,
        point_size=point_size,
        alpha=alpha,
        figsize=figsize,
        ncols=ncols,
        seed=seed,
        save_dir=save_dir,
        file_suffix=file_suffix,
        dpi=600,
        save_format='svg'
    )


In [None]:

show_cols = ['phase', 'broad_cell_type']
concord_keys = ['concord_knn']
#azims = [0, 45, 90, 135, 180, 225, 270, 315]
# azims = [30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330]
# elevs = [10, 30, 45, 60]
azims =[330]
elevs = [10]
for azim in azims:
    for elev in elevs:
        with plt.rc_context(rc=custom_rc):
            ccd.pl.plot_all_embeddings_3d(
                adata=adata,
                combined_keys=concord_keys,         # or however you name your embeddings
                color_bys=show_cols,  # columns or gene names to color by
                basis_types=['UMAP_3D'],              # or whatever naming convention is in adata.obsm
                pal=pal,    
                ncols=2,
                rasterized=True,                   # Points get rasterized
                point_size=2,
                alpha=0.8,
                elev=elev,
                azim=azim,
                zoom_factor=0.05,
                show_grid=True,
                show_axis_labels=False,            # Turn off axis labels
                show_ticks=False,                  # No ticks
                show_legend=False,
                tick_label_font_size=6,
                legend_font_size=6,
                save_dir=save_dir,
                file_suffix=file_suffix + f'_azim{azim}_elev{elev}',
                save_format='pdf',
            )


In [None]:
basis = 'concord_hcl'
#basis = 'concord_knn'
show_basis = basis + '_UMAP'
show_cols = ['broad_cell_type', 'mes_subtype', 'epi_subtype', 'phase', 'stage', 'leiden_global_Concord']
ccd.pl.plot_embedding(
    adata, show_basis, show_cols, figsize=(13,9), dpi=600, ncols=3, font_size=6, point_size=1, legend_loc="on data",
    pal=pal,
    save_path=save_dir / f"{show_basis}_{file_suffix}.pdf"
)

In [None]:
ccd.ul.anndata_to_viscello(adata, data_dir / f"cello_{proj_name}_{file_suffix}", project_name = proj_name, organism='mmu')

### Macrophage analysis

In [None]:
import re
ct = 'Macrophage'
sanitized_ct = re.sub(r'[^\w\-]', '_', ct)
adata_subset = sc.read(data_dir / f"adata_huycke_{sanitized_ct}_Jul17-1951.h5ad")

In [None]:
macrophage_clusters = ['17','6']
adata_subset = adata[adata.obs['leiden_global_Concord'].isin(macrophage_clusters)].copy()


#adata_subset = sc.read(data_dir / f"adata_huycke_{sanitized_ct}_{file_suffix}.h5ad")
for basis in methods:
    if f'{basis}_UMAP_sub' not in adata.obsm:

        ccd.ul.run_umap(adata_subset, source_key=basis, result_key=f'{basis}_UMAP_sub', n_components=2, n_neighbors=30, min_dist=0.1, metric='cosine', random_state=seed)
        ccd.ul.run_umap(adata_subset, source_key=basis, result_key=f'{basis}_UMAP_3D_sub', n_components=3, n_neighbors=30, min_dist=0.1, metric='cosine', random_state=seed)
    
    sc.pp.neighbors(adata_subset, n_neighbors=30, use_rep='Concord')
    sc.tl.leiden(adata_subset, resolution=1.0, key_added='leiden_Concord_sub')

adata_subset.write_h5ad(data_dir / f"adata_huycke_{sanitized_ct}_{file_suffix}.h5ad")

In [None]:
adata_subset.obs['mac_subtype'] = adata_subset.obs['mac_subtype'].astype(str)
adata_subset.obs['mac_subtype'][adata_subset.obs['mac_subtype'] == 'Lyve1+Mrc1+ M2‐like macrophage'] = 'Lyve1+Mrc1+ M2-like macrophage'
adata_subset.write_h5ad(data_dir / f"adata_huycke_{sanitized_ct}_{file_suffix}.h5ad")

In [None]:
viscello_dir = str(data_dir / f"cello_{proj_name}_{file_suffix}")
adata_subsets = {
    sanitized_ct: adata_subset
}
ccd.ul.update_clist_with_subsets(global_adata = adata, adata_subsets = adata_subsets, viscello_dir = viscello_dir)

In [None]:
# Annotate macrophage clusters based on leiden_Concord_sub
import numpy as np
adata_subset.obs['mac_subtype'] = adata_subset.obs['leiden_Concord_sub'].astype(str)
cluster_mapping = {
    'Monocyte': ['15'],
    'mo-DC/DC': ['13'],
    'Ccr2/Lyz2+ Macrophage': ['11', '14', '7'],
    'Apol7c/Timd4/Mmp9+ gMac': ['0', '6', '2','10', '1'],
    'Differentiating monocyte': ['12', '4'],
    'Lyve1+Mrc1+ M2-like macrophage': ['9','5', '3', '8']
}
for key, value in cluster_mapping.items():
    adata_subset.obs['mac_subtype'] = adata_subset.obs['mac_subtype'].replace({str(v):key for v in value})

# Map back to global adata
adata.obs['mac_subtype'] = np.nan
adata.obs.loc[adata_subset.obs.index, 'mac_subtype'] = adata_subset.obs['mac_subtype']
#adata.write_h5ad(data_dir / f"adata_huycke_{file_suffix}.h5ad")
#adata_subset.write_h5ad(data_dir / f"adata_huycke_{sanitized_ct}_{file_suffix}.h5ad")
_, _, mac_pal = ccd.pl.get_color_mapping(adata, 'mac_subtype', pal='tab10', seed=seed)
pal['mac_subtype'] = mac_pal

In [None]:
print("Plotting UMAP for", ct)
sanitized_ct = re.sub(r'[^\w\-]', '_', ct)
#adata_subset = sc.read(data_dir / f"adata_huycke_{sanitized_ct}_Jan08-1324.h5ad")
# Run leiden cluster
adata_subset.obs['stage'] = adata_subset.obs['MouseAge_combined'].astype(str)
show_keys = methods
#show_cols = ['phase', 'batch', 'LaneID', 'stage', 'leiden_Concord_sub', 'seg_classify', 'mac_subtype']
show_cols = ['mac_subtype', 'phase']
basis_types = ['UMAP_sub']

font_size=10
point_size=3
alpha=0.8
figsize=(10,1.35)
ncols = len(show_keys)
nrows = int(np.ceil(len(show_keys) / ncols))

with plt.rc_context(rc=custom_rc):
    ccd.pl.plot_all_embeddings(
        adata_subset,
        show_keys,
        color_bys=show_cols,
        basis_types=basis_types,
        pal=pal,
        font_size=font_size,
        point_size=point_size,
        alpha=alpha,
        figsize=figsize,
        ncols=ncols,
        legend_loc='on data',
        seed=seed,
        save_dir=save_dir,
        file_suffix=file_suffix+f"_{sanitized_ct}",
        dpi=600,
        save_format='pdf'
    )

In [None]:
basis = 'concord_knn'
#basis = 'concord_hcl'
show_basis = basis + '_UMAP_sub'
show_cols = ['mac_subtype', 'phase', 'stage']
with plt.rc_context(rc=custom_rc):
    ccd.pl.plot_embedding(
        adata_subset, show_basis, show_cols, figsize=(10,2), dpi=600, ncols=3, font_size=6, point_size=10, legend_loc="right margin",
        pal=pal,
        save_path=save_dir / f"{sanitized_ct}_{show_basis}_{file_suffix}.pdf"
    )

In [None]:
show_cols = ['mac_subtype', 'phase', 'stage']
concord_keys = ['concord_knn']
#azims = [30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330]
azims =[130]
elevs = [30]
for azim in azims:
    for elev in elevs:
        with plt.rc_context(rc=custom_rc):
            ccd.pl.plot_all_embeddings_3d(
                adata=adata_subset,
                combined_keys=concord_keys,         # or however you name your embeddings
                color_bys=show_cols,  # columns or gene names to color by
                basis_types=['UMAP_3D_sub'],              # or whatever naming convention is in adata.obsm
                pal=pal,    
                ncols=2,
                rasterized=True,                   # Points get rasterized
                point_size=20,
                alpha=0.8,
                elev=elev,
                azim=azim,
                zoom_factor=0.05,
                show_grid=True,
                show_axis_labels=False,            # Turn off axis labels
                show_ticks=False,                  # No ticks
                show_legend=False,
                tick_label_font_size=6,
                legend_font_size=6,
                save_dir=save_dir,
                file_suffix=file_suffix + f'_azim{azim}_elev{elev}' + f"_{sanitized_ct}",
                save_format='pdf',
            )

In [None]:
# Composition analysis of macrophage state across time points

state_count = adata_subset.obs.groupby(['mac_subtype', 'MouseAge_combined']).size().unstack(fill_value=0)
# Get composition of each state across time points
state_composition = state_count.div(state_count.sum(axis=0), axis=1)
state_composition

In [None]:
# Plot heatmap of state composition
import seaborn as sns
plt.figure(figsize=(3, 2))
with plt.rc_context(rc=custom_rc):
    sns.set_style("whitegrid")
    # Create a heatmap of the state composition
    sns.clustermap(state_composition, cmap='viridis', annot=True, fmt='.2f', linewidths=.5, cbar_kws={'label': 'Composition'}, row_cluster=True, col_cluster=False, annot_kws={"size": 10}, figsize=(5,3))
    # Save the state composition heatmap
    state_composition_heatmap_path = save_dir / f'state_composition_heatmap_{sanitized_ct}_{file_suffix}.pdf'
    plt.savefig(state_composition_heatmap_path, bbox_inches='tight')

In [None]:
# Heatmap of DEG
import scanpy as sc
import pandas as pd
from pathlib import Path

EXCEL_FILE = save_dir/"Macrophage_custom_2025-07-17_de_significant.xlsx"  # Path to your Excel file with DEGs)

# ── read every sheet at once
xl = pd.ExcelFile(EXCEL_FILE)

top_n      = 10                     # how many genes per sheet
rank_col   = "p"               # column to sort by   (change if needed)
gene_col   = "gene_short_name"      # column that holds gene symbols

sheet2genes = {}
for sheet in xl.sheet_names:
    df = xl.parse(sheet, engine="openpyxl")

    # sort descending by effect size, keep significant rows only
    df = df[df["significant"]].sort_values(rank_col, ascending=True)

    genes = df[gene_col].head(top_n)
    # keep only genes present in adata.var_names
    genes = [g for g in genes if g in adata.var_names]

    sheet2genes[sheet] = genes

# Flatten to one ordered list without duplicates
gene_order = pd.unique(sum(sheet2genes.values(), []))
print(f"{len(gene_order)} genes will be plotted")


In [None]:
subtype_plot = ['Ccr2/Lyz2+ Macrophage', 'Apol7c/Timd4/Mmp9+ gMac', 'Lyve1+Mrc1+ M2-like macrophage']
adata_heatmap = adata_subset[
    adata_subset.obs["mac_subtype"].isin(subtype_plot), gene_order
].copy()

# make it an ordered categorical and sort
adata_heatmap.obs["mac_subtype"] = pd.Categorical(
    adata_heatmap.obs["mac_subtype"],
    categories=subtype_plot,
    ordered=True,
)
adata_heatmap = adata_heatmap[
    adata_heatmap.obs.sort_values("mac_subtype").index, :
].copy()

In [None]:
import seaborn as sns
with plt.rc_context(rc=custom_rc):
    ccd.pl.heatmap_with_annotations(
        adata_heatmap,
        val="X",
        transpose=True,                # genes as rows, cells as columns
        obs_keys=["mac_subtype"],      # colour bar
        pal={"mac_subtype": pal["mac_subtype"]},
        use_clustermap=True,
        cluster_rows=False,
        cluster_cols=False,
        cmap=sns.color_palette("RdYlBu_r", as_cmap=True),
        log_transform=False,
        row_scale=True,                # NEW
        clip_limits=(-3, 3),           # NEW
        rasterize=True,                # keeps cells raster, text vector
        title="Top-DEG expression per cell (row-z-scored)",
        figsize=(8, 8),
        save_path=save_dir / f"{proj_name}_topDEG_heatmap.svg",
        show=True,
    )

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from sklearn.preprocessing import StandardScaler
import scipy.sparse as sp


# ── tunables ────────────────────────────────────────────────────────
log_transform   = False       # log1p before scaling (like R default)
pseudocount     = 1e-6
row_zscore      = True       # scale rows (genes)
clip_limits     = (-3, 3)    # same as limits = c(-3, 3) in R
cluster_rows    = False
cluster_cols    = False
cmap            = sns.color_palette("RdYlBu_r", as_cmap=True)
figsize         = (14, 10)
fontsize        = 10          # matches R’s fontsize arg
# ────────────────────────────────────────────────────────────────────


# 1. Expression matrix for selected cells / genes
# 1. expression matrix -------------------------------------------------
X = adata_heatmap.X          #  (cells  × genes)
if sp.issparse(X):
    X = X.toarray().astype(float)

if log_transform:
    X = np.log10(X + pseudocount)

# 2. make it genes × cells *before* scaling ----------------------------
X = X.T                                      #  (genes × cells)

# 3. row‑wise z‑score (now rows = genes) -------------------------------
if row_zscore:
    X = StandardScaler(with_mean=True, with_std=True).fit_transform(X.T).T

# 4. clip --------------------------------------------------------------
if clip_limits is not None:
    X = np.clip(X, clip_limits[0], clip_limits[1])

# 5. wrap for seaborn --------------------------------------------------
df = pd.DataFrame(
    X,
    index=adata_heatmap.var_names,  # rows = genes (e.g. 100)
    columns=adata_heatmap.obs_names,        # cols = cells (3470)
)


# 5. Column annotation bars (mac_subtype & stage palettes you already built)
col_colors = pd.DataFrame({
    "Subtype": adata_heatmap.obs["mac_subtype"].map(pal["mac_subtype"])
}, index=df.columns)

# 6. Heat‑map
sns.set(font_scale=fontsize / 10)
g = sns.clustermap(
    df,
    cmap=cmap,
    row_cluster=cluster_rows,
    col_cluster=cluster_cols,
    col_colors=col_colors,
    xticklabels=False,
    yticklabels=True,
    figsize=figsize,
    dendrogram_ratio=(0., 0.05),
    colors_ratio=(0., 0.03),
    mask=np.isnan(df),          # in case some genes are all‑NaN
)

# 7. Legends for annotation bars
handles_sub = [Patch(facecolor=c, label=k) for k, c in pal["mac_subtype"].items()]

g.ax_col_dendrogram.legend(
    handles=handles_sub,
    title="Annotations",
    bbox_to_anchor=(1.02, 1),
    loc="upper left",
    borderaxespad=0,
    fontsize=fontsize,
)

plt.title("Top-DEG expression per cell (row-z-scored)")
plt.savefig(save_dir / f"{proj_name}_topDEG_heatmap.png",
            dpi=600, bbox_inches="tight")
plt.show()
