# Setup

## Imports & Settings

In [None]:
%load_ext autoreload
%autoreload 2

import os
import re
import itertools
import functools
import seaborn as sb
import matplotlib.pyplot as plt
import scipy
import scanpy as sc
import pandas as pd
import numpy as np
import corescpy as cr

# Big Options
overwrite = False  # don't overwrite object
full_object = False  # False = faster; True = more features
# write_object_suffix = None
obj_suffix = "_downstream"
panel = "TUQ97N"
capitalize_sample = True if panel == "TUQ97N" else False
# panel = "XR4UZH"
suffix = ""  # if main objects (<sample><_region if applicable>.h5ad)
# suffix = "_new"  # example suffix for object h5ad file (to avoid overwrite)
n_jobs = 20

# For Regional/Object Sub-Directory Analyses
# sub_dir_parent = None
# sub_dirs = [None]  # if no sub-directory; top level of
sub_dir_parent = "objects_cropped"  # sub-directory: regional (suffix) objects
sub_dirs = ["mucosa", "submucosa", "myenteric_plexus", "smc_circular",
            "smc_longitudinal", "serosa"]  # if objects in sub-directories

# Main Directories
usr_write_rel_path = f"{os.getlogin()}/data/shared-xenium-library"
d_hpc = "/mnt/cho_lab" if os.path.exists(
    "/mnt/cho_lab") else "/sc/arion/projects/untreatedIBD"  # HPC path
d_nfs = os.path.join(d_hpc, "bbdata2") if os.path.exists(os.path.join(
    d_hpc, "bbdata2")) else os.path.join(
        d_hpc, "chobiolab-core/shared-xenium-library")
d_usr = os.path.join(d_hpc, "disk2", usr_write_rel_path) if os.path.exists(
    os.path.join(d_hpc, "disk2")) else os.path.join(d_hpc, usr_write_rel_path)
d_obj = d_usr  # CHANGE IF WRITING & PULLING OBJECTS FROM DIFFERENT PATHS
d_img = os.path.join(d_hpc, f"cache/tissue-registry/xenium/{panel}") if (
    "arion" in d_hpc) else os.path.join(d_hpc, f"bbdata1/xenium/{panel}")

# Construct Directories (Less Likely to Need Changes)
# Mirror my file/directory tree in the `d_usr` directory
obj_ext = ".h5ad"  # extension for processed objects
out_subdir_markers = "find_markers"  # sub-directory under out_dir for markers
out_obj = os.path.join(d_obj, f"outputs/{panel}/nebraska")
out_dir = os.path.join(d_usr, f"outputs/{panel}/nebraska")  # to save objects
out_plot = None if out_dir is None else os.path.join(
    out_dir, "plots/downstream")  # plot output directory
file_mdf = os.path.join(d_usr, f"samples_{panel}.csv")  # metadata file path
file_a = None  # don't map manual annotations
file_a = os.path.join(out_dir, "annotation_dictionaries/annotations_all.xlsx")
print(f"\n\n\n{'=' * 80}\nDirectories\n{'=' * 80}\n\nHPC Entry Point (Cho): "
      f"{d_hpc}\nData: {d_nfs}\nMetadata: {file_mdf}\nImages: {d_img}\n"
      f"Object/Outputs:\n\t{out_dir} (objects)\n\t{out_plot} (plots)\n\t"
      f"{os.path.join(out_dir, out_subdir_markers)} (markers)\n"
      f"Annotations: {file_a}\n\n\n")

# Computing Resources
gpu = False
sc.settings.n_jobs = 8
# sc.settings.max_memory = 150

# Display
pd.options.display.max_colwidth = 1000
pd.options.display.max_columns = 100
pd.options.display.max_rows = 500
sc.settings.set_figure_params(dpi=100, frameon=False, figsize=(20, 20))
plt.rcParams["axes.labelsize"] = 14    # Axis labels
plt.rcParams["axes.titlesize"] = 20    # Title
plt.rcParams["xtick.labelsize"] = 14   # X-axis tick labels
plt.rcParams["ytick.labelsize"] = 14   # Y-axis tick labels
plt.rcParams["figure.titlesize"] = 20  # Suptitle (overall figure title)

# Samples/Runs
run = None  # just look for samples in all Xenium runs for the panel
# run = "CHO-001"  # run all from this run; so don't have to specify samples
samples = "all"  # use samples = "all" with run = something for all from run
# samples = [  # sample IDs from patients for whom we have all conditions
#     "50452A", "50452B", "50452C",  # old segmentation
#     "50006A", "50006B", "50006C",  # rest are new segmentation
#     "50217A", "50217B", "50217C",
#     "50336B", "50336C", "50336A",
#     "50403A2", "50403B", "50403C1"
# ]  # excludes low-quality sample/condition replicates 50403A1 & 50403C2

# Object Files: Suffixes on Sample ID = Subdirectories or Single Suffix or ""
suffixes = [f"_{x}{suffix}" if x else f"{suffix}" for x in sub_dirs]
suffixes, sub_dirs = [""] + suffixes, [None] + sub_dirs  # to also run overall

# Genes of Interest
genes = {
    "CD Risk": ["LACC1", "LRRK2", "PTGER4"],
    "SnC Marker": ["CDKN2A", "CDKN1A", "TP53", "PLAUR"],
    "Apoptosis-Resistance": ["BCL2"],
    "SASP": ["IL4", "IL13", "IL1A", "CXCL8", "CCL2", "CEBPB", "NFKB1",
             "TGFB1", "IGFBP7"],
    "SASP-IL6": ["OSM", "IL6", "IL6ST"],
    "Fibrosis": ["SUCNR1", "CXCR4", "IL33", "IL23A",
                 "IL1B", "IL12", "IL17", "IL36", "TL1A", "IL11", "IFNG",
                 "IL22", "IL10", "IL21", "IL34", "CCL11"],
    "ER Stress/UPR": ["ATF4"],
    "Autophagy": ["MTOR", "ATG3", "ATG5", "ATG7", "ATG12", "ATG16L1", "SQSTM1",
                  "BECN1", "IRGM1", "ATG8", "ATG6"],
    "DNA Repair": ["ERCC1", "ERCC4"],
    "Healing": ["LCN2", "MMP9", "GREM1", "PDGFRA"],
    "Mixed": ["ICAM1"]
}   # genes of interest
genes_dict_colors = [
    "#FF0000", "#0000FF", "#000000", "#FFFF00", "#D2B48C",
    "#FFC0CB", "#A52A2A", "#800080", "#008000", "#808080"
] if isinstance(genes, dict) else None  # colors for each "key" of genes_dict

# Leiden Column & Manual Annotation Mapping Options
col_leiden = f"leiden_res1pt5_dist0_npc30"
col_assignment = "bucket"  # column in annotation file whose labels to use
col_cluster = col_leiden if file_a is None else str(
    col_assignment + "_" + col_leiden.split("leiden_")[1])
# col_assignment = "annotation"  # more specific cell types
suffix_ct = f"_{col_leiden}" + str(f"_{col_cluster}" if (
    col_cluster != col_leiden) else "")  # new out file suffix ~ cell type

## Setup

Get constants (e.g., column names in metadata), read metadata, create dictionary of clustering parameters (so can iterate across different clustering specifications to make multiple versions, e.g., at multiple resolutions) using `res_list`, `min_dist_list`, and `n_comps_list`, make any output directories (e.g., for processed objects, plots, find markers results, Xenium Explorer cluster files) if any don't exist yet, load data into objects, etc.

In [None]:
# Get/Set Constants
constants_dict = cr.get_panel_constants(panel_id=panel)
col_sample_id_o, col_sample_id, col_condition, col_inflamed, col_subject = [
    constants_dict[x] if x in constants_dict else None for x in [
        "col_sample_id_o", "col_sample_id", "col_condition",
        "col_inflamed", "col_subject"]]
col_stricture, col_f, col_tangram, col_segment, col_object = [
    constants_dict[x] if (x in constants_dict) else None for x in [
        "col_stricture", "col_data_dir",
        "col_tangram", "col_segment", "col_object"]]
key_uninflamed, key_inflamed, key_stricture = [
    constants_dict[x] if (x in constants_dict) else None for x in [
        "key_uninflamed", "key_inflamed", "key_stricture"]]
palette = "tab20" if panel != "TUQ97N" else dict(zip([
    key_uninflamed, key_inflamed, key_stricture], ["b", "r", "y"]))
gois = functools.reduce(lambda i, j: i + j, [genes[x] for x in genes])

# Read Metadata
metadata = cr.pp.get_metadata_cho(
    d_nfs, file_mdf, panel_id=panel, samples=samples, run=run,
    capitalize_sample=capitalize_sample)  # get metadata
print("\n\n", metadata[list(set([
    col_sample_id_o, col_subject, col_condition, col_inflamed, col_stricture,
    col_segment]).intersection(metadata))])

# Annotation File
f_ann = pd.read_excel(file_a, index_col=[0, 1]).dropna(how="all").dropna(
    how="all", axis=1)
f_ann = f_ann.reset_index().astype({f_ann.index.names[1]: "int"}).astype({
    f_ann.index.names[1]: "string"}).set_index(f_ann.index.names)

# Final Setup
if out_plot is not None:
    os.makedirs(out_plot, exist_ok=True)  # ensure plot save path exists
kws_init = dict(col_sample_id=col_sample_id, col_subject=col_subject,
                col_cell_type=col_cluster)

# Describe

Number of cells per region.

In [None]:
n_cells = {}
for r, sub_d in zip(suffixes, sub_dirs):  # loop sub-directories (or just top)
    out_r = str(os.path.join(out_plot, sub_d) if (
        sub_d) else out_plot) if out_plot else None  # plot directory
    in_r = os.path.join(out_obj, sub_dir_parent) if (
        sub_dir_parent) else out_obj  # object directory
    in_r = os.path.join(in_r, sub_d) if sub_d else in_r  # sub-directory?
    n_cells[r] = {}

    # Iterate Samples
    for x in metadata.index.values:
        print(f"\n\n{'=' * 80}\n{x}\n{'=' * 80}\n\n")
        out = os.path.join(in_r, f"{x}{r}{obj_ext}")
        if not os.path.exists(out):
            print(f"Skipping {x}: {out} doesn't exist!")
            n_cells[r][x] = np.nan
        else:
            n_cells[r][x] = sc.read_h5ad(out).obs.shape[0]

# Analyze

The first clustering version (first specified in `res_list`) is the cell type column used by default in downstream analyses (because it was specified in `kws_init["col_cell_type"]` when creating the object and thus is stored in `self._columns["col_cell_type"]`). Specify `col_cell_type` as an argument in the following functions to use a different column.

In [None]:
%%time

# Iterate Regions
for r, sub_d in zip(suffixes, sub_dirs):  # loop sub-directories (or just top)
    fig_central, fig_neigh, fig_cooccur, fig_svg = {}, {}, {}, {}
    fig_gex = {"heat": {}, "dot": {}}
    if (r == "" and sub_d == "") or sub_d is None:
        in_r = out_obj
        out_r = out_plot
    else:
        in_r = os.path.join(out_obj, sub_dir_parent) if (
            sub_dir_parent) else out_obj  # object directory
        in_r = os.path.join(in_r, sub_d) if sub_d else in_r  # sub-directory?
        out_r = str(os.path.join(out_plot, sub_d) if (
            sub_d) else out_plot) if out_plot else None  # plot directory
    if out_plot is not None:
        os.makedirs(out_r, exist_ok=True)
    if obj_suffix is not None:  # new object directory
        out_obj_new = os.path.join(out_dir, sub_dir_parent) if (
            sub_dir_parent) and (r != "" and sub_d != "") else out_dir
        os.makedirs(out_obj_new, exist_ok=True)

    # Iterate Samples
    for x in metadata.index.values:
        print(f"\n\n{'=' * 80}\n{x}\n{'=' * 80}\n\n")

        # Load Data
        out = os.path.join(in_r, f"{x}{r}{obj_ext}")
        out_h5ad = os.path.join(out_obj_new, sub_d if sub_d else "",
                                f"{x}{r}{obj_suffix}.h5ad")
        print(f"\n\nInput Object: {out}\nOutput Object: {out_h5ad}\n"
              f"Plot Directory: {out_r}")
        if not os.path.exists(out):
            print(f"Skipping {x}: {out} doesn't exist!")
            continue
        if overwrite is False and os.path.exists(out_h5ad):
            print(f"\n\nAlready exists: {out_h5ad}\n\n")
            continue
        self = cr.Spatial(metadata.loc[x][col_f] if full_object else out,
                          path_xenium=metadata.loc[x][col_f],
                          library_id=x, **kws_init)
        if full_object is True:
            self.update_from_h5ad(out)  # update with prior preprocessing
        # for j in metadata.dropna(how="all", axis=1):  # add metadata to .obs
        #     self.rna.obs.loc[:, j] = str(metadata.loc[x][j])
        _ = self.annotate_clusters(
            f_ann.loc[f"{self._library_id}___{col_leiden}_dictionary.xlsx"][
                col_assignment], col_cell_type=col_leiden,
            col_annotation=col_cluster, copy=False)  # annotations

        # Gene Expression Plots (Only GEX Data)
        goi = list(set(pd.unique(functools.reduce(lambda u, v: u + v, [
            genes[g] for g in genes])) if isinstance(genes, dict) else genes
                    ).intersection(self.rna.var_names))  # gene list
        # fig_gex["spatial"][self._library_id] = self.plot_spatial(
        #     color=goi + [col_cluster], title=f"{self._library_id}{r}")
        pex = "_".join(re.sub("/", "", str(
            f"{self._library_id}{r}{suffix_ct}_{'_'.join(genes)}")).split(
                " ")) + ".jpeg"
        fig_gex["dot"][self._library_id] = cr.pl.plot_dot(
            self.rna, col_cluster, genes,
            title=f"{self._library_id}{r}",
            genes_dict_colors=genes_dict_colors, vmin=0, vmax=10,
            percent="right", center=None, out_file=os.path.join(
                out_r, "dot_gex_" + pex))
        fig_gex["heat"][self._library_id] = cr.pl.plot_matrix(
            self.rna, col_cluster, genes,
            title=f"{self._library_id}{r}",
            genes_dict_colors=genes_dict_colors, vmin=0, vmax=10,
            percent="right", center=None, out_file=os.path.join(
                out_r, "heat_gex_" + pex))

        # Spatial Analyses
        _, fig_central[self._library_id] = self.calculate_centrality(
            figsize=(20, 10), n_jobs=n_jobs, out_plot=os.path.join(
                out_r, f"centrality_{self._library_id}{r}{suffix_ct}.jpeg"))
        _, fig_neigh[self._library_id] = self.calculate_neighborhood(
            figsize=(60, 30), n_jobs=n_jobs, out_plot=os.path.join(
                out_r, f"neighborhood_{self._library_id}{r}{suffix_ct}.jpeg"))
        _, fig_cooccur[self._library_id] = self.find_cooccurrence(
            figsize=(60, 20), n_jobs=n_jobs,
            kws_plot=dict(wspace=3), out_plot=os.path.join(
                out_r, f"cooccurrence_{self._library_id}{r}{suffix_ct}.jpeg"))
        _ = self.find_svgs(
            genes=goi, method="moran", n_perms=10, kws_plot=dict(
                legend_fontsize="large"), figsize=(15, 15), n_jobs=n_jobs,
            out_plot=os.path.join(
                out_r, f"svg_{self._library_id}{r}{suffix_ct}.jpeg"))
        if out_h5ad is not None:
            print(f"Writing {out_h5ad}")
            self.write(out_h5ad)

# Reload & Concatenate Results

In [None]:
%%time

# Iterate Regions
cells_spatial, genes_spatial, cooccur = {}, {}, {}
for r, sub_d in zip(suffixes, sub_dirs):  # loop sub-directories (or just top)
    rix = "Overall" if sub_d in [None, ""] else sub_d
    outed = os.path.join(out_dir, sub_dir_parent) if (
        sub_dir_parent) and sub_d is not None else out_dir  # object directory
    cells_spatial[rix], genes_spatial[rix], cooccur[rix] = {}, {}, {}

    # Iterate Samples
    for x in metadata.index.values:
        print(f"\n\n{'=' * 80}\n{x}\n{'=' * 80}\n\n")
        out_h5ad = os.path.join(outed, sub_d if sub_d else "",
                                f"{x}{r}{obj_suffix}.h5ad")
        if not os.path.exists(out_h5ad):
            print(f"{out_h5ad} doesn't exist!")
            continue
        self = cr.Spatial(out_h5ad, path_xenium=metadata.loc[x][col_f],
                          library_id=x, **kws_init)
        if not os.path.exists(out_h5ad):
            print(f"{out_h5ad} doesn't exist!")
            continue
        k_c = f"{self._columns['col_cell_type']}_centrality_scores"
        if k_c not in self.rna.uns:
            print(f"Spatial results {k_c} missing from `self.rna.uns`!")
            continue
        central = self.rna.uns[k_c].rename_axis(
            self._columns["col_cell_type"])
        cccs = central.index.values
        k_n = f"{self._columns['col_cell_type']}_nhood_enrichment"
        neigh = pd.concat([pd.DataFrame(
            self.rna.uns[k_n][q], index=cccs, columns=cccs).rename_axis(
                self._columns["col_cell_type"]).rename_axis(
                    f"{self._columns['col_cell_type']}_2", axis=1).stack()
                for q in self.rna.uns[k_n]], keys=self.rna.uns[k_n], axis=1)
        neigh.columns = [f"neighborhood_{q}" for q in neigh.columns]
        ixs = pd.DataFrame(
            self.rna.uns[f"{self._columns['col_cell_type']}_interactions"],
            index=cccs, columns=cccs).rename_axis(self._columns[
                "col_cell_type"]).rename_axis(
                    f"{self._columns['col_cell_type']}_2", axis=1)
        cooc = self.rna.uns[f"{self._columns['col_cell_type']}_co_occurrence"]
        cooc = pd.concat([pd.DataFrame(cooc["occ"][:, :, i], pd.Index(
            cccs, name=self._columns["col_cell_type"]), columns=pd.Index(
                cccs, name=f"{self._columns['col_cell_type']}_2"))
                          for i in np.arange(cooc["occ"].shape[2])],
                         keys=cooc["interval"], names=["Interval"])
        cooccur[rix][x] = cooc
        cells_spatial[rix][x] = central.join(ixs.stack().to_frame(
            "Interaction Score").join(neigh))
        svg = self.rna.uns["moranI"].rename_axis(
            "Gene").rename_axis("Metric", axis=1)
        svg.columns = [f"svg_{q}" for q in svg.columns]
        genes_spatial[rix][x] = svg
    cells_spatial[rix], genes_spatial[rix], cooccur[rix] = [
        pd.concat([v[j] for j in v], keys=v, names=[
            self._columns["col_sample_id"]]) if len(v) > 0 else None
        for v in [cells_spatial[rix], genes_spatial[rix], cooccur[rix]]]
cells_spatial, genes_spatial, cooccur = [pd.concat([
    u[r] for r in u], keys=u, names=["Region"]) for u in [
        cells_spatial, genes_spatial, cooccur]]
cells_spatial, genes_spatial, cooccur = [x.join(metadata[[
    col_subject, col_condition]]).set_index([
        col_subject, col_condition], append=True)
    for x in [cells_spatial, genes_spatial, cooccur]]
cells_spatial, genes_spatial, cooccur = [x.reset_index().set_index([
    "Region", col_subject, col_condition] + list(x.index.names.difference([
        "Region", col_subject, col_condition]))) for x in [
            cells_spatial, genes_spatial, cooccur]]
cooccur = cooccur.rename_axis(f"{col_cluster}_2", axis=1)
print(cooccur)
print(genes_spatial)
cells_spatial

## Normalize

In [None]:
cells_spatial = cells_spatial.join(cells_spatial.groupby([
    "Region", col_sample_id]).apply(
        lambda x: x["neighborhood_count"] / max(x["neighborhood_count"]),
        include_groups=False).reset_index([0, 1], drop=True).to_frame(
            "Neighborhood Score (Maximum-Normalized)"))

# Visualize

## Co-Occurrence

In [None]:
fig = sb.displot(cooccur.reset_index("Interval"), x="Interval", kind="kde",
                 cut=0, fill=True, hue="Region")
fig.fig.suptitle("Distributions of Co-Occurrence Computed Intervals")
fig.fig.tight_layout()

print(cooccur.reset_index("Interval")["Interval"].groupby([
    "Region"]).describe())

intervals = [cooccur.reset_index("Interval")["Interval"].min(),
             cooccur.reset_index("Interval")["Interval"].groupby([
                 "Region"]).describe()["75%"].median()]
for iv in intervals:
    cooccur_interval = cooccur.reset_index("Interval").groupby([
        "Region", col_sample_id]).apply(
            lambda x: x.iloc[np.argmin(abs(x["Interval"] - iv))]).drop(
                "Interval", axis=1).rename_axis(col_cluster, axis=1).stack(
                    ).to_frame("Pr(Co-Occur)").join(metadata[[
                        col_subject, col_condition]])
    fig = sb.catplot(cooccur_interval, x=col_cluster, y="Pr(Co-Occur)",
                    hue=col_condition, col="Region", col_wrap=3, kind="bar",
                    sharey=False, sharex=False, height=8,
                    aspect=2.5, palette=palette)
    fig.set_xticklabels(rotation=45, fontsize=20)
    fig.set_titles(col_template="{col_name}", size=24)
    fig.fig.suptitle(
        f"Probability of Co-Occurrence (Interval = {iv:.2g})", fontsize=32)
    plt.subplots_adjust(hspace=1, top=0.92)
    fig.fig.set_dpi(200)

In [None]:
# cell_types = list(cooccur.columns)
cell_types = ["Endothelial", "Stromal", "Neuron-Glia", "T Cell", "B Cell"]
# cell_refs = cell_types
# cell_refs = ["Neuron-Glia"]
# cell_refs = ["Mast Cell", "Myeloid"]
cell_refs = ["Myeloid"]
regions = cooccur.reset_index()["Region"].unique()
# regions = ["Overall"]
regions = ["mucosa", "submucosa",
           ["smc_circular", "smc_longitudinal", "myenteric_plexus"]]

for ref in cell_refs:
    for r in regions:
        rix = [r] if isinstance(r, str) else r
        coo = cooccur[cell_types].loc[rix].stack().unstack(col_cluster)[
            ref].to_frame("Pr(Co-Occur)").reset_index().rename(
                {f"{col_cluster}_2": "Cell Type"}, axis=1)
        fig, axes = plt.subplots(1, len(coo[col_condition].unique()))
        for i, cond in enumerate(coo[col_condition].unique()):
            sb.lineplot(coo[coo[col_condition] == cond], x="Interval",
                        y="Pr(Co-Occur)", units=col_subject,
                        style=col_subject,
                        hue="Cell Type", estimator=None, ax=axes[i])
            axes[i].set_title(cond)
        reg = f"{' '.join(r.split('_')).capitalize()}" if isinstance(
            r, str) else " | ".join(r)
        fig.suptitle(f"P(Co-Occur) with {ref}: {reg}")
        plt.subplots_adjust(hspace=0.5, top=0.9, right=0.8)

In [None]:
# cell_types = list(cooccur.columns)
cell_types = ["Endothelial", "Stromal", "Neuron-Glia", "T Cell", "B Cell"]
# cell_refs = cell_types
# cell_refs = ["Neuron-Glia"]
# cell_refs = ["Mast Cell", "Myeloid"]
cell_refs = ["Myeloid"]
regions = cooccur.reset_index()["Region"].unique()
# regions = ["Overall"]
regions = ["mucosa", "submucosa",
           ["smc_circular", "smc_longitudinal", "myenteric_plexus"]]

for ref in cell_refs:
    for r in regions:
        rix = [r] if isinstance(r, str) else r
        coo = cooccur[cell_types].loc[rix].stack().unstack(col_cluster)[
            ref].to_frame("Pr(Co-Occur)").reset_index().rename(
                {f"{col_cluster}_2": "Cell Type"}, axis=1)
        fig = sb.lmplot(coo, x="Interval", y="Pr(Co-Occur)", lowess=True,
                        col=f"Cell Type", sharex=False, units=col_subject,
                        sharey=False, hue=col_condition, palette=palette)
        fig.set_titles(col_template="{col_name}", size=24)
        reg = f"{' '.join(r.split('_')).capitalize()}" if isinstance(
            r, str) else " | ".join(r)
        fig.fig.suptitle(f"P(Co-Occur) with {ref}: {reg}")
        plt.subplots_adjust(hspace=0.5, top=0.9, right=0.1)

In [None]:
# cell_types = list(cooccur.columns)
cell_types = ["Endothelial", "Stromal", "Neuron-Glia"]
# cell_refs = cell_types
# cell_refs = ["Neuron-Glia"]
# cell_refs = ["Mast Cell", "Myeloid"]
cell_refs = ["Myeloid"]
regions = cooccur.reset_index()["Region"].unique()
# regions = ["Overall"]
regions = ["mucosa", "submucosa",
           ["smc_circular", "smc_longitudinal", "myenteric_plexus"]]

for ref in cell_refs:
    for r in regions:
        rix = [r] if isinstance(r, str) else r
        coo = cooccur[cell_types].loc[rix].stack().unstack(col_cluster)[
            ref].to_frame("Pr(Co-Occur)").reset_index().rename(
                {f"{col_cluster}_2": "Cell Type"}, axis=1)
        fig = sb.lmplot(coo, x="Interval", y="Pr(Co-Occur)", lowess=True,
                        row=col_subject, col=f"Cell Type", sharex=False,
                        sharey=False, hue=col_condition, palette=palette)
        fig.set_titles(col_template="{col_name}",
                       row_template="{row_name}", size=24)
        reg = f"{' '.join(r.split('_')).capitalize()}" if isinstance(
            r, str) else " | ".join(r)
        fig.fig.suptitle(f"P(Co-Occur) with {ref}: {reg}")
        fig.fig.tight_layout()
        plt.subplots_adjust(hspace=0.5, top=0.92)

In [None]:
# cell_types = list(cooccur.columns)
cell_types = ["Neuron-Glia", "Endothelial", "Stromal", "Myeloid"]
# cell_refs = cell_types
cell_refs = ["Myeloid"]
regions = cooccur.reset_index()["Region"].unique()
# regions = ["smc_circular", "smc_longitudinal"]
regions = ["mucosa", "submucosa",
           ["smc_circular", "smc_longitudinal", "myenteric_plexus"]]

for ref in cell_refs:
    for r in regions:
        coo = cooccur[cell_types].loc[r].stack().unstack(col_cluster)[
            ref].to_frame("Pr(Co-Occur)").reset_index().rename(
                {f"{col_cluster}_2": "Cell Type"}, axis=1)
        fig = sb.lmplot(coo, x="Interval", y="Pr(Co-Occur)", lowess=True,
                        row=col_subject, hue=f"Cell Type", sharex=False,
                        sharey=False, col=col_condition)
        fig.set_titles(col_template="{col_name}",
                       row_template="{row_name}", size=24)
        reg = f"{' '.join(r.split('_')).capitalize()}" if isinstance(
            r, str) else " | ".join(r)
        fig.fig.suptitle(f"P(Co-Occur) with {ref}: {reg}")
        fig.fig.tight_layout()
        plt.subplots_adjust(hspace=0.5, top=0.92)

In [None]:
# # for cell in cooccur.reset_index()[col_cluster].unique():
# for cell in ["Neuron-Glia"]:
#     for r in cooccur.reset_index()["Region"].unique():
#         conds = cooccur.reset_index()[col_condition].unique()
#         fig, axes = plt.subplots(
#             len(cooccur.reset_index()[col_subject]), len(conds))
#         coor = cooccur[cell].to_frame("Pr(Co-Occur)").loc[r].reset_index()
#         for i, x in enumerate(coor[col_subject].unique()):
#             coo = coor[coor[col_subject] == x]
#             for j, y in conds:
#                 sb.lineplot(coo[coo[col_condition] == y], x="Interval",
#                             y="Pr(Co-Occur)", hue=col_cluster,
#                             ax=axes[i, j])
#                 axes[i, j].set_title(f"{y} | {x}")
#         fig.suptitle(f"P(Co-Occur) with {cell}: {r.capitalize()}")

## Neighborhood

In [None]:
# for x in cells_spatial.columns:
for x in ["Neighborhood Score (Maximum-Normalized)"]:
    fig = sb.catplot(cells_spatial,
                     # cells_spatial.drop("Overall", level="Region"),
                     x=col_cluster, y=x, hue=col_condition,
                     palette=palette, col="Region", col_wrap=3,
                     sharex=False, kind="bar", height=12, aspect=2.5)
    fig.set_xticklabels(rotation=45, fontsize=20)
    # fig.set_axis_labels(rotation=45, fontsize=36)
    fig.set_titles(fontsize=36)
    fig.fig.suptitle(x)
    plt.subplots_adjust(hspace=0.5, top=0.92)
    # fig.fig.set_dpi(300)
    # fig._legend.set_title(fig._legend.get_title().get_text(), prop={
    #     "size": 16})
    # for text in fig._legend.get_texts():
    #     text.set_fontsize(16)

In [None]:
# for x in cells_spatial.columns:
for x in ["Neighborhood Score (Maximum-Normalized)"]:
    fig = sb.catplot(cells_spatial,
                     # cells_spatial.drop("Overall", level="Region"),
                     x=col_cluster, y=x, hue=col_condition, split=True,
                     palette=palette, col="Region", col_wrap=3,
                     sharex=False, kind="violin", height=12, aspect=2.5)
    fig.set_xticklabels(rotation=45, fontsize=20)
    fig.set_titles(fontsize=36)
    fig.fig.suptitle(x)
    plt.subplots_adjust(hspace=0.5, top=0.92)

# Workspace

In [12]:
_, fig_central[self._library_id] = self.calculate_centrality(
    figsize=(20, 10), n_jobs=n_jobs, out_plot=os.path.join(
        out_r, f"centrality_{self._library_id}{r}{suffix_ct}.jpeg"))
_, fig_neigh[self._library_id] = self.calculate_neighborhood(
    figsize=(60, 30), n_jobs=n_jobs, out_plot=os.path.join(
        out_r, f"neighborhood_{self._library_id}{r}{suffix_ct}.jpeg"))

_, fig_cooccur[self._library_id] = self.find_cooccurrence(
    figsize=(60, 20), kws_plot=dict(wspace=3), out_plot=os.path.join(
        out_r, f"cooccurrence_{self._library_id}{r}{suffix_ct}.jpeg"),
    n_jobs=n_jobs)
_ = self.find_svgs(
    genes=goi, method="moran", n_perms=10, kws_plot=dict(
        legend_fontsize="large"), figsize=(15, 15), n_jobs=n_jobs,
    out_plot=os.path.join(
        out_r, f"svg_{self._library_id}{r}{suffix_ct}.jpeg"))



<<< QUANTIFYING GRAPH >>>


	*** Building connectivity matrix...
