# Imports

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import os
import re
import functools
import matplotlib.pyplot as plt
import seaborn as sb
import scanpy as sc
import spatialdata
import spatialdata_io as sdio
import numpy as np
import pandas as pd
import corescpy as cr

# Setup

## Options & Data

In [None]:
%%time

# Count Threshold for Cell Quantification
count_threshold = 1

# File Paths
libid = "Inflamed-50006A"
# libid = "Uninflamed-50336C"
# libid = "Stricture-50564A4"
dir_data = "/mnt/cho_lab/bbdata2/outputs/TUQ97N"
out_dir = str("/mnt/cho_lab/disk2/elizabeth/data/shared-xenium-library/"
              "outputs/TUQ97N/nebraska")
path_dir = os.path.join(out_dir, "pathology")
file_align = os.path.join(path_dir,
                          f"alignment/{libid}_alignment_files/matrix.csv")
file_image = (os.path.join(path_dir, f"{libid.split('-')[1]}.ndpi"),  # raw
              os.path.join(path_dir, f"ome-tiff/{libid}.ome.tif"))  # convert

# Clustering Version
col_cell_type = "leiden_res1pt5_dist0_npc30"  # high resolution
# c_t = "leiden_res0pt75_dist0pt3_npc30"  # medium resolution
# c_t = "leiden_res0pt5_dist0pt5_npc30"  # low resolution

# Display
pd.options.display.max_colwidth = 1000
pd.options.display.max_columns = 100
pd.options.display.max_rows = 500

# Spatial Data
files = functools.reduce(lambda i, j: i + j, [[os.path.join(
    run, i) for i in os.listdir(os.path.join(
        dir_data, run))] for run in os.listdir(dir_data)])
file_path = np.array(files)[np.where(["-".join(libid.split(
    "-")[1:]) == os.path.basename(x).split("__")[2].split(
        "-")[0] for x in files])[0][0]]
self = cr.Spatial(os.path.join(dir_data, file_path), library_id=libid,
                  col_cell_type=col_cell_type, n_jobs=16)
self.update_from_h5ad(os.path.join(out_dir, libid + ".h5ad"))
self.get_layer("counts", inplace=True)
self.adata

## Load Annotations

In [None]:
fmr = os.path.join(out_dir, "annotation_dictionaries", str(
    f"{self._library_id}___{col_cell_type}_dictionary.xlsx"))  # file
fmr = pd.read_excel(fmr).astype(str)
c_m = col_cell_type.split("leiden_")[1]
for x in ["annotation", "bin", "bucket"]:
    self.rna.obs.loc[:, f"{x}_{c_m}"] = self.rna.obs[col_cell_type].astype(
        int).astype(str).replace(fmr.set_index(fmr.columns[0])[x])  # to label
    self.rna.obs.loc[:, f"{x}_{c_m}"] = self.rna.obs[
        f"{x}_{c_m}"].astype("category")  # as categorical

## Load Image

If you haven't already, add the `bfconvert` command-line tool (https://docs.openmicroscopy.org/bio-formats/5.7.1/users/comlinetools/index.html) to your path. For instance, if the tool is stored in `/opt`:

`echo 'export PATH="$PATH:/opt/bftools/"" >> ~/.bashrc`.

In [None]:
if os.path.exists(file_image[1]) is False:  # convert to ome-tiff if needed
    cr.tl.write_ome_tif(file_image[0], file_image[1], bf_cmd="bfconvert")
self.add_image(file_image[1], name="he", file_align=file_align)  # add image

In [None]:
# %matplotlib inline
# import matplotlib.pyplot as plt
# from napari_spatialdata import Interactive
# from spatialdata import SpatialData

# plt.rcParams["figure.figsize"] = (20, 20)

# sdata = self.adata
# interactive = Interactive(sdata)
# interactive.run()

# Render Images

In [None]:
%%time

axes = plt.subplots(1, 2, figsize=(20, 13))[1].flatten()
# axes = plt.subplots(3, 1, figsize=(20, 13))[1].flatten()
self.adata.pl.render_images("he").pl.show(
    ax=axes[0], title="H&E", coordinate_systems="global")
self.adata.pl.render_images("morphology_focus").pl.show(
    ax=axes[1], title="Morphology", coordinate_systems="global")
# self.adata.pl.render_shapes(color=col_cell_type).pl.show(
#     ax=axes[2], title="Labels", coordinate_systems="global")

In [None]:
self.adata.pl.render_images("morphology_focus").pl.show(
    title="Morphology", coordinate_systems="global", dpi=20)
self.adata.pl.render_shapes(elements="cell_boundaries", groups=[
    "1", "2"], color=col_cell_type).pl.show(coordinate_systems="global")

In [None]:
axes = plt.subplots(1, 2, figsize=(30, 30))[1].flatten()
self.adata.pl.render_images("he").pl.show(
    ax=axes[0], title="H&E", coordinate_systems="global")
self.adata.pl.render_shapes(color=col_cell_type).pl.show(
    ax=axes[1], title="Labels", coordinate_systems="global")

In [None]:
sdata = self.crop([800, 1000], [600, 800])

In [None]:
sdata.pl.render_images("morphology_focus", scale="scale4").pl.show(
    title="Morphology", coordinate_systems="global")

In [None]:
%%time

from spatialdata_io.experimental import to_legacy_anndata

adata = to_legacy_anndata(self.adata, include_images=True,
                          coordinate_system="transformed")
sc.pl.spatial(adata, library_id="morphology_focus", img_key="hires",
              na_color="white", show=True, crop_coord=(0, 2000, 0, 2000))

# Image Analysis

In [None]:
# Directories & Metadata
load, reannotate = True, True
# run = "CHO-011"
# samples = "all"
run = None  # just look for samples in all runs
# samples = ["50452A", "50452B", "50006A", "50006B",
#            "50217A", "50217B", "50336B", "50336C"]  # paired (un)inflamed
# samples = ["50006B", "50006A",  "50006C",
#            "50217B", "50217A", "50217C",
#            "50564A4",
#            "50452A", "50452B", "50452C",
#            "50336C", "50336B",  "50336A"]  # all
samples = ["50006C", "50217C", "50452C", "50336A"]  # paired strictures


# Optionally, Define Manual Annotation Versions
# should be stored in ("<out_dir>/annotations_dictionaries")
# in format <selves[i]._library_id>___leiden_<man_anns[i]>_dictionary.xlsx
# with first column = leiden cluster and second column = annotation
man_anns = True  # load manual annotations according to clustering kws
# man_anns = ["res0pt5_dist0pt5_npc30", "res0pt75_dist0pt3_npc30",
#             "res1pt5_dist0_npc30"]  # choose manual annotations to load
# man_anns = None  # do not load manual annotations

# Main Directories
# Replace manually or mirror my file/directory tree in your home (`ddu`)


In [None]:
for g in genes:
    sc.queries.enrich(adata, g)


# Workspace

## STLearn

In [None]:
SPATIAL_KEY = "spatial"

def update_spatial_uns(adata, library_id, col_sample_id, rna_only=False):
    """Copy SpatialData.images to .table.uns (Squidpy-compatible)."""
    imgs = {}
    if "images" in dir(adata):
        for x in adata.images:
            scales = [int(i.split("scale")[1]) for i in adata.images[x] if (
                "scale") in i] if "focus" in x else []
            for i in adata.images[x]:
                key = f"{library_id}{SPATIAL_IMAGE_KEY_SEP}{x}_{i}"
                imgs[key] = sq.im.ImageContainer(
                    adata.images[x][i].image, library_id=library_id)
                if len(scales) > 0 and "scale" in i and str(i.split(
                        "scale")[1]) == str(min(scales)):
                    imgs["hires"] = imgs[key]  # Squidpy-compatible
    if rna_only is True:
        # if col_sample_id in adata.table.obs:
        #     rna = adata.table[adata.table.obs[col_sample_id] == library_id]
        rna = adata.table if "table" in dir(adata) else adata
        rna.uns[SPATIAL_KEY] = {library_id: {"images": imgs}}
        # rna.uns[SPATIAL_KEY]["library_id"] = library_id
        return rna
    else:
        adata.table.uns[SPATIAL_KEY] = {library_id: {"images": imgs}}
        # adata.table.uns[SPATIAL_KEY]["library_id"] = library_id
        if col_sample_id not in adata.table.obs:
            adata.table.obs.loc[:, col_sample_id] = library_id
        return adata

In [1]:
import matplotlib.pyplot as plt
import warnings
import stlearn as st
import scanpy as sc
import pandas as pd
import numpy as np

warnings.filterwarnings("ignore")

library_id = "Inflamed-50006A"

col_cell_type = "leiden_res1pt5_dist0_npc30"
adata = sc.read(str("/mnt/cho_lab/disk2/elizabeth/data/shared-xenium-library/"
                    f"outputs/TUQ97N/nebraska/{library_id}.h5ad"))
adata = update_spatial_uns(adata, library_id, "Sample", rna_only=True)

ImportError: Numba needs NumPy 1.22 or greater. Got NumPy 1.21.

In [2]:
kwargs = {}

col_cell_type = None
n_spots = 125
organism = "human"
resource = "connectomeDB2020_lit"
distance = None
min_spots = 20
n_pairs = 100  # CHANGE DEFAULT TO 10000
n_top = 50
n_jobs = 8
stats = "all"
layer = "counts"

In [39]:
scale = 1
quality = "hires"
spot_diameter_fullres = 15
if "scalefactors" not in adata.uns["spatial"]:
    adata.uns["spatial"][library_id]["scalefactors"] = {}
    adata.uns["spatial"][library_id]["scalefactors"][
        "tissue_" + quality + "_scalef"] = scale
    adata.uns["spatial"][library_id]["scalefactors"][
        "spot_diameter_fullres"] = spot_diameter_fullres

In [None]:
# Process Arguments
if isinstance(stats, str) and stats.lower().strip() == "all":
    stats = ["lr_scores", "p_vals", "p_adjs", "-log10(p_adjs)"]
pval_adj_cutoff, pval_adj_cutoff = [kwargs.pop(x, None) for x in [
    "pval_adj_cutoff", "adj_method"]]

# Make Compatible with Hard-Coded Column in stlearn Code
# max_coor = np.max(adata.obsm["spatial"])
# scale = 2000 / max_coor

if "spatial" in adata.obsm:
    scale = kwargs.pop("scale")
    quality = kwargs.pop("key_image", "hires")
    spot_diameter_fullres = kwargs.pop("spot_diameter_fullres", 15)
    adata.obs.loc[:, "imagerow"] = adata.obsm["spatial"][:, 0] * scale
    adata.obs.loc[:, "imagecol"] = adata.obsm["spatial"][:, 1] * scale
if "scalefactors" not in adata.uns["spatial"]:
    adata.uns["spatial"][library_id]["scalefactors"] = {}
    adata.uns["spatial"][library_id]["scalefactors"][
        "tissue_" + quality + "_scalef"] = scale
    adata.uns["spatial"][library_id]["scalefactors"][
        "spot_diameter_fullres"] = spot_diameter_fullres

# Process Data
adata.X = adata.layers[layer].copy()
st.pp.normalize_total(adata)

In [None]:
# Create Spot Grid
grid = st.tl.cci.grid(adata, n_row=n_spots, n_col=n_spots,
                      use_label=col_cell_type)

In [None]:
# Plot: Compare Clusters to Created Spots
fig, axes = plt.subplots(ncols=2, figsize=(20, 8))
st.pl.cluster_plot(grid, use_label=cct, size=10, ax=axes[0], show_plot=False)
st.pl.cluster_plot(adata, use_label=cct, ax=axes[1], show_plot=False)
axes[0].set_title(f"Grid: Dominant Spots")
axes[1].set_title(f"Cell {cct} Labels")
plt.show()

In [None]:
groups = list(grid.obs[cct].cat.categories)
for g in groups[0:2]:
    fig, axes = plt.subplots(ncols=3, figsize=(20,8))
    group_props = grid.uns[cct][g].values
    grid.obs["Group"] = group_props
    st.pl.feat_plot(grid, feature="Group", ax=axes[0], show_plot=False,
                    vmax=1, show_color_bar=False)
    st.pl.cluster_plot(grid, use_label=cct, list_clusters=[g],
                       ax=axes[1], show_plot=False)
    st.pl.cluster_plot(adata, use_label=cct, list_clusters=[g],
                       ax=axes[2], show_plot=False)
    axes[0].set_title(f"Grid {g} Proportions (Maximum = 1)")
    axes[1].set_title(f"Grid {g} Maximum Spots")
    axes[2].set_title(f"Individual Cell {g}")
    plt.show()

In [None]:
lrs = st.tl.cci.load_lrs([resource], species=organism)
st.tl.cci.run(
    grid, lrs, min_spots=min_spots, distance=distance,
    n_pairs=n_pairs, n_cpus=n_jobs)
if pval_adj_cutoff is not None or adj_method is not None:  # adjust p?
    st.tl.cci.adj_pvals(
        grid, correct_axis="spot", pval_adj_cutoff=pval_adj_cutoff,
        adj_method=adj_method)  # optionally, adjust p-values
print(grid.uns["lr_summary"])

In [None]:
# QC Plots
fig, axes = st.pl.cci_check(grid, cct, figsize=(16, 5))
fig.suptitle("CCI Check: Interactions Shouldn't Correlate Much "
             "with Cell Type Frequency if Well-Controlled for")
st.pl.lr_diagnostics(grid, figsize=(10, 2.5))

# Results Plots
st.pl.lr_summary(data, n_top=n_top, figsize=(10, 3))  # summary plot
if plot_lr is True or isinstance(
        plot_lr, (int, float)):  # if pairs unspecified, or just want top N
    plot_lr = 3 if plot_lr is None else int(plot_lr)  # top 3 = default
    plot_lr = grid.uns["lr_summary"].index.values[:plot_lr]  # best pairs
if plot_lr not in [None, False]:  # if wanted these plots...
    fig, axes = plt.subplots(ncols=len(stats), nrows=len(plot_lr),
                             figsize=(12, 6))
    for r, x in enumerate(plot_lr):  # iterate ligand-receptors
        for c, stat in enumerate(stats):  # iterate statistics
            st.pl.lr_result_plot(grid, use_result=stat, use_lr=x,
                                 show_color_bar=False, ax=axes[r, c])
            axes[r, c].set_title(f"{x} {stat}")

# Gene Expression Plots
if plot_lr is not None:
    genes = functools.reduce(lambda i, j: list(i) + list(j),
                             [i.split("_") for i in plot_lr])
    for g in genes:
        fig, axes = plt.subplots(ncols=2, figsize=(20, 5))
        st.pl.gene_plot(grid, gene_symbols=g, ax=axes[0],
                        show_color_bar=False, show_plot=False)
        st.pl.gene_plot(adata, gene_symbols=g, ax=axes[1],
                        show_color_bar=False, show_plot=False, vmax=80)
        axes[0].set_title(f"Grid {g} Expression")
        axes[1].set_title(f"Cell {g} Expression")
        plt.show()

In [None]:
return grid, grid.uns["lr_summary"]

## Liana

In [None]:
import liana as li
from liana.method import MistyData, genericMistyData, lrMistyData
from liana.method.sp import RandomForestModel, LinearModel, RobustLinearModel
import decoupler as dc

organism = "human"
adata = sc.read(os.path.join(out_dir, libid + ".h5ad"))




adata.X = adata.layers["counts"].copy()
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata)

progeny = dc.get_progeny(organism=organism, top=500)
dc.run_mlm(mat=adata, net=progeny, source="source", target="target",
           weight="weight", verbose=True, use_raw=False)
acts_progeny = li.ut.obsm_to_adata(adata, "mlm_estimate")

adata.obsm["compositions"] =
comps = li.ut.obsm_to_adata(adata, "compositions")

misty = genericMistyData(intra=comps, extra=acts_progeny, cutoff=0.05,
                         bandwidth=200, coord_type="generic", n_rings=1)

hvg = adata.var[adata.var["highly_variable"]].index
misty(bypass_intra=True, model=LinearModel, verbose=True)


In [None]:
ann = sc.read("kuppe_heart19.h5ad", backup_url='https://figshare.com/ndownloader/files/41501073?private_link=4744950f8768d5c8f68c')
ann.obsm["compositions"]