# Imports

In [1]:
%load_ext autoreload
%autoreload 2

import os
import re
import functools
import spatialdata_io as sdio
import numpy as np
import pandas as pd
import corescpy as cr

Downloading data from `https://omnipathdb.org/queries/enzsub?format=json`
Downloading data from `https://omnipathdb.org/queries/interactions?format=json`
Downloading data from `https://omnipathdb.org/queries/complexes?format=json`
Downloading data from `https://omnipathdb.org/queries/annotations?format=json`
Downloading data from `https://omnipathdb.org/queries/intercell?format=json`
Downloading data from `https://omnipathdb.org/about?format=text`


# Setup

In [4]:
%%time

# Count Threshold for Cell Quantification
count_threshold = 1

# File Paths
libid = "Uninflamed-50336C"
dir_data = "/mnt/cho_lab/bbdata2/outputs/TUQ97N"
out_dir = str("/mnt/cho_lab/disk2/elizabeth/data/shared-xenium-library/"
              "outputs/TUQ97N/nebraska")
path_dir = os.path.join(out_dir, "pathology")
file_align = os.path.join(path_dir,
                          f"alignment/{libid}_alignment_files/matrix.csv")
file_image = (os.path.join(path_dir, f"{libid.split('-')[1]}.ndpi"),  # raw
              os.path.join(path_dir, f"ome-tiff/{libid}.ome.tif"))  # convert

# Clustering Version
col_cell_type = "leiden_res1pt5_dist0_npc30"  # high resolution
# c_t = "leiden_res0pt75_dist0pt3_npc30"  # medium resolution
# c_t = "leiden_res0pt5_dist0pt5_npc30"  # low resolution

# Display
pd.options.display.max_colwidth = 1000
pd.options.display.max_columns = 100
pd.options.display.max_rows = 500

# Spatial Data
files = functools.reduce(lambda i, j: i + j, [[os.path.join(
    run, i) for i in os.listdir(os.path.join(
        dir_data, run))] for run in os.listdir(dir_data)])
file_path = np.array(files)[np.where(["-".join(libid.split(
    "-")[1:]) == os.path.basename(x).split("__")[2].split(
        "-")[0] for x in files])[0][0]]
self = cr.Spatial(os.path.join(dir_data, file_path), library_id=libid,
                  col_cell_type=col_cell_type, n_jobs=8)
self.update_from_h5ad(os.path.join(out_dir, libid + ".h5ad"))
self.get_layer("counts", inplace=True)



<<< INITIALIZING SPATIAL CLASS OBJECT >>>

[34mINFO    [0m reading                                                                                                   
         [35m/mnt/cho_lab/bbdata2/outputs/TUQ97N/CHO-010/output-XETG00189__0011047__50336C-TUQ97N-EA__20240422__175051/[0m
         [95mcell_feature_matrix.h5[0m                                                                                    




Counts: Initial


	Observations: 387961

	Genes: 469







 AnnData object with n_obs × n_vars = 387961 × 469
    obs: 'cell_id', 'transcript_counts', 'control_probe_counts', 'control_codeword_counts', 'unassigned_codeword_counts', 'deprecated_codeword_counts', 'total_counts', 'cell_area', 'nucleus_area', 'region', 'z_level', 'nucleus_count', 'cell_labels', 'Sample'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'spatialdata_attrs', 'spatial', 'original_ix'
    obsm: 'spatial'
    layers: 'counts' 

                      gene_ids    feature_types   genome
gene_symbols                                           
ABCA7         ENSG00000064687  Gene Expression  Unknown
ACTA2         ENSG00000107796  Gene Expression  Unknown
ACTG2         ENSG00000163017  Gene Expression  Unknown
ADAM15        ENSG00000143537  Gene Expression  Unknown
ADAM28        ENSG00000042980  Gene Expression  Unknown 

 



col_gene_symbols="gene_symbols"
col_cell_type="leiden_res1pt5_dist0_npc30"
col_samp

AnnData object with n_obs × n_vars = 378219 × 469
    obs: 'cell_id', 'transcript_counts', 'control_probe_counts', 'control_codeword_counts', 'unassigned_codeword_counts', 'deprecated_codeword_counts', 'total_counts', 'cell_area', 'nucleus_area', 'region', 'z_level', 'nucleus_count', 'cell_labels', 'Sample', 'Sample ID', 'Patient', 'Status', 'Slide Id', 'Project', 'Location', 'Stricture', 'GRID ID', 'Inflamed', 'Procedure Date', 'Age', 'Sex', 'Race', 'Hispanic', 'Diagnosis', 'Project.1', 'Procedure', 'Disease_Status', 'Date Collected', 'Date Sectioned', 'Date Hybridization', 'Storage 4c', 'Created By', 'Created', 'Storage Status', 'Location.1', 'Storage Row', 'Storage Col', 'Checked Out By', 'out_file', 'Condition', 'file_path', 'n_counts', 'log_counts', 'n_genes', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'assay_protein', 'col_gene_symbols', 'col_cell_type', 'col_sample_id', 'col_batch', 'col_subject', 'col_condition', 'col_num_umis', 'col_segment', 'cell_f

# Annotations

In [5]:
fmr = os.path.join(out_dir, "annotation_dictionaries", str(
    f"{self._library_id}___{col_cell_type}_dictionary.xlsx"))  # file
fmr = pd.read_excel(fmr).astype(str)
c_m = col_cell_type.split("leiden_")[1]
for x in ["annotation", "bin", "bucket"]:
    self.rna.obs.loc[:, f"{x}_{c_m}"] = self.rna.obs[col_cell_type].astype(
        int).astype(str).replace(fmr.set_index(fmr.columns[0])[x])  # to label
    self.rna.obs.loc[self.rna.obs[f"{x}_{c_m}"].isnull(
        ), f"{x}_{c_m}"] = self.rna.obs.loc[self.rna.obs[f"{x}_{c_m}"].isnull(
            ), col_cell_type].astype(str)  # missing annotations -> Leiden
    self.rna.obs.loc[:, f"{x}_{c_m}"] = self.rna.obs[
        f"{x}_{c_m}"].astype("category")  # as categorical
    # self.plot_spatial(f"{x}_{r}")

# Load Image

In [7]:
if os.path.exists(file_image[1]) is False:  # convert to ome-tiff if needed
    cr.tl.write_ome_tif(file_image[0], file_image[1],
                        bf_cmd="cd ~/bftools && ")
self.add_image(file_image[1], name="he", file_align=file_align)  # add image

[34mINFO    [0m Transposing `data` of type: [1m<[0m[1;95mclass[0m[39m [0m[32m'dask.array.core.Array'[0m[1m>[0m to [1m([0m[32m'c'[0m, [32m'y'[0m, [32m'x'[0m[1m)[0m.                           


RuntimeError: The functions add_image(), add_labels(), add_points() and add_shapes() have been removed in favor of dict-like access to the elements. Please use the following syntax to add an element:

	sdata.images["image_name"] = image
	sdata.labels["labels_name"] = labels
	...

The new syntax does not automatically updates the disk storage, so you need to call sdata.write() when the in-memory object is ready to be saved.
To save only a new specific element to an existing Zarr storage please use the functions write_image(), write_labels(), write_points(), write_shapes() and write_table(). We are going to make these calls more ergonomic in a follow up PR.

In [None]:
# %matplotlib inline
# import matplotlib.pyplot as plt
# from napari_spatialdata import Interactive
# from spatialdata import SpatialData

# plt.rcParams["figure.figsize"] = (20, 20)

# sdata = self.adata
# interactive = Interactive(sdata)
# interactive.run()

# Render Images

In [None]:
crop = lambda x: spatialdata.bounding_box_query(
    x, min_coordinate=[20_000, 8000], max_coordinate=[22_000, 8500],
    axes=("x", "y"), target_coordinate_system="global")

In [None]:
crop(self.adata).pl.render_images("he").pl.show()

In [None]:
self.adata.pl.render_images("morphology_focus").pl.show(title="Morphology")

In [None]:
self.adata

In [None]:
crop(self.adata).pl.render_shapes(
    elements="cell_boundaries", color=col_cell_type,
    groups=["1", "2"]).pl.show(dpi=20)

In [None]:
self.adata.pl.render_images("he").pl.show()

In [None]:
axes = plt.subplots(1, 2, figsize=(10, 10))[1].flatten()
self.adata.pl.render_images("he_image").pl.show(ax=axes[0], title="H&E")
self.adata.pl.render_images("morphology_focus").pl.show(
    ax=axes[1], title="Morphology")