# Setup

```
pip install git+https://github.com/scverse/spatialdata.git@main
pip install git+https://github.com/scverse/spatialdata-io.git@main
```

This is the stuff you have to edit; the rest of the sections can run as-is after you've set the needed parameters.

---

`coord_suffix` must align with 
* the sub-directory (corresponds to region, e.g., "mucosa") under the `dir_coord` directory where the Xenium Explorer-exported selection files are stored, and
* the suffixes of the coordinate selection files (see file naming conventions below).
  
The `AnnData` objects created will have this suffix as well (e.g., `Uninflamed-50452A_mucosa.h5ad`).

---

Selection files should be named by this convention:
`<library_id>_<coord_suffix>.csv`.

For example, if `dir_coord` is `.../coordinates/mucosa`, the mucosa selection file for sample 50452A should be under `.../coordinates/mucosa/50452A_mucosa.csv`. 

More specifically, if the coordinates directory is under `/mnt/cho_lab/disk2/elizabeth/data/shared-xenium-library/outputs/TUQ97N/nebraska/coordinates`, and the selection region is "mucosa,"`dir_coord` should be `/mnt/cho_lab/disk2/elizabeth/data/shared-xenium-library/outputs/TUQ97N/nebraska/coordinates/mucosa`, and the full file path for this sample would be `/mnt/cho_lab/disk2/elizabeth/data/shared-xenium-library/outputs/TUQ97N/nebraska/coordinates/mucosa/50452A_mucosa.csv`.

---

**As with any other file naming schema, suffixes/directory names should not any special characters other than underscores (`_`) (no periods, dashes, spaces, etc.).**

N.B. In the above explanation, `library_id` refers to library/original sample ID without condition (e.g., "50452A", not "Uninflamed-50452A" like in other places). Remember that `coord_suffix` should also be the name of the parent directory of the coordinate file. I include this information in both the directory and file name to prevent mix-ups should files be moved or placed in the wrong folder.

Loading the metadata allows us to find the object IDs (e.g., for TUQ97N, object IDs are in the format <condition (Inflamed/Uninflamed/Stricture)><block_id>) corresponding to the sample IDs.


In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import os
import re
import math
import functools
import traceback
import anndata
import scanpy as sc
import spatialdata_plot
import numpy as np
import pandas as pd
import corescpy as cr

# Main
write_object = True  # change to True when you're ready to save objects
overwrite = False  # overwrite if already exists?
regions = ["mucosa", "serosa", "myenteric_plexus", "submucosa",
           "smc_longitudinal", "smc_circular"]
col_leiden = "leiden_res1pt5_dist0_npc30"
col_ann = "Lump"

# Process Options
panel = "TUQ97N"  # Xenium panel ID
constants_dict = cr.pp.CONSTANTS_PANELS[panel]
cso, col_sample, col_condition, col_inflamed, col_subject = [
    constants_dict[x] if x in constants_dict else None for x in [
        "col_sample_id_o", "col_sample_id", "col_condition",
        "col_inflamed", "col_subject"]]
col_stricture, key_stricture, col_fff = [
    constants_dict[x] if (x in constants_dict) else None for x in [
        "col_stricture", "key_stricture", "col_data_dir"]]
# libs = [  # sample IDs from patients for whom we have all conditions
#     "50452A", "50452B", "50452C",  # old segmentation
#     "50006A", "50006B", "50006C",  # rest are new segmentation
#     "50217A", "50217B", "50217C",
#     "50336B", "50336C", "50336A",
#     "50403A2", "50403B", "50403C1"
# ]  # excludes low-quality sample/condition replicates 50403A1 & 50403C2
libs = ["50006A", "50006B", "50217A", "50217B", "50336B", "50336C",
        "50403B", "50403A2"]  # just inflamed/uninflamed (no strictures)
# libs = None  # to run all available samples
input_suffix = ""  # in case want to crop objects with some suffix
# due to creation of a subsidiary object, e.g., for
# "Stricture-50452C_downsampled.h5ad"
# input_suffix would be "_downsampled". For "main" objects, input_suffix=""
plot = True  # could slow process down if large samples/cropped area

# Files & Directories
direc = "/mnt/cho_lab/bbdata2/"  # mounted NFS with data
dir_entry = "/mnt/cho_lab/disk2"  # Spark writeable data directory
mdf = str("/mnt/cho_lab/disk2/elizabeth/data/shared-xenium-library/samples_"
          f"{panel}.csv")  # metadata file path (for now; will soon be on NFS)
dir_writeable = os.path.join(
    dir_entry, f"elizabeth/data/shared-xenium-library")  # where objects are
out_dir = os.path.join(
    dir_writeable, f"outputs/{panel}/nebraska")  # object output directory

#  Your Folders
out_new = os.path.join(
    dir_entry,
    f"{os.getlogin()}/data/shared-xenium-library/outputs/{panel}/nebraska")

# Constants (Shouldn't Need Edits Unless Extreme Process Changes)
dir_data = os.path.join(direc, f"outputs/{panel}")
files = functools.reduce(lambda i, j: i + j, [[os.path.join(
    run, i) for i in os.listdir(os.path.join(
        dir_data, run))] for run in os.listdir(dir_data)])  # all data paths
os.makedirs(out_dir, exist_ok=True)  # make output directory if needed
metadata = cr.pp.get_metadata_cho(direc, mdf, panel=panel, samples=libs)
metadata[col_subject]

Downloading data from `https://omnipathdb.org/queries/enzsub?format=json`
Downloading data from `https://omnipathdb.org/queries/interactions?format=json`
Downloading data from `https://omnipathdb.org/queries/complexes?format=json`
Downloading data from `https://omnipathdb.org/queries/annotations?format=json`
Downloading data from `https://omnipathdb.org/queries/intercell?format=json`
Downloading data from `https://omnipathdb.org/about?format=text`


Unnamed: 0_level_0,Name
Sample,Unnamed: 1_level_1
Inflamed-50006A,50006
Uninflamed-50006B,50006
Inflamed-50217A,50217
Uninflamed-50217B,50217
Inflamed-50336B,50336
Uninflamed-50336C,50336
Inflamed-50403B,50403
Uninflamed-50403A2,50403


# Subset Data by Coordinate Files & Write Cropped Objects

Subset the data by coordinates (`corescpy` can use Xenium Explorer-exported manual selection files to get those coordinates) and then write the cropped objects to `out_dir/<coord_suffix>`.

In [156]:
%%time

for s in libs:  # iterate samples
    print(f"\n\n{'=' * 80}\n{s}\n{'=' * 80}\n\n")
    fff = os.path.join(dir_data, np.array(files)[np.where([
        s == os.path.basename(x).split("__")[2].split("-")[0]
        for x in files])[0][0]])  # sample's Xenium data directory path
    lib = metadata.reset_index().set_index(cso).loc[s][col_sample]
    file_obj_proc = os.path.join(out_dir, f"{lib}{input_suffix}.h5ad")
    self = cr.Spatial(fff, library_id=lib)  # load original data
    self.update_from_h5ad(file_obj_proc)  # update with processed object
    if "shapes" in dir(sdata):
        for x in sdata.shapes:
            if self.adata.shapes[x].isnull().any().any():
                self.adata.shapes[x] = sdata.shapes[x].dropna()
    for coord_suffix in regions:
        print(f"\n\n\t\t{'*' * 40}\n\t\t{s}\n\t\t{'*' * 40}\n\n")
        dir_coord = os.path.join(
            out_new, "coordinates", coord_suffix)  # coordinates (NFS soon?)
        out = os.path.join(out_new, "objects_cropped", coord_suffix)  # path
        os.makedirs(out, exist_ok=True)  # make sub-directory for new objects?
        file_coord = os.path.join(dir_coord, s + f"_{coord_suffix}.csv")
        file_obj_crop = os.path.join(out, f"{lib}_{coord_suffix}.h5ad")
        if overwrite is False and os.path.exists(file_obj_crop):
            print(f"*** Subset {file_obj_crop} already exists")
            continue
        if not os.path.exists(file_coord):
            print(f"*** Coordinate file {file_coord} doesn't exist")
            continue
        print(f"\n\nData: {fff}\nObject: {file_obj_proc}"
              f"\nCoordinates: {file_coord}\nOuput: {file_obj_crop}")
        sdata = self.crop(file_coord)  # crop data to coordinates
        try:
            # sdata = self.crop(file_coord)  # crop data to coordinates
            if plot:
                try:
                    sdata.pl.render_labels("cell_labels").pl.show()  # plot
                except Exception:
                    pass
            self.adata = sdata
            if write_object is True:
                self.write(file_obj_crop)  # write cropped
        except Exception:
            print(traceback.format_exc(), f"Cropping \n\n{s} failed!")
print("\n\nCompleted!")



50006A




<<< INITIALIZING SPATIAL CLASS OBJECT >>>

[34mINFO    [0m reading                                                                                                   
         [35m/mnt/cho_lab/bbdata2/outputs/TUQ97N/CHO-007/output-XETG00189__0022407__50006A-TUQ97N-EA__20240411__205514/[0m
         [95mcell_feature_matrix.h5[0m                                                                                    


In [6]:
self.rna

AnnData object with n_obs × n_vars = 220569 × 469
    obs: 'cell_id', 'transcript_counts', 'control_probe_counts', 'control_codeword_counts', 'unassigned_codeword_counts', 'deprecated_codeword_counts', 'total_counts', 'cell_area', 'nucleus_area', 'region', 'z_level', 'nucleus_count', 'cell_labels', 'Sample', 'Sample ID', 'Patient', 'Status', 'Slide Id', 'Project', 'Location', 'Stricture', 'GRID ID', 'Inflamed', 'Procedure Date', 'Age', 'Sex', 'Race', 'Hispanic', 'Diagnosis', 'Project.1', 'Procedure', 'Disease_Status', 'Date Collected', 'Date Sectioned', 'Date Hybridization', 'Storage 4c', 'Created By', 'Created', 'Storage Status', 'Location.1', 'Storage Row', 'Storage Col', 'Checked Out By', 'out_file', 'Condition', 'file_path', 'n_counts', 'log_counts', 'n_genes', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'assay_protein', 'col_gene_symbols', 'col_cell_type', 'col_sample_id', 'col_batch', 'col_subject', 'col_condition', 'col_num_umis', 'col_segment', 'cell_f

In [125]:
print(f"\n\n{'=' * 80}\n{s}\n{'=' * 80}\n\n")
fff = os.path.join(dir_data, np.array(files)[np.where([
    s == os.path.basename(x).split("__")[2].split("-")[0]
    for x in files])[0][0]])  # sample's Xenium data directory path
lib = metadata.reset_index().set_index(cso).loc[s][col_sample]
file_obj_proc = os.path.join(out_dir, f"{lib}{input_suffix}.h5ad")
self = cr.Spatial(fff, library_id=lib, cells_as_circles=False)  # original
self.update_from_h5ad(file_obj_proc)  # update with processed object



50006B




<<< INITIALIZING SPATIAL CLASS OBJECT >>>

[34mINFO    [0m reading                                                                                                   
         [35m/mnt/cho_lab/bbdata2/outputs/TUQ97N/CHO-007/output-XETG00189__0022407__50006B-TUQ97N-EA__20240411__205514/[0m
         [95mcell_feature_matrix.h5[0m                                                                                    


Counts: Initial


	Observations: 116837

	Genes: 469







 AnnData object with n_obs × n_vars = 116837 × 469
    obs: 'cell_id', 'transcript_counts', 'control_probe_counts', 'control_codeword_counts', 'unassigned_codeword_counts', 'deprecated_codeword_counts', 'total_counts', 'cell_area', 'nucleus_area', 'region', 'z_level', 'nucleus_count', 'cell_labels', 'Sample'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'spatialdata_attrs', 'spatial', 'original_ix'
    obsm: 'spatial'
    layers: 'counts' 

                      gene_ids    feature_types   ge

# Workspace

In [104]:
self.adata.shapes["cell_circles"]["radius"].isna().sum()

KeyError: 'cell_circles'

In [106]:
sdata

SpatialData object
├── Images
│     └── 'morphology_focus': DataTree[cyx] (5, 16631, 17416), (5, 8316, 8708), (5, 4158, 4354), (5, 2079, 2177), (5, 1040, 1089)
├── Labels
│     ├── 'cell_labels': DataTree[yx] (16631, 17416), (8316, 8708), (4158, 4354), (2079, 2177), (1040, 1089)
│     └── 'nucleus_labels': DataTree[yx] (16631, 17416), (8316, 8708), (4158, 4354), (2079, 2177), (1040, 1089)
├── Points
│     └── 'transcripts': DataFrame with shape: (<Delayed>, 11) (3D points)
└── Shapes
      ├── 'cell_boundaries': GeoDataFrame shape: (8733, 1) (2D shapes)
      └── 'nucleus_boundaries': GeoDataFrame shape: (8646, 1) (2D shapes)
with coordinate systems:
    ▸ 'global', with elements:
        morphology_focus (Images), cell_labels (Labels), nucleus_labels (Labels), transcripts (Points), cell_boundaries (Shapes), nucleus_boundaries (Shapes)

In [109]:
sdata

SpatialData object
├── Images
│     └── 'morphology_focus': DataTree[cyx] (5, 16631, 17416), (5, 8316, 8708), (5, 4158, 4354), (5, 2079, 2177), (5, 1040, 1089)
├── Labels
│     ├── 'cell_labels': DataTree[yx] (16631, 17416), (8316, 8708), (4158, 4354), (2079, 2177), (1040, 1089)
│     └── 'nucleus_labels': DataTree[yx] (16631, 17416), (8316, 8708), (4158, 4354), (2079, 2177), (1040, 1089)
├── Points
│     └── 'transcripts': DataFrame with shape: (<Delayed>, 11) (3D points)
└── Shapes
      ├── 'cell_boundaries': GeoDataFrame shape: (8733, 1) (2D shapes)
      └── 'nucleus_boundaries': GeoDataFrame shape: (8646, 1) (2D shapes)
with coordinate systems:
    ▸ 'global', with elements:
        morphology_focus (Images), cell_labels (Labels), nucleus_labels (Labels), transcripts (Points), cell_boundaries (Shapes), nucleus_boundaries (Shapes)

'cell_id'

In [121]:
sdata

SpatialData object
├── Images
│     └── 'morphology_focus': DataTree[cyx] (5, 16631, 17416), (5, 8316, 8708), (5, 4158, 4354), (5, 2079, 2177), (5, 1040, 1089)
├── Labels
│     ├── 'cell_labels': DataTree[yx] (16631, 17416), (8316, 8708), (4158, 4354), (2079, 2177), (1040, 1089)
│     └── 'nucleus_labels': DataTree[yx] (16631, 17416), (8316, 8708), (4158, 4354), (2079, 2177), (1040, 1089)
├── Points
│     └── 'transcripts': DataFrame with shape: (<Delayed>, 11) (3D points)
└── Shapes
      ├── 'cell_boundaries': GeoDataFrame shape: (8733, 1) (2D shapes)
      └── 'nucleus_boundaries': GeoDataFrame shape: (8646, 1) (2D shapes)
with coordinate systems:
    ▸ 'global', with elements:
        morphology_focus (Images), cell_labels (Labels), nucleus_labels (Labels), transcripts (Points), cell_boundaries (Shapes), nucleus_boundaries (Shapes)

In [140]:
self.rna[self.rna.obs["cell_id"].isin(sdata.shapes["cell_boundaries"].index)]

View of AnnData object with n_obs × n_vars = 5215 × 469
    obs: 'cell_id', 'transcript_counts', 'control_probe_counts', 'control_codeword_counts', 'unassigned_codeword_counts', 'deprecated_codeword_counts', 'total_counts', 'cell_area', 'nucleus_area', 'region', 'z_level', 'nucleus_count', 'cell_labels', 'Sample', 'Sample ID', 'Patient', 'Status', 'Slide Id', 'Project', 'Location', 'Stricture', 'GRID ID', 'Inflamed', 'Procedure Date', 'Age', 'Sex', 'Race', 'Hispanic', 'Diagnosis', 'Project.1', 'Procedure', 'Disease_Status', 'Date Collected', 'Date Sectioned', 'Date Hybridization', 'Storage 4c', 'Created By', 'Created', 'Storage Status', 'Location.1', 'Storage Row', 'Storage Col', 'Checked Out By', 'out_file', 'Condition', 'file_path', 'n_counts', 'log_counts', 'n_genes', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'assay_protein', 'col_gene_symbols', 'col_cell_type', 'col_sample_id', 'col_batch', 'col_subject', 'col_condition', 'col_num_umis', 'col_segment', '

In [136]:
self.adata

SpatialData object
├── Images
│     └── 'morphology_focus': DataTree[cyx] (5, 34128, 51148), (5, 17064, 25574), (5, 8532, 12787), (5, 4266, 6393), (5, 2133, 3196)
├── Labels
│     ├── 'cell_labels': DataTree[yx] (34128, 51148), (17064, 25574), (8532, 12787), (4266, 6393), (2133, 3196)
│     └── 'nucleus_labels': DataTree[yx] (34128, 51148), (17064, 25574), (8532, 12787), (4266, 6393), (2133, 3196)
├── Points
│     └── 'transcripts': DataFrame with shape: (<Delayed>, 11) (3D points)
├── Shapes
│     ├── 'cell_boundaries': GeoDataFrame shape: (116837, 1) (2D shapes)
│     └── 'nucleus_boundaries': GeoDataFrame shape: (114734, 1) (2D shapes)
└── Tables
      └── 'table': AnnData (78976, 469)
with coordinate systems:
    ▸ 'global', with elements:
        morphology_focus (Images), cell_labels (Labels), nucleus_labels (Labels), transcripts (Points), cell_boundaries (Shapes), nucleus_boundaries (Shapes)

In [None]:
self.adata.get_annotated_regions(self.adata.table)

In [134]:
sdata = self.crop(file_coord)  # crop data to coordinates
sdata.pl.render_labels("cell_labels").pl.show()  # plot

# self.adata = sdata
# self.write(file_obj_crop)  # write cropped

AttributeError: 'SpatialData' object has no attribute 'pl'

In [119]:
file_obj_crop

'/mnt/cho_lab/disk2/elizabeth/data/shared-xenium-library/outputs/TUQ97N/nebraska/objects_cropped/serosa/Uninflamed-50006B_serosa.h5ad'

In [31]:
dir(self.adata)

['__class__',
 '__contains__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_change_table_annotation_target',
 '_check_element_not_on_disk_with_different_type',
 '_element_type_and_name_from_element_path',
 '_element_type_from_element_name',
 '_filter_tables',
 '_find_element',
 '_gen_elements',
 '_gen_repr',
 '_gen_spatial_element_values',
 '_get_groups_for_element',
 '_group_for_element_exists',
 '_images',
 '_labels',
 '_non_empty_elements',
 '_path',
 '_points',
 '_query',
 '_set_table_annotation_target',
 '_shapes',
 '_shared_keys',
 '_symmetric_difference_with_zarr_store',
 '_tables',
 '_validate_can_safely_write_to_path',
 '

In [33]:
self.adata

SpatialData object
├── Images
│     └── 'morphology_focus': DataTree[cyx] (5, 34128, 51148), (5, 17064, 25574), (5, 8532, 12787), (5, 4266, 6393), (5, 2133, 3196)
├── Labels
│     ├── 'cell_labels': DataTree[yx] (34128, 51148), (17064, 25574), (8532, 12787), (4266, 6393), (2133, 3196)
│     └── 'nucleus_labels': DataTree[yx] (34128, 51148), (17064, 25574), (8532, 12787), (4266, 6393), (2133, 3196)
├── Points
│     └── 'transcripts': DataFrame with shape: (<Delayed>, 11) (3D points)
├── Shapes
│     ├── 'cell_boundaries': GeoDataFrame shape: (116837, 1) (2D shapes)
│     ├── 'cell_circles': GeoDataFrame shape: (116837, 2) (2D shapes)
│     └── 'nucleus_boundaries': GeoDataFrame shape: (114734, 1) (2D shapes)
└── Tables
      └── 'table': AnnData (78976, 469)
with coordinate systems:
    ▸ 'global', with elements:
        morphology_focus (Images), cell_labels (Labels), nucleus_labels (Labels), transcripts (Points), cell_boundaries (Shapes), cell_circles (Shapes), nucleus_boundaries (Sha

In [72]:
self.adata.shapes["cell_circles"][self.adata.shapes["cell_circles"].radius.isna()].iloc[0]

geometry    POINT (195.42831420898438 3911.943359375)
radius                                            NaN
Name: aaafgjnh-1, dtype: object

In [75]:
cell_circles = self.adata.shapes["cell_circles"].copy()

Unnamed: 0_level_0,geometry,radius
cell_id,Unnamed: 1_level_1,Unnamed: 2_level_1
aaacnfei-1,POINT (228.882 3886.67),2.509118
aaaddime-1,POINT (243.282 3913.385),3.259164
aaadjknl-1,POINT (239.285 3898.489),3.371881
aaaekipn-1,POINT (233.972 3899.172),2.154692
aaaemmpi-1,POINT (203.626 3904.753),3.532188
...,...,...
oindhmhj-1,POINT (6890.228 3650.433),2.045175
oindmibp-1,POINT (6903.345 3731.109),3.056615
oinecfni-1,POINT (6895.435 3653.535),2.107479
oineebdc-1,POINT (6930.453 3651.84),1.818226


In [77]:
self.adata.shapes["cell_circles"]

Unnamed: 0_level_0,geometry,radius
cell_id,Unnamed: 1_level_1,Unnamed: 2_level_1
aaacnfei-1,POINT (228.882 3886.67),2.509118
aaaddime-1,POINT (243.282 3913.385),3.259164
aaadjknl-1,POINT (239.285 3898.489),3.371881
aaaekipn-1,POINT (233.972 3899.172),2.154692
aaaemmpi-1,POINT (203.626 3904.753),3.532188
...,...,...
oindhmhj-1,POINT (6890.228 3650.433),2.045175
oindmibp-1,POINT (6903.345 3731.109),3.056615
oinecfni-1,POINT (6895.435 3653.535),2.107479
oineebdc-1,POINT (6930.453 3651.84),1.818226


In [81]:
import spatialdata

cell_circles = spatialdata.deepcopy(self.adata.shapes["cell_circles"])

In [82]:
from spatialdata.transformations.transformations import Affine, Identity, Scale
from spatialdata_io._constants._constants import XeniumKeys

In [83]:
        transform = Scale([1.0 / 0.2125, 1.0 / 0.2125], axes=("x", "y"))
        radii = np.sqrt(self.rna.obs[XeniumKeys.CELL_NUCLEUS_AREA].to_numpy() / np.pi)

In [97]:
self.rna.obs.iloc[:, 1:12]

Unnamed: 0,transcript_counts,control_probe_counts,control_codeword_counts,unassigned_codeword_counts,deprecated_codeword_counts,total_counts,cell_area,nucleus_area,region,z_level,nucleus_count
0,243,0,0,0,0,243.0,71.346878,19.778438,cell_circles,0.0,1.0
1,260,0,0,0,0,260.0,80.197503,33.370470,cell_circles,0.0,1.0
2,254,0,0,0,0,254.0,89.183597,35.718595,cell_circles,0.0,1.0
3,180,0,0,0,0,180.0,58.748283,14.585469,cell_circles,0.0,1.0
4,274,0,0,0,0,274.0,127.069692,39.195626,cell_circles,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...
116812,138,0,0,0,0,138.0,135.604224,11.560000,cell_circles,12.0,1.0
116815,97,0,0,0,0,97.0,114.832348,14.043594,cell_circles,12.0,1.0
116816,104,0,0,0,0,104.0,140.661724,23.255470,cell_circles,11.0,1.0
116817,53,0,0,0,0,53.0,49.265471,49.265471,cell_circles,12.0,1.0


In [98]:
self.rna.obs[XeniumKeys.CELL_NUCLEUS_AREA]

0         19.778438
1         33.370470
2         35.718595
3         14.585469
4         39.195626
            ...    
116812    11.560000
116815    14.043594
116816    23.255470
116817    49.265471
116818    17.385157
Name: nucleus_area, Length: 78976, dtype: float64

In [76]:
self.adata.shapes["cell_circles"] = self.adata.shapes["cell_circles"].drop("radius", axis=1)

ValueError: Column `radius` not found. Please use ShapesModel.parse() to construct data that is guaranteed to be valid.

In [154]:
self.adata

SpatialData object
├── Images
│     └── 'morphology_focus': DataTree[cyx] (5, 34128, 51148), (5, 17064, 25574), (5, 8532, 12787), (5, 4266, 6393), (5, 2133, 3196)
├── Labels
│     ├── 'cell_labels': DataTree[yx] (34128, 51148), (17064, 25574), (8532, 12787), (4266, 6393), (2133, 3196)
│     └── 'nucleus_labels': DataTree[yx] (34128, 51148), (17064, 25574), (8532, 12787), (4266, 6393), (2133, 3196)
├── Points
│     └── 'transcripts': DataFrame with shape: (<Delayed>, 11) (3D points)
├── Shapes
│     ├── 'cell_boundaries': GeoDataFrame shape: (116837, 1) (2D shapes)
│     └── 'nucleus_boundaries': GeoDataFrame shape: (114734, 1) (2D shapes)
└── Tables
      └── 'table': AnnData (78976, 469)
with coordinate systems:
    ▸ 'global', with elements:
        morphology_focus (Images), cell_labels (Labels), nucleus_labels (Labels), transcripts (Points), cell_boundaries (Shapes), nucleus_boundaries (Shapes)

In [151]:

XeniumKeys.CELL_ID


In [152]:
sdata

SpatialData object
├── Images
│     └── 'morphology_focus': DataTree[cyx] (5, 16631, 17416), (5, 8316, 8708), (5, 4158, 4354), (5, 2079, 2177), (5, 1040, 1089)
├── Labels
│     ├── 'cell_labels': DataTree[yx] (16631, 17416), (8316, 8708), (4158, 4354), (2079, 2177), (1040, 1089)
│     └── 'nucleus_labels': DataTree[yx] (16631, 17416), (8316, 8708), (4158, 4354), (2079, 2177), (1040, 1089)
├── Points
│     └── 'transcripts': DataFrame with shape: (<Delayed>, 11) (3D points)
├── Shapes
│     ├── 'cell_boundaries': GeoDataFrame shape: (8733, 1) (2D shapes)
│     └── 'nucleus_boundaries': GeoDataFrame shape: (8646, 1) (2D shapes)
└── Tables
      └── 'table': AnnData (5215, 469)
with coordinate systems:
    ▸ 'global', with elements:
        morphology_focus (Images), cell_labels (Labels), nucleus_labels (Labels), transcripts (Points), cell_boundaries (Shapes), nucleus_boundaries (Shapes)

In [35]:

queried_points = sdata_crop["my_points"].index.compute()
sdata_crop["my_table"] = sdata["my_table"][queried_points].copy()

'cell_circles'

In [43]:
[i[1] for i in self.adata._gen_elements()]

['morphology_focus',
 'nucleus_labels',
 'cell_labels',
 'transcripts',
 'nucleus_boundaries',
 'cell_boundaries',
 'cell_circles']

In [56]:
found_regions

NameError: name 'found_regions' is not defined

In [59]:
set(table.obs["region_key"].unique().tolist())

KeyError: 'region_key'

In [62]:
    found_regions = set(table.obs["cell_id"].unique().tolist())
    target_element_set = ["cell_boundaries"]
    symmetric_difference = found_regions.symmetric_difference(target_element_set)

In [64]:
len(symmetric_difference)

78977

In [127]:
self.adata

SpatialData object
├── Images
│     └── 'morphology_focus': DataTree[cyx] (5, 34128, 51148), (5, 17064, 25574), (5, 8532, 12787), (5, 4266, 6393), (5, 2133, 3196)
├── Labels
│     ├── 'cell_labels': DataTree[yx] (34128, 51148), (17064, 25574), (8532, 12787), (4266, 6393), (2133, 3196)
│     └── 'nucleus_labels': DataTree[yx] (34128, 51148), (17064, 25574), (8532, 12787), (4266, 6393), (2133, 3196)
├── Points
│     └── 'transcripts': DataFrame with shape: (<Delayed>, 11) (3D points)
├── Shapes
│     ├── 'cell_boundaries': GeoDataFrame shape: (116837, 1) (2D shapes)
│     └── 'nucleus_boundaries': GeoDataFrame shape: (114734, 1) (2D shapes)
└── Tables
      └── 'table': AnnData (78976, 469)
with coordinate systems:
    ▸ 'global', with elements:
        morphology_focus (Images), cell_labels (Labels), nucleus_labels (Labels), transcripts (Points), cell_boundaries (Shapes), nucleus_boundaries (Shapes)

In [131]:
self.adata

In [132]:
self.adata._change_table_annotation_target(self.adata.table, "cell_labels")

ValueError: Mismatch(es) found between regions in region column in obs and target element: cell_circles, cell_labels

In [51]:
table.uns.get("spatialdata_attrs")

{'instance_key': 'cell_id', 'region': 'cell_circles', 'region_key': 'region'}

In [129]:
self.adata

SpatialData object
├── Images
│     └── 'morphology_focus': DataTree[cyx] (5, 34128, 51148), (5, 17064, 25574), (5, 8532, 12787), (5, 4266, 6393), (5, 2133, 3196)
├── Labels
│     ├── 'cell_labels': DataTree[yx] (34128, 51148), (17064, 25574), (8532, 12787), (4266, 6393), (2133, 3196)
│     └── 'nucleus_labels': DataTree[yx] (34128, 51148), (17064, 25574), (8532, 12787), (4266, 6393), (2133, 3196)
├── Points
│     └── 'transcripts': DataFrame with shape: (<Delayed>, 11) (3D points)
├── Shapes
│     ├── 'cell_boundaries': GeoDataFrame shape: (116837, 1) (2D shapes)
│     └── 'nucleus_boundaries': GeoDataFrame shape: (114734, 1) (2D shapes)
└── Tables
      └── 'table': AnnData (78976, 469)
with coordinate systems:
    ▸ 'global', with elements:
        morphology_focus (Images), cell_labels (Labels), nucleus_labels (Labels), transcripts (Points), cell_boundaries (Shapes), nucleus_boundaries (Shapes)

In [128]:
self.adata.set_table_annotates_spatialelement("table", region="cell_labels")

ValueError: Mismatch(es) found between regions in region column in obs and target element: cell_circles, cell_labels

In [None]:
sdata = self.crop(file_coord)  # crop data to coordinates
sdata.pl.render_labels("cell_labels").pl.show()  # plot

self.adata = sdata
self.write(file_obj_crop)  # write cropped

In [None]:
# element = self.adata.shapes["cell_circles"]
# buffered_df = element.copy()
# buffered = to_polygons(element)

In [None]:
# n_cells = {}
# for s in libs:  # iterate samples
#     print(f"\n\n{'*' * 40}\n{s}\n{'*' * 40}\n\n")
#     fff = os.path.join(dir_data, np.array(files)[np.where([
#         s == os.path.basename(x).split("__")[2].split("-")[0]
#         for x in files])[0][0]])  # sample's Xenium data directory path
#     lib = metadata.reset_index().set_index(cso).loc[s][col_sample]
#     file_obj_proc = os.path.join(out_dir, f"{lib}{input_suffix}.h5ad")
#     self = cr.Spatial(fff, library_id=lib)  # load original data
#     n_obs = self.rna.obs.shape[0]
#     self.update_from_h5ad(file_obj_proc)  # update with processed object)
#     self.rna.obs.loc[:, "n_obs_raw"] = n_obs
#     self.write(file_obj_proc)
#     n_cells[s] = pd.Series([n_obs, self.rna.obs.shape[0]], index=pd.Index([
#         "Raw", "Processed"], name="Source"))
# n_cells = pd.concat(n_cells).unstack("Source")
# n_cells.to_excel("/home/elizabeth/elizabeth/projects/senescence/meta.xlsx")
# n_cells

In [None]:
%%time

file_annotations = os.path.join(
    out_dir, "annotation_dictionaries/annotations_all.xlsx")
fmr = pd.read_excel(file_annotations, index_col=[0, 1])[
    col_ann].dropna().astype(str)  # annotation mapping
c_ann = col_ann + "_" + col_leiden.split("leiden_")[1]

for s in libs:  # iterate samples
    print(f"\n\n{'=' * 80}\n{s}\n{'=' * 80}\n\n")
    fff = os.path.join(dir_data, np.array(files)[np.where([
        s == os.path.basename(x).split("__")[2].split("-")[0]
        for x in files])[0][0]])  # sample's Xenium data directory path
    lib = metadata.reset_index().set_index(cso).loc[s][col_sample]
    file_obj_proc = os.path.join(out_dir, f"{lib}{input_suffix}.h5ad")
    for coord_suffix in regions:
        print(f"\n\n\t\t{'*' * 40}\n\t\t{s}\n\t\t{'*' * 40}\n\n")
        self = cr.Spatial(fff, library_id=lib, col_cell_type=c_ann)  # load
        self.update_from_h5ad(os.path.join(
            out_new, "objects_cropped", coord_suffix,
            f"{lib}_{coord_suffix}.h5ad"))  # update with cropped object
        self.annotate_clusters(fmr.loc[i_x], col_cell_type=col_leiden,
                               col_annotation=c_ann, copy=False)  # annotate
        _ = self.calculate_centrality(n_jobs=sc.settings.n_jobs)
        _, fig = self.calculate_neighborhood(figsize=(60, 30))