In [1]:
#imports
import spatialdata
import dask_image
import dask.array as da
import geopandas as gpd
import anndata as ad
import napari_spatialdata
import time



In [36]:
for package in [spatialdata, gpd, ad, napari_spatialdata]:
    print(f"{package.__name__}: {package.__version__}")

# Create a sdata object for testing

In [2]:
#paths
path_to_mIF = "/Volumes/Extreme SSD/P26_BCCN/P26E07_E08_E09_SOPA_Gates/processed_images/991.ome.tif"
path_to_mask = "/Volumes/Extreme SSD/P26_BCCN/P26E07_E08_E09_SOPA_Gates/expanded-masks/991.tif"
path_to_shapes = "../data/geojsons/Primary/processed/20241129_1442_Primary991_harmonized.geojson"
# path_to_imaging_table = "/Volumes/Extreme SSD/P26_BCCN/P26E07_E08_E09_SOPA_Gates/adatas/20241002_1218_991_phenotyped.h5ad"
path_to_proteomics = "../data/proteomic/20241201_170620_filtered_imputed_outerjoin_mergefirst.h5ad"

In [3]:
# initiate spatialdata object
sdata = spatialdata.SpatialData()

In [4]:
#parse image
image = dask_image.imread.imread(path_to_mIF)
image = image.rechunk(chunks=(1, 5000, 5000))
parsed_image = spatialdata.models.Image2DModel.parse(image)
sdata['image'] = parsed_image

In [5]:
#parse mask
mask = dask_image.imread.imread(path_to_mask)
mask = da.squeeze(mask)
assert mask.ndim == 2
mask = mask.rechunk(chunks=(5000, 5000))
sdata['mask'] = spatialdata.models.Labels2DModel.parse(mask)

In [6]:
# parse shapes 
gdf = gpd.read_file(path_to_shapes)
# make column matching proteomic table the index
gdf = gdf.set_index("harmonized_class")
sdata["Primary_contours"] = spatialdata.models.ShapesModel.parse(gdf)

In [7]:
#load proteomic table
adata = ad.read_h5ad(path_to_proteomics)
adata.obs["region"] = "Primary_contours"
adata.obs["region"] = adata.obs["region"].astype("category")
table = spatialdata.models.TableModel.parse(adata, region = "Primary_contours", region_key = "region", instance_key = "shape_key")
sdata["proteomics"] = table

In [8]:
def switch_adat_var_index(adata, new_index):
    """
    Created by Jose Nimo on 2023-07-01
    Lastest modified by Jose Nimo on 2024-11-16

    Description:
    Switch the index of adata.var to a new index. Useful for switching between gene names and protein names.

    Arg:
        adata: anndata object
        new_index: pandas series, new index to switch to
    Returns:
        adata: anndata object, with the new index
    """
    adata_copy = adata.copy()

    adata_copy.var[adata_copy.var.index.name] = adata_copy.var.index
    adata_copy.var.set_index(new_index, inplace=True)
    adata_copy.var.index.name = new_index
    
    return adata_copy

In [9]:
sdata['proteomics'] = switch_adat_var_index(sdata['proteomics'], "Genes")

In [10]:
sdata['proteomics'].var.sort_index(inplace=True) #much easier to find a gene like this

In [11]:
#save the spatialdata object
import time
datetime = time.strftime("%Y%m%d_%H%M")
time_start = time.time()
sdata.write(f"../data/sdata/{datetime}_sdata_fullres.zarr")
time_end = time.time()
print(f"Time to save: {time_end - time_start}")

In [21]:
# create subset of the spatialdata object
sdata_subset = spatialdata.bounding_box_query(
    sdata,
    axes = ["y", "x"],
    min_coordinate = [22886, 36125],
    max_coordinate = [43045, 61625],
    target_coordinate_system = "global",
    filter_table = False,
)

In [3]:
sdata = spatialdata.read_zarr("../data/sdata/20241201_1730_sdata_fullres.zarr")

  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
  utils.warn_names_duplicates("var")


In [6]:
sdata_subset = sdata.query.bounding_box(
    axes = ["y", "x"],
    min_coordinate = [22886, 36125],
    max_coordinate = [43045, 61625],
    target_coordinate_system = "global",
    filter_table = False,
)

In [7]:
sdata_subset

SpatialData object
├── Images
│     └── 'image': DataArray[cyx] (15, 20159, 25500)
├── Labels
│     └── 'mask': DataArray[yx] (20159, 25500)
├── Shapes
│     └── 'Primary_contours': GeoDataFrame shape: (347, 6) (2D shapes)
└── Tables
      └── 'proteomics': AnnData (18, 4531)
with coordinate systems:
    ▸ 'global', with elements:
        image (Images), mask (Labels), Primary_contours (Shapes)

In [10]:
sdata['image']

Unnamed: 0,Array,Chunk
Bytes,45.45 GiB,23.84 MiB
Shape,"(15, 44470, 73167)","(1, 5000, 5000)"
Dask graph,2025 chunks in 2 graph layers,2025 chunks in 2 graph layers
Data type,uint8 numpy.ndarray,uint8 numpy.ndarray
"Array Chunk Bytes 45.45 GiB 23.84 MiB Shape (15, 44470, 73167) (1, 5000, 5000) Dask graph 2025 chunks in 2 graph layers Data type uint8 numpy.ndarray",73167  44470  15,

Unnamed: 0,Array,Chunk
Bytes,45.45 GiB,23.84 MiB
Shape,"(15, 44470, 73167)","(1, 5000, 5000)"
Dask graph,2025 chunks in 2 graph layers,2025 chunks in 2 graph layers
Data type,uint8 numpy.ndarray,uint8 numpy.ndarray


In [9]:
path = r"../data/sdata/20241201_1730_sdata_subset.zarr"
sdata_subset.write(path, overwrite=True)

ValueError: Attempt to save array to zarr with irregular chunking, please call `arr.rechunk(...)` first.

In [None]:
interactive = napari_spatialdata.Interactive(sdata_subset)
interactive.run()

In [8]:
datetime = time.strftime("%Y%m%d_%H%M")
sdata_subset.write(f"../data/sdata/{datetime}_sdata_subset.zarr")

ValueError: Attempt to save array to zarr with irregular chunking, please call `arr.rechunk(...)` first.