In [1]:
import shutil
from pathlib import Path
from hest import iter_hest
import pandas as pd

# Organise Xenium Prime Run into HEST Format

In [2]:
def organise_xenium(run, slide, roi, source_dir, dest_root):
    """
    Organise XeniumReader outputs into HEST flat-format directories.

    Args:
        run (int): run number (prime run index)
        slide (int): slide number
        roi (int): ROI number
        source_dir (Path): folder where XeniumReader outputs live
        dest_root (Path): root HEST dataset directory
    """
    sample_id = f"XeniumPR{run}S{slide}ROI{roi}"

    # Create modality-based folders if not exist
    for folder in ["wsis", "st", "metadata", "thumbnails",
                   "spatial_plots", "tissue_seg", "pixel_size_vis",
                   "patches", "patches_vis", "cellvit_seg", "xenium_seg"]:
        (dest_root / folder).mkdir(parents=True, exist_ok=True)

    # Required files
    if (source_dir / "aligned_fullres_HE.tif").exists():
        shutil.copy(source_dir / "aligned_fullres_HE.tif",
                    dest_root / "wsis" / f"{sample_id}.tif")

    if (source_dir / "aligned_adata.h5ad").exists():
        shutil.copy(source_dir / "aligned_adata.h5ad",
                    dest_root / "st" / f"{sample_id}.h5ad")

#     if (source_dir / "aligned_cells.h5ad").exists():
#         shutil.copy(source_dir / "aligned_cells.h5ad",
#                     dest_root / "st" / f"{sample_id}_cells.h5ad")

    # Metadata
    if (source_dir / "metrics.json").exists():
        shutil.copy(source_dir / "metrics.json",
                    dest_root / "metadata" / f"{sample_id}.json")

    # Optional QC/visuals
    if (source_dir / "downscaled_fullres.jpeg").exists():
        shutil.copy(source_dir / "downscaled_fullres.jpeg",
                    dest_root / "thumbnails" / f"{sample_id}_thumbnail.jpeg")

    if (source_dir / "spatial_plots.png").exists():
        shutil.copy(source_dir / "spatial_plots.png",
                    dest_root / "spatial_plots" / f"{sample_id}_overlay.png")

    print(f"[INFO] Organised sample {sample_id} into HEST format at {dest_root}")
    return sample_id

### Test code

In [5]:
# Root destination for HEST dataset
dest_root = Path("/project/simmons_hts/kxu/hest/data")

# Example: Prime Run 1, Slide 1, ROI 1
src = Path("/project/simmons_hts/kxu/hest/xenium_data/xenium_prime_run1/slide1/ROI1")
organise_xenium(run=1, slide=1, roi=1, source_dir=src, dest_root=dest_root)

[INFO] Organised sample XeniumPR1S1ROI1 into HEST format at /project/simmons_hts/kxu/hest/data


'XeniumPR1S1ROI1'

### Loop over all samples

In [19]:
# 2 min 
root = Path("/project/simmons_hts/kxu/hest/xenium_data/xenium_prime_run1")

# Root destination for HEST dataset
dest_root = Path("/project/simmons_hts/kxu/hest/data")

for slide in [1, 2]:
    for roi in range(1, 9):  # ROI1â€“ROI8
        src = root / f"slide{slide}" / f"ROI{roi}"
        if src.exists():
            organise_xenium(run=1, slide=slide, roi=roi,
                            source_dir=src, dest_root=dest_root)

[INFO] Organised sample XeniumPR1S1ROI1 into HEST format at /project/simmons_hts/kxu/hest/data
[INFO] Organised sample XeniumPR1S1ROI2 into HEST format at /project/simmons_hts/kxu/hest/data
[INFO] Organised sample XeniumPR1S1ROI3 into HEST format at /project/simmons_hts/kxu/hest/data
[INFO] Organised sample XeniumPR1S1ROI4 into HEST format at /project/simmons_hts/kxu/hest/data
[INFO] Organised sample XeniumPR1S1ROI5 into HEST format at /project/simmons_hts/kxu/hest/data
[INFO] Organised sample XeniumPR1S1ROI6 into HEST format at /project/simmons_hts/kxu/hest/data
[INFO] Organised sample XeniumPR1S1ROI7 into HEST format at /project/simmons_hts/kxu/hest/data
[INFO] Organised sample XeniumPR1S1ROI8 into HEST format at /project/simmons_hts/kxu/hest/data
[INFO] Organised sample XeniumPR1S2ROI1 into HEST format at /project/simmons_hts/kxu/hest/data
[INFO] Organised sample XeniumPR1S2ROI2 into HEST format at /project/simmons_hts/kxu/hest/data
[INFO] Organised sample XeniumPR1S2ROI3 into HEST 

In [6]:
# test that the code worked
print('load hest...')
# Iterate through a subset of hest
for st in iter_hest("/project/simmons_hts/kxu/hest/data", id_list=['XeniumPR1S1ROI1','XeniumPR1S1ROI2']):
    print(st)

load hest...
<hest.HESTData.HESTData object at 0x73ef8d7aa490>
        'pixel_size' is 0.4353514884015406
        'wsi' is <width=8851, height=8585, backend=CuImageWSI>
        'shapes': []
<hest.HESTData.HESTData object at 0x73ef7f7c73d0>
        'pixel_size' is 0.43561023341205224
        'wsi' is <width=7408, height=10434, backend=CuImageWSI>
        'shapes': []
