#### save hest processed metadata with patch count in "/project/simmons_hts/kxu/hest/hest_directory.csv"

In [22]:
import pandas as pd
from pathlib import Path
import h5py
from gutdecoder.wrappers.patching_wrapper import *
from gutdecoder.utils.create_metadata import *

# Load metadata

## Create Metadata for Xenium

In [23]:
metadata_path = Path("/project/simmons_hts/kxu/hest/xenium_directory.xlsx")
metadata_xe = pd.read_excel(metadata_path)

# Convert Slide_ID to integer (handles NaNs safely)
#metadata_df["Slide_ID"] = pd.to_numeric(metadata_df["Slide_ID"], errors="coerce").astype("Int64")


# Rename column
metadata_xe = metadata_xe.rename(columns={"directory": "directory_xenium_output"})
metadata_xe = metadata_xe.rename(columns={"run": "run_id"})
metadata_xe = metadata_xe.rename(columns={"PostHnE": "wsi"})

# Clean up Slide column: keep only numeric part
metadata_xe["Slide"] = metadata_xe["Slide"].str.replace("SLIDE", "", regex=False)

# Clean column names
metadata_xe.columns = (
    metadata_xe.columns
    .str.strip()               # remove leading/trailing spaces
    .str.lower()               # make all lowercase
    .str.replace(" ", "_")     # replace spaces with underscores
)

metadata_xe

Unnamed: 0,sample_id,technology,panel,panel_name,patient_id,run_name,run_id,slide,slide_id,roi,...,location,directory,czi,wsi,alignment,directory_xenium_output,rds,alignment_note,crop_100_um,segmentation_target_pxl_size
0,XeniumPR1S1ROI1,10x Xenium,5k,,CAM006,RUNTrexBIO,PR1,1,43739.0,1.0,...,Colon,/project/simmons_hts/jpark/1_project/0_xenium/...,,CAM006_Xenium5K_post_HnE.ome.tif,CAM006_Xenium5K_post_HnE_matrix.csv,/project/simmons_hts/shared/20_11_2024_xenium_...,/project/simmons_hts/jpark/1_project/1_objects...,,"{""type"": ""strip"", ""side"": ""right"", ""size"": 0.1...",
1,XeniumPR1S1ROI2,10x Xenium,5k,,TIP877,RUNTrexBIO,PR1,1,43739.0,2.0,...,Colon,/project/simmons_hts/jpark/1_project/0_xenium/...,,TIP877_Xenium5K_post_HnE.ome.tif,TIP877_Xenium5K_post_HnE_matrix.csv,/project/simmons_hts/shared/20_11_2024_xenium_...,/project/simmons_hts/jpark/1_project/1_objects...,,,
2,XeniumPR1S1ROI3,10x Xenium,5k,,GI9389,RUNTrexBIO,PR1,1,43739.0,3.0,...,Colon,/project/simmons_hts/jpark/1_project/0_xenium/...,,GI9389_Xenium5K_post_HnE.ome.tif,GI9389_Xenium5K_post_HnE_matrix.csv,/project/simmons_hts/shared/20_11_2024_xenium_...,/project/simmons_hts/jpark/1_project/1_objects...,,"{""type"":""corner"", ""corner"":""top-left"", ""width""...",
3,XeniumPR1S1ROI4,10x Xenium,5k,,GI9077,RUNTrexBIO,PR1,1,43739.0,4.0,...,Colon,/project/simmons_hts/jpark/1_project/0_xenium/...,,GI9077_Xenium5K_post_HnE.ome.tif,GI9077_Xenium5K_post_HnE_matrix.csv,/project/simmons_hts/shared/20_11_2024_xenium_...,/project/simmons_hts/jpark/1_project/1_objects...,,"{""type"":""corner"", ""corner"":""bottom-left"", ""wid...",
4,XeniumPR1S1ROI5,10x Xenium,5k,,GI9612,RUNTrexBIO,PR1,1,43739.0,5.0,...,Colon,/project/simmons_hts/jpark/1_project/0_xenium/...,,GI9612_Xenium5K_post_HnE.ome.tif,GI9612_Xenium5K_post_HnE_matrix.csv,/project/simmons_hts/shared/20_11_2024_xenium_...,/project/simmons_hts/jpark/1_project/1_objects...,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
138,XeniumR7S2ROI8,10x Xenium,480,hColon_v1,,RUN1_KK,R7,2,37812.0,8.0,...,,,,,XeniumPR5S2ROI7_alignment_files/matrix.csv,/project/Coeliac_Group/shared/xenium/20240812_...,,,,
139,,,,,,,,,,,...,,,,,XeniumPR5S2ROI8_alignment_files/matrix.csv,,,,,
140,,,,,,,,,,,...,,,,,XeniumPR5S2ROI9_alignment_files/matrix.csv,,,,,
141,,,,,,,,,,,...,,,,,XeniumPR5S2ROI10_alignment_files/matrix.csv,,,,,


In [24]:
# Drop rows with NA in critical columns (after cleaning)
required_cols = [
    "patient_id",
    "run_id",
    "slide",
    "slide_id",
    "roi",
    "directory",
    "directory_xenium_output",
    "wsi",
    "alignment"
]

metadata_xe = metadata_xe.dropna(subset=required_cols)

# Reorder columns so SampleID is first
cols = ["sample_id"] + [c for c in metadata_xe.columns if c != "sample_id"]
metadata_xe = metadata_xe[cols]

metadata_xe["roi"] = metadata_xe["roi"].astype(int)
metadata_xe["slide_id"] = metadata_xe["slide_id"].astype(int)

subset_cols = [
    "sample_id", 
    "roi", 
    "slide",
    "patient_id",
    'sample_code',
    'panel',
    'technology',
    'run_id',
    'directory',
    'wsi',
    'alignment',
    'alignment_note',
    'rds',
    'slide_id',
    'sample_type',
    'location',
]

metadata_xe = metadata_xe[subset_cols]
metadata_xe

Unnamed: 0,sample_id,roi,slide,patient_id,sample_code,panel,technology,run_id,directory,wsi,alignment,alignment_note,rds,slide_id,sample_type,location
0,XeniumPR1S1ROI1,1,1,CAM006,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,CAM006_Xenium5K_post_HnE.ome.tif,CAM006_Xenium5K_post_HnE_matrix.csv,,/project/simmons_hts/jpark/1_project/1_objects...,43739,Healthy,Colon
1,XeniumPR1S1ROI2,2,1,TIP877,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,TIP877_Xenium5K_post_HnE.ome.tif,TIP877_Xenium5K_post_HnE_matrix.csv,,/project/simmons_hts/jpark/1_project/1_objects...,43739,UC inflamed,Colon
2,XeniumPR1S1ROI3,3,1,GI9389,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,GI9389_Xenium5K_post_HnE.ome.tif,GI9389_Xenium5K_post_HnE_matrix.csv,,/project/simmons_hts/jpark/1_project/1_objects...,43739,UC inflamed,Colon
3,XeniumPR1S1ROI4,4,1,GI9077,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,GI9077_Xenium5K_post_HnE.ome.tif,GI9077_Xenium5K_post_HnE_matrix.csv,,/project/simmons_hts/jpark/1_project/1_objects...,43739,UC inflamed,Colon
4,XeniumPR1S1ROI5,5,1,GI9612,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,GI9612_Xenium5K_post_HnE.ome.tif,GI9612_Xenium5K_post_HnE_matrix.csv,,/project/simmons_hts/jpark/1_project/1_objects...,43739,Healthy,Colon
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117,XeniumR6S2ROI7,7,2,JR_50621_22,XEN_ST_SLIDE 10_S7,480,10x Xenium,R6,/project/simmons_hts/kxu/xenium/he/480/RUN6,XeniumR6S2ROI7.ome.tif,XeniumR6S2ROI7_alignment_files/matrix.csv,,,31265,Diverticular fistula,Colon
118,XeniumR6S2ROI8,8,2,JR_18170_21,XEN_ST_SLIDE 10_S8,480,10x Xenium,R6,/project/simmons_hts/kxu/xenium/he/480/RUN6,XeniumR6S2ROI8.ome.tif,XeniumR6S2ROI8_alignment_files/matrix.csv,,,31265,Diverticular fistula,Colon
119,XeniumR6S2ROI9,9,2,JR_8610_23,XEN_ST_SLIDE 10_S9,480,10x Xenium,R6,/project/simmons_hts/kxu/xenium/he/480/RUN6,XeniumR6S2ROI9.ome.tif,XeniumR6S2ROI9_alignment_files/matrix.csv,,,31265,CD fistula,Perianal
120,XeniumR6S2ROI10,10,2,JR_20687_20,XEN_ST_SLIDE 10_S10,480,10x Xenium,R6,/project/simmons_hts/kxu/xenium/he/480/RUN6,XeniumR6S2ROI10.ome.tif,XeniumR6S2ROI10_alignment_files/matrix.csv,,,31265,Diverticular fistula,Perianal


## Create Metadata for Visium

In [25]:
metadata_path = Path("/project/simmons_hts/kxu/hest/visium_directory.xlsx")
metadata_vi = pd.read_excel(metadata_path)
metadata_vi

# Clean column names
metadata_vi.columns = (
    metadata_vi.columns
    .str.strip()               # remove leading/trailing spaces
    .str.lower()               # make all lowercase
    .str.replace(" ", "_")     # replace spaces with underscores
)

metadata_vi

required_cols = [
    "patient_id",
    "run_id",
    "slide",
    "slide_id",
    "roi",
    "directory",
]

metadata_vi = metadata_vi.dropna(subset=required_cols)
metadata_vi["roi"] = metadata_vi["roi"].astype(int)
metadata_vi["slide"] = metadata_vi["slide"].astype(int)

subset_cols = [
    "sample_id", 
    "roi", 
    "slide",
    "patient_id",
    'sample_code',
    'sample_name',
    'panel',
    'technology',
    'run_id',
    'directory',
    'rds',
    'slide_id',
    'sample_type',
    'location',
    'phenotype_montreal',
    'matched_xenium'
]

metadata_vi = metadata_vi[subset_cols]

metadata_vi

Unnamed: 0,sample_id,roi,slide,patient_id,sample_code,sample_name,panel,technology,run_id,directory,rds,slide_id,sample_type,location,phenotype_montreal,matched_xenium
0,VisiumR1S1ROI1,1,1,GI 7051,R1A5,GI_7051_INFLAMMATORY,whole transcriptome,10x Visium,R1,/ceph/project/simmons_hts/shared/FFPE_ST_23_07...,/ceph/project/simmons_hts/aantanav/_r_projects...,V12M15-091-A1,CD inflamed,Ileum,Inflammatory (B1),
1,VisiumR1S1ROI2,2,1,GI 6968,R1A6,GI_6968_HEALTHY,whole transcriptome,10x Visium,R1,/ceph/project/simmons_hts/shared/FFPE_ST_23_07...,/ceph/project/simmons_hts/aantanav/_r_projects...,V12M15-091-B1,CD,Ileum,,
2,VisiumR1S1ROI3,3,1,GI 7595,R1A7,GI_7595_STRICTURING,whole transcriptome,10x Visium,R1,/ceph/project/simmons_hts/shared/FFPE_ST_23_07...,/ceph/project/simmons_hts/aantanav/_r_projects...,V12M15-091-C1,CD stricture,Ileum,Stricturing (B2),
3,VisiumR1S1ROI4,4,1,TIP 535,R1A8?,TIP_535_FISTULATING,whole transcriptome,10x Visium,R1,/ceph/project/simmons_hts/shared/FFPE_ST_23_07...,/ceph/project/simmons_hts/aantanav/_r_projects...,V12M15-091-D1,CD fistula,Ileum,Fistulating (B3),XEN_ST_SLIDE 6_S9
4,VisiumR2S1ROI1,1,1,GI 6966,R2B4,GI_6966_INFLAMMATORY,whole transcriptome,10x Visium,R2,/ceph/project/simmons_hts/shared/CM_ST_12_10_2...,/ceph/project/simmons_hts/aantanav/_r_projects...,V12Y31-028-D1,CD inflamed,Ileum,Inflammatory (B1),
5,VisiumR2S1ROI2,2,1,GI 7738,?,GI_7738_JR_STRICTURING,whole transcriptome,10x Visium,R2,/ceph/project/simmons_hts/shared/CM_ST_12_10_2...,/ceph/project/simmons_hts/aantanav/_r_projects...,V12Y31-028-A1,CD stricture,Ileum,Stricturing (B2),
6,VisiumR2S1ROI3,3,1,TIP 473 JR,?,TIP_473_JR_FISTULATING,whole transcriptome,10x Visium,R2,/ceph/project/simmons_hts/shared/CM_ST_12_10_2...,/ceph/project/simmons_hts/aantanav/_r_projects...,V12Y31-028-C1,CD fistula,Ileocolic,Fistulating (B3),
7,VisiumR2S1ROI4,4,1,TIP 633 JR,?,TIP_633_JR_HEALTHY,whole transcriptome,10x Visium,R2,/ceph/project/simmons_hts/shared/CM_ST_12_10_2...,/ceph/project/simmons_hts/aantanav/_r_projects...,V12Y31-028-B1,Healthy,Ileum,,
8,VisiumR2S2ROI1,1,2,TIP 522,R2B7,TIP_522_HEALTHY,whole transcriptome,10x Visium,R2,/ceph/project/simmons_hts/shared/CM_ST_12_10_2...,/ceph/project/simmons_hts/aantanav/_r_projects...,V12U21-275-C1,Healthy,Ileum,,
9,VisiumR2S2ROI2,2,2,TIP 559,R2B5,TIP_559_FISTULATING_1,whole transcriptome,10x Visium,R2,/ceph/project/simmons_hts/shared/CM_ST_12_10_2...,/ceph/project/simmons_hts/aantanav/_r_projects...,V12U21-275-A1,CD fistula,Ileocolic,Fistulating (B3),


## Combine Visium & Xenium medatada

In [26]:
metadata = pd.concat([metadata_xe, metadata_vi], axis=0, ignore_index=True)
metadata

Unnamed: 0,sample_id,roi,slide,patient_id,sample_code,panel,technology,run_id,directory,wsi,alignment,alignment_note,rds,slide_id,sample_type,location,sample_name,phenotype_montreal,matched_xenium
0,XeniumPR1S1ROI1,1,1,CAM006,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,CAM006_Xenium5K_post_HnE.ome.tif,CAM006_Xenium5K_post_HnE_matrix.csv,,/project/simmons_hts/jpark/1_project/1_objects...,43739,Healthy,Colon,,,
1,XeniumPR1S1ROI2,2,1,TIP877,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,TIP877_Xenium5K_post_HnE.ome.tif,TIP877_Xenium5K_post_HnE_matrix.csv,,/project/simmons_hts/jpark/1_project/1_objects...,43739,UC inflamed,Colon,,,
2,XeniumPR1S1ROI3,3,1,GI9389,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,GI9389_Xenium5K_post_HnE.ome.tif,GI9389_Xenium5K_post_HnE_matrix.csv,,/project/simmons_hts/jpark/1_project/1_objects...,43739,UC inflamed,Colon,,,
3,XeniumPR1S1ROI4,4,1,GI9077,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,GI9077_Xenium5K_post_HnE.ome.tif,GI9077_Xenium5K_post_HnE_matrix.csv,,/project/simmons_hts/jpark/1_project/1_objects...,43739,UC inflamed,Colon,,,
4,XeniumPR1S1ROI5,5,1,GI9612,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,GI9612_Xenium5K_post_HnE.ome.tif,GI9612_Xenium5K_post_HnE_matrix.csv,,/project/simmons_hts/jpark/1_project/1_objects...,43739,Healthy,Colon,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150,VisiumR6S1ROI4,4,1,JR_23234_23,R6B12,whole transcriptome,10x Visium,R6,/ceph/project/simmons_hts/shared/27_07_2023_CD...,,,,/ceph/project/simmons_hts/aantanav/_r_projects...,V12D12-290-D1,CD fistula,Enterocutaneous,B12,Fistulating (B3),
151,VisiumR6S2ROI1,1,2,JR_18076_22,R6C1,whole transcriptome,10x Visium,R6,/ceph/project/simmons_hts/shared/27_07_2023_CD...,,,,/ceph/project/simmons_hts/aantanav/_r_projects...,V12D12-291-A1,CD fistula,Anal,C1,Fistulating (B3) A2L2B1p,
152,VisiumR6S2ROI2,2,2,BAY_105338_20,R6C2,whole transcriptome,10x Visium,R6,/ceph/project/simmons_hts/shared/27_07_2023_CD...,,,,/ceph/project/simmons_hts/aantanav/_r_projects...,V12D12-291-B1,CD fistula,Transsphincteric,C2,Fistulating (B3),
153,VisiumR6S2ROI3,3,2,BAY_104603_20/JR_20291_22,R6C3,whole transcriptome,10x Visium,R6,/ceph/project/simmons_hts/shared/27_07_2023_CD...,,,,/ceph/project/simmons_hts/aantanav/_r_projects...,V12D12-291-C1,CD fistula,Ileocaecal/Perianal,C3,Fistulating (B3)/Fistulating (B3) A2L1B2p,


# add patch number to metadata

## Xenium

In [27]:
# 2) Prepare specs similar to your previous code; you can pass multiple paths
specs = [
    # 100um: note two slides each have different prefixes in your original code,
    # so we specify prefixes per path.
    {
        "paths": [
            "/project/simmons_hts/kxu/hest/xenium_data/XeniumPR1/slide1",
            "/project/simmons_hts/kxu/hest/xenium_data/XeniumPR1/slide2",
            "/project/simmons_hts/kxu/hest/xenium_data/XeniumPR2/slide1",
            "/project/simmons_hts/kxu/hest/xenium_data/XeniumPR3/slide1",
        ],
        "prefixes": ["XeniumPR1S1", "XeniumPR1S2","XeniumPR2S1","XeniumPR3S1"],
        "rename_col": "num_patches_100um"
    },
    
     # 50um (standard)
    {
        "paths": [
            "/project/simmons_hts/kxu/hest/xenium_data/XeniumPR1_50um/slide1",
            "/project/simmons_hts/kxu/hest/xenium_data/XeniumPR1_50um/slide2",
        ],
        "prefixes": ["XeniumPR1S1", "XeniumPR1S2"],
        "rename_col": "num_patches_50um",
    },
    
# 50um with 0.25 um px
    {
        "paths": [
            "/project/simmons_hts/kxu/hest/xenium_data/XeniumPR1_50um_0.25_um_px/slide1",
            "/project/simmons_hts/kxu/hest/xenium_data/XeniumPR1_50um_0.25_um_px/slide2",
        ],
        "prefixes": ["XeniumPR1S1", "XeniumPR1S2"],
        "rename_col": "num_patches_50um_0.25_um_px",
    },

    # 25um (standard)
    {
        "paths": [
            "/project/simmons_hts/kxu/hest/xenium_data/XeniumPR1_25um/slide1",
            "/project/simmons_hts/kxu/hest/xenium_data/XeniumPR1_25um/slide2",
        ],
        "prefixes": ["XeniumPR1S1", "XeniumPR1S2"],
        "rename_col": "num_patches_25um",
    },

    # 25um with 0.125 um px
    {
        "paths": [
            "/project/simmons_hts/kxu/hest/xenium_data/XeniumPR1_25um_0.125_um_px/slide1",
            "/project/simmons_hts/kxu/hest/xenium_data/XeniumPR1_25um_0.125_um_px/slide2",
            
        ],
        "prefixes": ["XeniumPR1S1", "XeniumPR1S2"],
        "rename_col": "num_patches_25um_0.125_um_px",
    },

    # cell-level 100um (cell crops)
    {
        "paths": [
            "/project/simmons_hts/kxu/hest/xenium_data/XeniumPR1_cell/slide1",
            "/project/simmons_hts/kxu/hest/xenium_data/XeniumPR1_cell/slide2",
            "/project/simmons_hts/kxu/hest/xenium_data/XeniumPR2_cell/slide1",
            "/project/simmons_hts/kxu/hest/xenium_data/XeniumPR3_cell/slide1",
        ],
        "prefixes": ["XeniumPR1S1", "XeniumPR1S2", "XeniumPR2S1", "XeniumPR3S1"],
        "rename_col": "num_patches_cell_100um",
    },
]

# 3) Run the builder
out = build_merged_counts(metadata, specs, count_func=count_patches, save_csv=None, auto_prefix=True, verbose=True)

merged_df = out["merged"]
per_metric = out["per_metric"]

# 4) Inspect / save
merged_df


  sample_id  num_patches
0      ROI1          684
1      ROI2          482
2      ROI3         1168
3      ROI4         1253
4      ROI5          893
5      ROI6         2665
6      ROI7         2419
7      ROI8         3291

Total patches across all samples: 12855
  sample_id  num_patches
0      ROI1         2799
1      ROI2         2383
2      ROI3         1514
3      ROI4         2674
4      ROI5          892
5      ROI6         1631
6      ROI7         1868

Total patches across all samples: 13761
  sample_id  num_patches
0      ROI1         2850
1      ROI2         2352
2      ROI3         1597
3      ROI4         1592
4      ROI5         2725
5      ROI6          720
6      ROI7          565
7      ROI8         1362

Total patches across all samples: 13763
  sample_id  num_patches
0      ROI1         2625
1      ROI2          672
2      ROI3         2044
3      ROI4         2045
4      ROI5         1476
5      ROI6         2054
6      ROI7          687
7      ROI8         1114

T

Unnamed: 0,sample_id,roi,slide,patient_id,sample_code,panel,technology,run_id,directory,wsi,...,location,sample_name,phenotype_montreal,matched_xenium,num_patches_100um,num_patches_50um,num_patches_50um_0.25_um_px,num_patches_25um,num_patches_25um_0.125_um_px,num_patches_cell_100um
0,XeniumPR1S1ROI1,1,1,CAM006,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,CAM006_Xenium5K_post_HnE.ome.tif,...,Colon,,,,684.0,2727.0,2603.0,10534.0,9950.0,608.0
1,XeniumPR1S1ROI2,2,1,TIP877,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,TIP877_Xenium5K_post_HnE.ome.tif,...,Colon,,,,482.0,1886.0,1838.0,7393.0,7130.0,429.0
2,XeniumPR1S1ROI3,3,1,GI9389,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,GI9389_Xenium5K_post_HnE.ome.tif,...,Colon,,,,1168.0,4627.0,4502.0,18007.0,17368.0,930.0
3,XeniumPR1S1ROI4,4,1,GI9077,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,GI9077_Xenium5K_post_HnE.ome.tif,...,Colon,,,,1253.0,5010.0,4903.0,19937.0,19360.0,1064.0
4,XeniumPR1S1ROI5,5,1,GI9612,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,GI9612_Xenium5K_post_HnE.ome.tif,...,Colon,,,,893.0,3520.0,3289.0,13692.0,12449.0,756.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150,VisiumR6S1ROI4,4,1,JR_23234_23,R6B12,whole transcriptome,10x Visium,R6,/ceph/project/simmons_hts/shared/27_07_2023_CD...,,...,Enterocutaneous,B12,Fistulating (B3),,,,,,,
151,VisiumR6S2ROI1,1,2,JR_18076_22,R6C1,whole transcriptome,10x Visium,R6,/ceph/project/simmons_hts/shared/27_07_2023_CD...,,...,Anal,C1,Fistulating (B3) A2L2B1p,,,,,,,
152,VisiumR6S2ROI2,2,2,BAY_105338_20,R6C2,whole transcriptome,10x Visium,R6,/ceph/project/simmons_hts/shared/27_07_2023_CD...,,...,Transsphincteric,C2,Fistulating (B3),,,,,,,
153,VisiumR6S2ROI3,3,2,BAY_104603_20/JR_20291_22,R6C3,whole transcriptome,10x Visium,R6,/ceph/project/simmons_hts/shared/27_07_2023_CD...,,...,Ileocaecal/Perianal,C3,Fistulating (B3)/Fistulating (B3) A2L1B2p,,,,,,,


## count patches for XeniumPR4 & PR5 (100um)

In [28]:
ROOT = Path("/project/simmons_hts/kxu/hest/xenium_data")

for i in range(4, 6):  
    run_name = f"XeniumPR{i}"
    base = ROOT / run_name

    for slide_idx, slide_name in enumerate(["slide1", "slide2"], start=1):
        slide_path = base / slide_name
        if not slide_path.exists():
            print(f"Skipping {slide_path} (not found)")
            continue

        print(f"Counting patches in {slide_path}...")
        df_counts = count_patches(str(slide_path), save_csv='patch_counts.csv')
        if df_counts is None:
            continue

        # ensure we have 'sample_id' and 'num_patches'
        if "sample_id" not in df_counts.columns:
            df_counts = df_counts.reset_index()
        if "num_patches" not in df_counts.columns:
            raise ValueError(f"'num_patches' column missing in {slide_path}")

        prefix = f"{run_name}S{slide_idx}"
        df_counts["sample_id"] = prefix + df_counts["sample_id"].astype(str)
        display(df_counts)

        # align and add to existing merged_df
        for _, row in df_counts.iterrows():
            sid = row["sample_id"]
            num = row["num_patches"]
            if sid in merged_df["sample_id"].values:
                merged_df.loc[merged_df["sample_id"] == sid, "num_patches_100um"] = num

print("✅ Updated merged_df with patch counts (added to existing num_patches).")

Counting patches in /project/simmons_hts/kxu/hest/xenium_data/XeniumPR4/slide1...
  sample_id  num_patches
0      ROI1          796
1     ROI10          564
2      ROI2          719
3      ROI3          716
4      ROI4          730
5      ROI5          477
6      ROI6          439
7      ROI7          725
8      ROI8          605
9      ROI9          399

Total patches across all samples: 6170
[INFO] Saved counts to /project/simmons_hts/kxu/hest/xenium_data/XeniumPR4/slide1/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,XeniumPR4S1ROI1,796
1,XeniumPR4S1ROI10,564
2,XeniumPR4S1ROI2,719
3,XeniumPR4S1ROI3,716
4,XeniumPR4S1ROI4,730
5,XeniumPR4S1ROI5,477
6,XeniumPR4S1ROI6,439
7,XeniumPR4S1ROI7,725
8,XeniumPR4S1ROI8,605
9,XeniumPR4S1ROI9,399


Counting patches in /project/simmons_hts/kxu/hest/xenium_data/XeniumPR4/slide2...
  sample_id  num_patches
0      ROI1          551
1     ROI10          395
2      ROI2          607
3      ROI3          716
4      ROI4          791
5      ROI5          860
6      ROI6          778
7      ROI7          758
8      ROI8          432
9      ROI9          514

Total patches across all samples: 6402
[INFO] Saved counts to /project/simmons_hts/kxu/hest/xenium_data/XeniumPR4/slide2/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,XeniumPR4S2ROI1,551
1,XeniumPR4S2ROI10,395
2,XeniumPR4S2ROI2,607
3,XeniumPR4S2ROI3,716
4,XeniumPR4S2ROI4,791
5,XeniumPR4S2ROI5,860
6,XeniumPR4S2ROI6,778
7,XeniumPR4S2ROI7,758
8,XeniumPR4S2ROI8,432
9,XeniumPR4S2ROI9,514


Counting patches in /project/simmons_hts/kxu/hest/xenium_data/XeniumPR5/slide1...
  sample_id  num_patches
0      ROI1          435
1     ROI10          640
2      ROI2          218
3      ROI3          639
4      ROI4          552
5      ROI5          665
6      ROI6          663
7      ROI7          370
8      ROI8          966
9      ROI9          709

Total patches across all samples: 5857
[INFO] Saved counts to /project/simmons_hts/kxu/hest/xenium_data/XeniumPR5/slide1/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,XeniumPR5S1ROI1,435
1,XeniumPR5S1ROI10,640
2,XeniumPR5S1ROI2,218
3,XeniumPR5S1ROI3,639
4,XeniumPR5S1ROI4,552
5,XeniumPR5S1ROI5,665
6,XeniumPR5S1ROI6,663
7,XeniumPR5S1ROI7,370
8,XeniumPR5S1ROI8,966
9,XeniumPR5S1ROI9,709


Counting patches in /project/simmons_hts/kxu/hest/xenium_data/XeniumPR5/slide2...
   sample_id  num_patches
0       ROI1          475
1      ROI10          467
2      ROI11          703
3       ROI2          340
4       ROI3          440
5       ROI4          706
6       ROI5          866
7       ROI6          528
8       ROI7          593
9       ROI8          431
10      ROI9          581

Total patches across all samples: 6130
[INFO] Saved counts to /project/simmons_hts/kxu/hest/xenium_data/XeniumPR5/slide2/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,XeniumPR5S2ROI1,475
1,XeniumPR5S2ROI10,467
2,XeniumPR5S2ROI11,703
3,XeniumPR5S2ROI2,340
4,XeniumPR5S2ROI3,440
5,XeniumPR5S2ROI4,706
6,XeniumPR5S2ROI5,866
7,XeniumPR5S2ROI6,528
8,XeniumPR5S2ROI7,593
9,XeniumPR5S2ROI8,431


✅ Updated merged_df with patch counts (added to existing num_patches).


## count patches for Xenium 480 panel (100um)

In [29]:
from pathlib import Path
import pandas as pd

ROOT = Path("/project/simmons_hts/kxu/hest/xenium_data")

for i in range(1, 7):  # XeniumR1 .. XeniumR6
    run_name = f"XeniumR{i}"
    base = ROOT / run_name

    for slide_idx, slide_name in enumerate(["slide1", "slide2"], start=1):
        slide_path = base / slide_name
        if not slide_path.exists():
            print(f"Skipping {slide_path} (not found)")
            continue

        print(f"Counting patches in {slide_path}...")
        df_counts = count_patches(str(slide_path), save_csv='patch_counts.csv')
        if df_counts is None:
            continue

        # ensure we have 'sample_id' and 'num_patches'
        if "sample_id" not in df_counts.columns:
            df_counts = df_counts.reset_index()
        if "num_patches" not in df_counts.columns:
            raise ValueError(f"'num_patches' column missing in {slide_path}")

        prefix = f"{run_name}S{slide_idx}"
        df_counts["sample_id"] = prefix + df_counts["sample_id"].astype(str)
        display(df_counts)

        # align and add to existing merged_df
        for _, row in df_counts.iterrows():
            sid = row["sample_id"]
            num = row["num_patches"]
            if sid in merged_df["sample_id"].values:
                merged_df.loc[merged_df["sample_id"] == sid, "num_patches_100um"] = num

print("✅ Updated merged_df with patch counts (added to existing num_patches).")

Counting patches in /project/simmons_hts/kxu/hest/xenium_data/XeniumR1/slide1...
  sample_id  num_patches
0      ROI2         1145
1      ROI3           11
2      ROI4         3816
3      ROI5         1200
4      ROI7         1336

Total patches across all samples: 7508
[INFO] Saved counts to /project/simmons_hts/kxu/hest/xenium_data/XeniumR1/slide1/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,XeniumR1S1ROI2,1145
1,XeniumR1S1ROI3,11
2,XeniumR1S1ROI4,3816
3,XeniumR1S1ROI5,1200
4,XeniumR1S1ROI7,1336


Counting patches in /project/simmons_hts/kxu/hest/xenium_data/XeniumR1/slide2...
  sample_id  num_patches
0     ROI10          787
1     ROI11          756
2     ROI12         2493

Total patches across all samples: 4036
[INFO] Saved counts to /project/simmons_hts/kxu/hest/xenium_data/XeniumR1/slide2/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,XeniumR1S2ROI10,787
1,XeniumR1S2ROI11,756
2,XeniumR1S2ROI12,2493


Counting patches in /project/simmons_hts/kxu/hest/xenium_data/XeniumR2/slide1...
  sample_id  num_patches
0      ROI1         1860
1      ROI2         3368
2      ROI3         1079
3      ROI4         1581
4      ROI7         2742

Total patches across all samples: 10630
[INFO] Saved counts to /project/simmons_hts/kxu/hest/xenium_data/XeniumR2/slide1/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,XeniumR2S1ROI1,1860
1,XeniumR2S1ROI2,3368
2,XeniumR2S1ROI3,1079
3,XeniumR2S1ROI4,1581
4,XeniumR2S1ROI7,2742


Counting patches in /project/simmons_hts/kxu/hest/xenium_data/XeniumR2/slide2...
  sample_id  num_patches
0     ROI10         1697
1     ROI13         2051
2     ROI14         5846
3      ROI8         1342
4      ROI9         1688

Total patches across all samples: 12624
[INFO] Saved counts to /project/simmons_hts/kxu/hest/xenium_data/XeniumR2/slide2/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,XeniumR2S2ROI10,1697
1,XeniumR2S2ROI13,2051
2,XeniumR2S2ROI14,5846
3,XeniumR2S2ROI8,1342
4,XeniumR2S2ROI9,1688


Counting patches in /project/simmons_hts/kxu/hest/xenium_data/XeniumR3/slide1...
  sample_id  num_patches
0      ROI3         3887
1      ROI4          622
2      ROI5         1297
3      ROI6         2460
4      ROI7          943

Total patches across all samples: 9209
[INFO] Saved counts to /project/simmons_hts/kxu/hest/xenium_data/XeniumR3/slide1/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,XeniumR3S1ROI3,3887
1,XeniumR3S1ROI4,622
2,XeniumR3S1ROI5,1297
3,XeniumR3S1ROI6,2460
4,XeniumR3S1ROI7,943


Counting patches in /project/simmons_hts/kxu/hest/xenium_data/XeniumR3/slide2...
  sample_id  num_patches
0     ROI10         2148
1     ROI11         3248

Total patches across all samples: 5396
[INFO] Saved counts to /project/simmons_hts/kxu/hest/xenium_data/XeniumR3/slide2/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,XeniumR3S2ROI10,2148
1,XeniumR3S2ROI11,3248


Skipping /project/simmons_hts/kxu/hest/xenium_data/XeniumR4/slide1 (not found)
Skipping /project/simmons_hts/kxu/hest/xenium_data/XeniumR4/slide2 (not found)
Counting patches in /project/simmons_hts/kxu/hest/xenium_data/XeniumR5/slide1...
  sample_id  num_patches
0      ROI1         6319
1      ROI2         3063
2      ROI3          804
3      ROI4         1381
4      ROI5         4693
5      ROI6         1469

Total patches across all samples: 17729
[INFO] Saved counts to /project/simmons_hts/kxu/hest/xenium_data/XeniumR5/slide1/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,XeniumR5S1ROI1,6319
1,XeniumR5S1ROI2,3063
2,XeniumR5S1ROI3,804
3,XeniumR5S1ROI4,1381
4,XeniumR5S1ROI5,4693
5,XeniumR5S1ROI6,1469


Counting patches in /project/simmons_hts/kxu/hest/xenium_data/XeniumR5/slide2...
  sample_id  num_patches
0     ROI10         3799
1     ROI11         1506
2      ROI7         5161
3      ROI8         2534
4      ROI9          703

Total patches across all samples: 13703
[INFO] Saved counts to /project/simmons_hts/kxu/hest/xenium_data/XeniumR5/slide2/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,XeniumR5S2ROI10,3799
1,XeniumR5S2ROI11,1506
2,XeniumR5S2ROI7,5161
3,XeniumR5S2ROI8,2534
4,XeniumR5S2ROI9,703


Counting patches in /project/simmons_hts/kxu/hest/xenium_data/XeniumR6/slide1...
  sample_id  num_patches
0      ROI1         1199
1      ROI2         1483
2      ROI3          856
3      ROI4         1338
4      ROI5         6500
5      ROI6         4190

Total patches across all samples: 15566
[INFO] Saved counts to /project/simmons_hts/kxu/hest/xenium_data/XeniumR6/slide1/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,XeniumR6S1ROI1,1199
1,XeniumR6S1ROI2,1483
2,XeniumR6S1ROI3,856
3,XeniumR6S1ROI4,1338
4,XeniumR6S1ROI5,6500
5,XeniumR6S1ROI6,4190


Counting patches in /project/simmons_hts/kxu/hest/xenium_data/XeniumR6/slide2...
  sample_id  num_patches
0     ROI10          665
1     ROI11         8006
2      ROI7         4212
3      ROI8         2016
4      ROI9         3130

Total patches across all samples: 18029
[INFO] Saved counts to /project/simmons_hts/kxu/hest/xenium_data/XeniumR6/slide2/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,XeniumR6S2ROI10,665
1,XeniumR6S2ROI11,8006
2,XeniumR6S2ROI7,4212
3,XeniumR6S2ROI8,2016
4,XeniumR6S2ROI9,3130


✅ Updated merged_df with patch counts (added to existing num_patches).


In [31]:
merged_df.iloc[60:80,0:20]

Unnamed: 0,sample_id,roi,slide,patient_id,sample_code,panel,technology,run_id,directory,wsi,alignment,alignment_note,rds,slide_id,sample_type,location,sample_name,phenotype_montreal,matched_xenium,num_patches_100um
60,XeniumPR5S1ROI10,10,1,GI10430,SLIDE3_S10,5k,10x Xenium,PR5,/project/simmons_hts/kxu/xenium/he/5k/RUN5,XeniumPR5S1ROI10.ome.tif,XeniumPR5S1ROI10_alignment_files/matrix.csv,,,60528,Active coeliac,Deuodenum,,,,640.0
61,XeniumPR5S2ROI1,1,2,GI9927,SLIDE4_S1,5k,10x Xenium,PR5,/project/simmons_hts/kxu/xenium/he/5k/RUN5,XeniumPR5S2ROI1.ome.tif,XeniumPR5S2ROI1_alignment_files/matrix.csv,,,60530,Active coeliac,Deuodenum,,,,475.0
62,XeniumPR5S2ROI2,2,2,GI10902,SLIDE4_S2,5k,10x Xenium,PR5,/project/simmons_hts/kxu/xenium/he/5k/RUN5,XeniumPR5S2ROI2.ome.tif,XeniumPR5S2ROI2_alignment_files/matrix.csv,,,60530,Active coeliac,Deuodenum,,,,340.0
63,XeniumPR5S2ROI3,3,2,GI10941,SLIDE4_S3,5k,10x Xenium,PR5,/project/simmons_hts/kxu/xenium/he/5k/RUN5,XeniumPR5S2ROI3.ome.tif,XeniumPR5S2ROI3_alignment_files/matrix.csv,bubbles,,60530,Treated coeliac,Deuodenum,,,,440.0
64,XeniumPR5S2ROI4,4,2,GI10361,SLIDE4_S4,5k,10x Xenium,PR5,/project/simmons_hts/kxu/xenium/he/5k/RUN5,XeniumPR5S2ROI4.ome.tif,XeniumPR5S2ROI4_alignment_files/matrix.csv,,,60530,Treated coeliac,Deuodenum,,,,706.0
65,XeniumPR5S2ROI5,5,2,GI10857,SLIDE4_S5,5k,10x Xenium,PR5,/project/simmons_hts/kxu/xenium/he/5k/RUN5,XeniumPR5S2ROI5.ome.tif,XeniumPR5S2ROI5_alignment_files/matrix.csv,,,60530,Non-coeliac control,Deuodenum,,,,866.0
66,XeniumPR5S2ROI6,6,2,GI10748,SLIDE4_S6,5k,10x Xenium,PR5,/project/simmons_hts/kxu/xenium/he/5k/RUN5,XeniumPR5S2ROI6.ome.tif,XeniumPR5S2ROI6_alignment_files/matrix.csv,,,60530,Non-coeliac control,Deuodenum,,,,528.0
67,XeniumPR5S2ROI7,7,2,GI11159,SLIDE4_S7,5k,10x Xenium,PR5,/project/simmons_hts/kxu/xenium/he/5k/RUN5,XeniumPR5S2ROI7.ome.tif,XeniumPR5S2ROI7_alignment_files/matrix.csv,,,60530,Potential coeliac,Deuodenum,,,,593.0
68,XeniumPR5S2ROI8,8,2,GI10806,SLIDE4_S8,5k,10x Xenium,PR5,/project/simmons_hts/kxu/xenium/he/5k/RUN5,XeniumPR5S2ROI8.ome.tif,XeniumPR5S2ROI8_alignment_files/matrix.csv,,,60530,Non-coeliac control,Deuodenum,,,,431.0
69,XeniumPR5S2ROI9,9,2,GI9160,SLIDE4_S9,5k,10x Xenium,PR5,/project/simmons_hts/kxu/xenium/he/5k/RUN5,XeniumPR5S2ROI9.ome.tif,XeniumPR5S2ROI9_alignment_files/matrix.csv,,,60530,Active coeliac,Deuodenum,,,,581.0


# Count Visium Patches

In [32]:
from pathlib import Path
import pandas as pd

ROOT = Path("/project/simmons_hts/kxu/hest/visium_data")

for i in range(1, 7): 
    run_name = f"VisiumR{i}"
    base = ROOT / run_name

    for slide_idx, slide_name in enumerate(["slide1", "slide2"], start=1):
        slide_path = base / slide_name
        if not slide_path.exists():
            print(f"Skipping {slide_path} (not found)")
            continue

        print(f"Counting patches in {slide_path}...")
        df_counts = count_patches(str(slide_path), save_csv='patch_counts.csv')
        if df_counts is None:
            continue

        # ensure we have 'sample_id' and 'num_patches'
        if "sample_id" not in df_counts.columns:
            df_counts = df_counts.reset_index()
        if "num_patches" not in df_counts.columns:
            raise ValueError(f"'num_patches' column missing in {slide_path}")

        prefix = f"{run_name}S{slide_idx}"
        df_counts["sample_id"] = prefix + df_counts["sample_id"].astype(str)
        display(df_counts)

        # align and add to existing merged_df
        for _, row in df_counts.iterrows():
            sid = row["sample_id"]
            num = row["num_patches"]
            if sid in merged_df["sample_id"].values:
                merged_df.loc[merged_df["sample_id"] == sid, "num_patches_100um"] = num

print("✅ Updated merged_df with patch counts (added to existing num_patches).")

Counting patches in /project/simmons_hts/kxu/hest/visium_data/VisiumR1/slide1...
  sample_id  num_patches
0      ROI1         1713
1      ROI2         1579
2      ROI3         1658
3      ROI4         1077

Total patches across all samples: 6027
[INFO] Saved counts to /project/simmons_hts/kxu/hest/visium_data/VisiumR1/slide1/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,VisiumR1S1ROI1,1713
1,VisiumR1S1ROI2,1579
2,VisiumR1S1ROI3,1658
3,VisiumR1S1ROI4,1077


Skipping /project/simmons_hts/kxu/hest/visium_data/VisiumR1/slide2 (not found)
Counting patches in /project/simmons_hts/kxu/hest/visium_data/VisiumR2/slide1...
  sample_id  num_patches
0      ROI1         2939
1      ROI2         4544
2      ROI3         4597
3      ROI4         4617

Total patches across all samples: 16697
[INFO] Saved counts to /project/simmons_hts/kxu/hest/visium_data/VisiumR2/slide1/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,VisiumR2S1ROI1,2939
1,VisiumR2S1ROI2,4544
2,VisiumR2S1ROI3,4597
3,VisiumR2S1ROI4,4617


Counting patches in /project/simmons_hts/kxu/hest/visium_data/VisiumR2/slide2...
  sample_id  num_patches
0      ROI1         2731
1      ROI2         3812
2      ROI3         2790
3      ROI4         3922

Total patches across all samples: 13255
[INFO] Saved counts to /project/simmons_hts/kxu/hest/visium_data/VisiumR2/slide2/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,VisiumR2S2ROI1,2731
1,VisiumR2S2ROI2,3812
2,VisiumR2S2ROI3,2790
3,VisiumR2S2ROI4,3922


Counting patches in /project/simmons_hts/kxu/hest/visium_data/VisiumR3/slide1...
  sample_id  num_patches
0      ROI1         2810
1      ROI2         4208
2      ROI3         3886
3      ROI4         3817

Total patches across all samples: 14721
[INFO] Saved counts to /project/simmons_hts/kxu/hest/visium_data/VisiumR3/slide1/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,VisiumR3S1ROI1,2810
1,VisiumR3S1ROI2,4208
2,VisiumR3S1ROI3,3886
3,VisiumR3S1ROI4,3817


Skipping /project/simmons_hts/kxu/hest/visium_data/VisiumR3/slide2 (not found)
Counting patches in /project/simmons_hts/kxu/hest/visium_data/VisiumR4/slide1...
  sample_id  num_patches
0      ROI1         3807
1      ROI2         4609
2      ROI3         4922
3      ROI4         4270

Total patches across all samples: 17608
[INFO] Saved counts to /project/simmons_hts/kxu/hest/visium_data/VisiumR4/slide1/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,VisiumR4S1ROI1,3807
1,VisiumR4S1ROI2,4609
2,VisiumR4S1ROI3,4922
3,VisiumR4S1ROI4,4270


Skipping /project/simmons_hts/kxu/hest/visium_data/VisiumR4/slide2 (not found)
Counting patches in /project/simmons_hts/kxu/hest/visium_data/VisiumR5/slide1...
  sample_id  num_patches
0      ROI1         4317
1      ROI2         2193
2      ROI3         4128
3      ROI4         2524

Total patches across all samples: 13162
[INFO] Saved counts to /project/simmons_hts/kxu/hest/visium_data/VisiumR5/slide1/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,VisiumR5S1ROI1,4317
1,VisiumR5S1ROI2,2193
2,VisiumR5S1ROI3,4128
3,VisiumR5S1ROI4,2524


Counting patches in /project/simmons_hts/kxu/hest/visium_data/VisiumR5/slide2...
  sample_id  num_patches
0      ROI1         4053
1      ROI2         2347
2      ROI3         1191

Total patches across all samples: 7591
[INFO] Saved counts to /project/simmons_hts/kxu/hest/visium_data/VisiumR5/slide2/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,VisiumR5S2ROI1,4053
1,VisiumR5S2ROI2,2347
2,VisiumR5S2ROI3,1191


Counting patches in /project/simmons_hts/kxu/hest/visium_data/VisiumR6/slide1...
  sample_id  num_patches
0      ROI1         2438
1      ROI2         3518
2      ROI3         3579
3      ROI4         4213

Total patches across all samples: 13748
[INFO] Saved counts to /project/simmons_hts/kxu/hest/visium_data/VisiumR6/slide1/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,VisiumR6S1ROI1,2438
1,VisiumR6S1ROI2,3518
2,VisiumR6S1ROI3,3579
3,VisiumR6S1ROI4,4213


Counting patches in /project/simmons_hts/kxu/hest/visium_data/VisiumR6/slide2...
  sample_id  num_patches
0      ROI1         3021
1      ROI2         2892
2      ROI3         3693
3      ROI4         4824

Total patches across all samples: 14430
[INFO] Saved counts to /project/simmons_hts/kxu/hest/visium_data/VisiumR6/slide2/patch_counts.csv


Unnamed: 0,sample_id,num_patches
0,VisiumR6S2ROI1,3021
1,VisiumR6S2ROI2,2892
2,VisiumR6S2ROI3,3693
3,VisiumR6S2ROI4,4824


✅ Updated merged_df with patch counts (added to existing num_patches).


In [33]:
merged_df

Unnamed: 0,sample_id,roi,slide,patient_id,sample_code,panel,technology,run_id,directory,wsi,...,location,sample_name,phenotype_montreal,matched_xenium,num_patches_100um,num_patches_50um,num_patches_50um_0.25_um_px,num_patches_25um,num_patches_25um_0.125_um_px,num_patches_cell_100um
0,XeniumPR1S1ROI1,1,1,CAM006,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,CAM006_Xenium5K_post_HnE.ome.tif,...,Colon,,,,684.0,2727.0,2603.0,10534.0,9950.0,608.0
1,XeniumPR1S1ROI2,2,1,TIP877,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,TIP877_Xenium5K_post_HnE.ome.tif,...,Colon,,,,482.0,1886.0,1838.0,7393.0,7130.0,429.0
2,XeniumPR1S1ROI3,3,1,GI9389,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,GI9389_Xenium5K_post_HnE.ome.tif,...,Colon,,,,1168.0,4627.0,4502.0,18007.0,17368.0,930.0
3,XeniumPR1S1ROI4,4,1,GI9077,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,GI9077_Xenium5K_post_HnE.ome.tif,...,Colon,,,,1253.0,5010.0,4903.0,19937.0,19360.0,1064.0
4,XeniumPR1S1ROI5,5,1,GI9612,XEN_ST_5K_SLIDE 1,5k,10x Xenium,PR1,/project/simmons_hts/jpark/1_project/0_xenium/...,GI9612_Xenium5K_post_HnE.ome.tif,...,Colon,,,,893.0,3520.0,3289.0,13692.0,12449.0,756.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150,VisiumR6S1ROI4,4,1,JR_23234_23,R6B12,whole transcriptome,10x Visium,R6,/ceph/project/simmons_hts/shared/27_07_2023_CD...,,...,Enterocutaneous,B12,Fistulating (B3),,4213.0,,,,,
151,VisiumR6S2ROI1,1,2,JR_18076_22,R6C1,whole transcriptome,10x Visium,R6,/ceph/project/simmons_hts/shared/27_07_2023_CD...,,...,Anal,C1,Fistulating (B3) A2L2B1p,,3021.0,,,,,
152,VisiumR6S2ROI2,2,2,BAY_105338_20,R6C2,whole transcriptome,10x Visium,R6,/ceph/project/simmons_hts/shared/27_07_2023_CD...,,...,Transsphincteric,C2,Fistulating (B3),,2892.0,,,,,
153,VisiumR6S2ROI3,3,2,BAY_104603_20/JR_20291_22,R6C3,whole transcriptome,10x Visium,R6,/ceph/project/simmons_hts/shared/27_07_2023_CD...,,...,Ileocaecal/Perianal,C3,Fistulating (B3)/Fistulating (B3) A2L1B2p,,3693.0,,,,,


# save metadata

In [34]:
output_path = Path("/project/simmons_hts/kxu/hest/hest_directory.csv")
merged_df.to_csv(output_path, index=False)
print(f"✔ Saved merged metadata to {output_path}")

✔ Saved merged metadata to /project/simmons_hts/kxu/hest/hest_directory.csv


# Create BROAD metadata 

In [None]:
patch_count_broad = count_patches("/project/simmons_hts/kxu/hest/xenium_data/broad/", save_csv="patch_counts.csv")
patch_count_broad = patch_count_broad.rename(columns={'num_patches': 'num_patches_100um'})
patch_count_broad

In [None]:
import numpy as np
patch_count_broad['disease'] = np.where(
    patch_count_broad['sample_id'].str.contains("UC", case=False, na=False),
    "ulcerative colitis",
    np.where(
        patch_count_broad['sample_id'].str.contains("DC", case=False, na=False),
        "diverticulitis",
        np.nan  # or keep as None/empty if it doesn’t match
    ))

In [99]:
patch_count_broad['inflamed'] = np.where(
    patch_count_broad['sample_id'].str.contains("_NI", case=False, na=False),
    False,
    np.where(
        patch_count_broad['sample_id'].str.contains("_I", case=False, na=False),
        True,
        np.nan  # or None if you prefer
    )
)

In [100]:
# extract the digits from sample_id and assign to new column
patch_count_broad['patient_id'] = patch_count_broad['sample_id'].str.extract(r'(\d+)')

In [101]:
patch_count_broad

Unnamed: 0,sample_id,num_patches_100um,disease,inflamed,patient_id
0,DC5,5644,diverticulitis,,5
1,UC1_I,6549,ulcerative colitis,1.0,1
2,UC1_NI,4518,ulcerative colitis,0.0,1
3,UC6_I,6966,ulcerative colitis,1.0,6
4,UC6_NI,4812,ulcerative colitis,0.0,6
5,UC7_I,3607,ulcerative colitis,1.0,7
6,UC9_I,7335,ulcerative colitis,1.0,9


## add cell_centered patches

In [103]:
def count_patches_broad(patches_dir):
    """
    Count patches per sample, print total, and save results as CSV.

    Args:
        patches_dir (str or Path): folder containing .h5 patch files
        csv_path (str or Path): path to save CSV file
    Returns:
        pandas.DataFrame
    """
    patches_dir = Path(patches_dir)
    results = []

    for h5_file in patches_dir.glob("*.h5"):
        sample_id = h5_file.stem
        with h5py.File(h5_file, "r") as f:
            num_patches = f["img"].shape[0]
        results.append({"sample_id": sample_id, "num_patches_cell_centered": num_patches})

    df = pd.DataFrame(results)
    total_patches = df["num_patches_cell_centered"].sum()

    print(f"\nTotal patches across all samples: {total_patches}")

    return df

# Usage
hest_root = "/project/simmons_hts/kxu/hest/eval/data/broad_cell_centered"
patches_dir = Path(hest_root) / "patches"
patch_counts = count_patches_broad(patches_dir)
patch_counts



Total patches across all samples: 1062602


Unnamed: 0,sample_id,num_patches_cell_centered
0,UC7_I,144704
1,DC5,140368
2,UC9_I,196937
3,UC1_I,202534
4,UC6_I,196537
5,UC1_NI,80037
6,UC6_NI,101485


In [104]:
# Merge metadata_df on SampleID
merged_df_broad = pd.merge(
    patch_counts,
    patch_count_broad,
    on="sample_id",
    how="right"   # or "left" if you want to keep all metadata rows
)
merged_df_broad

Unnamed: 0,sample_id,num_patches_cell_centered,num_patches_100um,disease,inflamed,patient_id
0,DC5,140368,5644,diverticulitis,,5
1,UC1_I,202534,6549,ulcerative colitis,1.0,1
2,UC1_NI,80037,4518,ulcerative colitis,0.0,1
3,UC6_I,196537,6966,ulcerative colitis,1.0,6
4,UC6_NI,101485,4812,ulcerative colitis,0.0,6
5,UC7_I,144704,3607,ulcerative colitis,1.0,7
6,UC9_I,196937,7335,ulcerative colitis,1.0,9


In [105]:
output_path = Path("/project/simmons_hts/kxu/hest/broad_directory.csv")
merged_df_broad.to_csv(output_path, index=False)
print(f"✔ Saved merged metadata to {output_path}")

✔ Saved merged metadata to /project/simmons_hts/kxu/hest/broad_directory.csv
