In [9]:
from pathlib import Path
import scanpy as sc
import geopandas as gpd
import pandas as pd

In [10]:
# Paths and samples
segmentation_path = Path(
    "/mnt/c/Users/jonan/Documents/1Work/RoseLab/Spatial/"
    "dietary_droject/data/cell_segmentation"
)
Fnumbers = ["F07833","F07834","F07835","F07836","F07837","F07838"]
out_dir = segmentation_path / "concatenated"
out_dir.mkdir(parents=True, exist_ok=True)

In [11]:
# 1) Load & concatenate AnnData
adatas = []
for fnum in Fnumbers:
    ad = sc.read_h5ad(segmentation_path / fnum / "labeled_files" / f"{fnum}_labeled_adata.h5ad")
    ad.obs['TMA'] = fnum
    adatas.append(ad)

# stitch them together (preserves each ad.obs['TMA'])
combined_adata = sc.concat(
    adatas,
    join="outer",          # union of var and obs columns
    index_unique=None      # keep original obs_names if possible
)
# save
combined_adata.write(out_dir / "combined_adata.h5ad")
print("Wrote combined AnnData:", out_dir / "combined_adata.h5ad")

Wrote combined AnnData: /mnt/c/Users/jonan/Documents/1Work/RoseLab/Spatial/dietary_droject/data/cell_segmentation/concatenated/combined_adata.h5ad


In [12]:
# 2) Load & concatenate GeoDataFrames
gdfs = []
for fnum in Fnumbers:
    g = gpd.read_file(segmentation_path / fnum / "labeled_files" / f"{fnum}_nuclei_geometry.gpkg")
    g['TMA'] = fnum
    gdfs.append(g)

combined_geo = gpd.GeoDataFrame(
    pd.concat(gdfs, ignore_index=True),
    crs=gdfs[0].crs
)
combined_geo.to_file(out_dir / "combined_nuclei_geometry.gpkg", driver="GPKG")
print("Wrote combined geometry:", out_dir / "combined_nuclei_geometry.gpkg")

  write(


Wrote combined geometry: /mnt/c/Users/jonan/Documents/1Work/RoseLab/Spatial/dietary_droject/data/cell_segmentation/concatenated/combined_nuclei_geometry.gpkg


In [24]:
# ───────────────────────────────────────────────────────────────────────────────
# 3) Compute QC metrics and save CSVs
# how many cells per mouse/TMA
mouse_counts = (
    combined_adata.obs['mouse']
    .value_counts()
    .rename_axis('mouse')
    .reset_index(name='n_cells')
)
mouse_counts.to_csv(out_dir / "cell_counts_by_mouse.csv", index=False)

# how many cells per condition
cond_counts = (
    combined_adata.obs['condition']
    .value_counts()
    .rename_axis('condition')
    .reset_index(name='n_cells')
)
cond_counts.to_csv(out_dir / "cell_counts_by_condition.csv", index=False)

# how many cells per sample_id
sample_counts = (
    combined_adata.obs['sample_id']
    .value_counts()
    .rename_axis('sample_id')
    .reset_index(name='n_cells')
)
sample_counts.to_csv(out_dir / "cell_counts_by_sample.csv", index=False)

print("Wrote QC metrics CSVs to:", out_dir)

Wrote QC metrics CSVs to: /mnt/c/Users/jonan/Documents/1Work/RoseLab/Spatial/dietary_droject/data/cell_segmentation/concatenated


In [23]:
combined_adata.obs['mouse'].unique()

['9_KDRT', '8_CRRT', '28_LFRT', '5_RT', '3_RT', ..., '31_RT', '34_LGIRT', '36_CRRT', '38_CRRT', '32_RT']
Length: 20
Categories (20, object): ['2_KDRT', '3_RT', '5_RT', '7_CRRT', ..., '35_LGIRT', '36_CRRT', '37_CRRT', '38_CRRT']

In [19]:
combined_geo

Unnamed: 0,id,area,cx,cy,mouse,sample_id,condition,geometry,TMA
0,ID_1,164.211547,5027.710544,11477.946399,5_RT,F07833,RT,"POLYGON ((5034.709 11478, 5034.51 11479.295, 5...",F07833
1,ID_2,168.253242,10183.824168,6428.559879,28_LFRT,F07833,LFRT,"POLYGON ((10190.207 6428, 10190.009 6429.195, ...",F07833
2,ID_3,150.134230,4619.768704,11232.465983,5_RT,F07833,RT,"POLYGON ((4626.401 11232, 4626.364 11233.266, ...",F07833
3,ID_4,111.751713,4967.216777,11319.182248,5_RT,F07833,RT,"POLYGON ((4973.224 11319, 4973.036 11320.2, 49...",F07833
4,ID_5,157.579249,4797.994817,5101.094167,8_CRRT,F07833,CRRT,"POLYGON ((4804.751 5101, 4804.636 5102.32, 480...",F07833
...,...,...,...,...,...,...,...,...,...
1643235,ID_1648497,71.467875,2612.162089,11240.026260,34_LGIRT,F07838,LGIRT,"POLYGON ((2617.926 11240, 2617.892 11241.172, ...",F07838
1643236,ID_1648498,65.584925,9056.226602,12299.657790,36_CRRT,F07838,CRRT,"POLYGON ((9059.798 12298, 9059.88 12298.771, 9...",F07838
1643237,ID_1648499,78.907557,4761.607527,9648.393308,34_LGIRT,F07838,LGIRT,"POLYGON ((4766.198 9648, 4765.875 9648.771, 47...",F07838
1643238,ID_1648500,89.583303,8810.156684,10467.002247,36_CRRT,F07838,CRRT,"POLYGON ((8816.387 10466, 8816.618 10466.919, ...",F07838
