In [2]:
from pathlib import Path
import scanpy as sc
import geopandas as gpd
import pandas as pd

In [3]:
# Paths and samples
segmentation_path = Path(
    "/mnt/c/Users/jonan/Documents/1Work/RoseLab/Spatial/"
    "CAR_T/data/cell_segmentation"
)

Fnumbers = ["F07839","F07840"]
out_dir = segmentation_path / "concatenated"
out_dir.mkdir(parents=True, exist_ok=True)

In [4]:
# 1) Load & concatenate AnnData
adatas = []
for fnum in Fnumbers:
    ad = sc.read_h5ad(segmentation_path / fnum / "labeled_files" / f"{fnum}_labeled_adata.h5ad")
    ad.obs['TMA'] = fnum
    adatas.append(ad)

# stitch them together (preserves each ad.obs['TMA'])
combined_adata = sc.concat(
    adatas,
    join="outer",          # union of var and obs columns
    index_unique=None      # keep original obs_names if possible
)
# save
combined_adata.write(out_dir / "combined_adata.h5ad")
print("Wrote combined AnnData:", out_dir / "combined_adata.h5ad")

Wrote combined AnnData: /mnt/c/Users/jonan/Documents/1Work/RoseLab/Spatial/CAR_T/data/cell_segmentation/concatenated/combined_adata.h5ad


In [5]:
# 2) Load & concatenate GeoDataFrames
gdfs = []
for fnum in Fnumbers:
    g = gpd.read_file(segmentation_path / fnum / "labeled_files" / f"{fnum}_nuclei_geometry.gpkg")
    g['TMA'] = fnum
    gdfs.append(g)

combined_geo = gpd.GeoDataFrame(
    pd.concat(gdfs, ignore_index=True),
    crs=gdfs[0].crs
)
combined_geo.to_file(out_dir / "combined_nuclei_geometry.gpkg", driver="GPKG")
print("Wrote combined geometry:", out_dir / "combined_nuclei_geometry.gpkg")

  write(


Wrote combined geometry: /mnt/c/Users/jonan/Documents/1Work/RoseLab/Spatial/CAR_T/data/cell_segmentation/concatenated/combined_nuclei_geometry.gpkg


In [6]:
# ───────────────────────────────────────────────────────────────────────────────
# 3) Compute QC metrics and save CSVs
# how many cells per mouse/TMA
mouse_counts = (
    combined_adata.obs['mouse']
    .value_counts()
    .rename_axis('mouse')
    .reset_index(name='n_cells')
)
mouse_counts.to_csv(out_dir / "cell_counts_by_mouse.csv", index=False)

# how many cells per condition
cond_counts = (
    combined_adata.obs['condition']
    .value_counts()
    .rename_axis('condition')
    .reset_index(name='n_cells')
)
cond_counts.to_csv(out_dir / "cell_counts_by_condition.csv", index=False)

# how many cells per sample_id
sample_counts = (
    combined_adata.obs['sample_id']
    .value_counts()
    .rename_axis('sample_id')
    .reset_index(name='n_cells')
)
sample_counts.to_csv(out_dir / "cell_counts_by_sample.csv", index=False)

print("Wrote QC metrics CSVs to:", out_dir)

Wrote QC metrics CSVs to: /mnt/c/Users/jonan/Documents/1Work/RoseLab/Spatial/CAR_T/data/cell_segmentation/concatenated


In [9]:
combined_adata.obs['condition'].unique()

['RTCyT72', 'RTCyPSCA', 'CyPSCA', 'NoTx', 'CyT72']
Categories (5, object): ['CyPSCA', 'CyT72', 'NoTx', 'RTCyPSCA', 'RTCyT72']

In [8]:
combined_geo

Unnamed: 0,id,area,cx,cy,mouse,condition,mouse_num,sample_id,geometry,TMA
0,ID_1,280.221639,1897.516561,1410.757634,CyPSCA_1_1,CyPSCA,1_1,F07839,"POLYGON ((1907.427 1410, 1907.252 1411.84, 190...",F07839
1,ID_2,208.208112,1451.096575,969.849422,CyPSCA_1_1,CyPSCA,1_1,F07839,"POLYGON ((1460.078 970, 1459.024 971.795, 1458...",F07839
2,ID_3,140.847965,1083.826704,1502.442681,CyPSCA_1_1,CyPSCA,1_1,F07839,"POLYGON ((1089.396 1502, 1089.217 1503.038, 10...",F07839
3,ID_4,295.491891,1225.403479,1327.376710,CyPSCA_1_1,CyPSCA,1_1,F07839,"POLYGON ((1235.982 1328, 1234.698 1330.128, 12...",F07839
4,ID_5,275.968497,1922.803774,1864.119806,CyPSCA_1_1,CyPSCA,1_1,F07839,"POLYGON ((1932.349 1864, 1932.091 1865.609, 19...",F07839
...,...,...,...,...,...,...,...,...,...,...
246385,ID_246584,211.002387,10696.210716,13695.730901,RTCyT72_2_4,RTCyT72,2_4,F07840,"POLYGON ((10703.625 13694, 10703.954 13695.582...",F07840
246386,ID_246585,61.056324,11483.207339,13615.239497,RTCyT72_2_4,RTCyT72,2_4,F07840,"POLYGON ((11486.985 13616, 11486.732 13616.543...",F07840
246387,ID_246586,107.477445,10600.269713,13682.471674,RTCyT72_2_4,RTCyT72,2_4,F07840,"POLYGON ((10605.099 13678, 10605.65 13678.726,...",F07840
246388,ID_246587,508.468466,10707.627158,13801.351509,RTCyT72_2_4,RTCyT72,2_4,F07840,"POLYGON ((10719.812 13802, 10719.665 13803.922...",F07840
