In [None]:
# Segmentation Method Benchmarking: General Statistics and Beyond

This notebook provides a step-by-step approach to calculating various metrics for benchmarking segmentation methods in single-cell transcriptomics. We will start with general statistics and proceed to advanced metrics, including F1 purity, neighborhood entropy, MECR, and contamination.

## 0. Setup: Import Required Packages and Define Paths



In [None]:
import pandas as pd
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path
from itertools import combinations
from matplotlib.backends.backend_pdf import PdfPages

# Define paths and segmentation methods
benchmarks_path      = Path('/dkfz/cluster/gpu/data/OE0606/elihei/segger_experiments/data_tidy/benchmarks/xe_rep1_bc')
output_path          = benchmarks_path / 'resutls'
output_path.mkdir(parents = True, exist_ok=True)
segmentation_methods = ['segger', 'segger_n0', 'segger_n1', 'Baysor', '10X', '10X-nucleus']

# Load the AnnData objects for each segmentation method
adata_segger  = sc.read(benchmarks_path / 'adata_segger.h5ad')
adata_baysor  = sc.read(benchmarks_path / 'adata_baysor.h5ad')
adata_10X     = sc.read(benchmarks_path / 'adata_10X.h5ad')
adata_10X_nuc = sc.read(benchmarks_path / 'adata_10X_nuc.h5ad')

cells_n1 = [i for i in adata_segger.obs_names if not i.endswith('-nx')]
cells_n0 = [i for i in adata_segger.obs_names if i.endswith('-nx')]
adata_segger_n1 = adata_segger[cells_n1,:]
adata_segger_n0 = adata_segger[cells_n0,:]

scRNAseq = sc.read_h5ad(Path('data_tidy') /  "BC_atlas_xe.h5ad")
genes = scRNAseq.var_names

segmentations = [adata_segger, adata_segger_n0, adata_segger_n1, adata_baysor, adata_10X, adata_10X_nuc]
segmentations_dict = dict(zip(segmentation_methods, segmentations))

max_area = adata_10X.obs.cell_area.max()
min_area = adata_10X_nuc.obs.cell_area.min()
min_transcripts = adata_10X_nuc.obs.transcripts.min()

segmentations = [x[(x.obs.cell_area > min_area) & (x.obs.cell_area < max_area) & (x.obs.transcripts > min_transcripts)] for x in segmentations]

## 1. General Statistics

### Number of cells