# AnnData Inspection, Analysis & MapMyCells Export

This notebook provides a comprehensive workflow for:
1. **Inspecting** an AnnData `.h5ad` file structure (obs, var, layers, obsm, uns, etc.)
2. **Analyzing** the counts layer to verify data type (raw UMI counts vs normalized)
3. **Exporting** a simplified AnnData suitable for MapMyCells or other downstream tools

---

## 0. Configuration & Imports

In [None]:
# ============================================================
# USER CONFIGURATION - Edit these paths and options as needed
# ============================================================

# Path to input AnnData file
INPUT_H5AD = "/scicore/home/doetsch/kaiser0001/Single_cell_paper/Output_dir_Single_cell_paper/Single_cell_clustering/10_Downstream_Analysis_All_Samples/final_output_object.h5ad"

# Layer containing raw counts (for analysis and export)
COUNTS_LAYER = "counts"

# === Counts Analysis Options ===
ANALYSIS_SEED = 123           # Random seed for reproducibility
ANALYSIS_N_CELLS = 1000       # Number of random cells to sample for analysis

# === MapMyCells Export Options ===
# If None: use adata.obs_names as barcodes
# If set to a column name (e.g. "barcode_cellranger"): use that column instead
BARCODE_SOURCE_OBS_COLUMN = None

# Gene name conversion to mouse-style casing (e.g., "CD74" -> "Cd74")
CONVERT_GENE_NAMES_TO_MOUSE_CASE = False

# Apply conversion to:
#   "all"          -> convert every gene name
#   "only_allcaps" -> convert only genes that are fully uppercase
MOUSE_CASE_APPLY_MODE = "all"

# If conversion yields duplicates, make unique by appending _2, _3, ...
MAKE_GENE_NAMES_UNIQUE_IF_NEEDED = True

# Overwrite output if it exists
OVERWRITE_OUTPUT = True

# Print output var preview after export
PRINT_OUTPUT_VAR = True
OUTPUT_VAR_N_PREVIEW = 20

In [None]:
# ============================================================
# Imports
# ============================================================

from __future__ import annotations

import os
import numpy as np
import pandas as pd
import anndata as ad
import scipy.sparse as sp
import h5py
from collections.abc import Mapping, Sequence

print(f"anndata version: {ad.__version__}")
print(f"numpy version: {np.__version__}")
print(f"pandas version: {pd.__version__}")

---
## 1. Helper Functions

Utility functions used throughout the notebook for inspection, analysis, and export.

In [None]:
# ============================================================
# Helper functions for AnnData inspection
# ============================================================

def _dtype_of(x):
    """Get dtype of array-like object (works for backed arrays, sparse, memmaps)."""
    try:
        return str(x.dtype)
    except Exception:
        return type(x).__name__


def _shape_of(x):
    """Get shape of array-like object."""
    try:
        return tuple(x.shape)
    except Exception:
        return None


def _summarize_mapping(m, max_items=200):
    """Summarize keys of a mapping, truncating if too many."""
    keys = list(m.keys())
    if len(keys) > max_items:
        keys = keys[:max_items] + ["..."]
    return keys


def _print_uns(obj, indent=0, max_depth=6, max_list_items=30):
    """Recursively print the structure of uns (unstructured annotations)."""
    pad = " " * indent
    if indent // 2 >= max_depth:
        print(f"{pad}… (max depth reached)")
        return

    if isinstance(obj, Mapping):
        for k in list(obj.keys()):
            v = obj[k]
            if isinstance(v, (Mapping, Sequence)) and not isinstance(v, (str, bytes, bytearray)):
                print(f"{pad}{k}: {type(v).__name__}")
                _print_uns(v, indent=indent + 2, max_depth=max_depth, max_list_items=max_list_items)
            else:
                s = repr(v)
                if len(s) > 120:
                    s = s[:117] + "..."
                print(f"{pad}{k}: {type(v).__name__} = {s}")
    elif isinstance(obj, Sequence) and not isinstance(obj, (str, bytes, bytearray)):
        n = len(obj)
        print(f"{pad}[list/tuple] len={n}")
        for i, v in enumerate(obj[:max_list_items]):
            if isinstance(v, (Mapping, Sequence)) and not isinstance(v, (str, bytes, bytearray)):
                print(f"{pad}- [{i}] {type(v).__name__}")
                _print_uns(v, indent=indent + 2, max_depth=max_depth, max_list_items=max_list_items)
            else:
                s = repr(v)
                if len(s) > 120:
                    s = s[:117] + "..."
                print(f"{pad}- [{i}] {type(v).__name__} = {s}")
        if n > max_list_items:
            print(f"{pad}… ({n - max_list_items} more items)")
    else:
        s = repr(obj)
        if len(s) > 120:
            s = s[:117] + "..."
        print(f"{pad}{type(obj).__name__} = {s}")

In [None]:
# ============================================================
# Helper functions for counts layer analysis
# ============================================================

def summarize_values(x, label):
    """
    Summarize numerical values with statistics useful for determining
    whether data represents raw counts or normalized values.
    
    Parameters
    ----------
    x : np.ndarray
        Array of values to summarize
    label : str
        Label for the summary output
    """
    if x.size == 0:
        print(f"{label}: empty")
        return
    
    x = x[np.isfinite(x)]
    print(f"{label}: n={x.size}")
    print(f"  min={x.min():.6g}  max={x.max():.6g}  mean={x.mean():.6g}  median={np.median(x):.6g}")
    print(f"  p1={np.quantile(x, 0.01):.6g}  p99={np.quantile(x, 0.99):.6g}")
    
    # Check for integer-like values
    frac = np.abs(x - np.round(x))
    nonint = np.sum(frac > 1e-6)
    neg = np.sum(x < -1e-12)
    between01 = np.sum((x > 1e-12) & (x < 1 - 1e-6))
    
    print(f"  non-integer-like (|x-round(x)|>1e-6): {nonint}  ({100*nonint/x.size:.3f}%)")
    print(f"  negatives: {neg}  ({100*neg/x.size:.3f}%)")
    print(f"  values in (0,1): {between01}  ({100*between01/x.size:.3f}%)")

In [None]:
# ============================================================
# Helper functions for MapMyCells export
# ============================================================

def mouse_case(name: str) -> str:
    """Convert gene name to mouse-style casing (first letter upper, rest lower)."""
    if not name:
        return name
    return name[0].upper() + name[1:].lower()


def make_unique(names: list[str]) -> list[str]:
    """Make names unique by appending _2, _3, ... for duplicates."""
    seen: dict[str, int] = {}
    out: list[str] = []
    for n in names:
        if n not in seen:
            seen[n] = 1
            out.append(n)
        else:
            seen[n] += 1
            out.append(f"{n}_{seen[n]}")
    return out


def ensure_csr(x) -> sp.csr_matrix:
    """Convert matrix to CSR sparse format if not already."""
    if sp.isspmatrix_csr(x):
        return x
    if sp.isspmatrix(x):
        return x.tocsr()
    return sp.csr_matrix(x)


def assert_csr_equal(a: sp.csr_matrix, b: sp.csr_matrix, label: str = "X") -> None:
    """Assert that two CSR matrices are equal."""
    if not (sp.isspmatrix_csr(a) and sp.isspmatrix_csr(b)):
        raise AssertionError(f"{label}: not CSR on both sides (types: {type(a)} vs {type(b)})")

    if a.shape != b.shape:
        raise AssertionError(f"{label}: shape mismatch {a.shape} vs {b.shape}")
    if a.nnz != b.nnz:
        raise AssertionError(f"{label}: nnz mismatch {a.nnz} vs {b.nnz}")

    if not np.array_equal(a.indptr, b.indptr):
        raise AssertionError(f"{label}: indptr differs")
    if not np.array_equal(a.indices, b.indices):
        raise AssertionError(f"{label}: indices differ")

    if not np.array_equal(a.data, b.data):
        diff = np.max(np.abs(a.data.astype(np.float64) - b.data.astype(np.float64)))
        raise AssertionError(f"{label}: data differs (max abs diff {diff})")

---
## 2. Load AnnData File (Backed Mode)

Load the h5ad file in read-only backed mode to avoid loading full matrices into memory.

In [None]:
# ============================================================
# Load AnnData in backed (read-only) mode
# ============================================================

if not os.path.exists(INPUT_H5AD):
    raise FileNotFoundError(f"Input file not found: {INPUT_H5AD}")

print(f"Loading AnnData (backed='r'): {INPUT_H5AD}")
adata = ad.read_h5ad(INPUT_H5AD, backed="r")
print(f"Loaded successfully!")
print(f"  Shape: {adata.n_obs} cells × {adata.n_vars} genes")

---
## 3. AnnData Structure Inspection

Comprehensive inspection of the AnnData object structure including all slots.

### 3.1 Basic Information

In [None]:
# ============================================================
# Basic AnnData information
# ============================================================

print("=== BASIC ===")
print(f"path: {INPUT_H5AD}")
print(f"AnnData: {adata}")
print(f"n_obs x n_vars: {adata.n_obs} x {adata.n_vars}")

### 3.2 Main Matrix (X)

In [None]:
# ============================================================
# Main expression matrix (X)
# ============================================================

print("=== X ===")
print(f"X type: {type(adata.X).__name__}")
print(f"X shape: {_shape_of(adata.X)}")
print(f"X dtype: {_dtype_of(adata.X)}")

### 3.3 Observation (Cell) Annotations

In [None]:
# ============================================================
# Observation (cell) annotations
# ============================================================

print("=== obs (cell annotations) ===")
print(f"obs shape: {adata.obs.shape} | columns: {len(adata.obs.columns)}")
obs_cols = list(adata.obs.columns)
print(f"obs columns (first 80): {obs_cols[:80]}" + (" ..." if len(obs_cols) > 80 else ""))

In [None]:
# ============================================================
# Preview obs dataframe
# ============================================================

print("obs head (first 5 rows, first 10 columns):")
display(adata.obs.iloc[:5, :min(10, len(adata.obs.columns))])

### 3.4 Variable (Gene) Annotations

In [None]:
# ============================================================
# Variable (gene) annotations
# ============================================================

print("=== var (gene annotations) ===")
print(f"var shape: {adata.var.shape} | columns: {len(adata.var.columns)}")
var_cols = list(adata.var.columns)
print(f"var columns (first 80): {var_cols[:80]}" + (" ..." if len(var_cols) > 80 else ""))

In [None]:
# ============================================================
# Preview var dataframe
# ============================================================

print("var head (first 10 rows):")
display(adata.var.head(10))

### 3.5 Multidimensional Observation Annotations (obsm)

In [None]:
# ============================================================
# obsm - embeddings, dimensionality reductions, etc.
# ============================================================

print("=== obsm ===")
if len(adata.obsm.keys()) == 0:
    print("(none)")
else:
    for k in adata.obsm.keys():
        v = adata.obsm[k]
        print(f"{k}: type={type(v).__name__} shape={_shape_of(v)} dtype={_dtype_of(v)}")

### 3.6 Multidimensional Variable Annotations (varm)

In [None]:
# ============================================================
# varm - gene-level multidimensional annotations
# ============================================================

print("=== varm ===")
if len(adata.varm.keys()) == 0:
    print("(none)")
else:
    for k in adata.varm.keys():
        v = adata.varm[k]
        print(f"{k}: type={type(v).__name__} shape={_shape_of(v)} dtype={_dtype_of(v)}")

### 3.7 Layers (Alternative Expression Matrices)

In [None]:
# ============================================================
# layers - alternative expression matrices (counts, normalized, etc.)
# ============================================================

print("=== layers (ALL) ===")
if len(adata.layers.keys()) == 0:
    print("(none)")
else:
    for k in adata.layers.keys():
        v = adata.layers[k]
        print(f"{k}: type={type(v).__name__} shape={_shape_of(v)} dtype={_dtype_of(v)}")

### 3.8 Pairwise Observation Annotations (obsp)

In [None]:
# ============================================================
# obsp - pairwise cell annotations (e.g., connectivity graphs)
# ============================================================

print("=== obsp ===")
if len(adata.obsp.keys()) == 0:
    print("(none)")
else:
    for k in adata.obsp.keys():
        v = adata.obsp[k]
        print(f"{k}: type={type(v).__name__} shape={_shape_of(v)} dtype={_dtype_of(v)}")

### 3.9 Pairwise Variable Annotations (varp)

In [None]:
# ============================================================
# varp - pairwise gene annotations
# ============================================================

print("=== varp ===")
if len(adata.varp.keys()) == 0:
    print("(none)")
else:
    for k in adata.varp.keys():
        v = adata.varp[k]
        print(f"{k}: type={type(v).__name__} shape={_shape_of(v)} dtype={_dtype_of(v)}")

### 3.10 Raw Data Slot

In [None]:
# ============================================================
# raw - original data before filtering/normalization
# ============================================================

print("=== raw ===")
if adata.raw is None:
    print("(raw is None)")
else:
    print(f"raw.X type: {type(adata.raw.X).__name__}")
    try:
        print(f"raw shape: {adata.raw.n_obs} x {adata.raw.n_vars}")
    except Exception:
        pass
    print(f"raw.var columns (first 80): {list(adata.raw.var.columns)[:80]}")

### 3.11 Unstructured Annotations (uns)

In [None]:
# ============================================================
# uns - unstructured annotations (top-level keys)
# ============================================================

print("=== uns (top-level keys) ===")
uns_keys = list(adata.uns.keys())
print(f"uns keys (first 200): {uns_keys[:200]}" + (" ..." if len(uns_keys) > 200 else ""))

In [None]:
# ============================================================
# uns - recursive preview of nested structure
# ============================================================

print("=== uns (recursive preview) ===")
_print_uns(adata.uns, indent=0, max_depth=6)

---
## 4. Counts Layer Analysis

Analyze the counts layer to determine if it contains raw UMI counts or normalized/corrected values.

**Interpretation guide:**
- **Raw UMI counts**: Almost all values are integers, no negatives, essentially none in (0,1)
- **Corrected/normalized**: Many fractional values; often lots in (0,1); sometimes negatives (method-dependent)

In [None]:
# ============================================================
# Analyze counts layer using h5py for efficient random sampling
# ============================================================

rng = np.random.default_rng(ANALYSIS_SEED)

with h5py.File(INPUT_H5AD, "r") as f:
    # Check if layer exists
    if "layers" not in f or COUNTS_LAYER not in f["layers"]:
        raise KeyError(f"Layer '{COUNTS_LAYER}' not found. Available layers: {list(f['layers'].keys()) if 'layers' in f else 'none'}")
    
    g = f["layers"][COUNTS_LAYER]
    enc = g.attrs.get("encoding-type", None)
    shape = g.attrs.get("shape", None)
    
    print(f"File: {INPUT_H5AD}")
    print(f"Layer '{COUNTS_LAYER}' encoding-type: {enc}")
    print(f"shape (from attrs): {shape}")
    
    data = g["data"]
    indices = g["indices"]
    indptr = g["indptr"]
    
    nnz = int(data.shape[0])
    n_rows = int(shape[0])
    n_cols = int(shape[1])
    
    print(f"CSR: n_rows={n_rows}  n_cols={n_cols}  nnz={nnz}")
    print(f"  data dtype={data.dtype}  indices dtype={indices.dtype}  indptr dtype={indptr.dtype}")

In [None]:
# ============================================================
# Sample random cells and collect nonzero values
# ============================================================

with h5py.File(INPUT_H5AD, "r") as f:
    g = f["layers"][COUNTS_LAYER]
    shape = g.attrs.get("shape", None)
    data = g["data"]
    indptr = g["indptr"]
    
    n_rows = int(shape[0])
    
    # Select random subset of cells
    n_subset = min(ANALYSIS_N_CELLS, n_rows)
    cell_idx = rng.choice(n_rows, size=n_subset, replace=False)
    cell_idx.sort()
    print(f"Using random subset of {n_subset} cells for analysis")
    
    # Read indptr for selected cells to get data ranges
    indptr_starts = np.array([indptr[i] for i in cell_idx], dtype=np.int64)
    indptr_ends = np.array([indptr[i + 1] for i in cell_idx], dtype=np.int64)
    
    # Collect all nonzero values from subset cells
    all_values = []
    row_sums = np.empty(n_subset, dtype=np.float64)
    row_nnz = np.empty(n_subset, dtype=np.int64)
    
    for i, (start, end) in enumerate(zip(indptr_starts, indptr_ends)):
        row_nnz[i] = end - start
        if end == start:
            row_sums[i] = 0.0
        else:
            row_data = np.asarray(data[start:end], dtype=np.float64)
            all_values.append(row_data)
            row_sums[i] = float(row_data.sum())
    
    all_values = np.concatenate(all_values) if all_values else np.array([], dtype=np.float64)
    print(f"Total nonzero values in subset: {all_values.size}")

In [None]:
# ============================================================
# Summarize nonzero values from sampled cells
# ============================================================

print("\n" + "="*60)
summarize_values(all_values, f"Nonzero values from {n_subset} cells")

In [None]:
# ============================================================
# Row sparsity statistics
# ============================================================

print("\nRow sparsity (subset):")
print(f"  row nnz: min={row_nnz.min()}  median={np.median(row_nnz):.6g}  max={row_nnz.max()}")

In [None]:
# ============================================================
# Per-cell totals (library size proxy)
# ============================================================

print("\n" + "="*60)
summarize_values(row_sums, f"Per-cell totals ({n_subset} cells)")

In [None]:
# ============================================================
# Interpretation guide
# ============================================================

print("\n" + "="*60)
print("HOW TO INTERPRET:")
print("  Raw UMI counts: almost all values are integers, no negatives,")
print("                  essentially none in (0,1).")
print("  Corrected/normalized: many fractional values; often lots in (0,1);")
print("                        sometimes negatives (method-dependent).")

---
## 5. Export Simplified AnnData for MapMyCells

Create a minimal AnnData suitable for MapMyCells or similar tools:
- `X` = counts layer (CSR sparse)
- `obs` = barcodes only (as obs_names)
- `var` = gene names only (stored in `adata.var['gene_name']` and as var_names)

In [None]:
# ============================================================
# Define output path
# ============================================================

base, _ = os.path.splitext(INPUT_H5AD)
OUTPUT_H5AD = f"{base}_for_MapMyCells.h5ad"

print(f"Output will be written to: {OUTPUT_H5AD}")

if os.path.exists(OUTPUT_H5AD) and not OVERWRITE_OUTPUT:
    raise FileExistsError(f"Output exists and OVERWRITE_OUTPUT=False: {OUTPUT_H5AD}")

In [None]:
# ============================================================
# Extract barcodes
# ============================================================

if BARCODE_SOURCE_OBS_COLUMN is None:
    barcodes = adata.obs_names.astype(str).to_numpy()
    barcode_source = "obs_names"
else:
    if BARCODE_SOURCE_OBS_COLUMN not in adata.obs.columns:
        raise KeyError(
            f"BARCODE_SOURCE_OBS_COLUMN='{BARCODE_SOURCE_OBS_COLUMN}' not in adata.obs columns"
        )
    barcodes = adata.obs[BARCODE_SOURCE_OBS_COLUMN].astype(str).to_numpy()
    barcode_source = f"obs['{BARCODE_SOURCE_OBS_COLUMN}']"

if len(barcodes) != adata.n_obs:
    raise ValueError(f"Barcode length mismatch: {len(barcodes)} vs n_obs={adata.n_obs}")

print(f"Barcode source: {barcode_source}")
print(f"Number of barcodes: {len(barcodes)}")
print(f"Sample barcodes: {barcodes[:5]}")

In [None]:
# ============================================================
# Extract and optionally convert gene names
# ============================================================

genes_orig = adata.var_names.astype(str).to_numpy()
genes = genes_orig.copy()

if CONVERT_GENE_NAMES_TO_MOUSE_CASE:
    if MOUSE_CASE_APPLY_MODE not in {"all", "only_allcaps"}:
        raise ValueError("MOUSE_CASE_APPLY_MODE must be 'all' or 'only_allcaps'")
    
    print(f"Converting gene names to mouse case (mode={MOUSE_CASE_APPLY_MODE})...")
    for i, g in enumerate(genes):
        if MOUSE_CASE_APPLY_MODE == "all":
            genes[i] = mouse_case(g)
        else:
            genes[i] = mouse_case(g) if g.isupper() else g

if MAKE_GENE_NAMES_UNIQUE_IF_NEEDED and (len(set(genes.tolist())) != len(genes)):
    print("WARNING: Gene names are not unique after conversion. Making unique with suffixes _2, _3, ...")
    genes = np.asarray(make_unique(genes.tolist()), dtype=str)

print(f"Number of genes: {len(genes)}")
print(f"Sample genes: {genes[:10]}")

In [None]:
# ============================================================
# Load counts layer into memory
# ============================================================

if COUNTS_LAYER not in adata.layers.keys():
    raise KeyError(f"Layer '{COUNTS_LAYER}' not present. Available layers: {list(adata.layers.keys())}")

print(f"Loading counts layer '{COUNTS_LAYER}' into memory (sparse CSR expected)...")
counts = ensure_csr(adata.layers[COUNTS_LAYER])

if counts.shape != (adata.n_obs, adata.n_vars):
    raise ValueError(f"counts layer shape {counts.shape} != (n_obs,n_vars)=({adata.n_obs},{adata.n_vars})")

print(f"Counts matrix: {counts.shape}, nnz={counts.nnz}, dtype={counts.dtype}")

In [None]:
# ============================================================
# Build minimal AnnData
# ============================================================

print("Building minimal AnnData...")

obs_new = pd.DataFrame(index=pd.Index(barcodes, name="barcode"))
var_new = pd.DataFrame(index=pd.Index(genes, name="gene"))
var_new["gene_name"] = genes

adata_export = ad.AnnData(X=counts, obs=obs_new, var=var_new)

print(f"New AnnData: {adata_export}")

In [None]:
# ============================================================
# Write simplified h5ad
# ============================================================

if os.path.exists(OUTPUT_H5AD) and OVERWRITE_OUTPUT:
    os.remove(OUTPUT_H5AD)

print(f"Writing simplified h5ad: {OUTPUT_H5AD}")
adata_export.write_h5ad(OUTPUT_H5AD, compression="gzip")
print("Write complete!")

### 5.1 Validate Export

In [None]:
# ============================================================
# Reload and validate output
# ============================================================

print("Reloading output and validating...")
adata_reloaded = ad.read_h5ad(OUTPUT_H5AD)

# Validate dimensions
if adata_reloaded.n_obs != adata.n_obs or adata_reloaded.n_vars != adata.n_vars:
    raise AssertionError(f"n_obs/n_vars mismatch: ({adata_reloaded.n_obs},{adata_reloaded.n_vars}) vs ({adata.n_obs},{adata.n_vars})")
print("✓ Dimensions match")

# Validate barcodes
if not np.array_equal(adata_reloaded.obs_names.astype(str).to_numpy(), barcodes.astype(str)):
    raise AssertionError("Barcode validation failed: output obs_names differ from expected barcodes")
print("✓ Barcodes match")

# Validate gene_name column
if "gene_name" not in adata_reloaded.var.columns:
    raise AssertionError("Gene validation failed: 'gene_name' column missing in output adata.var")
print("✓ gene_name column present")

if not np.array_equal(adata_reloaded.var["gene_name"].astype(str).to_numpy(), genes.astype(str)):
    raise AssertionError("Gene validation failed: output var['gene_name'] differs from expected")
print("✓ Gene names match")

# Validate counts matrix
x_out = ensure_csr(adata_reloaded.X)
assert_csr_equal(counts, x_out, label="counts/X")
print("✓ Counts matrix matches")

In [None]:
# ============================================================
# Export summary
# ============================================================

print("\n" + "="*60)
print("SUCCESS ✅")
print(f"  Barcode source: {barcode_source}")
print(f"  Input n_obs x n_vars: {adata.n_obs} x {adata.n_vars}")
print(f"  Output file: {OUTPUT_H5AD}")
print(f"  Output X: CSR nnz={x_out.nnz}, dtype={x_out.data.dtype}")
print(f"  Gene conversion: {CONVERT_GENE_NAMES_TO_MOUSE_CASE} (mode={MOUSE_CASE_APPLY_MODE})")

In [None]:
# ============================================================
# Preview output var dataframe
# ============================================================

if PRINT_OUTPUT_VAR:
    print("\n=== OUTPUT adata.var INFO ===")
    print(f"adata_reloaded.var shape: {adata_reloaded.var.shape}")
    print(f"adata_reloaded.var columns: {list(adata_reloaded.var.columns)}")
    print(f"\nHead ({OUTPUT_VAR_N_PREVIEW}):")
    display(adata_reloaded.var.head(OUTPUT_VAR_N_PREVIEW))
    
    print("\nvar_names preview:")
    print(list(adata_reloaded.var_names[:min(OUTPUT_VAR_N_PREVIEW, adata_reloaded.n_vars)]))
    
    # Heuristic: count all-caps gene names
    vn = adata_reloaded.var_names.astype(str)
    allcaps = np.sum([s.isupper() for s in vn[:10000]])
    print(f"\nHeuristic (first 10k genes): all-caps var_names count = {allcaps}")

---
## 6. Cleanup

In [None]:
# ============================================================
# Close backed file handle
# ============================================================

try:
    adata.file.close()
    print("Backed file handle closed.")
except Exception:
    pass

print("\nNotebook complete!")