In [2]:
import anndata
import xarray as xr
import pandas as pd
import numpy as np
from dask import delayed, compute
from glob import glob

# Assume all .h5ad files are in a directory
file_list = glob(r"D:\Mouse\Matrices\*.h5ad")

# Determine the union of parcellation indices and subclasses (for Isocortex cells)
all_parcellation_indices = set()
all_subclasses = set()

for f in file_list:
    adata = anndata.read_h5ad(f, backed='r')
    # Filter for cells with parcellation_division == 'Isocortex.'
    mask = adata.obs['parcellation_division'] == 'Isocortex'
    # Use the filtered view for the union
    adata_filtered = adata[mask, :]
    all_parcellation_indices.update(adata_filtered.obs['parcellation_index'].unique())
    all_subclasses.update(adata_filtered.obs['subclass'].unique())

all_parcellation_indices = sorted(list(all_parcellation_indices))
all_subclasses = sorted(list(all_subclasses))
print(f'processed {len(file_list)} files')

# Assume gene symbols are the same in all files; load from the first file
adata = anndata.read_h5ad(file_list[0], backed='r')
gene_symbols = adata.var_names.tolist()
n_genes = len(gene_symbols)
print(f"Loaded gene symbols with length {len(gene_symbols)}")


processed 53 files
Loaded gene symbols with length 8460


In [3]:
print(len(all_parcellation_indices), len(all_subclasses), n_genes)

239 104 8460


In [None]:
import os

def process_file_in_chunks(file_path, gene_symbols, union_parcellation_indices, union_subclasses, chunk_size=500):
    """
    Process one h5ad file in cell chunks.
    Accumulate gene expression sums and counts per group (parcellation_index and subclass)
    to compute the average expression for each gene.
    """
    adata = anndata.read_h5ad(file_path, backed='r')
    n_cells = adata.n_obs

    # Preallocate arrays for accumulating sums and counts.
    sum_array = np.zeros((n_genes, len(union_parcellation_indices), len(union_subclasses)), dtype=np.float64)
    count_array = np.zeros((n_genes, len(union_parcellation_indices), len(union_subclasses)), dtype=np.int64)

    for start in range(0, n_cells, chunk_size):
        end = min(start + chunk_size, n_cells)
        # Load chunk from backed file into memory.
        chunk = adata[start:end, :].to_memory()

        # Filter for Isocortex cells.
        mask = chunk.obs['parcellation_division'] == 'Isocortex'
        if mask.sum() == 0:
            continue
        chunk_filt = chunk[mask, :]
        expr_data = chunk_filt.X

        # Create a DataFrame for this chunk using gene_symbols as column names.
        df = pd.DataFrame(expr_data, index=chunk_filt.obs_names, columns=gene_symbols)

        # Add grouping metadata.
        df["subclass"] = chunk_filt.obs["subclass"].values
        df["parcellation_index"] = chunk_filt.obs["parcellation_index"].values

        # Drop metadata columns to get only gene expression data.
        df_genes = df[gene_symbols]

        # Group by subclass and parcellation_index and compute sum.
        grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
        # For count, we can use groupby and take the size of each group.
        grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()

        # Loop through each group in this chunk.
        for (subclass, parcellation_index), gene_sum in grouped_sum.iterrows():
            try:
                p_idx = union_parcellation_indices.index(parcellation_index)
                c_idx = union_subclasses.index(subclass)
            except ValueError:
                continue  # Skip if the label is not in our union
            # Add the gene expression sum.
            sum_array[:, p_idx, c_idx] += gene_sum.values
            # Add the count (same for all genes; note grouped_count is a Series indexed by (subclass, parcellation_index)).
            count_array[:, p_idx, c_idx] += grouped_count.loc[(subclass, parcellation_index)]

    # Compute the mean for each gene where count > 0.
    mean_array = np.full((n_genes, len(union_parcellation_indices), len(union_subclasses)), np.nan)
    valid = count_array > 0
    mean_array[valid] = sum_array[valid] / count_array[valid]

    # Create an xarray.DataArray with proper labels.
    xr_da = xr.DataArray(
        mean_array,
        dims=["gene_symbol", "parcellation_index", "subclass"],
        coords={
            "gene_symbol": gene_symbols,
            "parcellation_index": union_parcellation_indices,
            "subclass": union_subclasses
        }
    )
    return xr_da

# Step 5: Process each file, save the result to disk, and keep track of the filenames.
section_files = []
for i, file_path in enumerate(file_list):
    print(f"Processing {file_path}...")
    xr_da = process_file_in_chunks(file_path, gene_symbols, all_parcellation_indices, all_subclasses, chunk_size=500)
    # Save the xarray DataArray to a NetCDF file.
    section_filename = f"processed_section_{i}.nc"
    xr_da.to_netcdf(section_filename)
    section_files.append(section_filename)


Processing D:\Mouse\Matrices\C57BL6J-638850.05.h5ad...
Processing D:\Mouse\Matrices\C57BL6J-638850.06.h5ad...
Processing D:\Mouse\Matrices\C57BL6J-638850.08.h5ad...
Processing D:\Mouse\Matrices\C57BL6J-638850.09.h5ad...
Processing D:\Mouse\Matrices\C57BL6J-638850.10.h5ad...
Processing D:\Mouse\Matrices\C57BL6J-638850.11.h5ad...
Processing D:\Mouse\Matrices\C57BL6J-638850.12.h5ad...
Processing D:\Mouse\Matrices\C57BL6J-638850.13.h5ad...
Processing D:\Mouse\Matrices\C57BL6J-638850.14.h5ad...
Processing D:\Mouse\Matrices\C57BL6J-638850.15.h5ad...
Processing D:\Mouse\Matrices\C57BL6J-638850.16.h5ad...
Processing D:\Mouse\Matrices\C57BL6J-638850.17.h5ad...
Processing D:\Mouse\Matrices\C57BL6J-638850.18.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.19.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.24.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.25.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.26.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.27.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.28.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.29.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.30.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.31.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.32.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.33.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.35.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.36.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.37.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.38.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.39.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.40.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.42.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.43.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.44.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.45.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.46.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.47.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.48.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.49.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.50.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.51.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.52.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.54.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.55.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.56.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.57.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.58.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.59.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.60.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.61.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.62.h5ad...


  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_index"]]).size()
  grouped_sum = df_genes.groupby([df["subclass"], df["parcellation_index"]]).sum()
  grouped_count = df_genes.groupby([df["subclass"], df["parcellation_ind

Processing D:\Mouse\Matrices\C57BL6J-638850.64.h5ad...
Processing D:\Mouse\Matrices\C57BL6J-638850.66.h5ad...
Processing D:\Mouse\Matrices\C57BL6J-638850.67.h5ad...


In [1]:
import os
import xarray as xr
from glob import glob

def merge_files(file_list, group_size):
    """
    Merge files in groups of group_size.
    For each group, open the files lazily, concatenate them along a temporary dimension,
    take the mean over that dimension, save the resulting 3D DataArray to a new NetCDF file,
    and delete the original files in the group.
    
    Returns a new list of file names.
    """
    new_files = []
    for i in range(0, len(file_list), group_size):
        group = file_list[i:i+group_size]
        # Open each file lazily with dask (adjust chunk sizes if needed)
        arrays = [xr.open_dataarray(f, chunks={"gene_symbol": -1,
                                                 "parcellation_index": -1,
                                                 "subclass": -1}) 
                  for f in group]
        # Concatenate along a temporary dimension, then average along that dimension.
        concat_arr = xr.concat(arrays, dim="temp")
        avg_arr = concat_arr.mean(dim="temp")
        out_file = f"merged_round{group_size}_{i}.nc"
        # for when the file already exists
        counter = 0
        while os.path.exists(out_file):
            counter += 1
            out_file = f"merged_round{group_size}_{i}_{counter}.nc"

        avg_arr.to_netcdf(out_file)
        new_files.append(out_file)
        # Close the lazy arrays and remove the original files.
        for arr in arrays:
            arr.close()
        for f in group:
            os.remove(f)
            print(f"Deleted temporary file {f}")
    return new_files

file_list = glob(r"D:\Mouse\Notebooks\*.nc")

# Hierarchical merging:
while len(file_list) > 1:
    print(f"Starting hierarchical merge. Initial file count: {len(file_list)}")
    file_list = merge_files(file_list, group_size=2)
    print(f"After merging with group size 3, {len(file_list)} files remain.")

print("Final merged file:", file_list[0])


Starting hierarchical merge. Initial file count: 28
Deleted temporary file D:\Mouse\Notebooks\mergedb_2_0.nc
Deleted temporary file D:\Mouse\Notebooks\merged_2_0.nc
Deleted temporary file D:\Mouse\Notebooks\merged_2_10.nc
Deleted temporary file D:\Mouse\Notebooks\merged_2_12.nc
Deleted temporary file D:\Mouse\Notebooks\merged_2_14.nc
Deleted temporary file D:\Mouse\Notebooks\merged_2_16.nc
Deleted temporary file D:\Mouse\Notebooks\merged_2_18.nc
Deleted temporary file D:\Mouse\Notebooks\merged_2_2.nc
Deleted temporary file D:\Mouse\Notebooks\merged_2_20.nc
Deleted temporary file D:\Mouse\Notebooks\merged_2_22.nc
Deleted temporary file D:\Mouse\Notebooks\merged_2_24.nc
Deleted temporary file D:\Mouse\Notebooks\merged_2_26.nc
Deleted temporary file D:\Mouse\Notebooks\merged_2_28.nc
Deleted temporary file D:\Mouse\Notebooks\merged_2_30.nc
Deleted temporary file D:\Mouse\Notebooks\merged_2_32.nc
Deleted temporary file D:\Mouse\Notebooks\merged_2_34.nc
Deleted temporary file D:\Mouse\Notebo

In [13]:
import xarray as xr

# Open the final file (assumes it's saved as final_data.nc)
final_data = xr.open_dataarray("final_data.nc")

# # Print a summary of the DataArray (shows dimensions, coordinates, etc.)
# print(final_data)

# Inspect the coordinates for clarity:
print("Dimensions:", final_data.dims)
print("Coordinates:", final_data.coords)

# view first column of the data as numpy array
print(final_data[0:10, 10:15, 0].values)


Dimensions: ('gene_symbol', 'parcellation_index', 'subclass')
Coordinates: Coordinates:
  * gene_symbol         (gene_symbol) object 68kB 'ENSMUSG00000026676' ... 'E...
  * parcellation_index  (parcellation_index) int32 956B 2 8 29 ... 1290 1291
  * subclass            (subclass) object 832B '001 CLA-EPd-CTX Car3 Glut' .....
[[0.47557068        nan 0.32928467 0.28772065 0.3972168 ]
 [0.1040144         nan 0.04470825 0.0197962  0.06651306]
 [0.44015694        nan 0.24813843 0.25791598 0.38989258]
 [0.24117851        nan 0.11322021 0.14618603 0.18463135]
 [1.60424161        nan 1.12147522 0.81159732 1.50390625]
 [3.61724854        nan 2.25854492 2.32458693 3.52050781]
 [0.19246483        nan 0.09188843 0.0838238  0.09854126]
 [2.89416504        nan 1.94482422 1.86554639 2.74511719]
 [4.58517456        nan 3.53491211 2.42676638 5.00683594]
 [5.14294434        nan 4.87304688 4.63930635 5.20507812]]
