In [2]:
import scanpy as sc
from scipy.spatial import cKDTree
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

import os
import re
import pandas as pd
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
from typing import Dict
import scanpy as sc

In [3]:
sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=100, facecolor="white")

scanpy==1.10.1 anndata==0.10.7 umap==0.5.5 numpy==1.26.4 scipy==1.13.0 pandas==2.2.2 scikit-learn==1.4.2 statsmodels==0.14.1 igraph==0.11.4 pynndescent==0.5.12


In [4]:
pfc_map = {'Primary motor area': 'PFC',
 'Primary motor area Layer 1': 'PFC',
 'Primary motor area Layer 2/3': 'PFC',
 'Primary motor area Layer 5': 'PFC',
 'Primary motor area Layer 6a': 'PFC',
 'Primary motor area Layer 6b': 'PFC',
 'Secondary motor area': 'PFC',
 'Secondary motor area layer 1': 'PFC',
 'Secondary motor area layer 2/3': 'PFC',
 'Secondary motor area layer 5': 'PFC',
 'Secondary motor area layer 6a': 'PFC',
 'Secondary motor area layer 6b': 'PFC',
 'Anterior cingulate area': 'PFC',
 'Anterior cingulate area layer 1': 'PFC',
 'Anterior cingulate area layer 2/3': 'PFC',
 'Anterior cingulate area layer 5': 'PFC',
 'Anterior cingulate area layer 6a': 'PFC',
 'Anterior cingulate area layer 6b': 'PFC',
 'Anterior cingulate area dorsal part': 'PFC',
 'Anterior cingulate area dorsal part layer 1': 'PFC',
 'Anterior cingulate area dorsal part layer 2/3': 'PFC',
 'Anterior cingulate area dorsal part layer 5': 'PFC',
 'Anterior cingulate area dorsal part layer 6a': 'PFC',
 'Anterior cingulate area dorsal part layer 6b': 'PFC',
 'Anterior cingulate area ventral part': 'PFC',
 'Anterior cingulate area ventral part layer 1': 'PFC',
 'Anterior cingulate area ventral part layer 2/3': 'PFC',
 'Anterior cingulate area ventral part layer 5': 'PFC',
 'Anterior cingulate area ventral part 6a': 'PFC',
 'Anterior cingulate area ventral part 6b': 'PFC',
 'Prelimbic area': 'PFC',
 'Prelimbic area layer 1': 'PFC',
 'Prelimbic area layer 2': 'PFC',
 'Prelimbic area layer 2/3': 'PFC',
 'Prelimbic area layer 5': 'PFC',
 'Prelimbic area layer 6a': 'PFC',
 'Prelimbic area layer 6b': 'PFC',
 'Striatum dorsal region': 'STR',
 'Caudoputamen': 'STR',
 'Striatum ventral region': 'STR',
 'Nucleus accumbens': 'STR',
 'Islands of Calleja': 'STR',
 'Major island of Calleja': 'STR',
 'Olfactory tubercle layers 1-3': 'STR',
 'Olfactory tubercle molecular layer': 'STR',
 'Olfactory tubercle pyramidal layer': 'STR',
 'Olfactory tubercle polymorph layer': 'STR',
 'Lateral strip of striatum': 'STR',
 'Lateral septal complex': 'STR',
 'Lateral septal nucleus': 'STR',
 'Lateral septal nucleus caudal (caudodorsal) part': 'STR',
 'Lateral septal nucleus rostral (rostroventral) part': 'STR',
 'Lateral septal nucleus ventral part': 'STR',
 'Septofimbrial nucleus': 'STR',
 'Septohippocampal nucleus': 'STR',
 'Striatum-like amygdalar nuclei': 'STR',
 'Anterior amygdalar area': 'STR',
 'Bed nucleus of the accessory olfactory tract': 'STR',
 'Central amygdalar nucleus': 'STR',
 'Central amygdalar nucleus capsular part': 'STR',
 'Central amygdalar nucleus lateral part': 'STR',
 'Central amygdalar nucleus medial part': 'STR',
 'Intercalated amygdalar nucleus': 'STR',
 'Medial amygdalar nucleus': 'STR',
 'Medial amygdalar nucleus anterodorsal part': 'STR',
 'Medial amygdalar nucleus anteroventral part': 'STR',
 'Medial amygdalar nucleus posterodorsal part': 'STR',
 'Medial amygdalar nucleus posterodorsal part sublayer a': 'STR',
 'Medial amygdalar nucleus posterodorsal part sublayer b': 'STR',
 'Medial amygdalar nucleus posterodorsal part sublayer c': 'STR',
 'Medial amygdalar nucleus posteroventral part': 'STR',
 'Triangular nucleus of septum': 'STR',
 'fimbria': 'STR',
 'column of fornix': 'STR',
 'Substantia innominata': 'BF',
 'Magnocellular nucleus': 'BF',
 'Diagonal band nucleus': 'BF',
 'Hypothalamus': 'BF',
 'Periventricular zone': 'BF',
 'Supraoptic nucleus': 'BF',
 'Accessory supraoptic group': 'BF',
 'Nucleus circularis': 'BF',
 'Paraventricular hypothalamic nucleus': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division anterior magnocellular part': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division medial magnocellular part': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division posterior magnocellular part': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division posterior magnocellular part lateral zone': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division posterior magnocellular part medial zone': 'BF',
 'Paraventricular hypothalamic nucleus parvicellular division': 'BF',
 'Paraventricular hypothalamic nucleus parvicellular division anterior parvicellular part': 'BF',
 'Paraventricular hypothalamic nucleus parvicellular division medial parvicellular part dorsal zone': 'BF',
 'Paraventricular hypothalamic nucleus parvicellular division periventricular part': 'BF',
 'Periventricular hypothalamic nucleus anterior part': 'BF',
 'Periventricular hypothalamic nucleus intermediate part': 'BF',
 'Arcuate hypothalamic nucleus': 'BF',
 'Periventricular region': 'BF',
 'Anterodorsal preoptic nucleus': 'BF',
 'Anterior hypothalamic area': 'BF',
 'Anteroventral preoptic nucleus': 'BF',
 'Anteroventral periventricular nucleus': 'BF',
 'Dorsomedial nucleus of the hypothalamus': 'BF',
 'Dorsomedial nucleus of the hypothalamus anterior part': 'BF',
 'Dorsomedial nucleus of the hypothalamus posterior part': 'BF',
 'Dorsomedial nucleus of the hypothalamus ventral part': 'BF',
 'Medial preoptic area': 'BF',
 'Vascular organ of the lamina terminalis': 'BF',
 'Posterodorsal preoptic nucleus': 'BF',
 'Parastrial nucleus': 'BF',
 'Suprachiasmatic preoptic nucleus': 'BF',
 'Periventricular hypothalamic nucleus posterior part': 'BF',
 'Periventricular hypothalamic nucleus preoptic part': 'BF',
 'Subparaventricular zone': 'BF',
 'Suprachiasmatic nucleus': 'BF',
 'Subfornical organ': 'BF',
 'Ventromedial preoptic nucleus': 'BF',
 'Ventrolateral preoptic nucleus': 'BF',
 'Hypothalamic medial zone': 'BF',
 'Anterior hypothalamic nucleus': 'BF',
 'Anterior hypothalamic nucleus anterior part': 'BF',
 'Anterior hypothalamic nucleus central part': 'BF',
 'Anterior hypothalamic nucleus dorsal part': 'BF',
 'Anterior hypothalamic nucleus posterior part': 'BF',
 'Mammillary body': 'BF',
 'Lateral mammillary nucleus': 'BF',
 'Medial mammillary nucleus': 'BF',
 'Medial mammillary nucleus median part': 'BF',
 'Medial mammillary nucleus lateral part': 'BF',
 'Medial mammillary nucleus medial part': 'BF',
 'Medial mammillary nucleus posterior part': 'BF',
 'Medial mammillary nucleus dorsal part': 'BF',
 'Supramammillary nucleus': 'BF',
 'Supramammillary nucleus lateral part': 'BF',
 'Supramammillary nucleus medial part': 'BF',
 'Tuberomammillary nucleus': 'BF',
 'Tuberomammillary nucleus dorsal part': 'BF',
 'Tuberomammillary nucleus ventral part': 'BF',
 'Medial preoptic nucleus': 'BF',
 'Medial preoptic nucleus central part': 'BF',
 'Medial preoptic nucleus lateral part': 'BF',
 'Medial preoptic nucleus medial part': 'BF',
 'Dorsal premammillary nucleus': 'BF',
 'Ventral premammillary nucleus': 'BF',
 'Paraventricular hypothalamic nucleus descending division': 'BF',
 'Paraventricular hypothalamic nucleus descending division dorsal parvicellular part': 'BF',
 'Paraventricular hypothalamic nucleus descending division forniceal part': 'BF',
 'Paraventricular hypothalamic nucleus descending division lateral parvicellular part': 'BF',
 'Paraventricular hypothalamic nucleus descending division medial parvicellular part ventral zone': 'BF',
 'Ventromedial hypothalamic nucleus': 'BF',
 'Ventromedial hypothalamic nucleus anterior part': 'BF',
 'Ventromedial hypothalamic nucleus central part': 'BF',
 'Ventromedial hypothalamic nucleus dorsomedial part': 'BF',
 'Ventromedial hypothalamic nucleus ventrolateral part': 'BF',
 'Posterior hypothalamic nucleus': 'BF',
 'Hypothalamic lateral zone': 'BF',
 'Lateral hypothalamic area': 'BF',
 'Lateral preoptic area': 'BF',
 'Preparasubthalamic nucleus': 'BF',
 'Parasubthalamic nucleus': 'BF',
 'Perifornical nucleus': 'BF',
 'Retrochiasmatic area': 'BF',
 'Subthalamic nucleus': 'BF',
 'Tuberal nucleus': 'BF',
 'Zona incerta': 'BF',
 'Dopaminergic A13 group': 'BF',
 'Fields of Forel': 'BF',
 'Median eminence': 'BF'}

hippo_map = {'Retrosplenial area': 'RS',
 'Retrosplenial area lateral agranular part': 'RS',
 'Retrosplenial area lateral agranular part layer 1': 'RS',
 'Retrosplenial area lateral agranular part layer 2/3': 'RS',
 'Retrosplenial area lateral agranular part layer 5': 'RS',
 'Retrosplenial area lateral agranular part layer 6a': 'RS',
 'Retrosplenial area lateral agranular part layer 6b': 'RS',
 'Mediomedial anterior visual area': 'RS',
 'Mediomedial anterior visual area layer 1': 'RS',
 'Mediomedial anterior visual area layer 2/3': 'RS',
 'Mediomedial anterior visual area layer 4': 'RS',
 'Mediomedial anterior visual arealayer 5': 'RS',
 'Mediomedial anterior visual area layer 6a': 'RS',
 'Mediomedial anterior visual area layer 6b': 'RS',
 'Mediomedial posterior visual area': 'RS',
 'Mediomedial posterior visual area layer 1': 'RS',
 'Mediomedial posterior visual area layer 2/3': 'RS',
 'Mediomedial posterior visual area layer 4': 'RS',
 'Mediomedial posterior visual arealayer 5': 'RS',
 'Mediomedial posterior visual area layer 6a': 'RS',
 'Mediomedial posterior visual area layer 6b': 'RS',
 'Medial visual area': 'RS',
 'Medial visual area layer 1': 'RS',
 'Medial visual area layer 2/3': 'RS',
 'Medial visual area layer 4': 'RS',
 'Medial visual arealayer 5': 'RS',
 'Medial visual area layer 6a': 'RS',
 'Medial visual area layer 6b': 'RS',
 'Retrosplenial area dorsal part': 'RS',
 'Retrosplenial area dorsal part layer 1': 'RS',
 'Retrosplenial area dorsal part layer 2/3': 'RS',
 'Retrosplenial area dorsal part layer 4': 'RS',
 'Retrosplenial area dorsal part layer 5': 'RS',
 'Retrosplenial area dorsal part layer 6a': 'RS',
 'Retrosplenial area dorsal part layer 6b': 'RS',
 'Retrosplenial area ventral part': 'RS',
 'Retrosplenial area ventral part layer 1': 'RS',
 'Retrosplenial area ventral part layer 2': 'RS',
 'Retrosplenial area ventral part layer 2/3': 'RS',
 'Retrosplenial area ventral part layer 5': 'RS',
 'Retrosplenial area ventral part layer 6a': 'RS',
 'Retrosplenial area ventral part layer 6b': 'RS',
 'Ectorhinal area': 'ECT',
 'Ectorhinal area/Layer 1': 'ECT',
 'Ectorhinal area/Layer 2/3': 'ECT',
 'Ectorhinal area/Layer 5': 'ECT',
 'Ectorhinal area/Layer 6a': 'ECT',
 'Ectorhinal area/Layer 6b': 'ECT',
 'Perirhinal area': 'ECT',
 'Perirhinal area layer 1': 'ECT',
 'Perirhinal area layer 2/3': 'ECT',
 'Perirhinal area layer 5': 'ECT',
 'Perirhinal area layer 6a': 'ECT',
 'Perirhinal area layer 6b': 'ECT',
 'Entorhinal area': 'ECT',
 'Entorhinal area lateral part': 'ECT',
 'Entorhinal area lateral part layer 1': 'ECT',
 'Entorhinal area lateral part layer 2': 'ECT',
 'Entorhinal area lateral part layer 2/3': 'ECT',
 'Entorhinal area lateral part layer 2a': 'ECT',
 'Entorhinal area lateral part layer 2b': 'ECT',
 'Entorhinal area lateral part layer 3': 'ECT',
 'Entorhinal area lateral part layer 4': 'ECT',
 'Entorhinal area lateral part layer 4/5': 'ECT',
 'Entorhinal area lateral part layer 5': 'ECT',
 'Entorhinal area lateral part layer 5/6': 'ECT',
 'Entorhinal area lateral part layer 6a': 'ECT',
 'Entorhinal area lateral part layer 6b': 'ECT',
 'Entorhinal area medial part dorsal zone': 'ECT',
 'Entorhinal area medial part dorsal zone layer 1': 'ECT',
 'Entorhinal area medial part dorsal zone layer 2': 'ECT',
 'Entorhinal area medial part dorsal zone layer 2a': 'ECT',
 'Entorhinal area medial part dorsal zone layer 2b': 'ECT',
 'Entorhinal area medial part dorsal zone layer 3': 'ECT',
 'Entorhinal area medial part dorsal zone layer 4': 'ECT',
 'Entorhinal area medial part dorsal zone layer 5': 'ECT',
 'Entorhinal area medial part dorsal zone layer 5/6': 'ECT',
 'Entorhinal area medial part dorsal zone layer 6': 'ECT',
 'Entorhinal area medial part ventral zone': 'ECT',
 'Entorhinal area medial part ventral zone layer 1': 'ECT',
 'Entorhinal area medial part ventral zone layer 2': 'ECT',
 'Entorhinal area medial part ventral zone layer 3': 'ECT',
 'Entorhinal area medial part ventral zone layer 4': 'ECT',
 'Entorhinal area medial part ventral zone layer 5/6': 'ECT',
 'Hippocampal formation': 'HPF',
 'Hippocampal region': 'HPF',
 "Ammon's horn": 'HPF',
 'Field CA1': 'HPF',
 'Field CA1 stratum lacunosum-moleculare': 'HPF',
 'Field CA1 stratum oriens': 'HPF',
 'Field CA1 pyramidal layer': 'HPF',
 'Field CA1 stratum radiatum': 'HPF',
 'Field CA2': 'HPF',
 'Field CA2 stratum lacunosum-moleculare': 'HPF',
 'Field CA2 stratum oriens': 'HPF',
 'Field CA2 pyramidal layer': 'HPF',
 'Field CA2 stratum radiatum': 'HPF',
 'Field CA3': 'HPF',
 'Field CA3 stratum lacunosum-moleculare': 'HPF',
 'Field CA3 stratum lucidum': 'HPF',
 'Field CA3 stratum oriens': 'HPF',
 'Field CA3 pyramidal layer': 'HPF',
 'Field CA3 stratum radiatum': 'HPF',
 'Dentate gyrus': 'HPF',
 'Dentate gyrus molecular layer': 'HPF',
 'Dentate gyrus polymorph layer': 'HPF',
 'Dentate gyrus granule cell layer': 'HPF',
 'Dentate gyrus subgranular zone': 'HPF',
 'Dentate gyrus crest': 'HPF',
 'Dentate gyrus crest molecular layer': 'HPF',
 'Dentate gyrus crest polymorph layer': 'HPF',
 'Dentate gyrus crest granule cell layer': 'HPF',
 'Dentate gyrus lateral blade': 'HPF',
 'Dentate gyrus lateral blade molecular layer': 'HPF',
 'Dentate gyrus lateral blade polymorph layer': 'HPF',
 'Dentate gyrus lateral blade granule cell layer': 'HPF',
 'Dentate gyrus medial blade': 'HPF',
 'Dentate gyrus medial blade molecular layer': 'HPF',
 'Dentate gyrus medial blade polymorph layer': 'HPF',
 'Dentate gyrus medial blade granule cell layer': 'HPF',
 'Fasciola cinerea': 'HPF',
 'Induseum griseum': 'HPF',
 'Retrohippocampal region': 'HPF',
 'Parasubiculum': 'HPF',
 'Parasubiculum layer 1': 'HPF',
 'Parasubiculum layer 2': 'HPF',
 'Parasubiculum layer 3': 'HPF',
 'Postsubiculum': 'HPF',
 'Postsubiculum layer 1': 'HPF',
 'Postsubiculum layer 2': 'HPF',
 'Postsubiculum layer 3': 'HPF',
 'Presubiculum': 'HPF',
 'Presubiculum layer 1': 'HPF',
 'Presubiculum layer 2': 'HPF',
 'Presubiculum layer 3': 'HPF',
 'Subiculum': 'HPF',
 'Subiculum dorsal part': 'HPF',
 'Subiculum dorsal part molecular layer': 'HPF',
 'Subiculum dorsal part pyramidal layer': 'HPF',
 'Subiculum dorsal part stratum radiatum': 'HPF',
 'Subiculum ventral part': 'HPF',
 'Subiculum ventral part molecular layer': 'HPF',
 'Subiculum ventral part pyramidal layer': 'HPF',
 'Subiculum ventral part stratum radiatum': 'HPF',
 'Prosubiculum': 'HPF',
 'Prosubiculum dorsal part': 'HPF',
 'Prosubiculum dorsal part molecular layer': 'HPF',
 'Prosubiculum dorsal part pyramidal layer': 'HPF',
 'Prosubiculum dorsal part stratum radiatum': 'HPF',
 'Prosubiculum ventral part': 'HPF',
 'Prosubiculum ventral part molecular layer': 'HPF',
 'Prosubiculum ventral part pyramidal layer': 'HPF',
 'Prosubiculum ventral part stratum radiatum': 'HPF',
 'Hippocampo-amygdalar transition area': 'HPF',
 'Area prostriata': 'HPF',
 'Substantia innominata': 'BF',
 'Magnocellular nucleus': 'BF',
 'Diagonal band nucleus': 'BF',
 'Thalamus': 'TH',
 'Thalamus sensory-motor cortex related': 'TH',
 'Ventral group of the dorsal thalamus': 'TH',
 'Ventral anterior-lateral complex of the thalamus': 'TH',
 'Ventral medial nucleus of the thalamus': 'TH',
 'Ventral posterior complex of the thalamus': 'TH',
 'Ventral posterolateral nucleus of the thalamus': 'TH',
 'Ventral posterolateral nucleus of the thalamus parvicellular part': 'TH',
 'Ventral posteromedial nucleus of the thalamus': 'TH',
 'Ventral posteromedial nucleus of the thalamus parvicellular part': 'TH',
 'Posterior triangular thalamic nucleus': 'TH',
 'Subparafascicular nucleus': 'TH',
 'Subparafascicular nucleus magnocellular part': 'TH',
 'Subparafascicular nucleus parvicellular part': 'TH',
 'Subparafascicular area': 'TH',
 'Peripeduncular nucleus': 'TH',
 'Geniculate group dorsal thalamus': 'TH',
 'Medial geniculate complex': 'TH',
 'Medial geniculate complex dorsal part': 'TH',
 'Medial geniculate complex ventral part': 'TH',
 'Medial geniculate complex medial part': 'TH',
 'Dorsal part of the lateral geniculate complex': 'TH',
 'Dorsal part of the lateral geniculate complex shell': 'TH',
 'Dorsal part of the lateral geniculate complex core': 'TH',
 'Dorsal part of the lateral geniculate complex ipsilateral zone': 'TH',
 'Thalamus polymodal association cortex related': 'TH',
 'Lateral group of the dorsal thalamus': 'TH',
 'Lateral posterior nucleus of the thalamus': 'TH',
 'Posterior complex of the thalamus': 'TH',
 'Posterior limiting nucleus of the thalamus': 'TH',
 'Suprageniculate nucleus': 'TH',
 'Ethmoid nucleus of the thalamus': 'TH',
 'Retroethmoid nucleus': 'TH',
 'Anterior group of the dorsal thalamus': 'TH',
 'Anteroventral nucleus of thalamus': 'TH',
 'Anteromedial nucleus': 'TH',
 'Anteromedial nucleus dorsal part': 'TH',
 'Anteromedial nucleus ventral part': 'TH',
 'Anterodorsal nucleus': 'TH',
 'Interanteromedial nucleus of the thalamus': 'TH',
 'Interanterodorsal nucleus of the thalamus': 'TH',
 'Lateral dorsal nucleus of thalamus': 'TH',
 'Medial group of the dorsal thalamus': 'TH',
 'Intermediodorsal nucleus of the thalamus': 'TH',
 'Mediodorsal nucleus of thalamus': 'TH',
 'Mediodorsal nucleus of the thalamus central part': 'TH',
 'Mediodorsal nucleus of the thalamus lateral part': 'TH',
 'Mediodorsal nucleus of the thalamus medial part': 'TH',
 'Submedial nucleus of the thalamus': 'TH',
 'Perireunensis nucleus': 'TH',
 'Midline group of the dorsal thalamus': 'TH',
 'Paraventricular nucleus of the thalamus': 'TH',
 'Parataenial nucleus': 'TH',
 'Nucleus of reuniens': 'TH',
 'Xiphoid thalamic nucleus': 'TH',
 'Intralaminar nuclei of the dorsal thalamus': 'TH',
 'Rhomboid nucleus': 'TH',
 'Central medial nucleus of the thalamus': 'TH',
 'Paracentral nucleus': 'TH',
 'Central lateral nucleus of the thalamus': 'TH',
 'Parafascicular nucleus': 'TH',
 'Posterior intralaminar thalamic nucleus': 'TH',
 'Reticular nucleus of the thalamus': 'TH',
 'Geniculate group ventral thalamus': 'TH',
 'Intergeniculate leaflet of the lateral geniculate complex': 'TH',
 'Intermediate geniculate nucleus': 'TH',
 'Ventral part of the lateral geniculate complex': 'TH',
 'Ventral part of the lateral geniculate complex lateral zone': 'TH',
 'Ventral part of the lateral geniculate complex medial zone': 'TH',
 'Subgeniculate nucleus': 'TH',
 'Epithalamus': 'TH',
 'Medial habenula': 'TH',
 'Lateral habenula': 'TH',
 'Pineal body': 'TH',
 'Hypothalamus': 'BF',
 'Periventricular zone': 'BF',
 'Supraoptic nucleus': 'BF',
 'Accessory supraoptic group': 'BF',
 'Nucleus circularis': 'BF',
 'Paraventricular hypothalamic nucleus': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division anterior magnocellular part': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division medial magnocellular part': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division posterior magnocellular part': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division posterior magnocellular part lateral zone': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division posterior magnocellular part medial zone': 'BF',
 'Paraventricular hypothalamic nucleus parvicellular division': 'BF',
 'Paraventricular hypothalamic nucleus parvicellular division anterior parvicellular part': 'BF',
 'Paraventricular hypothalamic nucleus parvicellular division medial parvicellular part dorsal zone': 'BF',
 'Paraventricular hypothalamic nucleus parvicellular division periventricular part': 'BF',
 'Periventricular hypothalamic nucleus anterior part': 'BF',
 'Periventricular hypothalamic nucleus intermediate part': 'BF',
 'Arcuate hypothalamic nucleus': 'BF',
 'Periventricular region': 'BF',
 'Anterodorsal preoptic nucleus': 'BF',
 'Anterior hypothalamic area': 'BF',
 'Anteroventral preoptic nucleus': 'BF',
 'Anteroventral periventricular nucleus': 'BF',
 'Dorsomedial nucleus of the hypothalamus': 'BF',
 'Dorsomedial nucleus of the hypothalamus anterior part': 'BF',
 'Dorsomedial nucleus of the hypothalamus posterior part': 'BF',
 'Dorsomedial nucleus of the hypothalamus ventral part': 'BF',
 'Median preoptic nucleus': 'BF',
 'Medial preoptic area': 'BF',
 'Vascular organ of the lamina terminalis': 'BF',
 'Posterodorsal preoptic nucleus': 'BF',
 'Parastrial nucleus': 'BF',
 'Suprachiasmatic preoptic nucleus': 'BF',
 'Periventricular hypothalamic nucleus posterior part': 'BF',
 'Periventricular hypothalamic nucleus preoptic part': 'BF',
 'Subparaventricular zone': 'BF',
 'Suprachiasmatic nucleus': 'BF',
 'Subfornical organ': 'BF',
 'Ventromedial preoptic nucleus': 'BF',
 'Ventrolateral preoptic nucleus': 'BF',
 'Hypothalamic medial zone': 'BF',
 'Anterior hypothalamic nucleus': 'BF',
 'Anterior hypothalamic nucleus anterior part': 'BF',
 'Anterior hypothalamic nucleus central part': 'BF',
 'Anterior hypothalamic nucleus dorsal part': 'BF',
 'Anterior hypothalamic nucleus posterior part': 'BF',
 'Mammillary body': 'BF',
 'Lateral mammillary nucleus': 'BF',
 'Medial mammillary nucleus': 'BF',
 'Medial mammillary nucleus median part': 'BF',
 'Medial mammillary nucleus lateral part': 'BF',
 'Medial mammillary nucleus medial part': 'BF',
 'Medial mammillary nucleus posterior part': 'BF',
 'Medial mammillary nucleus dorsal part': 'BF',
 'Supramammillary nucleus': 'BF',
 'Supramammillary nucleus lateral part': 'BF',
 'Supramammillary nucleus medial part': 'BF',
 'Tuberomammillary nucleus': 'BF',
 'Tuberomammillary nucleus dorsal part': 'BF',
 'Tuberomammillary nucleus ventral part': 'BF',
 'Medial preoptic nucleus': 'BF',
 'Medial preoptic nucleus central part': 'BF',
 'Medial preoptic nucleus lateral part': 'BF',
 'Medial preoptic nucleus medial part': 'BF',
 'Dorsal premammillary nucleus': 'BF',
 'Ventral premammillary nucleus': 'BF',
 'Paraventricular hypothalamic nucleus descending division': 'BF',
 'Paraventricular hypothalamic nucleus descending division dorsal parvicellular part': 'BF',
 'Paraventricular hypothalamic nucleus descending division forniceal part': 'BF',
 'Paraventricular hypothalamic nucleus descending division lateral parvicellular part': 'BF',
 'Paraventricular hypothalamic nucleus descending division medial parvicellular part ventral zone': 'BF',
 'Ventromedial hypothalamic nucleus': 'BF',
 'Ventromedial hypothalamic nucleus anterior part': 'BF',
 'Ventromedial hypothalamic nucleus central part': 'BF',
 'Ventromedial hypothalamic nucleus dorsomedial part': 'BF',
 'Ventromedial hypothalamic nucleus ventrolateral part': 'BF',
 'Posterior hypothalamic nucleus': 'BF',
 'Hypothalamic lateral zone': 'BF',
 'Lateral hypothalamic area': 'BF',
 'Lateral preoptic area': 'BF',
 'Preparasubthalamic nucleus': 'BF',
 'Parasubthalamic nucleus': 'BF',
 'Perifornical nucleus': 'BF',
 'Retrochiasmatic area': 'BF',
 'Subthalamic nucleus': 'BF',
 'Tuberal nucleus': 'BF',
 'Zona incerta': 'BF',
 'Dopaminergic A13 group': 'BF',
 'Fields of Forel': 'BF',
 'Median eminence': 'BF'}




merged_map = {'Primary motor area': 'PFC',
 'Primary motor area Layer 1': 'PFC',
 'Primary motor area Layer 2/3': 'PFC',
 'Primary motor area Layer 5': 'PFC',
 'Primary motor area Layer 6a': 'PFC',
 'Primary motor area Layer 6b': 'PFC',
 'Secondary motor area': 'PFC',
 'Secondary motor area layer 1': 'PFC',
 'Secondary motor area layer 2/3': 'PFC',
 'Secondary motor area layer 5': 'PFC',
 'Secondary motor area layer 6a': 'PFC',
 'Secondary motor area layer 6b': 'PFC',
 'Anterior cingulate area': 'PFC',
 'Anterior cingulate area layer 1': 'PFC',
 'Anterior cingulate area layer 2/3': 'PFC',
 'Anterior cingulate area layer 5': 'PFC',
 'Anterior cingulate area layer 6a': 'PFC',
 'Anterior cingulate area layer 6b': 'PFC',
 'Anterior cingulate area dorsal part': 'PFC',
 'Anterior cingulate area dorsal part layer 1': 'PFC',
 'Anterior cingulate area dorsal part layer 2/3': 'PFC',
 'Anterior cingulate area dorsal part layer 5': 'PFC',
 'Anterior cingulate area dorsal part layer 6a': 'PFC',
 'Anterior cingulate area dorsal part layer 6b': 'PFC',
 'Anterior cingulate area ventral part': 'PFC',
 'Anterior cingulate area ventral part layer 1': 'PFC',
 'Anterior cingulate area ventral part layer 2/3': 'PFC',
 'Anterior cingulate area ventral part layer 5': 'PFC',
 'Anterior cingulate area ventral part 6a': 'PFC',
 'Anterior cingulate area ventral part 6b': 'PFC',
 'Prelimbic area': 'PFC',
 'Prelimbic area layer 1': 'PFC',
 'Prelimbic area layer 2': 'PFC',
 'Prelimbic area layer 2/3': 'PFC',
 'Prelimbic area layer 5': 'PFC',
 'Prelimbic area layer 6a': 'PFC',
 'Prelimbic area layer 6b': 'PFC',
 'Striatum dorsal region': 'STR',
 'Caudoputamen': 'STR',
 'Striatum ventral region': 'STR',
 'Nucleus accumbens': 'STR',
 'Islands of Calleja': 'STR',
 'Major island of Calleja': 'STR',
 'Olfactory tubercle layers 1-3': 'STR',
 'Olfactory tubercle molecular layer': 'STR',
 'Olfactory tubercle pyramidal layer': 'STR',
 'Olfactory tubercle polymorph layer': 'STR',
 'Lateral strip of striatum': 'STR',
 'Lateral septal complex': 'STR',
 'Lateral septal nucleus': 'STR',
 'Lateral septal nucleus caudal (caudodorsal) part': 'STR',
 'Lateral septal nucleus rostral (rostroventral) part': 'STR',
 'Lateral septal nucleus ventral part': 'STR',
 'Septofimbrial nucleus': 'STR',
 'Septohippocampal nucleus': 'STR',
 'Striatum-like amygdalar nuclei': 'STR',
 'Anterior amygdalar area': 'STR',
 'Bed nucleus of the accessory olfactory tract': 'STR',
 'Central amygdalar nucleus': 'STR',
 'Central amygdalar nucleus capsular part': 'STR',
 'Central amygdalar nucleus lateral part': 'STR',
 'Central amygdalar nucleus medial part': 'STR',
 'Intercalated amygdalar nucleus': 'STR',
 'Medial amygdalar nucleus': 'STR',
 'Medial amygdalar nucleus anterodorsal part': 'STR',
 'Medial amygdalar nucleus anteroventral part': 'STR',
 'Medial amygdalar nucleus posterodorsal part': 'STR',
 'Medial amygdalar nucleus posterodorsal part sublayer a': 'STR',
 'Medial amygdalar nucleus posterodorsal part sublayer b': 'STR',
 'Medial amygdalar nucleus posterodorsal part sublayer c': 'STR',
 'Medial amygdalar nucleus posteroventral part': 'STR',
 'Triangular nucleus of septum': 'STR',
 'fimbria': 'STR',
 'column of fornix': 'STR',
 'Substantia innominata': 'BF',
 'Magnocellular nucleus': 'BF',
 'Diagonal band nucleus': 'BF',
 'Hypothalamus': 'BF',
 'Periventricular zone': 'BF',
 'Supraoptic nucleus': 'BF',
 'Accessory supraoptic group': 'BF',
 'Nucleus circularis': 'BF',
 'Paraventricular hypothalamic nucleus': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division anterior magnocellular part': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division medial magnocellular part': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division posterior magnocellular part': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division posterior magnocellular part lateral zone': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division posterior magnocellular part medial zone': 'BF',
 'Paraventricular hypothalamic nucleus parvicellular division': 'BF',
 'Paraventricular hypothalamic nucleus parvicellular division anterior parvicellular part': 'BF',
 'Paraventricular hypothalamic nucleus parvicellular division medial parvicellular part dorsal zone': 'BF',
 'Paraventricular hypothalamic nucleus parvicellular division periventricular part': 'BF',
 'Periventricular hypothalamic nucleus anterior part': 'BF',
 'Periventricular hypothalamic nucleus intermediate part': 'BF',
 'Arcuate hypothalamic nucleus': 'BF',
 'Periventricular region': 'BF',
 'Anterodorsal preoptic nucleus': 'BF',
 'Anterior hypothalamic area': 'BF',
 'Anteroventral preoptic nucleus': 'BF',
 'Anteroventral periventricular nucleus': 'BF',
 'Dorsomedial nucleus of the hypothalamus': 'BF',
 'Dorsomedial nucleus of the hypothalamus anterior part': 'BF',
 'Dorsomedial nucleus of the hypothalamus posterior part': 'BF',
 'Dorsomedial nucleus of the hypothalamus ventral part': 'BF',
 'Medial preoptic area': 'BF',
 'Vascular organ of the lamina terminalis': 'BF',
 'Posterodorsal preoptic nucleus': 'BF',
 'Parastrial nucleus': 'BF',
 'Suprachiasmatic preoptic nucleus': 'BF',
 'Periventricular hypothalamic nucleus posterior part': 'BF',
 'Periventricular hypothalamic nucleus preoptic part': 'BF',
 'Subparaventricular zone': 'BF',
 'Suprachiasmatic nucleus': 'BF',
 'Subfornical organ': 'BF',
 'Ventromedial preoptic nucleus': 'BF',
 'Ventrolateral preoptic nucleus': 'BF',
 'Hypothalamic medial zone': 'BF',
 'Anterior hypothalamic nucleus': 'BF',
 'Anterior hypothalamic nucleus anterior part': 'BF',
 'Anterior hypothalamic nucleus central part': 'BF',
 'Anterior hypothalamic nucleus dorsal part': 'BF',
 'Anterior hypothalamic nucleus posterior part': 'BF',
 'Mammillary body': 'BF',
 'Lateral mammillary nucleus': 'BF',
 'Medial mammillary nucleus': 'BF',
 'Medial mammillary nucleus median part': 'BF',
 'Medial mammillary nucleus lateral part': 'BF',
 'Medial mammillary nucleus medial part': 'BF',
 'Medial mammillary nucleus posterior part': 'BF',
 'Medial mammillary nucleus dorsal part': 'BF',
 'Supramammillary nucleus': 'BF',
 'Supramammillary nucleus lateral part': 'BF',
 'Supramammillary nucleus medial part': 'BF',
 'Tuberomammillary nucleus': 'BF',
 'Tuberomammillary nucleus dorsal part': 'BF',
 'Tuberomammillary nucleus ventral part': 'BF',
 'Medial preoptic nucleus': 'BF',
 'Medial preoptic nucleus central part': 'BF',
 'Medial preoptic nucleus lateral part': 'BF',
 'Medial preoptic nucleus medial part': 'BF',
 'Dorsal premammillary nucleus': 'BF',
 'Ventral premammillary nucleus': 'BF',
 'Paraventricular hypothalamic nucleus descending division': 'BF',
 'Paraventricular hypothalamic nucleus descending division dorsal parvicellular part': 'BF',
 'Paraventricular hypothalamic nucleus descending division forniceal part': 'BF',
 'Paraventricular hypothalamic nucleus descending division lateral parvicellular part': 'BF',
 'Paraventricular hypothalamic nucleus descending division medial parvicellular part ventral zone': 'BF',
 'Ventromedial hypothalamic nucleus': 'BF',
 'Ventromedial hypothalamic nucleus anterior part': 'BF',
 'Ventromedial hypothalamic nucleus central part': 'BF',
 'Ventromedial hypothalamic nucleus dorsomedial part': 'BF',
 'Ventromedial hypothalamic nucleus ventrolateral part': 'BF',
 'Posterior hypothalamic nucleus': 'BF',
 'Hypothalamic lateral zone': 'BF',
 'Lateral hypothalamic area': 'BF',
 'Lateral preoptic area': 'BF',
 'Preparasubthalamic nucleus': 'BF',
 'Parasubthalamic nucleus': 'BF',
 'Perifornical nucleus': 'BF',
 'Retrochiasmatic area': 'BF',
 'Subthalamic nucleus': 'BF',
 'Tuberal nucleus': 'BF',
 'Zona incerta': 'BF',
 'Dopaminergic A13 group': 'BF',
 'Fields of Forel': 'BF',
 'Median eminence': 'BF',
 'Retrosplenial area': 'RS',
 'Retrosplenial area lateral agranular part': 'RS',
 'Retrosplenial area lateral agranular part layer 1': 'RS',
 'Retrosplenial area lateral agranular part layer 2/3': 'RS',
 'Retrosplenial area lateral agranular part layer 5': 'RS',
 'Retrosplenial area lateral agranular part layer 6a': 'RS',
 'Retrosplenial area lateral agranular part layer 6b': 'RS',
 'Mediomedial anterior visual area': 'RS',
 'Mediomedial anterior visual area layer 1': 'RS',
 'Mediomedial anterior visual area layer 2/3': 'RS',
 'Mediomedial anterior visual area layer 4': 'RS',
 'Mediomedial anterior visual arealayer 5': 'RS',
 'Mediomedial anterior visual area layer 6a': 'RS',
 'Mediomedial anterior visual area layer 6b': 'RS',
 'Mediomedial posterior visual area': 'RS',
 'Mediomedial posterior visual area layer 1': 'RS',
 'Mediomedial posterior visual area layer 2/3': 'RS',
 'Mediomedial posterior visual area layer 4': 'RS',
 'Mediomedial posterior visual arealayer 5': 'RS',
 'Mediomedial posterior visual area layer 6a': 'RS',
 'Mediomedial posterior visual area layer 6b': 'RS',
 'Medial visual area': 'RS',
 'Medial visual area layer 1': 'RS',
 'Medial visual area layer 2/3': 'RS',
 'Medial visual area layer 4': 'RS',
 'Medial visual arealayer 5': 'RS',
 'Medial visual area layer 6a': 'RS',
 'Medial visual area layer 6b': 'RS',
 'Retrosplenial area dorsal part': 'RS',
 'Retrosplenial area dorsal part layer 1': 'RS',
 'Retrosplenial area dorsal part layer 2/3': 'RS',
 'Retrosplenial area dorsal part layer 4': 'RS',
 'Retrosplenial area dorsal part layer 5': 'RS',
 'Retrosplenial area dorsal part layer 6a': 'RS',
 'Retrosplenial area dorsal part layer 6b': 'RS',
 'Retrosplenial area ventral part': 'RS',
 'Retrosplenial area ventral part layer 1': 'RS',
 'Retrosplenial area ventral part layer 2': 'RS',
 'Retrosplenial area ventral part layer 2/3': 'RS',
 'Retrosplenial area ventral part layer 5': 'RS',
 'Retrosplenial area ventral part layer 6a': 'RS',
 'Retrosplenial area ventral part layer 6b': 'RS',
 'Ectorhinal area': 'ECT',
 'Ectorhinal area/Layer 1': 'ECT',
 'Ectorhinal area/Layer 2/3': 'ECT',
 'Ectorhinal area/Layer 5': 'ECT',
 'Ectorhinal area/Layer 6a': 'ECT',
 'Ectorhinal area/Layer 6b': 'ECT',
 'Perirhinal area': 'ECT',
 'Perirhinal area layer 1': 'ECT',
 'Perirhinal area layer 2/3': 'ECT',
 'Perirhinal area layer 5': 'ECT',
 'Perirhinal area layer 6a': 'ECT',
 'Perirhinal area layer 6b': 'ECT',
 'Entorhinal area': 'ECT',
 'Entorhinal area lateral part': 'ECT',
 'Entorhinal area lateral part layer 1': 'ECT',
 'Entorhinal area lateral part layer 2': 'ECT',
 'Entorhinal area lateral part layer 2/3': 'ECT',
 'Entorhinal area lateral part layer 2a': 'ECT',
 'Entorhinal area lateral part layer 2b': 'ECT',
 'Entorhinal area lateral part layer 3': 'ECT',
 'Entorhinal area lateral part layer 4': 'ECT',
 'Entorhinal area lateral part layer 4/5': 'ECT',
 'Entorhinal area lateral part layer 5': 'ECT',
 'Entorhinal area lateral part layer 5/6': 'ECT',
 'Entorhinal area lateral part layer 6a': 'ECT',
 'Entorhinal area lateral part layer 6b': 'ECT',
 'Entorhinal area medial part dorsal zone': 'ECT',
 'Entorhinal area medial part dorsal zone layer 1': 'ECT',
 'Entorhinal area medial part dorsal zone layer 2': 'ECT',
 'Entorhinal area medial part dorsal zone layer 2a': 'ECT',
 'Entorhinal area medial part dorsal zone layer 2b': 'ECT',
 'Entorhinal area medial part dorsal zone layer 3': 'ECT',
 'Entorhinal area medial part dorsal zone layer 4': 'ECT',
 'Entorhinal area medial part dorsal zone layer 5': 'ECT',
 'Entorhinal area medial part dorsal zone layer 5/6': 'ECT',
 'Entorhinal area medial part dorsal zone layer 6': 'ECT',
 'Entorhinal area medial part ventral zone': 'ECT',
 'Entorhinal area medial part ventral zone layer 1': 'ECT',
 'Entorhinal area medial part ventral zone layer 2': 'ECT',
 'Entorhinal area medial part ventral zone layer 3': 'ECT',
 'Entorhinal area medial part ventral zone layer 4': 'ECT',
 'Entorhinal area medial part ventral zone layer 5/6': 'ECT',
 'Hippocampal formation': 'HPF',
 'Hippocampal region': 'HPF',
 "Ammon's horn": 'HPF',
 'Field CA1': 'HPF',
 'Field CA1 stratum lacunosum-moleculare': 'HPF',
 'Field CA1 stratum oriens': 'HPF',
 'Field CA1 pyramidal layer': 'HPF',
 'Field CA1 stratum radiatum': 'HPF',
 'Field CA2': 'HPF',
 'Field CA2 stratum lacunosum-moleculare': 'HPF',
 'Field CA2 stratum oriens': 'HPF',
 'Field CA2 pyramidal layer': 'HPF',
 'Field CA2 stratum radiatum': 'HPF',
 'Field CA3': 'HPF',
 'Field CA3 stratum lacunosum-moleculare': 'HPF',
 'Field CA3 stratum lucidum': 'HPF',
 'Field CA3 stratum oriens': 'HPF',
 'Field CA3 pyramidal layer': 'HPF',
 'Field CA3 stratum radiatum': 'HPF',
 'Dentate gyrus': 'HPF',
 'Dentate gyrus molecular layer': 'HPF',
 'Dentate gyrus polymorph layer': 'HPF',
 'Dentate gyrus granule cell layer': 'HPF',
 'Dentate gyrus subgranular zone': 'HPF',
 'Dentate gyrus crest': 'HPF',
 'Dentate gyrus crest molecular layer': 'HPF',
 'Dentate gyrus crest polymorph layer': 'HPF',
 'Dentate gyrus crest granule cell layer': 'HPF',
 'Dentate gyrus lateral blade': 'HPF',
 'Dentate gyrus lateral blade molecular layer': 'HPF',
 'Dentate gyrus lateral blade polymorph layer': 'HPF',
 'Dentate gyrus lateral blade granule cell layer': 'HPF',
 'Dentate gyrus medial blade': 'HPF',
 'Dentate gyrus medial blade molecular layer': 'HPF',
 'Dentate gyrus medial blade polymorph layer': 'HPF',
 'Dentate gyrus medial blade granule cell layer': 'HPF',
 'Fasciola cinerea': 'HPF',
 'Induseum griseum': 'HPF',
 'Retrohippocampal region': 'HPF',
 'Parasubiculum': 'HPF',
 'Parasubiculum layer 1': 'HPF',
 'Parasubiculum layer 2': 'HPF',
 'Parasubiculum layer 3': 'HPF',
 'Postsubiculum': 'HPF',
 'Postsubiculum layer 1': 'HPF',
 'Postsubiculum layer 2': 'HPF',
 'Postsubiculum layer 3': 'HPF',
 'Presubiculum': 'HPF',
 'Presubiculum layer 1': 'HPF',
 'Presubiculum layer 2': 'HPF',
 'Presubiculum layer 3': 'HPF',
 'Subiculum': 'HPF',
 'Subiculum dorsal part': 'HPF',
 'Subiculum dorsal part molecular layer': 'HPF',
 'Subiculum dorsal part pyramidal layer': 'HPF',
 'Subiculum dorsal part stratum radiatum': 'HPF',
 'Subiculum ventral part': 'HPF',
 'Subiculum ventral part molecular layer': 'HPF',
 'Subiculum ventral part pyramidal layer': 'HPF',
 'Subiculum ventral part stratum radiatum': 'HPF',
 'Prosubiculum': 'HPF',
 'Prosubiculum dorsal part': 'HPF',
 'Prosubiculum dorsal part molecular layer': 'HPF',
 'Prosubiculum dorsal part pyramidal layer': 'HPF',
 'Prosubiculum dorsal part stratum radiatum': 'HPF',
 'Prosubiculum ventral part': 'HPF',
 'Prosubiculum ventral part molecular layer': 'HPF',
 'Prosubiculum ventral part pyramidal layer': 'HPF',
 'Prosubiculum ventral part stratum radiatum': 'HPF',
 'Hippocampo-amygdalar transition area': 'HPF',
 'Area prostriata': 'HPF',
 'Substantia innominata': 'BF',
 'Magnocellular nucleus': 'BF',
 'Diagonal band nucleus': 'BF',
 'Thalamus': 'TH',
 'Thalamus sensory-motor cortex related': 'TH',
 'Ventral group of the dorsal thalamus': 'TH',
 'Ventral anterior-lateral complex of the thalamus': 'TH',
 'Ventral medial nucleus of the thalamus': 'TH',
 'Ventral posterior complex of the thalamus': 'TH',
 'Ventral posterolateral nucleus of the thalamus': 'TH',
 'Ventral posterolateral nucleus of the thalamus parvicellular part': 'TH',
 'Ventral posteromedial nucleus of the thalamus': 'TH',
 'Ventral posteromedial nucleus of the thalamus parvicellular part': 'TH',
 'Posterior triangular thalamic nucleus': 'TH',
 'Subparafascicular nucleus': 'TH',
 'Subparafascicular nucleus magnocellular part': 'TH',
 'Subparafascicular nucleus parvicellular part': 'TH',
 'Subparafascicular area': 'TH',
 'Peripeduncular nucleus': 'TH',
 'Geniculate group dorsal thalamus': 'TH',
 'Medial geniculate complex': 'TH',
 'Medial geniculate complex dorsal part': 'TH',
 'Medial geniculate complex ventral part': 'TH',
 'Medial geniculate complex medial part': 'TH',
 'Dorsal part of the lateral geniculate complex': 'TH',
 'Dorsal part of the lateral geniculate complex shell': 'TH',
 'Dorsal part of the lateral geniculate complex core': 'TH',
 'Dorsal part of the lateral geniculate complex ipsilateral zone': 'TH',
 'Thalamus polymodal association cortex related': 'TH',
 'Lateral group of the dorsal thalamus': 'TH',
 'Lateral posterior nucleus of the thalamus': 'TH',
 'Posterior complex of the thalamus': 'TH',
 'Posterior limiting nucleus of the thalamus': 'TH',
 'Suprageniculate nucleus': 'TH',
 'Ethmoid nucleus of the thalamus': 'TH',
 'Retroethmoid nucleus': 'TH',
 'Anterior group of the dorsal thalamus': 'TH',
 'Anteroventral nucleus of thalamus': 'TH',
 'Anteromedial nucleus': 'TH',
 'Anteromedial nucleus dorsal part': 'TH',
 'Anteromedial nucleus ventral part': 'TH',
 'Anterodorsal nucleus': 'TH',
 'Interanteromedial nucleus of the thalamus': 'TH',
 'Interanterodorsal nucleus of the thalamus': 'TH',
 'Lateral dorsal nucleus of thalamus': 'TH',
 'Medial group of the dorsal thalamus': 'TH',
 'Intermediodorsal nucleus of the thalamus': 'TH',
 'Mediodorsal nucleus of thalamus': 'TH',
 'Mediodorsal nucleus of the thalamus central part': 'TH',
 'Mediodorsal nucleus of the thalamus lateral part': 'TH',
 'Mediodorsal nucleus of the thalamus medial part': 'TH',
 'Submedial nucleus of the thalamus': 'TH',
 'Perireunensis nucleus': 'TH',
 'Midline group of the dorsal thalamus': 'TH',
 'Paraventricular nucleus of the thalamus': 'TH',
 'Parataenial nucleus': 'TH',
 'Nucleus of reuniens': 'TH',
 'Xiphoid thalamic nucleus': 'TH',
 'Intralaminar nuclei of the dorsal thalamus': 'TH',
 'Rhomboid nucleus': 'TH',
 'Central medial nucleus of the thalamus': 'TH',
 'Paracentral nucleus': 'TH',
 'Central lateral nucleus of the thalamus': 'TH',
 'Parafascicular nucleus': 'TH',
 'Posterior intralaminar thalamic nucleus': 'TH',
 'Reticular nucleus of the thalamus': 'TH',
 'Geniculate group ventral thalamus': 'TH',
 'Intergeniculate leaflet of the lateral geniculate complex': 'TH',
 'Intermediate geniculate nucleus': 'TH',
 'Ventral part of the lateral geniculate complex': 'TH',
 'Ventral part of the lateral geniculate complex lateral zone': 'TH',
 'Ventral part of the lateral geniculate complex medial zone': 'TH',
 'Subgeniculate nucleus': 'TH',
 'Epithalamus': 'TH',
 'Medial habenula': 'TH',
 'Lateral habenula': 'TH',
 'Pineal body': 'TH',
 'Hypothalamus': 'BF',
 'Periventricular zone': 'BF',
 'Supraoptic nucleus': 'BF',
 'Accessory supraoptic group': 'BF',
 'Nucleus circularis': 'BF',
 'Paraventricular hypothalamic nucleus': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division anterior magnocellular part': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division medial magnocellular part': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division posterior magnocellular part': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division posterior magnocellular part lateral zone': 'BF',
 'Paraventricular hypothalamic nucleus magnocellular division posterior magnocellular part medial zone': 'BF',
 'Paraventricular hypothalamic nucleus parvicellular division': 'BF',
 'Paraventricular hypothalamic nucleus parvicellular division anterior parvicellular part': 'BF',
 'Paraventricular hypothalamic nucleus parvicellular division medial parvicellular part dorsal zone': 'BF',
 'Paraventricular hypothalamic nucleus parvicellular division periventricular part': 'BF',
 'Periventricular hypothalamic nucleus anterior part': 'BF',
 'Periventricular hypothalamic nucleus intermediate part': 'BF',
 'Arcuate hypothalamic nucleus': 'BF',
 'Periventricular region': 'BF',
 'Anterodorsal preoptic nucleus': 'BF',
 'Anterior hypothalamic area': 'BF',
 'Anteroventral preoptic nucleus': 'BF',
 'Anteroventral periventricular nucleus': 'BF',
 'Dorsomedial nucleus of the hypothalamus': 'BF',
 'Dorsomedial nucleus of the hypothalamus anterior part': 'BF',
 'Dorsomedial nucleus of the hypothalamus posterior part': 'BF',
 'Dorsomedial nucleus of the hypothalamus ventral part': 'BF',
 'Median preoptic nucleus': 'BF',
 'Medial preoptic area': 'BF',
 'Vascular organ of the lamina terminalis': 'BF',
 'Posterodorsal preoptic nucleus': 'BF',
 'Parastrial nucleus': 'BF',
 'Suprachiasmatic preoptic nucleus': 'BF',
 'Periventricular hypothalamic nucleus posterior part': 'BF',
 'Periventricular hypothalamic nucleus preoptic part': 'BF',
 'Subparaventricular zone': 'BF',
 'Suprachiasmatic nucleus': 'BF',
 'Subfornical organ': 'BF',
 'Ventromedial preoptic nucleus': 'BF',
 'Ventrolateral preoptic nucleus': 'BF',
 'Hypothalamic medial zone': 'BF',
 'Anterior hypothalamic nucleus': 'BF',
 'Anterior hypothalamic nucleus anterior part': 'BF',
 'Anterior hypothalamic nucleus central part': 'BF',
 'Anterior hypothalamic nucleus dorsal part': 'BF',
 'Anterior hypothalamic nucleus posterior part': 'BF',
 'Mammillary body': 'BF',
 'Lateral mammillary nucleus': 'BF',
 'Medial mammillary nucleus': 'BF',
 'Medial mammillary nucleus median part': 'BF',
 'Medial mammillary nucleus lateral part': 'BF',
 'Medial mammillary nucleus medial part': 'BF',
 'Medial mammillary nucleus posterior part': 'BF',
 'Medial mammillary nucleus dorsal part': 'BF',
 'Supramammillary nucleus': 'BF',
 'Supramammillary nucleus lateral part': 'BF',
 'Supramammillary nucleus medial part': 'BF',
 'Tuberomammillary nucleus': 'BF',
 'Tuberomammillary nucleus dorsal part': 'BF',
 'Tuberomammillary nucleus ventral part': 'BF',
 'Medial preoptic nucleus': 'BF',
 'Medial preoptic nucleus central part': 'BF',
 'Medial preoptic nucleus lateral part': 'BF',
 'Medial preoptic nucleus medial part': 'BF',
 'Dorsal premammillary nucleus': 'BF',
 'Ventral premammillary nucleus': 'BF',
 'Paraventricular hypothalamic nucleus descending division': 'BF',
 'Paraventricular hypothalamic nucleus descending division dorsal parvicellular part': 'BF',
 'Paraventricular hypothalamic nucleus descending division forniceal part': 'BF',
 'Paraventricular hypothalamic nucleus descending division lateral parvicellular part': 'BF',
 'Paraventricular hypothalamic nucleus descending division medial parvicellular part ventral zone': 'BF',
 'Ventromedial hypothalamic nucleus': 'BF',
 'Ventromedial hypothalamic nucleus anterior part': 'BF',
 'Ventromedial hypothalamic nucleus central part': 'BF',
 'Ventromedial hypothalamic nucleus dorsomedial part': 'BF',
 'Ventromedial hypothalamic nucleus ventrolateral part': 'BF',
 'Posterior hypothalamic nucleus': 'BF',
 'Hypothalamic lateral zone': 'BF',
 'Lateral hypothalamic area': 'BF',
 'Lateral preoptic area': 'BF',
 'Preparasubthalamic nucleus': 'BF',
 'Parasubthalamic nucleus': 'BF',
 'Perifornical nucleus': 'BF',
 'Retrochiasmatic area': 'BF',
 'Subthalamic nucleus': 'BF',
 'Tuberal nucleus': 'BF',
 'Zona incerta': 'BF',
 'Dopaminergic A13 group': 'BF',
 'Fields of Forel': 'BF',
 'Median eminence': 'BF'}

In [5]:
# your_project/analysis/spatial_analysis.py

import pandas as pd
import numpy as np
import scanpy as sc
import anndata as ad
from sklearn.utils import check_random_state
from scipy.sparse import csr_matrix
from libpysal.weights import WSP
from esda.moran import Moran
from typing import List, Dict
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
from scipy.spatial.distance import pdist, squareform
from anndata import AnnData
import concurrent.futures
import warnings

import logging

# Set up logging configuration
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


def concatenate_and_intersect(
    adata_list: list[ad.AnnData], 
    key: str = None
) -> ad.AnnData:
    """
    Concatenates a list of AnnData objects, keeping only the intersection of their variables.

    Args:
        adata_list: List of AnnData objects to concatenate.
        key: Optional key under which the batch information is stored in `obs`.

    Returns:
        Concatenated AnnData object with intersected variables.
    """
    if not adata_list:
        raise ValueError("The list of AnnData objects is empty.")
    
    common_vars = adata_list[0].var_names
    for adata in adata_list[1:]:
        common_vars = np.intersect1d(common_vars, adata.var_names)
    
    filtered_adatas = []
    for adata in adata_list:
        mask = [var_name in common_vars for var_name in adata.var_names]
        filtered_adatas.append(adata[:, mask])
    
    concatenated_adata = ad.concat(filtered_adatas, axis=0, join='outer', merge='same', label=key)
    return concatenated_adata



def hierarchical_sample(
    adata: ad.AnnData,
    groupby_cols: list[str],
    n_samples: int | float | None = None,
    fraction: float | None = None,
    random_state: int | np.random.RandomState | None = None,
) -> ad.AnnData:
    """
    Performs hierarchical random sampling of an AnnData object based on multiple categorical variables.

    Args:
        adata: The AnnData object to sample from.
        groupby_cols: A list of column names from `adata.obs` to use for hierarchical grouping.
        n_samples: The number of samples to select from each group at the final level.
                   If a float, it's interpreted as the fraction of samples to keep.
        fraction: The fraction of samples to keep from each group at the final level.
                  If provided, `n_samples` is ignored.
        random_state: Random seed for reproducibility.

    Returns:
        A new AnnData object containing the sampled cells.
    """
    import anndata as ad
    import pandas as pd
    import numpy as np
    from sklearn.utils import check_random_state

    if (n_samples is not None and fraction is not None) or (
        n_samples is None and fraction is None
    ):
        raise ValueError("Must specify exactly one of 'n_samples' or 'fraction'.")

    rng = check_random_state(random_state)

    def _sample_group(group: pd.DataFrame, level: int) -> pd.DataFrame:
        """Recursively samples from each group."""
        if level == len(groupby_cols) - 1:
            # Sample at the final level
            if fraction is not None:
                return group.sample(frac=fraction, random_state=rng)
            elif isinstance(n_samples, int):
                return group.groupby(groupby_cols[level], group_keys=False, observed=False).apply(
                    lambda x: x.sample(n=min(n_samples, len(x)), random_state=rng)
                )
            elif isinstance(n_samples, float):
                return group.sample(frac=n_samples, random_state=rng)
        else:
            # Group by the next level and sample recursively
            return group.groupby(groupby_cols[level + 1], group_keys=False, observed=False).apply(
                lambda x: _sample_group(x, level + 1)
            )

    # Starting the hierarchical sampling from the top level
    sampled_obs = _sample_group(adata.obs, level=0)
    sampled_obs_index = sampled_obs.index.get_level_values(-1) if isinstance(sampled_obs.index, pd.MultiIndex) else sampled_obs.index

    return adata[sampled_obs_index, :].copy()


def subset_anndata(
    adata: 'anndata.AnnData', 
    conditions: dict
) -> 'anndata.AnnData':
    """
    Subsets the AnnData object to only include cells where the `.obs` column values match specified conditions.

    Args:
        adata: The AnnData object to subset.
        conditions: A dictionary where keys are column names from `adata.obs` and values are the values to match in those columns.

    Returns:
        A new AnnData object containing only the cells that match all specified conditions.
    """
    import anndata as ad
    import pandas as pd
    
    # Start with a boolean mask that includes all cells
    mask = pd.Series([True] * adata.shape[0], index=adata.obs.index)
    
    # Apply each condition to the mask
    for column, value in conditions.items():
        mask &= adata.obs[column] == value
    
    # Subset the AnnData object using the mask
    return adata[mask, :].copy()


def compute_distance_matrix(embedding: np.ndarray) -> np.ndarray:
    """
    Computes the pairwise distance matrix for a given embedding.

    Args:
        embedding: A 2D numpy array where rows represent points and columns represent dimensions.

    Returns:
        A 2D numpy array representing the pairwise distance matrix.
    """
    return squareform(pdist(embedding, 'euclidean'))

def compute_weight_matrix_from_distances(distance_matrix: np.ndarray) -> np.ndarray:
    """
    Computes the weight matrix from a distance matrix using inverse distances.

    Args:
        distance_matrix: A 2D numpy array representing the pairwise distance matrix.

    Returns:
        A 2D numpy array representing the weight matrix.
    """
    with np.errstate(divide='ignore'):
        weight_matrix = 1 / distance_matrix
    np.fill_diagonal(weight_matrix, 0)  # weights for self-distance are set to zero
    return weight_matrix

def scale_values(values: np.ndarray, scaling_method: str, apply_log: bool, log_before_scaling: bool) -> np.ndarray:
    """
    Scales and optionally log-transforms the values.

    Args:
        values: The values to scale.
        scaling_method: The method to use for scaling ('minmax', 'standard', 'robust', or None).
        apply_log: Whether to apply log transformation.
        log_before_scaling: Whether to apply log transformation before scaling.

    Returns:
        The scaled (and optionally log-transformed) values.
    """
    if apply_log and log_before_scaling:
        values = np.log1p(values)

    if scaling_method == 'minmax':
        scaler = MinMaxScaler()
    elif scaling_method == 'standard':
        scaler = StandardScaler()
    elif scaling_method == 'robust':
        scaler = RobustScaler()
    else:
        scaler = None

    if scaler is not None:
        values = scaler.fit_transform(values.reshape(-1, 1)).flatten()

    if apply_log and not log_before_scaling:
        values = np.log1p(values)

    return values

def neighbor_compute_moran_i(sub_adata: AnnData, value_key: str, category: str, use_embedding: bool = False, embedding_key: str = None, scaling_method: str = None, apply_log: bool = False, log_before_scaling: bool = False) -> dict:
    """
    Computes Moran's I spatial autocorrelation for a subset of cells.

    Args:
        sub_adata: Subset of AnnData object for specific cell type.
        value_key: The key in `sub_adata.obs` containing the values to analyze.
        category: The categorical variable in `sub_adata.obs` to group by.
        use_embedding: Whether to use embedding for distance calculation.
        embedding_key: The key in `sub_adata.obsm` for the embedding.
        scaling_method: The method to use for scaling ('minmax', 'standard', 'robust', or None).
        apply_log: Whether to apply log transformation.
        log_before_scaling: Whether to apply log transformation before scaling.

    Returns:
        A dictionary with Moran's I results.
    """
    try:
        if use_embedding and embedding_key is not None:
            embedding = sub_adata.obsm[embedding_key]
            distance_matrix = compute_distance_matrix(embedding)
            weight_matrix = compute_weight_matrix_from_distances(distance_matrix)
            sparse_weight_matrix = csr_matrix(weight_matrix)
            weights = WSP(sparse_weight_matrix)
        else:
            connectivities = sub_adata.obsp['connectivities']
            weights = WSP(connectivities)
    except KeyError as e:
        raise KeyError(f"Key error: {e}")

    values = sub_adata.obs[value_key].values
    values = scale_values(values, scaling_method, apply_log, log_before_scaling)
    weights_full = weights.to_W()
    moran = Moran(values, weights_full)
    return {
        category: sub_adata.obs[category].unique()[0],
        "Moran's I": moran.I,
        "P-value": moran.p_norm,
        "num_cell": len(values)
    }

def neighbor_process_cell_type(adata: AnnData, cell_type: str, value_key: str, category: str, use_embedding: bool = False, embedding_key: str = None, scaling_method: str = None, apply_log: bool = False, log_before_scaling: bool = False) -> pd.DataFrame:
    """
    Processes a specific cell type to compute Moran's I.

    Args:
        adata: The AnnData object to analyze.
        cell_type: The specific cell type to process.
        value_key: The key in `adata.obs` containing the values to analyze.
        category: The categorical variable in `adata.obs` to group by.
        use_embedding: Whether to use embedding for distance calculation.
        embedding_key: The key in `adata.obsm` for the embedding.
        scaling_method: The method to use for scaling ('minmax', 'standard', 'robust', or None).
        apply_log: Whether to apply log transformation.
        log_before_scaling: Whether to apply log transformation before scaling.

    Returns:
        A DataFrame with Moran's I results for the specific cell type.
    """
    mask = adata.obs[category] == cell_type
    num_cell = sum(mask)
    if num_cell > 10:
        sub_adata = adata[mask].copy()
        moranI_data = neighbor_compute_moran_i(sub_adata, value_key, category, use_embedding, embedding_key, scaling_method, apply_log, log_before_scaling)
        return pd.DataFrame([moranI_data])
    return pd.DataFrame()


def compute_neighbor_moran_i_by_category(
    adata: AnnData, 
    value_key: str, 
    category: str = "celltype", 
    connectivity_key: str = 'connectivities',
    use_embedding: bool = False,
    embedding_key: str = None,
    scaling_method: str = None,
    apply_log: bool = False,
    log_before_scaling: bool = False,
    max_workers: int = None,
    specific_celltype: list = None  # New argument to specify a single cell type
) -> pd.DataFrame:
    """
    Computes Moran's I spatial autocorrelation for each cell type, or a specific cell type, in parallel.

    Args:
        adata: The AnnData object to analyze.
        value_key: The key in `adata.obs` containing the values to analyze.
        category: The categorical variable in `adata.obs` to group by.
        connectivity_key: The key in `adata.obsp` containing the connectivities matrix.
        use_embedding: Whether to use embedding for distance calculation.
        embedding_key: The key in `adata.obsm` for the embedding.
        scaling_method: The method to use for scaling ('minmax', 'standard', 'robust', or None).
        apply_log: Whether to apply log transformation.
        log_before_scaling: Whether to apply log transformation before scaling.
        max_workers: The maximum number of threads to use for parallel processing.
        specific_celltype: A specific cell type to analyze. If provided, only this cell type will be analyzed.

    Returns:
        A DataFrame with Moran's I results for each cell type, or the specific cell type.
    """
    logging.info(f"Starting Moran's I computation with value key '{value_key}'.")

    # Determine the cell types to analyze
    if specific_celltype:
        logging.info(f"Analyzing specific cell type: {specific_celltype}")
        top_level_types = specific_celltype
    else:
        top_level_types = adata.obs[category].unique()
        logging.info(f"Identified {len(top_level_types)} unique cell types in category '{category}'.")

    result_df = pd.DataFrame()

    # Define a function to process each cell type, to be used with the thread pool
    def process_cell_type(cell_type):
        logging.info(f"Processing cell type: {cell_type}")
        cell_type_df = neighbor_process_cell_type(
            adata, cell_type, value_key, category, 
            use_embedding, embedding_key, scaling_method, apply_log, log_before_scaling
        )
        if cell_type_df.empty:
            logging.warning(f"No data available for cell type: {cell_type} (insufficient number of cells or other issues).")
        return cell_type_df

    # Use ThreadPoolExecutor to parallelize the execution if analyzing multiple cell types
    if len(top_level_types) > 1:
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            results = list(executor.map(process_cell_type, top_level_types))
    else:
        # If only one cell type is being analyzed, process it directly without parallelization
        results = [process_cell_type(top_level_types[0])]

    # Concatenate all results
    for cell_type_df in results:
        if not cell_type_df.empty:
            result_df = pd.concat([result_df, cell_type_df])

    if result_df.empty:
        logging.warning("No data was processed successfully. Returning an empty DataFrame.")
        return pd.DataFrame()

    result_df = result_df.set_index(category)
    result_df = result_df.sort_values("Moran's I", ascending=False)
    logging.info("Completed Moran's I computation.")

    return result_df





def identify_nearby_cells(
    merge_adata: ad.AnnData,
    label_column: str = 'celltype',
    target_col: str = 'datatype',
    targed_label: str = 'sn',
    threshold: float = 0.1,
    new_label_col: str = 'nearby_label'
) -> ad.AnnData:
    """
    Identifies and labels nearby cells in an AnnData object based on a threshold in the neighbor graph.

    Args:
        merge_adata: The AnnData object containing cells with connectivity information.
        label_column: The column name in `merge_adata.obs` which contains the labels for the target cells.
        target_col: The column name in `merge_adata.obs` representing labeled and unlabeled cells.
        targed_label: The label in `target_col` to consider as the source of nearby cells.
        threshold: The threshold for considering a cell "near" based on the neighbor graph connectivity.
        new_label_col: The column name to store new labels for nearby cells.

    Returns:
        A new AnnData object containing only the nearby cells that were identified.
    """
    import anndata as ad
    import numpy as np

    celltypes = merge_adata.obs[label_column].unique()
    merge_adata.obs[new_label_col] = 'unlabeled'  # Default value for cells that are not nearby
    nearby_cellbin_indices_dict = {}

    for celltype in celltypes:
        print(f"Processing celltype: {celltype}")
        sn_mask = (merge_adata.obs[target_col] == targed_label) & (merge_adata.obs[label_column] == celltype)
        cellbin_mask = (merge_adata.obs[target_col] != targed_label) & (merge_adata.obs[label_column] == celltype)
        neighbor_graph = merge_adata.obsp['connectivities']
        sn_indices = np.where(sn_mask)[0]
        cellbin_indices = np.where(cellbin_mask)[0]

        if len(sn_indices) == 0 or len(cellbin_indices) == 0:
            print(f"No labeled or unlabeled cells found for celltype: {celltype}")
            continue

        neighbor_sums = neighbor_graph[sn_indices].sum(axis=0)
        neighbor_sums = np.asarray(neighbor_sums).flatten()
        nearby_cellbin_indices = cellbin_indices[neighbor_sums[cellbin_indices] > threshold]

        if len(nearby_cellbin_indices) > 0:
            nearby_cellbin_indices_dict[celltype] = nearby_cellbin_indices
            merge_adata.obs.loc[merge_adata.obs.index[nearby_cellbin_indices], new_label_col] = f'near_{celltype}'
        else:
            print(f"No nearby unlabeled cells found for celltype: {celltype}")

    all_nearby_cellbin_indices = np.concatenate(list(nearby_cellbin_indices_dict.values()))
    print(f"Total nearby unlabeled cells found: {merge_adata[all_nearby_cellbin_indices].shape[0]}")

    return merge_adata[all_nearby_cellbin_indices].copy()


In [6]:
import pandas as pd
import concurrent.futures

def read_csv_files_concurrently(file_dict):
    """
    Reads multiple CSV files concurrently and returns a dictionary of DataFrames.

    :param file_dict: Dictionary where the key is a variable name and the value is the path to the CSV file.
    :return: Dictionary where the key is the variable name and the value is the corresponding DataFrame.
    """
    def load_csv(key, path):
        print(f"Reading {key}")
        return key, pd.read_csv(path)

    result_dict = {}
    
    with concurrent.futures.ThreadPoolExecutor() as executor:
        future_to_key = {executor.submit(load_csv, key, path): key for key, path in file_dict.items()}
        
        for future in concurrent.futures.as_completed(future_to_key):
            key, df = future.result()
            result_dict[key] = df
            
    return result_dict

def list_files_matching_criteria(directory: str, condition: str = None, regex: str = None, separator: str = "_", id_index: int = 0) -> dict:
    """
    Lists files in a directory matching a given condition or regular expression and extracts an ID from the filename.

    Args:
        directory: The directory to search for files.
        condition: The conditional equation to filter files by. The condition should be a valid Python expression
                   where 'file' can be used as the variable.
        regex: The regular expression to filter files by.
        separator: The separator used to split the filenames.
        id_index: The index of the split result to be used as the dictionary key.

    Returns:
        A dictionary where the keys are the specified parts of the filenames (split by the separator)
        and the values are the full file paths of the files that match the condition or regex.
    """
    files = sorted(os.listdir(directory))

    # Filter files based on condition or regex
    if condition:
        filtered_files = [file for file in files if eval(condition)]
    elif regex:
        pattern = re.compile(regex)
        filtered_files = [file for file in files if pattern.search(file)]
    else:
        filtered_files = files


    # Create the dictionary with the specified part of the filenames as keys
    paths = {file.split(separator)[id_index]: os.path.join(directory, file) for file in filtered_files}
    
    return paths


def load_data_in_parallel(file_paths: dict, load_function: callable) -> dict:
    """
    Loads data from multiple files in parallel using a specified load function.

    Args:
        file_paths: A dictionary where the keys are identifiers and the values are file paths.
        load_function: The function to use for loading data from each file path.

    Returns:
        A dictionary where the keys are identifiers and the values are the loaded data.
    """
    data = {}
    with ProcessPoolExecutor(max_workers=10) as executor:
        futures = {executor.submit(load_function, path): file for file, path in file_paths.items()}
        for future in futures:
            key = futures[future]
            try:
                data[key] = future.result()
            except Exception as exc:
                print(f"Error loading file {key}: {exc}")
    return data

# True Run

In [7]:
reg_path = "/home1/jijh/st_project/cellbin_analysis/annotated_cell_bins/regress_harmony/"

In [8]:
reg_paths = list_files_matching_criteria(reg_path, "'coorect' in file and 'h5ad' in file", separator=".")

In [9]:
region_paths = {}
for key, path in reg_paths.items():
    region_paths[key.split("_")[0] + "_" + key.split("_")[2]] = path

In [10]:
region_paths

{'HZ12M01_BF': '/home1/jijh/st_project/cellbin_analysis/annotated_cell_bins/regress_harmony/HZ12M01_coorect_BF_regress_harmony_cellbin.h5ad',
 'HZ12M01_PFC': '/home1/jijh/st_project/cellbin_analysis/annotated_cell_bins/regress_harmony/HZ12M01_coorect_PFC_regress_harmony_cellbin.h5ad',
 'HZ12M01_STR': '/home1/jijh/st_project/cellbin_analysis/annotated_cell_bins/regress_harmony/HZ12M01_coorect_STR_regress_harmony_cellbin.h5ad',
 'HZ12M02_BF': '/home1/jijh/st_project/cellbin_analysis/annotated_cell_bins/regress_harmony/HZ12M02_coorect_BF_regress_harmony_cellbin.h5ad',
 'HZ12M02_ECT': '/home1/jijh/st_project/cellbin_analysis/annotated_cell_bins/regress_harmony/HZ12M02_coorect_ECT_regress_harmony_cellbin.h5ad',
 'HZ12M02_HPF': '/home1/jijh/st_project/cellbin_analysis/annotated_cell_bins/regress_harmony/HZ12M02_coorect_HPF_regress_harmony_cellbin.h5ad',
 'HZ12M02_RS': '/home1/jijh/st_project/cellbin_analysis/annotated_cell_bins/regress_harmony/HZ12M02_coorect_RS_regress_harmony_cellbin.h5ad'

In [11]:
region_datas = load_data_in_parallel(region_paths, sc.read_h5ad)

In [12]:
for key, adata in region_datas.items():
    try:
        print(key)
        mask = adata.raw[:, "Chat"].X.toarray().flatten() > 0
        region_datas[key].obs["new_fine"] = region_datas[key].obs["fine"]
        region_datas[key].obs["new_fine"] = region_datas[key].obs["new_fine"].astype(str)
        region_datas[key].obs.loc[mask, "new_fine"] = "ChN"
    except Exception as exc:
        print(f"{key}: {exc}")

HZ12M01_BF
HZ12M01_PFC
HZ12M01_STR
HZ12M02_BF
HZ12M02_ECT
HZ12M02_HPF
HZ12M02_RS
HZ12M02_TH
HZ12M03_BF
HZ12M03_PFC
HZ12M03_STR
HZ12M04_BF
HZ12M04_ECT
HZ12M04_HPF
HZ12M04_RS
HZ12M04_TH
HZ12M05_BF
HZ12M05_PFC
HZ12M05_STR
HZ12M06_BF
HZ12M06_ECT
HZ12M06_HPF
HZ12M06_RS
HZ12M06_TH
HZ2M01_BF
HZ2M01_PFC
HZ2M01_STR
HZ2M02_BF
HZ2M02_ECT
HZ2M02_HPF
HZ2M02_RS
HZ2M02_TH
HZ2M03_BF
HZ2M03_PFC
HZ2M03_STR
HZ2M04_BF
HZ2M04_ECT
HZ2M04_HPF
HZ2M04_RS
HZ2M04_TH
HZ2M05_BF
HZ2M05_PFC
HZ2M05_STR
HZ2M06_BF
HZ2M06_ECT
HZ2M06_HPF
HZ2M06_RS
HZ2M06_TH
HZ3M01_BF
HZ3M01_PFC
HZ3M01_STR
HZ3M02_BF
HZ3M02_ECT
HZ3M02_HPF
HZ3M02_RS
HZ3M02_TH
HZ3M03_BF
HZ3M03_PFC
HZ3M03_STR
HZ3M04_BF
HZ3M04_ECT
HZ3M04_HPF
HZ3M04_RS
HZ3M04_TH
HZ3M05_BF
HZ3M05_PFC
HZ3M05_STR
HZ3M06_BF
HZ3M06_ECT
HZ3M06_HPF
HZ3M06_RS
HZ3M06_TH
HZ4M03_BF
HZ4M03_PFC
HZ4M03_STR
HZ4M05_BF
HZ4M05_PFC
HZ4M05_STR
HZ4MP101_BF
HZ4MP101_PFC
HZ4MP101_STR
HZ4MP102_BF
HZ4MP102_ECT
HZ4MP102_HPF
HZ4MP102_RS
HZ4MP102_TH
HZ4MP202_BF
HZ4MP202_ECT
HZ4MP202_HPF
HZ4MP202_RS
HZ4MP2