Comparative analysis of border vs. high-confidence hippocampal mappings

In [None]:
import os
import sys
from pathlib import Path
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors 
import matplotlib.patches as mpatches
import seaborn as sns
import session_info

In [None]:
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

In [None]:
plt.rcParams["figure.figsize"] = (4, 4)

In [None]:
plt.rcParams["figure.dpi"] = 500

In [None]:
#plt.rcParams["font.size"] = 24

#### Import data

In [None]:
# Base directory
base_dir = Path("/path/to/project")

# Subdirectories
## Input
data_dir = base_dir / "data/h5ad"
csv_dir = base_dir / "data/mapmycells"

## Output
output_dir = data_dir
outs = base_dir / "results"
fig_dir = outs / "figures"
os.makedirs(output_dir, exist_ok=True)
os.makedirs(outs, exist_ok=True)
os.makedirs(fig_dir, exist_ok=True)

In [None]:
sc.settings.figdir = fig_dir

In [None]:
adata = sc.read_h5ad(os.path.join(data_dir, "03_neurons-clean-scvi.h5ad"))

In [None]:
metadata = {
    '1':  {'group': 'Sham-GFP', 'group_id': 'A', 'condition': 'Sham', 'treatment': 'GFP',   'side': 'Ipsilateral'},
    '3':  {'group': 'Sham-VEGFC', 'group_id': 'B', 'condition': 'Sham', 'treatment': 'VEGFC', 'side': 'Ipsilateral'},
    '5':  {'group': 'TBI-GFP', 'group_id': 'C', 'condition': 'TBI',  'treatment': 'GFP',   'side': 'Ipsilateral'},
    '6':  {'group': 'TBI-GFP', 'group_id': 'D', 'condition': 'TBI',  'treatment': 'GFP',   'side': 'Contralateral'},
    '7':  {'group': 'TBI-VEGFC', 'group_id': 'E', 'condition': 'TBI',  'treatment': 'VEGFC', 'side': 'Ipsilateral'},
    '8':  {'group': 'TBI-VEGFC', 'group_id': 'F', 'condition': 'TBI',  'treatment': 'VEGFC', 'side': 'Contralateral'},
}

for key in ['group', 'group_id', 'condition', 'treatment', 'side']:
    adata.obs[key] = adata.obs['sample_id'].map({k: v[key] for k, v in metadata.items()})

adata.obs.group.value_counts()

In [None]:
adata.obs.group_id.value_counts()

In [None]:
adata.layers['counts'] = adata.X.copy()
sc.pp.normalize_total(adata)
adata.layers['normalized'] = adata.X.copy()
sc.pp.log1p(adata)
adata.layers['log1p'] = adata.X.copy()
adata.raw = adata.copy()

In [None]:
adata.obs['cell_class'] = adata.obs['cell_type'].copy()

In [None]:
labels_df = pd.read_csv(
    os.path.join(csv_dir, '03_neurons-clean_10xWholeMouseBrain(CCN20230722)_HierarchicalMapping_UTC_1749244668001.csv'),
    skiprows=4
)

#### Leiden Clustering

In [None]:
sc.pp.neighbors(adata, use_rep="X_scVI")
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution=1, key_added="leiden")

In [None]:
sc.pl.umap(adata, color="leiden")

### Merge mapmycells annotations

In [None]:
print(adata.obs_names[:5])
print(labels_df['cell_id'].head())

In [None]:
labels_df = labels_df.set_index('cell_id')

columns_to_map = [
    'class_label', 'class_name', 'class_bootstrapping_probability',
    'subclass_label', 'subclass_name', 'subclass_bootstrapping_probability',
    'supertype_label', 'supertype_name', 'supertype_bootstrapping_probability',
    'cluster_label', 'cluster_name', 'cluster_alias', 'cluster_bootstrapping_probability'
]

for col in columns_to_map:
    adata.obs[col] = adata.obs_names.map(labels_df[col])

#print(adata.obs[columns_to_map].head())

In [None]:
adata

In [None]:
sc.pl.umap(adata, color=['supertype_bootstrapping_probability', 
                         'subclass_bootstrapping_probability', 
                         'class_bootstrapping_probability'], cmap='viridis', wspace=0.4)

In [None]:
adata.obs['subclass_name'].value_counts()

### assign labels to clusters 

In [None]:
# Compute majority subclass_label per Leiden cluster
majority_subclass_per_cluster = (
    adata.obs.groupby('leiden')['subclass_name']
    .agg(lambda x: x.value_counts().idxmax())
)

adata.obs['cell_type'] = adata.obs['leiden'].map(majority_subclass_per_cluster)
print(adata.obs[['leiden', 'cell_type']].head())

In [None]:
adata.obs['cell_type'] = adata.obs['cell_type'].astype('category')
adata.obs['cell_type'] = adata.obs['cell_type'].cat.remove_unused_categories()

cell_type_counts = adata.obs['cell_type'].value_counts()
ordered_categories = cell_type_counts.index.tolist()
adata.obs['cell_type'] = adata.obs['cell_type'].cat.reorder_categories(ordered_categories, ordered=True)

husl_colors = sns.color_palette('husl', n_colors=len(ordered_categories))
adata.uns['cell_type_colors'] = [mcolors.to_hex(c) for c in husl_colors]

sc.pl.umap(adata, color=['leiden', 'Slc17a6', 'Slc17a7', 'Gad1', 'Gad2', 'cell_type'])

In [None]:
adata.obs.cell_type.value_counts()

In [None]:
cell_type_counts = adata.obs['cell_type'].value_counts().reset_index()
cell_type_counts.columns = ['cell_type', 'count']

cell_type_counts = cell_type_counts.sort_values('count', ascending=False)

plt.figure(figsize=(10, 8))
sns.barplot(data=cell_type_counts, y='cell_type', x='count', palette='viridis')

plt.title('Cell type composition (MapMyCells subclass labels)', fontsize=16)
plt.xlabel('Number of cells', fontsize=14)
plt.ylabel('Cell type', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

plt.tight_layout()
plt.show()

### Annotate hippocampal vs border/input

In [None]:
hippocampus_core = [
    '037 DG Glut',
    '016 CA1-ProS Glut',
    '025 CA2-FC-IG Glut',
    '017 CA3 Glut',
    '023 SUB-ProS Glut',
    '033 NP SUB Glut',
    'HPF CR Glut',  
]

hippocampus_interneurons = [
    '053 Sst Gaba',
    '052 Pvalb Gaba',
    '050 Lamp5 Lhx6 Gaba',
    '046 Vip Gaba',
    '049 Lamp5 Gaba',
]

def assign_region(cell_type):
    if cell_type in hippocampus_core or cell_type in hippocampus_interneurons:
        return 'Hippocampal neurons'
    else:
        return 'Border/Input neurons'

adata.obs['region_assignment'] = adata.obs['cell_type'].apply(assign_region)
adata.obs['region_assignment'] = adata.obs['region_assignment'].astype('category')


# subset to hippocampus
adata_hippocampus_only = adata[adata.obs['region_assignment'] == 'Hippocampal neurons'].copy()
sc.pl.umap(adata_hippocampus_only, color='cell_type', legend_loc='on data', title='Hippocampus - Cell Types')



region_palette = {
    'Hippocampal neurons': '#1f77b4',      # blue
    'Border/Input neurons': '#d62728',   # red
}

sc.pl.umap(adata, color='region_assignment', palette=region_palette, title='Region Assignment (All Cells)')

# barplot, region assignment
region_counts = adata.obs['region_assignment'].value_counts().reset_index()
region_counts.columns = ['region_assignment', 'count']

plt.figure(figsize=(7, 5))
sns.barplot(data=region_counts, x='region_assignment', y='count', palette=region_palette)

plt.title('Region Assignment - Cell Counts')
plt.xlabel('Region Assignment')
plt.ylabel('Number of Cells')
plt.xticks(rotation=20)
plt.tight_layout()
plt.show()

In [None]:
sc.pl.umap(adata, color = ['side', 'region_assignment'])

#### Compositional breadown -- input/output

In [None]:
adata = adata[adata.obs['side'] == 'Ipsilateral'].copy()
composition_counts = pd.crosstab(adata.obs['group'], adata.obs['region_assignment'])
composition_props = composition_counts.div(composition_counts.sum(axis=1), axis=0)

display(composition_counts)
display(composition_props)

type(composition_props)

In [None]:
cell_type_counts = adata.obs['region_assignment'].value_counts()
cell_types = cell_type_counts.index.tolist()

palette = sns.color_palette("Paired", n_colors=len(cell_types))
cell_type_colors = [matplotlib.colors.to_hex(c) for c in palette]

In [None]:
cell_types = [ct for ct in cell_types if ct in composition_props.columns]

In [None]:
composition_props = composition_props.apply(pd.to_numeric)

cell_type_color_dict = dict(zip(cell_types, cell_type_colors))

ax = composition_props[cell_types].plot(  # ensure same order of columns!
    kind='bar',
    stacked=True,
    figsize=(4, 4),
    color=[cell_type_color_dict[ct] for ct in cell_types]  # use exact same colors
)

plt.ylabel('Proportion', fontsize=12)
plt.xlabel('', fontsize=12)
plt.title('Subtype Composition by Group', fontsize=14)

ax.get_legend().remove()
sns.despine()
plt.tight_layout()

save_path = fig_dir / "anatomical_assignment_groups_freq.pdf"
plt.savefig(save_path, bbox_inches='tight')
plt.show()

In [None]:
composition_counts = composition_counts.apply(pd.to_numeric)

cell_type_color_dict = dict(zip(cell_types, cell_type_colors))

ax = composition_counts[cell_types].plot(
    kind='bar',
    stacked=True,
    figsize=(4, 4),
    color=[cell_type_color_dict[ct] for ct in cell_types]
)

plt.ylabel('Number of Cells', fontsize=12)
plt.xlabel('', fontsize=12)
plt.title('Subtype Composition by Group (Counts)', fontsize=14)

ax.get_legend().remove()

sns.despine()
plt.tight_layout()

save_path = fig_dir / "anatomical_assignment_groups_count.pdf"
plt.savefig(save_path, bbox_inches='tight')
plt.show()

In [None]:
handles, labels = ax.get_legend_handles_labels()

fig_legend, ax_legend = plt.subplots(figsize=(2, 2))
ax_legend.axis('off')  # hide the axes

legend = ax_legend.legend(
    handles,
    labels,
    loc='center',
    frameon=False,
    fontsize=10,
)

legend_path = fig_dir / "anatomical_assignment_groups_legend.pdf"
fig_legend.savefig(legend_path, bbox_inches='tight', transparent=True)
plt.show()

#### Compositional breadown -- subtype

In [None]:
adata_border = adata[adata.obs['region_assignment'] == 'Hippocampal neurons'].copy()

composition_counts = pd.crosstab(adata_border.obs['group'], adata_border.obs['cell_type'])
composition_props = composition_counts.div(composition_counts.sum(axis=1), axis=0)
display(composition_counts)
display(composition_props)

type(composition_props)

In [None]:
cell_type_counts = adata.obs['cell_type'].value_counts()
cell_types = cell_type_counts.index.tolist()

palette = sns.color_palette("Paired", n_colors=len(cell_types))
cell_type_colors = [matplotlib.colors.to_hex(c) for c in palette]

In [None]:
adata.obs

In [None]:
cell_types = [ct for ct in cell_types if ct in composition_props.columns]

In [None]:
composition_props = composition_props.apply(pd.to_numeric)

cell_type_color_dict = dict(zip(cell_types, cell_type_colors))

ax = composition_props[cell_types].plot(  # ensure same order of columns!
    kind='bar',
    stacked=True,
    figsize=(4, 4),
    color=[cell_type_color_dict[ct] for ct in cell_types]  # use exact same colors
)

plt.ylabel('Proportion', fontsize=12)
plt.xlabel('', fontsize=12)
plt.title('Subtype Composition by Group', fontsize=14)

ax.get_legend().remove()
sns.despine()
plt.tight_layout()

save_path = fig_dir / "subtype_groups.pdf"
plt.savefig(save_path, bbox_inches='tight')
plt.show()

In [None]:
handles, labels = ax.get_legend_handles_labels()

fig_legend, ax_legend = plt.subplots(figsize=(2, 2))
ax_legend.axis('off')  # hide the axes

legend = ax_legend.legend(
    handles,
    labels,
    loc='center',
    frameon=False,
    fontsize=10,
)

legend_path = fig_dir / "subtype_groups_legend.pdf"
fig_legend.savefig(legend_path, bbox_inches='tight', transparent=True)
plt.show()

#### Visualize gene expression by anatomical assignment and treatment group

In [None]:
genes = ['Arpp21', 'R3hdm1', 'Rorb', 'Cux1', 'Cux2', 'Brinp3', 'Mef2c', 'Zbtb20']

In [None]:
sc.pl.matrixplot(adata, 
                 var_names = genes, 
                 groupby = ['region_assignment', 'group'], 
                 standard_scale = 'var', 
                 colorbar_title = 'Scaled to gene',
                save="cortical_genes.pdf")

In [None]:
sc.pl.dotplot(adata, 
              var_names = genes, 
              groupby = ['region_assignment', 'group'], 
              standard_scale = 'var', 
              colorbar_title = 'Scaled to gene',
             save="cortical_genes.pdf")