In [1]:
import os
import pandas as pd
from pathlib import Path
import numpy as np
import anndata
import time
import matplotlib.pyplot as plt
import json

from abc_atlas_access.abc_atlas_cache.abc_project_cache import AbcProjectCache

pd.set_option('display.max_columns', 500)

In [21]:
version = '20251031'
download_base = Path('../../../data/allen-brain-cell-atlas-staging')
abc_cache = AbcProjectCache.from_cache_dir(
    download_base,
    s3_bucket='allen-brain-cell-atlas-staging',
    auth_required=True,
)

abc_cache.load_manifest(f'releases/{version}/manifest.json')

Read in the two DataFrames from the aging dataset we'll need to create an equivalent cluster annotation terms and term set like the WMB and WHB taxonomies.

In [34]:
taxonomy_dir = 'Consensus-WMB-integrated-taxonomy'

In [35]:
abc_cache.list_metadata_files('Consensus-WMB-integrated-taxonomy')

['HY-EA-Glut-GABA_cell_2d_embedding_coordinates',
 'MB-GABA_cell_2d_embedding_coordinates',
 'MB-Glut-Dopa-Sero_cell_2d_embedding_coordinates',
 'NN-IMN_cell_2d_embedding_coordinates',
 'P-MY-CB-GABA_cell_2d_embedding_coordinates',
 'P-MY-CB-Glut_cell_2d_embedding_coordinates',
 'Pallium-Glut_cell_2d_embedding_coordinates',
 'Subpallium-GABA_cell_2d_embedding_coordinates',
 'TH-EPI-Glut_cell_2d_embedding_coordinates',
 'cell_2d_embedding_coordinates',
 'cell_to_cluster_membership',
 'cluster',
 'cluster_annotation_term',
 'cluster_annotation_term_set',
 'cluster_to_cluster_annotation_membership']

In [36]:
term = abc_cache.get_metadata_dataframe(
    taxonomy_dir,
    'cluster_annotation_term'
)
term

Unnamed: 0,label,name,cluster_annotation_term_set_label,cluster_annotation_term_set_name,color_hex_triplet,term_order,term_set_order,parent_term_label,parent_term_name,parent_term_set_label
0,CS20251031_NEUR_0004,Chol,CCN20251031_NEUR,neurotransmitter,#73E785,4,0,,,
1,CS20251031_NEUR_0012,Chol-Dopa,CCN20251031_NEUR,neurotransmitter,#B8EC68,12,0,,,
2,CS20251031_NEUR_0008,Dopa,CCN20251031_NEUR,neurotransmitter,#fcf04b,8,0,,,
3,CS20251031_NEUR_0002,GABA,CCN20251031_NEUR,neurotransmitter,#FF3358,2,0,,,
4,CS20251031_NEUR_0006,GABA-Chol,CCN20251031_NEUR,neurotransmitter,#000080,6,0,,,
...,...,...,...,...,...,...,...,...,...,...
8590,CS20251031_CLUS_6717,6717 Pit_Six1_NN 5,CCN20251031_LEVEL_4,cluster,#FFF570,6717,5,CS20251031_SUPT_1385,1385 Pit_Six1_NN 5,CCN20251031_LEVEL_3
8591,CS20251031_CLUS_6718,6718 Pit_Six1_NN 5,CCN20251031_LEVEL_4,cluster,#FFFF00,6718,5,CS20251031_SUPT_1385,1385 Pit_Six1_NN 5,CCN20251031_LEVEL_3
8592,CS20251031_CLUS_6719,6719 Pit_Six1_NN 6,CCN20251031_LEVEL_4,cluster,#FFFBB1,6719,5,CS20251031_SUPT_1386,1386 Pit_Six1_NN 6,CCN20251031_LEVEL_3
8593,CS20251031_CLUS_6720,6720 Pit_Six1_NN 6,CCN20251031_LEVEL_4,cluster,#FFF2CD,6720,5,CS20251031_SUPT_1386,1386 Pit_Six1_NN 6,CCN20251031_LEVEL_3


In [37]:
term_sets = abc_cache.get_metadata_dataframe(
    directory=taxonomy_dir,
    file_name='cluster_annotation_term_set'
).set_index('label')
term_sets

Unnamed: 0_level_0,name,description,order,parent_term_set_label
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CCN20251031_NEUR,neurotransmitter,neurotransmitter,0,
CCN20251031_LEVEL_0,neighborhood,neighborhood,1,
CCN20251031_LEVEL_1,class,class,2,CCN20251031_LEVEL_0
CCN20251031_LEVEL_2,subclass,subclass,3,CCN20251031_LEVEL_1
CCN20251031_LEVEL_3,supertype,supertype,4,CCN20251031_LEVEL_2
CCN20251031_LEVEL_4,cluster,cluster,5,CCN20251031_LEVEL_3


In [38]:
filtered = term[pd.notna(term['parent_term_label'])]
first_child = filtered.groupby('parent_term_label')[['label','name','term_order','cluster_annotation_term_set_name']].first()
first_child

Unnamed: 0_level_0,label,name,term_order,cluster_annotation_term_set_name
parent_term_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CS20251031_CLAS_0001,CS20251031_SUBC_0001,001 L2/3-IT-RSP_Glut,1,subclass
CS20251031_CLAS_0002,CS20251031_SUBC_0032,032 L6b-EPd_Glut,32,subclass
CS20251031_CLAS_0003,CS20251031_SUBC_0040,040 OB_Eomes:Sall1_Glut,40,subclass
CS20251031_CLAS_0004,CS20251031_SUBC_0043,043 DG_Glut,43,subclass
CS20251031_CLAS_0005,CS20251031_SUBC_0046,046 OB_Meis2:Thsd7b_Gaba,46,subclass
...,...,...,...,...
CS20251031_SUPT_1382,CS20251031_CLUS_6713,6713 Pit_Six1_NN 2,6713,cluster
CS20251031_SUPT_1383,CS20251031_CLUS_6715,6715 Pit_Six1_NN 3,6715,cluster
CS20251031_SUPT_1384,CS20251031_CLUS_6716,6716 Pit_Six1_NN 4,6716,cluster
CS20251031_SUPT_1385,CS20251031_CLUS_6717,6717 Pit_Six1_NN 5,6717,cluster


In [39]:
term.set_index('label',inplace=True)
term.loc[first_child.index,'first_child_label'] = first_child['label']
term.loc[first_child.index,'first_child_term_set_name'] = first_child['cluster_annotation_term_set_name']
term.reset_index(inplace=True)

In [40]:
term[pd.notna(term['first_child_label'])]

Unnamed: 0,label,name,cluster_annotation_term_set_label,cluster_annotation_term_set_name,color_hex_triplet,term_order,term_set_order,parent_term_label,parent_term_name,parent_term_set_label,first_child_label,first_child_term_set_name
22,CS20251031_NEIGH_0003,HY-EA-Glut-GABA,CCN20251031_LEVEL_0,neighborhood,#FF6600,3,1,,,,CS20251031_CLAS_0011,class
23,CS20251031_NEIGH_0006,MB-GABA,CCN20251031_LEVEL_0,neighborhood,#9EF01A,6,1,,,,CS20251031_CLAS_0021,class
24,CS20251031_NEIGH_0004,MB-Glut-Dopa-Sero,CCN20251031_LEVEL_0,neighborhood,#006200,4,1,,,,CS20251031_CLAS_0015,class
25,CS20251031_NEIGH_0009,NN-IMN,CCN20251031_LEVEL_0,neighborhood,#03045E,9,1,,,,CS20251031_CLAS_0036,class
26,CS20251031_NEIGH_0008,P-MY-CB-GABA,CCN20251031_LEVEL_0,neighborhood,#0096C7,8,1,,,,CS20251031_CLAS_0031,class
...,...,...,...,...,...,...,...,...,...,...,...,...
1869,CS20251031_SUPT_1382,1382 Pit_Six1_NN 2,CCN20251031_LEVEL_3,supertype,#DD889D,1382,4,CS20251031_SUBC_0414,414 Pit_Six1_NN,CCN20251031_LEVEL_2,CS20251031_CLUS_6713,cluster
1870,CS20251031_SUPT_1383,1383 Pit_Six1_NN 3,CCN20251031_LEVEL_3,supertype,#BB999F,1383,4,CS20251031_SUBC_0414,414 Pit_Six1_NN,CCN20251031_LEVEL_2,CS20251031_CLUS_6715,cluster
1871,CS20251031_SUPT_1384,1384 Pit_Six1_NN 4,CCN20251031_LEVEL_3,supertype,#995E2E,1384,4,CS20251031_SUBC_0414,414 Pit_Six1_NN,CCN20251031_LEVEL_2,CS20251031_CLUS_6716,cluster
1872,CS20251031_SUPT_1385,1385 Pit_Six1_NN 5,CCN20251031_LEVEL_3,supertype,#866000,1385,4,CS20251031_SUBC_0414,414 Pit_Six1_NN,CCN20251031_LEVEL_2,CS20251031_CLUS_6717,cluster


In [41]:
membership = abc_cache.get_metadata_dataframe(directory=taxonomy_dir, file_name='cluster_to_cluster_annotation_membership')
pivot = membership.groupby(['cluster_alias', 'cluster_annotation_term_set_name'])['cluster_annotation_term_name'].first().unstack()
pivot = pivot[term_sets['name']] # order columns
pivot.fillna('Other', inplace=True)
pivot.sort_values(['neighborhood', 'class', 'subclass', 'supertype', 'cluster'], inplace=True)
cols = pivot.columns.to_list()
pivot.columns = cols
pivot

Unnamed: 0_level_0,neurotransmitter,neighborhood,class,subclass,supertype,cluster
cluster_alias,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
6562,GABA,HY-EA-Glut-GABA,011 CNU-HYa GABA,090 MEA-BST_Lhx6:Nfib_Gaba,0376 MEA-BST_Lhx6:Nfib_Gaba 1,1543 MEA-BST_Lhx6:Nfib_Gaba 1
6567,GABA,HY-EA-Glut-GABA,011 CNU-HYa GABA,090 MEA-BST_Lhx6:Nfib_Gaba,0376 MEA-BST_Lhx6:Nfib_Gaba 1,1544 MEA-BST_Lhx6:Nfib_Gaba 1
6576,GABA,HY-EA-Glut-GABA,011 CNU-HYa GABA,090 MEA-BST_Lhx6:Nfib_Gaba,0376 MEA-BST_Lhx6:Nfib_Gaba 1,1545 MEA-BST_Lhx6:Nfib_Gaba 1
6578,GABA,HY-EA-Glut-GABA,011 CNU-HYa GABA,090 MEA-BST_Lhx6:Nfib_Gaba,0376 MEA-BST_Lhx6:Nfib_Gaba 1,1546 MEA-BST_Lhx6:Nfib_Gaba 1
6579,GABA,HY-EA-Glut-GABA,011 CNU-HYa GABA,090 MEA-BST_Lhx6:Nfib_Gaba,0376 MEA-BST_Lhx6:Nfib_Gaba 1,1547 MEA-BST_Lhx6:Nfib_Gaba 1
...,...,...,...,...,...,...
11156,Glut,TH-EPI-Glut,019 TH Glut,188 MRN-PAG_Neurod2_Glut,0716 MRN-PAG_Neurod2_Glut 2,3226 MRN-PAG_Neurod2_Glut 2
11157,Glut,TH-EPI-Glut,019 TH Glut,188 MRN-PAG_Neurod2_Glut,0716 MRN-PAG_Neurod2_Glut 2,3227 MRN-PAG_Neurod2_Glut 2
11158,Glut,TH-EPI-Glut,019 TH Glut,188 MRN-PAG_Neurod2_Glut,0716 MRN-PAG_Neurod2_Glut 2,3228 MRN-PAG_Neurod2_Glut 2
11067,Glut,TH-EPI-Glut,019 TH Glut,188 MRN-PAG_Neurod2_Glut,0717 MRN-PAG_Neurod2_Glut 3,3229 MRN-PAG_Neurod2_Glut 3


In [42]:
lookup = {}
for tag in term_sets['name'] :
    #print(tag)
    pred = (term['cluster_annotation_term_set_name'] == tag)
    filtered = term[pred].copy()
    filtered.set_index('name', inplace=True)
    lookup[tag] = filtered

Helper functions to lookup an term attribut and format a cell in the html table

In [43]:
def get_value(c, n, v) :
    return lookup[c].loc[n][v]

def format_cell (df,c,add_id=False,add_plus=False,add_minus=False) :

    divs = pd.DataFrame(index=df.index)
    
    pattern = '<div class="circle" style="background-color:%s"></div>'
    divs['circle'] = [pattern % get_value(c,x,'color_hex_triplet') for x in df[c]]
    
    pattern = '<div class="celltext">%s</div>'
    divs['name'] = [pattern % x for x in df[c]]
   
    divs['id'] = ''
    if add_id :
        pattern = '<div id="%s"></div>'
        divs['id'] = [pattern % get_value(c,x,'label') for x in df[c]]
        
    divs['plus'] = ''
    if add_plus :
        pattern = '<div class="celltext"><a href="%s.html#%s">[+]</a></div>'
        divs['plus'] = [pattern % (get_value(c,x,'first_child_term_set_name'),
                                   get_value(c,x,'first_child_label')) for x in df[c]]
        
    divs['minus'] = ''
    if add_minus :
        pattern = '<div class="celltext"><a href="%s.html#%s">[-]</a></div>'
        divs['minus'] = [pattern % (get_value(c,x,'cluster_annotation_term_set_name'),
                                    get_value(c,x,'label')) for x in df[c]]
    
    cols = ['id','circle','name','plus','minus']
    output = divs[cols].apply(lambda row: ''.join(row.values.astype(str)), axis=1)
    return output


Helper function to create html document

In [44]:
def create_html(df, ts, file, title):
    
    # apply formatter to each term set
    df_formatted = df.copy()
    
    for tag in term_sets['name'] :
        if tag in df_formatted.columns :
            
            add_id = False
            if tag == ts :
                add_id = True
                
            add_plus = False
            if tag == ts and tag not in ('cluster', 'neurotransmitter') :
                add_plus = True
                
            add_minus = False
            if tag != ts and tag not in ('cluster', 'neurotransmitter') :
                add_minus = True
                
            df_formatted[tag] = format_cell(df,tag,add_id,add_plus,add_minus)
            
            
    output = df_formatted.to_html(index=False, na_rep='',
                        render_links=True,escape=False,
                        classes="mystyle")

    html_string = '''
    <html>
    <head><title>%s</title></head>
    <link rel="stylesheet" type="text/css" href="../../simple_style.css"/>
    <body>
    {table}
    </body>
    </html>.
    ''' % title

    # OUTPUT AN HTML FILE
    with open(file, 'w') as f:
        f.write(html_string.format(table=output))

In [45]:
# Write the data to the _static directory of the abc_atlas_access so that links work properly in the jupyter-book/sphinx page.
output_directory = os.path.join('../../_static', taxonomy_dir, version)
os.makedirs(output_directory, exist_ok=True)

In [46]:
df_supertype = pivot[['neighborhood']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory, 'neighborhood.html')
title = 'Consensus WMB integrated taxonomy: cell type neighborhood'
create_html(df_supertype, 'neighborhood', file, title)
print(len(df_supertype))

9


In [47]:
df_supertype = pivot[['neighborhood', 'class']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory, 'class.html')
title = 'Consensus WMB integrated taxonomy: cell type class'
create_html(df_supertype, 'class', file, title)
print(len(df_supertype))

43


In [48]:
df_supertype = pivot[['neighborhood', 'class', 'subclass']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'subclass.html')
title = 'Consensus WMB integrated taxonomy: cell type subclass'
create_html(df_supertype, 'subclass', file, title)
print(len(df_supertype))

414


In [49]:
df_supertype = pivot[['neighborhood', 'class', 'subclass', 'supertype']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'supertype.html')
title = 'Consensus WMB integrated taxonomy: cell type supertype'
create_html(df_supertype, 'supertype', file, title)
print(len(df_supertype))

1386


In [50]:
df_supertype = pivot[['neighborhood', 'class', 'subclass', 'supertype', 'cluster', 'neurotransmitter']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'cluster.html')
title = 'Consensus WMB integrated taxonomy: cell type cluster'
create_html(df_supertype, 'cluster', file, title)
print(len(df_supertype))

6721


In [51]:
df_supertype = pivot[['neurotransmitter']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'neurotransmitter.html')
title = 'Consensus WMB integrated taxonomy: cell type neurotransmitter'
create_html(df_supertype, 'neurotransmitter', file, title)
print(len(df_supertype))

22
