In [1]:
import os
import pandas as pd
from pathlib import Path
import numpy as np
import anndata
import time
import matplotlib.pyplot as plt
import json

from abc_atlas_access.abc_atlas_cache.abc_project_cache import AbcProjectCache

pd.set_option('display.max_columns', 500)

In [2]:
version = '20250531'
download_base = Path('../../../data/abc_atlas')
abc_cache = AbcProjectCache.from_s3_cache(download_base)
abc_cache.load_manifest(f'releases/{version}/manifest.json')

Read in the two DataFrames from the aging dataset we'll need to create an equivalent cluster annotation terms and term set like the WMB and WHB taxonomies.

In [4]:
abc_cache.list_metadata_files('HMBA-BG-taxonomy-CCN20250428')

['abbreviation_term',
 'cell_2d_embedding_coordinates',
 'cell_to_cluster_membership',
 'cluster',
 'cluster_annotation_term',
 'cluster_annotation_term_set',
 'cluster_annotation_to_abbreviation_map',
 'cluster_to_cluster_annotation_membership']

In [5]:
term = abc_cache.get_metadata_dataframe(
    'HMBA-BG-taxonomy-CCN20250428',
    'cluster_annotation_term'
)
term

Unnamed: 0,label,name,cluster_annotation_term_set_label,cluster_annotation_term_set_name,color_hex_triplet,term_order,term_set_order,parent_term_label,parent_term_name,parent_term_set_label
0,CS20250428_NEIGH_0001,Nonneuron,CCN20250428_LEVEL_0,Neighborhood,#f2ca7d,1,0,,,
1,CS20250428_NEIGH_0000,Glut Sero Dopa,CCN20250428_LEVEL_0,Neighborhood,#91f4bb,2,0,,,
2,CS20250428_NEIGH_0002,Subpallium GABA,CCN20250428_LEVEL_0,Neighborhood,#19613b,3,0,,,
3,CS20250428_NEIGH_0003,Subpallium GABA-Glut,CCN20250428_LEVEL_0,Neighborhood,#7e1d19,4,0,,,
4,CS20250428_CLASS_0000,Astro-Epen,CCN20250428_LEVEL_1,Class,#6ec0da,1,1,CS20250428_NEIGH_0001,Nonneuron,CCN20250428_LEVEL_0
...,...,...,...,...,...,...,...,...,...,...
1543,CS20250428_CLUST_0671,Macaque-490,CCN20250428_LEVEL_4,Cluster,#a2b102,1430,4,CS20250428_GROUP_0016,GPi Shell,CCN20250428_LEVEL_3
1544,CS20250428_CLUST_0672,Macaque-491,CCN20250428_LEVEL_4,Cluster,#e5de92,1431,4,CS20250428_GROUP_0016,GPi Shell,CCN20250428_LEVEL_3
1545,CS20250428_CLUST_0673,Macaque-492,CCN20250428_LEVEL_4,Cluster,#e1d71e,1432,4,CS20250428_GROUP_0016,GPi Shell,CCN20250428_LEVEL_3
1546,CS20250428_CLUST_1771,Marmoset-859,CCN20250428_LEVEL_4,Cluster,#1e3a5b,1433,4,CS20250428_GROUP_0016,GPi Shell,CCN20250428_LEVEL_3


In [6]:
term_sets = abc_cache.get_metadata_dataframe(directory='HMBA-BG-taxonomy-CCN20250428', file_name='cluster_annotation_term_set').set_index('label')
term_sets

Unnamed: 0_level_0,name,description,order
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CCN20250428_LEVEL_0,Neighborhood,Neighborhood,0
CCN20250428_LEVEL_1,Class,Class,1
CCN20250428_LEVEL_2,Subclass,Subclass,2
CCN20250428_LEVEL_3,Group,Group,3
CCN20250428_LEVEL_4,Cluster,Cluster,4


In [7]:
filtered = term[pd.notna(term['parent_term_label'])]
first_child = filtered.groupby('parent_term_label')[['label','name','term_order','cluster_annotation_term_set_name']].first()
first_child

Unnamed: 0_level_0,label,name,term_order,cluster_annotation_term_set_name
parent_term_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CS20250428_CLASS_0000,CS20250428_SUBCL_0000,Astrocyte,1,Subclass
CS20250428_CLASS_0001,CS20250428_SUBCL_0002,CN LAMP5-CXCL14 GABA,23,Subclass
CS20250428_CLASS_0002,CS20250428_SUBCL_0014,CN GABA-Glut,36,Subclass
CS20250428_CLASS_0003,CS20250428_SUBCL_0006,CN MEIS2 GABA,30,Subclass
CS20250428_CLASS_0005,CS20250428_SUBCL_0003,CN LAMP5-LHX6 GABA,17,Subclass
...,...,...,...,...
CS20250428_SUBCL_0033,CS20250428_GROUP_0043,STR SST-CHODL GABA,29,Group
CS20250428_SUBCL_0035,CS20250428_GROUP_0062,VLMC,17,Group
CS20250428_SUBCL_0036,CS20250428_GROUP_0068,OB FRMD7 GABA,60,Group
CS20250428_SUBCL_0037,CS20250428_GROUP_0063,ZI-HTH GABA,46,Group


In [8]:
term.set_index('label',inplace=True)
term.loc[first_child.index,'first_child_label'] = first_child['label']
term.loc[first_child.index,'first_child_term_set_name'] = first_child['cluster_annotation_term_set_name']
term.reset_index(inplace=True)

In [9]:
term[pd.notna(term['first_child_label'])]

Unnamed: 0,label,name,cluster_annotation_term_set_label,cluster_annotation_term_set_name,color_hex_triplet,term_order,term_set_order,parent_term_label,parent_term_name,parent_term_set_label,first_child_label,first_child_term_set_name
0,CS20250428_NEIGH_0001,Nonneuron,CCN20250428_LEVEL_0,Neighborhood,#f2ca7d,1,0,,,,CS20250428_CLASS_0000,Class
1,CS20250428_NEIGH_0000,Glut Sero Dopa,CCN20250428_LEVEL_0,Neighborhood,#91f4bb,2,0,,,,CS20250428_CLASS_0007,Class
2,CS20250428_NEIGH_0002,Subpallium GABA,CCN20250428_LEVEL_0,Neighborhood,#19613b,3,0,,,,CS20250428_CLASS_0005,Class
3,CS20250428_NEIGH_0003,Subpallium GABA-Glut,CCN20250428_LEVEL_0,Neighborhood,#7e1d19,4,0,,,,CS20250428_CLASS_0002,Class
4,CS20250428_CLASS_0000,Astro-Epen,CCN20250428_LEVEL_1,Class,#6ec0da,1,1,CS20250428_NEIGH_0001,Nonneuron,CCN20250428_LEVEL_0,CS20250428_SUBCL_0000,Subclass
...,...,...,...,...,...,...,...,...,...,...,...,...
108,CS20250428_GROUP_0051,STR D1D2 Hybrid MSN,CCN20250428_LEVEL_3,Group,#9467bd,58,3,CS20250428_SUBCL_0030,STR Hybrid MSN,CCN20250428_LEVEL_2,CS20250428_CLUST_0457,Cluster
109,CS20250428_GROUP_0024,OT D1 ICj,CCN20250428_LEVEL_3,Group,#ff7f0e,59,3,CS20250428_SUBCL_0021,OT Granular GABA,CCN20250428_LEVEL_2,CS20250428_CLUST_0336,Cluster
110,CS20250428_GROUP_0068,OB FRMD7 GABA,CCN20250428_LEVEL_3,Group,#cd0753,60,3,CS20250428_SUBCL_0036,ACx MEIS2 GABA,CCN20250428_LEVEL_2,CS20250428_CLUST_0025,Cluster
111,CS20250428_GROUP_0067,OB Dopa-GABA,CCN20250428_LEVEL_3,Group,#902f6b,61,3,CS20250428_SUBCL_0036,ACx MEIS2 GABA,CCN20250428_LEVEL_2,CS20250428_CLUST_0024,Cluster


In [11]:
membership = abc_cache.get_metadata_dataframe(directory='HMBA-BG-taxonomy-CCN20250428', file_name='cluster_to_cluster_annotation_membership')
pivot = membership.groupby(['cluster_alias', 'cluster_annotation_term_set_name'])['cluster_annotation_term_name'].first().unstack()
pivot = pivot[term_sets['name']] # order columns
pivot.fillna('Other', inplace=True)
pivot.sort_values(['Neighborhood', 'Class', 'Subclass', 'Group', 'Cluster'], inplace=True)
cols = pivot.columns.to_list()
pivot.columns = cols
pivot

Unnamed: 0_level_0,Neighborhood,Class,Subclass,Group,Cluster
cluster_alias,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Human-128,Glut Sero Dopa,F M Glut,F Glut,BF SKOR1 Glut,Human-128
Human-129,Glut Sero Dopa,F M Glut,F Glut,BF SKOR1 Glut,Human-129
Human-130,Glut Sero Dopa,F M Glut,F Glut,BF SKOR1 Glut,Human-130
Human-423,Glut Sero Dopa,F M Glut,F Glut,BF SKOR1 Glut,Human-423
Human-426,Glut Sero Dopa,F M Glut,F Glut,BF SKOR1 Glut,Human-426
...,...,...,...,...,...
Macaque-490,Subpallium GABA-Glut,CN GABA-Glut,CN GABA-Glut,GPi Shell,Macaque-490
Macaque-491,Subpallium GABA-Glut,CN GABA-Glut,CN GABA-Glut,GPi Shell,Macaque-491
Macaque-492,Subpallium GABA-Glut,CN GABA-Glut,CN GABA-Glut,GPi Shell,Macaque-492
Marmoset-859,Subpallium GABA-Glut,CN GABA-Glut,CN GABA-Glut,GPi Shell,Marmoset-859


In [12]:
lookup = {}
for tag in term_sets['name'] :
    #print(tag)
    pred = (term['cluster_annotation_term_set_name'] == tag)
    filtered = term[pred].copy()
    filtered.set_index('name', inplace=True)
    lookup[tag] = filtered

Helper functions to lookup an term attribut and format a cell in the html table

In [13]:
def get_value(c, n, v) :
    return lookup[c].loc[n][v]

def format_cell (df,c,add_id=False,add_plus=False,add_minus=False) :

    divs = pd.DataFrame(index=df.index)
    
    pattern = '<div class="circle" style="background-color:%s"></div>'
    divs['circle'] = [pattern % get_value(c,x,'color_hex_triplet') for x in df[c]]
    
    pattern = '<div class="celltext">%s</div>'
    divs['name'] = [pattern % x for x in df[c]]
   
    divs['id'] = ''
    if add_id :
        pattern = '<div id="%s"></div>'
        divs['id'] = [pattern % get_value(c,x,'label') for x in df[c]]
        
    divs['plus'] = ''
    if add_plus :
        pattern = '<div class="celltext"><a href="%s.html#%s">[+]</a></div>'
        divs['plus'] = [pattern % (get_value(c,x,'first_child_term_set_name'),
                                   get_value(c,x,'first_child_label')) for x in df[c]]
        
    divs['minus'] = ''
    if add_minus :
        pattern = '<div class="celltext"><a href="%s.html#%s">[-]</a></div>'
        divs['minus'] = [pattern % (get_value(c,x,'cluster_annotation_term_set_name'),
                                    get_value(c,x,'label')) for x in df[c]]
    
    cols = ['id','circle','name','plus','minus']
    output = divs[cols].apply(lambda row: ''.join(row.values.astype(str)), axis=1)
    return output


Helper function to create html document

In [20]:
def create_html(df, ts, file, title):
    
    # apply formatter to each term set
    df_formatted = df.copy()
    
    for tag in term_sets['name'] :
        if tag in df_formatted.columns :
            
            add_id = False
            if tag == ts :
                add_id = True
                
            add_plus = False
            if tag == ts and tag not in ('Cluster') :
                add_plus = True
                
            add_minus = False
            if tag != ts and tag not in ('Cluster') :
                add_minus = True
                
            df_formatted[tag] = format_cell(df,tag,add_id,add_plus,add_minus)
            
            
    output = df_formatted.to_html(index=False, na_rep='',
                        render_links=True,escape=False,
                        classes="mystyle")

    html_string = '''
    <html>
    <head><title>%s</title></head>
    <link rel="stylesheet" type="text/css" href="../../simple_style.css"/>
    <body>
    {table}
    </body>
    </html>.
    ''' % title

    # OUTPUT AN HTML FILE
    with open(file, 'w') as f:
        f.write(html_string.format(table=output))

In [31]:
# Write the data to the _static directory of the abc_atlas_access so that links work properly in the jupyter-book/sphinx page.
output_directory = os.path.join('../../_static', 'HMBA-BG-taxonomy-CCN20250428', version)
os.makedirs(output_directory, exist_ok=True)

In [32]:
df_supertype = pivot[['Neighborhood']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory, 'Neighborhood.html')
title = 'HMBA-BG-taxonomy-CCN20250428: cell type Neighborhood'
create_html(df_supertype, 'Neighborhood', file, title)
print(len(df_supertype))

4


In [33]:
df_supertype = pivot[['Neighborhood', 'Class']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory, 'Class.html')
title = 'HMBA-BG-taxonomy-CCN20250428: cell type Class'
create_html(df_supertype, 'Class', file, title)
print(len(df_supertype))

12


In [34]:
df_supertype = pivot[['Neighborhood', 'Class', 'Subclass']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'Subclass.html')
title = 'HMBA-BG-taxonomy-CCN20250428: cell type Subclass'
create_html(df_supertype, 'Subclass', file, title)
print(len(df_supertype))

36


In [35]:
df_supertype = pivot[['Neighborhood', 'Class', 'Subclass', 'Group']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'Group.html')
title = 'HMBA-BG-taxonomy-CCN20250428: cell type Group'
create_html(df_supertype, 'Group', file, title)
print(len(df_supertype))

61


In [36]:
df_supertype = pivot[['Neighborhood', 'Class', 'Subclass', 'Group', 'Cluster']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'Cluster.html')
title = 'HMBA-BG-taxonomy-CCN20250428: cell type Cluster'
create_html(df_supertype, 'Cluster', file, title)
print(len(df_supertype))

1435
