In [2]:
import os
import pandas as pd
from pathlib import Path
import numpy as np
import anndata
import time
import matplotlib.pyplot as plt
import json

from abc_atlas_access.abc_atlas_cache.abc_project_cache import AbcProjectCache

pd.set_option('display.max_columns', 500)

In [6]:
version = '20260131'
download_base = Path('../../data/allen-brain-cell-atlas-staging')
abc_cache = AbcProjectCache.from_cache_dir(
    download_base,
    s3_bucket='allen-brain-cell-atlas-staging',
    auth_required=True,
)

abc_cache.load_manifest(f'releases/{version}/manifest.json')

type.compare_manifests('releases/20260131/manifest.json', 'releases/20260228/manifest.json')
To load another version of the dataset, run
type.load_manifest('releases/20260228/manifest.json')


Read in the two DataFrames from the aging dataset we'll need to create an equivalent cluster annotation terms and term set like the WMB and WHB taxonomies.

In [7]:
taxonomy_dir = 'Developing-Mouse-taxonomy'

In [8]:
abc_cache.list_metadata_files(taxonomy_dir)

['cell_2d_embedding_coordinates',
 'cell_to_cluster_membership',
 'cluster',
 'cluster_annotation_term',
 'cluster_annotation_term_set',
 'cluster_to_cluster_annotation_membership']

In [9]:
term = abc_cache.get_metadata_dataframe(
    taxonomy_dir,
    'cluster_annotation_term'
)
term

cluster_annotation_term.csv: 100%|██████████| 129k/129k [00:00<00:00, 690kMB/s]  


Unnamed: 0,label,name,cluster_annotation_term_set_label,cluster_annotation_term_set_name,color_hex_triplet,term_order,term_set_order,parent_term_label,parent_term_name,parent_term_set_label,CCN20230722_label
0,CS20260131_CLAS_009,Astro-Epen,CCN20260131_LEVEL_0,class,#594a26,9,0,,,,CS20230722_CLAS_30
1,CS20260131_CLAS_013,CNU-MGE GABA,CCN20260131_LEVEL_0,class,#450099,13,0,,,,CS20230722_CLAS_08
2,CS20260131_CLAS_002,CR Glut,CCN20260131_LEVEL_0,class,#919900,2,0,,,,
3,CS20260131_CLAS_011,CTX-CGE GABA,CCN20260131_LEVEL_0,class,#CCFF33,11,0,,,,CS20230722_CLAS_06
4,CS20260131_CLAS_012,CTX-MGE GABA,CCN20260131_LEVEL_0,class,#f954ee,12,0,,,,CS20230722_CLAS_07
...,...,...,...,...,...,...,...,...,...,...,...
912,CS20260131_SCLU_051,RG_5,CCN20260131_LEVEL_3,subcluster,#9e9ac8,51,3,CS20260131_CLUS_004,RG,CCN20260131_LEVEL_2,
913,CS20260131_SCLU_052,RG_6,CCN20260131_LEVEL_3,subcluster,#bcbddc,52,3,CS20260131_CLUS_004,RG,CCN20260131_LEVEL_2,
914,CS20260131_SCLU_053,RG_7,CCN20260131_LEVEL_3,subcluster,#dadaeb,53,3,CS20260131_CLUS_004,RG,CCN20260131_LEVEL_2,
915,CS20260131_SCLU_054,RG_8,CCN20260131_LEVEL_3,subcluster,#636363,54,3,CS20260131_CLUS_004,RG,CCN20260131_LEVEL_2,


In [10]:
term_sets = abc_cache.get_metadata_dataframe(
    directory=taxonomy_dir,
    file_name='cluster_annotation_term_set'
).set_index('label')
term_sets

cluster_annotation_term_set.csv: 100%|██████████| 268/268 [00:00<00:00, 2.77kMB/s]


Unnamed: 0_level_0,name,description,order,parent_term_set_label
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CCN20260131_LEVEL_0,class,class,0,
CCN20260131_LEVEL_1,subclass,subclass,1,CCN20260131_LEVEL_0
CCN20260131_LEVEL_2,cluster,cluster,2,CCN20260131_LEVEL_1
CCN20260131_LEVEL_3,subcluster,subcluster,3,CCN20260131_LEVEL_2


In [11]:
filtered = term[pd.notna(term['parent_term_label'])]
first_child = filtered.groupby('parent_term_label')[['label','name','term_order','cluster_annotation_term_set_name']].first()
first_child

Unnamed: 0_level_0,label,name,term_order,cluster_annotation_term_set_name
parent_term_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CS20260131_CLAS_001,CS20260131_SCLA_001,NEC,1,subclass
CS20260131_CLAS_002,CS20260131_SCLA_003,CR Glut,3,subclass
CS20260131_CLAS_003,CS20260131_SCLA_004,RG,4,subclass
CS20260131_CLAS_004,CS20260131_SCLA_006,IP IT,6,subclass
CS20260131_CLAS_005,CS20260131_SCLA_008,IMN IT,8,subclass
...,...,...,...,...
CS20260131_SCLA_036,CS20260131_CLUS_139,5304_Peri NN_1,139,cluster
CS20260131_SCLA_037,CS20260131_CLUS_141,5306_SMC NN_1,141,cluster
CS20260131_SCLA_038,CS20260131_CLUS_143,5309_Endo NN_1,143,cluster
CS20260131_SCLA_039,CS20260131_CLUS_146,5312_Microglia NN_1,146,cluster


In [28]:
'cluster' in ['subcluster']

False

In [12]:
term.set_index('label',inplace=True)
term.loc[first_child.index,'first_child_label'] = first_child['label']
term.loc[first_child.index,'first_child_term_set_name'] = first_child['cluster_annotation_term_set_name']
term.reset_index(inplace=True)

In [13]:
term[pd.notna(term['first_child_label'])]

Unnamed: 0,label,name,cluster_annotation_term_set_label,cluster_annotation_term_set_name,color_hex_triplet,term_order,term_set_order,parent_term_label,parent_term_name,parent_term_set_label,CCN20230722_label,first_child_label,first_child_term_set_name
0,CS20260131_CLAS_009,Astro-Epen,CCN20260131_LEVEL_0,class,#594a26,9,0,,,,CS20230722_CLAS_30,CS20260131_SCLA_020,subclass
1,CS20260131_CLAS_013,CNU-MGE GABA,CCN20260131_LEVEL_0,class,#450099,13,0,,,,CS20230722_CLAS_08,CS20260131_SCLA_033,subclass
2,CS20260131_CLAS_002,CR Glut,CCN20260131_LEVEL_0,class,#919900,2,0,,,,,CS20260131_SCLA_003,subclass
3,CS20260131_CLAS_011,CTX-CGE GABA,CCN20260131_LEVEL_0,class,#CCFF33,11,0,,,,CS20230722_CLAS_06,CS20260131_SCLA_023,subclass
4,CS20260131_CLAS_012,CTX-MGE GABA,CCN20260131_LEVEL_0,class,#f954ee,12,0,,,,CS20230722_CLAS_07,CS20260131_SCLA_029,subclass
...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,CS20260131_CLUS_005,IP nonIT,CCN20260131_LEVEL_2,cluster,#F207c0,5,2,CS20260131_SCLA_005,IP nonIT,CCN20260131_LEVEL_1,,CS20260131_SCLU_077,subcluster
199,CS20260131_CLUS_101,MGE GABA,CCN20260131_LEVEL_2,cluster,#B2182B,101,2,CS20260131_SCLA_028,MGE GABA,CCN20260131_LEVEL_1,,CS20260131_SCLU_596,subcluster
200,CS20260131_CLUS_100,MGE GABA RG,CCN20260131_LEVEL_2,cluster,#e7969c,100,2,CS20260131_SCLA_027,MGE GABA RG,CCN20260131_LEVEL_1,,CS20260131_SCLU_590,subcluster
201,CS20260131_CLUS_001,NEC,CCN20260131_LEVEL_2,cluster,#9e9ac8,1,2,CS20260131_SCLA_001,NEC,CCN20260131_LEVEL_1,,CS20260131_SCLU_001,subcluster


In [14]:
membership = abc_cache.get_metadata_dataframe(directory=taxonomy_dir, file_name='cluster_to_cluster_annotation_membership')
pivot = membership.groupby(['cluster_alias', 'cluster_annotation_term_set_name'])['cluster_annotation_term_name'].first().unstack()
pivot = pivot[term_sets['name']] # order columns
pivot.fillna('Other', inplace=True)
pivot.sort_values(['class', 'subclass', 'cluster', 'subcluster'], inplace=True)
cols = pivot.columns.to_list()
pivot.columns = cols
pivot

cluster_to_cluster_annotation_membership.csv: 100%|██████████| 189k/189k [00:00<00:00, 865kMB/s]  


Unnamed: 0_level_0,class,subclass,cluster,subcluster
cluster_alias,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
476,Astro-Epen,Astro-TE NN,5218_Astro-TE NN_1,5218_Astro-TE NN_1_1
477,Astro-Epen,Astro-TE NN,5218_Astro-TE NN_1,5218_Astro-TE NN_1_2
478,Astro-Epen,Astro-TE NN,5219_Astro-TE NN_1,5219_Astro-TE NN_1_1
479,Astro-Epen,Astro-TE NN,5220_Astro-TE NN_1,5220_Astro-TE NN_1_1
480,Astro-Epen,Astro-TE NN,5225_Astro-TE NN_3,5225_Astro-TE NN_3_1
...,...,...,...,...
166,nonIT Glut,L6b/CT ENT Glut,421_L6b/CT ENT Glut_4,421_L6b/CT ENT Glut_4_5
167,nonIT Glut,L6b/CT ENT Glut,421_L6b/CT ENT Glut_4,421_L6b/CT ENT Glut_4_6
168,nonIT Glut,L6b/CT ENT Glut,421_L6b/CT ENT Glut_4,421_L6b/CT ENT Glut_4_7
169,nonIT Glut,L6b/CT ENT Glut,421_L6b/CT ENT Glut_4,421_L6b/CT ENT Glut_4_8


In [None]:
lookup = {}
for tag in term_sets['name']:
    #print(tag)
    pred = (term['cluster_annotation_term_set_name'] == tag)
    filtered = term[pred].copy()
    filtered.set_index('name', inplace=True)
    lookup[tag] = filtered

Helper functions to lookup an term attribut and format a cell in the html table

In [16]:
def get_value(c, n, v) :
    return lookup[c].loc[n][v]

def format_cell (df,c,add_id=False,add_plus=False,add_minus=False) :

    divs = pd.DataFrame(index=df.index)
    
    pattern = '<div class="circle" style="background-color:%s"></div>'
    divs['circle'] = [pattern % get_value(c,x,'color_hex_triplet') for x in df[c]]
    
    pattern = '<div class="celltext">%s</div>'
    divs['name'] = [pattern % x for x in df[c]]
   
    divs['id'] = ''
    if add_id :
        pattern = '<div id="%s"></div>'
        divs['id'] = [pattern % get_value(c,x,'label') for x in df[c]]
        
    divs['plus'] = ''
    if add_plus :
        pattern = '<div class="celltext"><a href="%s.html#%s">[+]</a></div>'
        divs['plus'] = [pattern % (get_value(c,x,'first_child_term_set_name'),
                                   get_value(c,x,'first_child_label')) for x in df[c]]
        
    divs['minus'] = ''
    if add_minus :
        pattern = '<div class="celltext"><a href="%s.html#%s">[-]</a></div>'
        divs['minus'] = [pattern % (get_value(c,x,'cluster_annotation_term_set_name'),
                                    get_value(c,x,'label')) for x in df[c]]
    
    cols = ['id','circle','name','plus','minus']
    output = divs[cols].apply(lambda row: ''.join(row.values.astype(str)), axis=1)
    return output


Helper function to create html document

In [40]:
def create_html(df, ts, file, title):
    
    # apply formatter to each term set
    df_formatted = df.copy()
    
    for tag in term_sets['name'] :
        if tag in df_formatted.columns :

            add_id = False
            if tag == ts :
                add_id = True
                
            add_plus = False
            if tag == ts and tag not in ['subcluster']:
                add_plus = True
                
            add_minus = False
            if tag != ts and tag not in ['']:
                add_minus = True
                
            df_formatted[tag] = format_cell(df,tag,add_id,add_plus,add_minus)
            
            
    output = df_formatted.to_html(index=False, na_rep='',
                        render_links=True,escape=False,
                        classes="mystyle")

    html_string = '''
    <html>
    <head><title>%s</title></head>
    <link rel="stylesheet" type="text/css" href="../../simple_style.css"/>
    <body>
    {table}
    </body>
    </html>.
    ''' % title

    # OUTPUT AN HTML FILE
    with open(file, 'w') as f:
        f.write(html_string.format(table=output))

In [41]:
# Write the data to the _static directory of the abc_atlas_access so that links work properly in the jupyter-book/sphinx page.
output_directory = os.path.join('../../_static', taxonomy_dir, version)
os.makedirs(output_directory, exist_ok=True)

In [42]:
df_supertype = pivot[['class']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory, 'class.html')
title = 'Developing Mouse - Visual Cortex: cell type class'
create_html(df_supertype, 'class', file, title)
print(len(df_supertype))

15


In [43]:
df_supertype = pivot[['class', 'subclass']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory, 'subclass.html')
title = 'Developing Mouse - Visual Cortex: cell type subclass'
create_html(df_supertype, 'subclass', file, title)
print(len(df_supertype))

40


In [44]:
df_supertype = pivot[['class', 'subclass', 'cluster']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory, 'cluster.html')
title = 'Developing Mouse - Visual Cortex: cell type cluster'
create_html(df_supertype, 'cluster', file, title)
print(len(df_supertype))

148


In [45]:
df_supertype = pivot[['class', 'subclass', 'cluster', 'subcluster']].copy()
df_supertype.drop_duplicates(inplace=True)
    
file = os.path.join(output_directory,'subcluster.html')
title = 'Developing Mouse - Visual Cortex: cell type subcluster'
create_html(df_supertype, 'subcluster', file, title)
print(len(df_supertype))

714
