In [2]:
import os
import pandas as pd
from pathlib import Path
import numpy as np
import anndata
import time
import matplotlib.pyplot as plt
import json

from abc_atlas_access.abc_atlas_cache.abc_project_cache import AbcProjectCache

pd.set_option('display.max_columns', 500)

In [3]:
version = '20250331'
download_base = Path('../../../data/abc_atlas')
abc_cache = AbcProjectCache.from_s3_cache(download_base)
abc_cache.list_manifest_file_names.append(f'releases/{version}/manifest.json')
abc_cache.load_manifest(f'releases/{version}/manifest.json')

releases/20250131/manifest.json
which is newer than the most recent manifest file you have previously been working with
releases/20250331/manifest.json
It is possible that some data files have changed between these two data releases, which will force you to re-download those data files (currently downloaded files will not be overwritten). To continue using releases/20250331/manifest.json, run
type.load_manifest('releases/20250331/manifest.json')


Read in the two DataFrames from the aging dataset we'll need to create an equivalent cluster annotation terms and term set like the WMB and WHB taxonomies.

In [5]:
abc_cache.list_metadata_files('SEAAD-taxonomy')

['cluster_annotation_term',
 'cluster_annotation_term_set',
 'cluster_to_cluster_annotation_membership']

In [12]:
term = abc_cache.get_metadata_dataframe(
    'SEAAD-taxonomy',
    'cluster_annotation_term'
)
term

Unnamed: 0,name,cluster_annotation_term_set_label,parent_term_label,parent_term_set_label,term_order,color_hex_triplet,cluster_annotation_term_set_name,label,parent_term_name
0,Neuronal: GABAergic,CCN20230508_CLAS,,,1,#F05A28,class,CS20230508_CLAS_0001,
1,Neuronal: Glutamatergic,CCN20230508_CLAS,,,2,#00ADF8,class,CS20230508_CLAS_0002,
2,Non-neuronal and Non-neural,CCN20230508_CLAS,,,3,#808080,class,CS20230508_CLAS_0003,
3,Lamp5 Lhx6,CCN20230508_SUBC,CS20230508_CLAS_0001,CCN20230508_CLAS,1,#935F50,subclass,CS20230508_SUBC_0001,Neuronal: GABAergic
4,Lamp5,CCN20230508_SUBC,CS20230508_CLAS_0001,CCN20230508_CLAS,2,#DA808C,subclass,CS20230508_SUBC_0002,Neuronal: GABAergic
...,...,...,...,...,...,...,...,...,...
164,Micro-PVM_4-SEAAD,CCN20230508_SUPT,CS20230508_SUBC_0024,CCN20230508_SUBC,138,#ABC6AD,supertype,CS20230508_SUPT_0138,Microglia-PVM
165,Lymphocyte,CCN20230508_SUPT,CS20230508_SUBC_0024,CCN20230508_SUBC,139,#4EAF59,supertype,CS20230508_SUPT_0139,Microglia-PVM
166,unchanged,CCN20230508_ACPS,,,1,#F7F7F7,abundancechangecps,CS20230508_ACPS_0001,
167,decrease,CCN20230508_ACPS,,,0,#2166AC,abundancechangecps,CS20230508_ACPS_0000,


In [10]:
term_sets = abc_cache.get_metadata_dataframe(directory='SEAAD-taxonomy', file_name='cluster_annotation_term_set').set_index('label')
term_sets

Unnamed: 0_level_0,name,parent_term_set_label,term_set_order
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CCN20230508_CLAS,class,,0
CCN20230508_SUBC,subclass,CCN20230508_CLAS,1
CCN20230508_SUPT,supertype,CCN20230508_SUBC,2
CCN20230508_ACPS,abundancechangecps,,3


In [18]:
filtered = term[pd.notna(term['parent_term_label'])]
first_child = filtered.groupby('parent_term_label')[['label','name','term_order','cluster_annotation_term_set_name']].first()
first_child

Unnamed: 0_level_0,label,name,term_order,cluster_annotation_term_set_name
parent_term_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CS20230508_CLAS_0001,CS20230508_SUBC_0001,Lamp5 Lhx6,1,subclass
CS20230508_CLAS_0002,CS20230508_SUBC_0010,L2/3 IT,10,subclass
CS20230508_CLAS_0003,CS20230508_SUBC_0019,Astrocyte,19,subclass
CS20230508_SUBC_0001,CS20230508_SUPT_0001,Lamp5_Lhx6_1,1,supertype
CS20230508_SUBC_0002,CS20230508_SUPT_0002,Lamp5_1,2,supertype
CS20230508_SUBC_0003,CS20230508_SUPT_0008,Pax6_1,8,supertype
CS20230508_SUBC_0004,CS20230508_SUPT_0012,Sncg_3,12,supertype
CS20230508_SUBC_0005,CS20230508_SUPT_0019,Vip_14,19,supertype
CS20230508_SUBC_0006,CS20230508_SUPT_0035,Sst Chodl_1,35,supertype
CS20230508_SUBC_0007,CS20230508_SUPT_0036,Sst_1,36,supertype


In [19]:
term.set_index('label',inplace=True)
term.loc[first_child.index,'first_child_label'] = first_child['label']
term.loc[first_child.index,'first_child_term_set_name'] = first_child['cluster_annotation_term_set_name']
term.reset_index(inplace=True)

In [20]:
term[pd.notna(term['first_child_label'])]

Unnamed: 0,label,index,name,cluster_annotation_term_set_label,parent_term_label,parent_term_set_label,term_order,color_hex_triplet,cluster_annotation_term_set_name,parent_term_name,first_child_label,first_child_term_set_name
0,CS20230508_CLAS_0001,0,Neuronal: GABAergic,CCN20230508_CLAS,,,1,#F05A28,class,,CS20230508_SUBC_0001,subclass
1,CS20230508_CLAS_0002,1,Neuronal: Glutamatergic,CCN20230508_CLAS,,,2,#00ADF8,class,,CS20230508_SUBC_0010,subclass
2,CS20230508_CLAS_0003,2,Non-neuronal and Non-neural,CCN20230508_CLAS,,,3,#808080,class,,CS20230508_SUBC_0019,subclass
3,CS20230508_SUBC_0001,3,Lamp5 Lhx6,CCN20230508_SUBC,CS20230508_CLAS_0001,CCN20230508_CLAS,1,#935F50,subclass,Neuronal: GABAergic,CS20230508_SUPT_0001,supertype
4,CS20230508_SUBC_0002,4,Lamp5,CCN20230508_SUBC,CS20230508_CLAS_0001,CCN20230508_CLAS,2,#DA808C,subclass,Neuronal: GABAergic,CS20230508_SUPT_0002,supertype
5,CS20230508_SUBC_0003,5,Pax6,CCN20230508_SUBC,CS20230508_CLAS_0001,CCN20230508_CLAS,3,#71238C,subclass,Neuronal: GABAergic,CS20230508_SUPT_0008,supertype
6,CS20230508_SUBC_0004,6,Sncg,CCN20230508_SUBC,CS20230508_CLAS_0001,CCN20230508_CLAS,4,#DF70FF,subclass,Neuronal: GABAergic,CS20230508_SUPT_0012,supertype
7,CS20230508_SUBC_0005,7,Vip,CCN20230508_SUBC,CS20230508_CLAS_0001,CCN20230508_CLAS,5,#A45FBF,subclass,Neuronal: GABAergic,CS20230508_SUPT_0019,supertype
8,CS20230508_SUBC_0006,8,Sst Chodl,CCN20230508_SUBC,CS20230508_CLAS_0001,CCN20230508_CLAS,6,#B1B10C,subclass,Neuronal: GABAergic,CS20230508_SUPT_0035,supertype
9,CS20230508_SUBC_0007,9,Sst,CCN20230508_SUBC,CS20230508_CLAS_0001,CCN20230508_CLAS,7,#FF9900,subclass,Neuronal: GABAergic,CS20230508_SUPT_0036,supertype


In [22]:
membership = abc_cache.get_metadata_dataframe(directory='SEAAD-taxonomy', file_name='cluster_to_cluster_annotation_membership')
pivot = membership.groupby(['cluster_alias', 'cluster_annotation_term_set_name'])['cluster_annotation_term_name'].first().unstack()
pivot = pivot[term_sets['name']] # order columns
pivot.fillna('Other', inplace=True)
pivot.sort_values(['class', 'subclass', 'supertype'], inplace=True)
cols = pivot.columns.to_list()
pivot.columns = cols
pivot

Unnamed: 0_level_0,class,subclass,supertype,abundancechangecps
cluster_alias,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Chandelier_1,Neuronal: GABAergic,Chandelier,Chandelier_1,unchanged
Chandelier_2,Neuronal: GABAergic,Chandelier,Chandelier_2,unchanged
Lamp5_1,Neuronal: GABAergic,Lamp5,Lamp5_1,unchanged
Lamp5_2,Neuronal: GABAergic,Lamp5,Lamp5_2,unchanged
Lamp5_3,Neuronal: GABAergic,Lamp5,Lamp5_3,decrease
...,...,...,...,...
Oligo_4,Non-neuronal and Non-neural,Oligodendrocyte,Oligo_4,unchanged
Pericyte_1,Non-neuronal and Non-neural,VLMC,Pericyte_1,unchanged
Pericyte_2-SEAAD,Non-neuronal and Non-neural,VLMC,Pericyte_2-SEAAD,unchanged
SMC-SEAAD,Non-neuronal and Non-neural,VLMC,SMC-SEAAD,unchanged


In [23]:
lookup = {}
for tag in term_sets['name'] :
    #print(tag)
    pred = (term['cluster_annotation_term_set_name'] == tag)
    filtered = term[pred].copy()
    filtered.set_index('name', inplace=True)
    lookup[tag] = filtered

Helper functions to lookup an term attribut and format a cell in the html table

In [24]:
def get_value(c, n, v) :
    return lookup[c].loc[n][v]

def format_cell (df,c,add_id=False,add_plus=False,add_minus=False) :

    divs = pd.DataFrame(index=df.index)
    
    pattern = '<div class="circle" style="background-color:%s"></div>'
    divs['circle'] = [pattern % get_value(c,x,'color_hex_triplet') for x in df[c]]
    
    pattern = '<div class="celltext">%s</div>'
    divs['name'] = [pattern % x for x in df[c]]
   
    divs['id'] = ''
    if add_id :
        pattern = '<div id="%s"></div>'
        divs['id'] = [pattern % get_value(c,x,'label') for x in df[c]]
        
    divs['plus'] = ''
    if add_plus :
        pattern = '<div class="celltext"><a href="%s.html#%s">[+]</a></div>'
        divs['plus'] = [pattern % (get_value(c,x,'first_child_term_set_name'),
                                   get_value(c,x,'first_child_label')) for x in df[c]]
        
    divs['minus'] = ''
    if add_minus :
        pattern = '<div class="celltext"><a href="%s.html#%s">[-]</a></div>'
        divs['minus'] = [pattern % (get_value(c,x,'cluster_annotation_term_set_name'),
                                    get_value(c,x,'label')) for x in df[c]]
    
    cols = ['id','circle','name','plus','minus']
    output = divs[cols].apply(lambda row: ''.join(row.values.astype(str)), axis=1)
    return output


Helper function to create html document

In [35]:
def create_html(df, ts, file, title):
    
    # apply formatter to each term set
    df_formatted = df.copy()
    
    for tag in term_sets['name'] :
        if tag in df_formatted.columns :
            
            add_id = False
            if tag == ts :
                add_id = True
                
            add_plus = False
            if tag == ts and tag not in ('supertype', 'abundancechangecps') :
                add_plus = True
                
            add_minus = False
            if tag != ts and tag not in ('abundancechangecps') :
                add_minus = True
                
            df_formatted[tag] = format_cell(df,tag,add_id,add_plus,add_minus)
            
            
    output = df_formatted.to_html(index=False, na_rep='',
                        render_links=True,escape=False,
                        classes="mystyle")

    html_string = '''
    <html>
    <head><title>%s</title></head>
    <link rel="stylesheet" type="text/css" href="../../simple_style.css"/>
    <body>
    {table}
    </body>
    </html>.
    ''' % title

    # OUTPUT AN HTML FILE
    with open(file, 'w') as f:
        f.write(html_string.format(table=output))

In [28]:
# Write the data to the _static directory of the abc_atlas_access so that links work properly in the jupyter-book/sphinx page.
output_directory = os.path.join('../../_static', 'SEAAD-taxonomy', version)
os.makedirs(output_directory, exist_ok=True)

In [29]:
df_supertype = pivot[['class']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory, 'class.html')
title = 'SEAAD-taxonmy: cell type classes'
create_html(df_supertype, 'class', file, title)
print(len(df_supertype))

3


In [30]:
df_supertype = pivot[['class', 'subclass']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory, 'subclass.html')
title = 'SEAAD-taxonmy: cell type subclasses'
create_html(df_supertype, 'subclass', file, title)
print(len(df_supertype))

24


In [36]:
df_supertype = pivot[['class', 'subclass', 'supertype', 'abundancechangecps']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'supertype.html')
title = 'SEAAD-taxonmy: cell type supertypes'
create_html(df_supertype, 'supertype', file, title)
print(len(df_supertype))

139
