In [1]:
import os
import pandas as pd
import numpy as np
import anndata
import time
import matplotlib.pyplot as plt
import json
import requests

In [2]:
download_base = '../../../abc_download_root'

url = 'https://allen-brain-cell-atlas.s3-us-west-2.amazonaws.com/releases/20230630/manifest.json'
manifest = json.loads(requests.get(url).text)
    
metadata = manifest['file_listing']['Allen-CCF-2020']['metadata']

Read in parcellation annotation term set dataframe

In [3]:
rpath = metadata['parcellation_term_set']['files']['csv']['relative_path']
file = os.path.join( download_base, rpath)
term_sets = pd.read_csv(file)
term_sets.set_index('label', inplace=True)
term_sets

Unnamed: 0_level_0,description,name,parcellation_terminology_label,term_set_order,parent_term_set_label
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AllenCCF-Ontology-2017-ORGA,organ,organ,AllenCCF-Ontology-2017,0,
AllenCCF-Ontology-2017-CATE,mouse brain anatomical catergories,category,AllenCCF-Ontology-2017,1,AllenCCF-Ontology-2017-ORGA
AllenCCF-Ontology-2017-DIVI,mouse brain divisions,division,AllenCCF-Ontology-2017,2,AllenCCF-Ontology-2017-CATE
AllenCCF-Ontology-2017-STRU,mouse brain structures,structure,AllenCCF-Ontology-2017,3,AllenCCF-Ontology-2017-DIVI
AllenCCF-Ontology-2017-SUBS,mouse brain substructures,substructure,AllenCCF-Ontology-2017,4,AllenCCF-Ontology-2017-STRU


Read in parcellation term membership dataframe

In [4]:
rpath = metadata['parcellation_to_parcellation_term_membership']['files']['csv']['relative_path']
file = os.path.join( download_base, rpath)
term = pd.read_csv(file)

term.sort_values(['term_set_order','term_order'],inplace=True)

term.head(8)

Unnamed: 0,parcellation_label,parcellation_term_label,parcellation_term_set_label,parcellation_index,voxel_count,volume_mm3,color_hex_triplet,red,green,blue,parcellation_term_name,parcellation_term_acronym,parcellation_term_set_name,term_set_order,term_order,parent_term_label
0,AllenCCF-Annotation-2020-997,AllenCCF-Ontology-2017-997,AllenCCF-Ontology-2017-ORGA,987,3564320,3.56432,#FFFFFF,255,255,255,brain,brain,organ,0,0,
1,AllenCCF-Annotation-2020-68,AllenCCF-Ontology-2017-997,AllenCCF-Ontology-2017-ORGA,63,256520,0.25652,#FFFFFF,255,255,255,brain,brain,organ,0,0,
6,AllenCCF-Annotation-2020-667,AllenCCF-Ontology-2017-997,AllenCCF-Ontology-2017-ORGA,657,232608,0.232608,#FFFFFF,255,255,255,brain,brain,organ,0,0,
11,AllenCCF-Annotation-2020-526157192,AllenCCF-Ontology-2017-997,AllenCCF-Ontology-2017-ORGA,1288,371584,0.371584,#FFFFFF,255,255,255,brain,brain,organ,0,0,
16,AllenCCF-Annotation-2020-526157196,AllenCCF-Ontology-2017-997,AllenCCF-Ontology-2017-ORGA,1289,112222,0.112222,#FFFFFF,255,255,255,brain,brain,organ,0,0,
21,AllenCCF-Annotation-2020-526322264,AllenCCF-Ontology-2017-997,AllenCCF-Ontology-2017-ORGA,1290,942,0.000942,#FFFFFF,255,255,255,brain,brain,organ,0,0,
26,AllenCCF-Annotation-2020-320,AllenCCF-Ontology-2017-997,AllenCCF-Ontology-2017-ORGA,310,1328982,1.328982,#FFFFFF,255,255,255,brain,brain,organ,0,0,
31,AllenCCF-Annotation-2020-943,AllenCCF-Ontology-2017-997,AllenCCF-Ontology-2017-ORGA,933,3689292,3.689292,#FFFFFF,255,255,255,brain,brain,organ,0,0,


Find and store first child for each term

In [5]:
filtered = term[pd.notna(term['parent_term_label'])]
first_child = filtered.groupby('parent_term_label')[['parcellation_term_label','parcellation_term_name','term_order','parcellation_term_set_name']].first()
first_child

Unnamed: 0_level_0,parcellation_term_label,parcellation_term_name,term_order,parcellation_term_set_name
parent_term_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ABC-Ontology-2023-AQ-structure,ABC-Ontology-2023-AQ-substructure,cerebral aqueduct,884,substructure
ABC-Ontology-2023-CB-structure,ABC-Ontology-2023-CB-substructure,"Cerebellum, unassigned",706,substructure
ABC-Ontology-2023-CTXsp-structure,ABC-Ontology-2023-CTXsp-substructure,"Cortical subplate, unassigned",368,substructure
ABC-Ontology-2023-HPF-structure,ABC-Ontology-2023-HPF-substructure,"Hippocampal formation, unassigned",302,substructure
ABC-Ontology-2023-HY-structure,ABC-Ontology-2023-HY-substructure,"Hypothalamus, unassigned",474,substructure
...,...,...,...,...
AllenCCF-Ontology-2017-989,ABC-Ontology-2023-FN-substructure,Fastigial nucleus,758,substructure
AllenCCF-Ontology-2017-991,AllenCCF-Ontology-2017-768,cerebrum related,358,structure
AllenCCF-Ontology-2017-993,AllenCCF-Ontology-2017-656,"Secondary motor area, layer 1",11,substructure
AllenCCF-Ontology-2017-997,ABC-Ontology-2023-brain-category,"brain, unassigned",0,category


In [6]:
term.set_index('parcellation_term_label',inplace=True)
term.loc[first_child.index,'first_child_label'] = first_child['parcellation_term_label']
term.loc[first_child.index,'first_child_term_set_name'] = first_child['parcellation_term_set_name']
term.reset_index(inplace=True)

In [7]:
term[pd.notna(term['first_child_label'])].tail(5)

Unnamed: 0,parcellation_term_label,parcellation_label,parcellation_term_set_label,parcellation_index,voxel_count,volume_mm3,color_hex_triplet,red,green,blue,parcellation_term_name,parcellation_term_acronym,parcellation_term_set_name,term_set_order,term_order,parent_term_label,first_child_label,first_child_term_set_name
2747,ABC-Ontology-2023-AQ-structure,AllenCCF-Annotation-2020-140,AllenCCF-Ontology-2017-STRU,135,405256,0.405256,#AAAAAA,170,170,170,cerebral aqueduct,AQ,structure,3,366,AllenCCF-Ontology-2017-140,ABC-Ontology-2023-AQ-substructure,substructure
2748,ABC-Ontology-2023-V4-structure,AllenCCF-Annotation-2020-145,AllenCCF-Ontology-2017-STRU,140,499566,0.499566,#AAAAAA,170,170,170,"fourth ventricle, unassigned",V4-unassigned,structure,3,367,AllenCCF-Ontology-2017-145,ABC-Ontology-2023-V4-substructure,substructure
2749,AllenCCF-Ontology-2017-153,AllenCCF-Annotation-2020-153,AllenCCF-Ontology-2017-STRU,148,461680,0.46168,#AAAAAA,170,170,170,lateral recess,V4r,structure,3,368,AllenCCF-Ontology-2017-145,ABC-Ontology-2023-V4r-substructure,substructure
2750,ABC-Ontology-2023-c-structure,AllenCCF-Annotation-2020-164,AllenCCF-Ontology-2017-STRU,159,432,0.000432,#AAAAAA,170,170,170,"central canal, spinal cord/medulla",c,structure,3,369,AllenCCF-Ontology-2017-164,ABC-Ontology-2023-c-substructure,substructure
2751,ABC-Ontology-2023-unassigned-structure,AllenCCF-Annotation-2020-0,AllenCCF-Ontology-2017-STRU,0,697783214,697.783214,#000000,0,0,0,unassigned,unassigned,structure,3,370,ABC-Ontology-2023-unassigned-division,ABC-Ontology-2023-unassigned-substructure,substructure


Create pivot of term order

In [8]:
df = term.groupby(['parcellation_index','parcellation_term_set_name'])['term_order'].first().unstack()
df = df[term_sets['name']] # order columns
df_sorted = df.sort_values(['organ','category','division','structure','substructure'])
df_sorted.columns = list(df_sorted.columns)
df_sorted

Unnamed: 0_level_0,organ,category,division,structure,substructure
parcellation_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
987,0,0,0,0,0
63,0,1,1,1,1
657,0,1,1,1,2
1288,0,1,1,1,3
1289,0,1,1,1,4
...,...,...,...,...,...
135,0,3,22,366,884
140,0,3,23,367,885
148,0,3,23,368,886
159,0,3,24,369,887


In [9]:
term.groupby(['parcellation_index','parcellation_term_set_name'])['term_order'].first().unstack()

parcellation_term_set_name,category,division,organ,structure,substructure
parcellation_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,4,25,1,370,888
1,1,8,0,179,518
2,1,1,0,7,40
5,2,17,0,350,820
6,1,10,0,239,615
...,...,...,...,...,...
1323,1,9,0,233,603
1324,1,9,0,233,604
1325,1,9,0,233,605
1326,1,9,0,233,606


Create a lookup by name dataframes for each term set

In [10]:
lookup = {}
for tag in term_sets['name'] :
    #print(tag)
    pred = (term['parcellation_term_set_name'] == tag)
    filtered = term[pred].copy()
    filtered.drop(['parcellation_label','parcellation_index','voxel_count','volume_mm3'], inplace=True, axis=1)
    filtered.drop_duplicates(inplace=True)
    filtered.set_index('term_order', inplace=True)
    lookup[tag] = filtered

Helper functions to lookup an term attribut and format a cell in the html table

In [11]:
def get_value( c, n, v ) :
    return lookup[c].loc[n][v]

def format_cell (df,c,add_id=False,add_plus=False,add_minus=False) :

    divs = pd.DataFrame(index=df.index)
    
    pattern = '<div class="circle" style="background-color:%s"></div>'
    divs['circle'] = [ pattern % get_value(c,x,'color_hex_triplet') for x in df[c] ]
    
    pattern = '<div class="celltext", title="%s">%s</div>'
    divs['name'] = [ pattern % (get_value(c,x,'parcellation_term_name'),
                                get_value(c,x,'parcellation_term_acronym')) for x in df[c] ]
   
    divs['id'] = ''
    if add_id :
        pattern = '<div id="%s"></div>'
        divs['id'] = [ pattern % get_value(c,x,'parcellation_term_label') for x in df[c] ]
        
    divs['plus'] = ''
    if add_plus :
        pattern = '<div class="celltext"><a href="%s.html#%s">[+]</a></div>'
        divs['plus'] = [ pattern % (get_value(c,x,'first_child_term_set_name'),
                                    get_value(c,x,'first_child_label')) for x in df[c] ]
        
    divs['minus'] = ''
    if add_minus :
        pattern = '<div class="celltext"><a href="%s.html#%s">[-]</a></div>'
        divs['minus'] = [ pattern % (get_value(c,x,'parcellation_term_set_name'),
                                    get_value(c,x,'parcellation_term_label')) for x in df[c] ]
    
    cols = ['id','circle','name','plus','minus']
    output = divs[cols].apply(lambda row: ''.join(row.values.astype(str)), axis=1)
    return output


Helper function to create html document

In [12]:
def create_html (df,ts,file,title) :
    
    # apply formatter to each term set
    df_formatted = df.copy()
    
    for tag in term_sets['name'] :
        if tag in df_formatted.columns :
            
            #print(tag)
            
            add_id = False
            if tag == ts :
                add_id = True
                
            add_plus = False
            if tag == ts and tag not in ['substructure'] :
                add_plus = True
                
            add_minus = False
            if tag != ts and tag not in ['organ'] :
                add_minus = True
                
            df_formatted[tag] = format_cell(df,tag,add_id,add_plus,add_minus)
            
            
    output = df_formatted.to_html(index=False, na_rep='',
                        render_links=True,escape=False,
                        classes="mystyle")

    html_string = '''
    <html>
    <head><title>%s</title></head>
    <link rel="stylesheet" type="text/css" href="../simple_style.css"/>
    <body>
    {table}
    </body>
    </html>.
    ''' % title

    # OUTPUT AN HTML FILE
    with open(file, 'w') as f:
        f.write(html_string.format(table=output))

In [13]:
output_directory = 'Allen-CCF-2020'
os.makedirs( output_directory, exist_ok=True)

In [14]:
subset = df_sorted[['division']].copy()
subset.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'division.html')
title = 'Allen-CCF-2020: anatomical divisions'
create_html(subset, 'division',file, title)
print(len(subset))

26


In [15]:
subset = df_sorted[['division','structure','substructure']].copy()
subset.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'substructure.html')
title = 'Allen-CCF-2020: anatomical substructures'
create_html(subset, 'substructure',file, title)
print(len(subset))

681


In [16]:
subset = df_sorted[['division','structure']].copy()
subset.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'structure.html')
title = 'Allen-CCF-2020: anatomical structure'
create_html(subset, 'structure',file, title)
print(len(subset))

358
