In [1]:
import os
import pandas as pd
import numpy as np
import anndata
import time
import matplotlib.pyplot as plt
import json
import requests

In [2]:
download_base = '../../../abc_download_root'

url = 'https://allen-brain-cell-atlas.s3-us-west-2.amazonaws.com/releases/20230630/manifest.json'
manifest = json.loads(requests.get(url).text)
    
metadata = manifest['file_listing']['Allen-CCF-2020']['metadata']

Read in parcellation annotation term set dataframe

In [3]:
rpath = metadata['parcellation_term_set']['files']['csv']['relative_path']
file = os.path.join( download_base, rpath)
term_sets = pd.read_csv(file)
term_sets.set_index('label', inplace=True)
term_sets

Read in parcellation term membership dataframe

In [4]:
rpath = metadata['parcellation_to_parcellation_term_membership']['files']['csv']['relative_path']
file = os.path.join( download_base, rpath)
term = pd.read_csv(file)

term.sort_values(['term_set_order','term_order'],inplace=True)

term.head(8)

Find and store first child for each term

In [5]:
filtered = term[pd.notna(term['parent_term_label'])]
first_child = filtered.groupby('parent_term_label')[['parcellation_term_label','parcellation_term_name','term_order','parcellation_term_set_name']].first()
first_child

In [6]:
rpath = metadata['parcellation_term']['files']['csv']['relative_path']
file = os.path.join( download_base, rpath)
info = pd.read_csv(file)
info.set_index('label',inplace=True)
info.head(5)

In [7]:
term.set_index('parcellation_term_label',inplace=True)
term.loc[first_child.index,'first_child_label'] = first_child['parcellation_term_label']
term.loc[first_child.index,'first_child_term_set_name'] = first_child['parcellation_term_set_name']
term = term.join(info['identifier'])
term.reset_index(inplace=True)

In [8]:
term[pd.notna(term['first_child_label'])].tail(5)

Create pivot of term order

In [9]:
df = term.groupby(['parcellation_index','parcellation_term_set_name'])['term_order'].first().unstack()
df = df[term_sets['name']] # order columns
df_sorted = df.sort_values(['organ','category','division','structure','substructure'])
df_sorted.columns = list(df_sorted.columns)
df_sorted

In [10]:
term.groupby(['parcellation_index','parcellation_term_set_name'])['term_order'].first().unstack()

Create a lookup by name dataframes for each term set

In [11]:
lookup = {}
for tag in term_sets['name'] :
    #print(tag)
    pred = (term['parcellation_term_set_name'] == tag)
    filtered = term[pred].copy()
    filtered.drop(['parcellation_label','parcellation_index','voxel_count','volume_mm3'], inplace=True, axis=1)
    filtered.drop_duplicates(inplace=True)
    filtered.set_index('term_order', inplace=True)
    lookup[tag] = filtered

Helper functions to lookup an term attribut and format a cell in the html table

In [12]:
def get_value( c, n, v ) :
    return lookup[c].loc[n][v]

def format_cell (df,c,add_id=False,add_plus=False,add_minus=False,add_name=False,add_atlas=False) :

    divs = pd.DataFrame(index=df.index)
    
    pattern = '<div class="circle" style="background-color:%s"></div>'
    divs['circle'] = [ pattern % get_value(c,x,'color_hex_triplet') for x in df[c] ]
    
    pattern = '<div class="celltext", title="%s"><b>%s</b></div>'
    divs['name'] = [ pattern % (get_value(c,x,'parcellation_term_name'),
                                get_value(c,x,'parcellation_term_acronym')) for x in df[c] ]
    
    divs['desc'] = ''
    if add_name :
        pattern = '<div class="celltext">%s</div>'
        divs['desc'] = [ pattern % get_value(c,x,'parcellation_term_name') for x in df[c] ]
   
    divs['id'] = ''
    if add_id :
        pattern = '<div id="%s"></div>'
        divs['id'] = [ pattern % get_value(c,x,'parcellation_term_label') for x in df[c] ]
        
    divs['plus'] = ''
    if add_plus :
        pattern = '<div class="celltext"><a href="%s.html#%s">[+]</a></div>'
        divs['plus'] = [ pattern % (get_value(c,x,'first_child_term_set_name'),
                                    get_value(c,x,'first_child_label')) for x in df[c] ]
        
    divs['minus'] = ''
    if add_minus :
        pattern = '<div class="celltext"><a href="%s.html#%s">[-]</a></div>'
        divs['minus'] = [ pattern % (get_value(c,x,'parcellation_term_set_name'),
                                    get_value(c,x,'parcellation_term_label')) for x in df[c] ]
        
    divs['atlas'] = ''
    if add_atlas :
        pattern = '<div class="celltext"><a href="https://atlas.brain-map.org/atlas?atlas=602630314&structure=%s">[A]</a></div>'
        sid = pd.Series([get_value(c,x,'identifier') for x in df[c]])
        sid[pd.isna(sid)] = 'MBA:0'
        sid = [x.split(':')[1] for x in sid]
        divs['atlas'] = [pattern % x for x in sid ]
    
    cols = ['id','circle','name','desc','plus','minus','atlas']
    output = divs[cols].apply(lambda row: ''.join(row.values.astype(str)), axis=1)
    return output


Helper function to create html document

In [13]:
def create_html (df,ts,file,title) :
    
    # apply formatter to each term set
    df_formatted = df.copy()
    
    for tag in term_sets['name'] :
        if tag in df_formatted.columns :
            
            #print(tag)
            
            add_id = False
            add_name = False
            add_atlas = False
            if tag == ts :
                add_id = True
                add_name = True
                add_atlas = True
                
            add_plus = False
            if tag == ts and tag not in ['substructure'] :
                add_plus = True
                
            add_minus = False
            if tag != ts and tag not in ['organ'] :
                add_minus = True
                
            df_formatted[tag] = format_cell(df,tag,add_id,add_plus,add_minus,add_name,add_atlas)
            
            
    output = df_formatted.to_html(index=False, na_rep='',
                        render_links=True,escape=False,
                        classes="mystyle")

    html_string = '''
    <html>
    <head><title>%s</title></head>
    <link rel="stylesheet" type="text/css" href="../simple_style.css"/>
    <body>
    {table}
    </body>
    </html>.
    ''' % title

    # OUTPUT AN HTML FILE
    with open(file, 'w') as f:
        f.write(html_string.format(table=output))

In [14]:
output_directory = 'Allen-CCF-2020'
os.makedirs( output_directory, exist_ok=True)

In [15]:
subset = df_sorted[['division']].copy()
subset.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'division.html')
title = 'Allen-CCF-2020: anatomical divisions'
create_html(subset, 'division',file, title)
print(len(subset))

In [16]:
subset = df_sorted[['division','structure','substructure']].copy()
subset.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'substructure.html')
title = 'Allen-CCF-2020: anatomical substructures'
create_html(subset, 'substructure',file, title)
print(len(subset))

In [17]:
subset = df_sorted[['division','structure']].copy()
subset.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'structure.html')
title = 'Allen-CCF-2020: anatomical structure'
create_html(subset, 'structure',file, title)
print(len(subset))