In [1]:
import os
import pandas as pd
import numpy as np
import anndata
import time
import matplotlib.pyplot as plt
import json
import requests

In [2]:
download_base = '../../../abc_download_root'

url = 'https://allen-brain-cell-atlas.s3-us-west-2.amazonaws.com/releases/20230630/manifest.json'
manifest = json.loads(requests.get(url).text)
    
taxonomy_metadata = manifest['file_listing']['WMB-taxonomy']['metadata']

Read in cluster annotation term set dataframe

In [3]:
rpath = taxonomy_metadata['cluster_annotation_term_set']['files']['csv']['relative_path']
file = os.path.join( download_base, rpath)
term_sets = pd.read_csv(file)
term_sets

Unnamed: 0,label,description,name,order
0,CCN20230504_NEUR,Clusters are assigned based on the average exp...,neurotransmitter,0
1,CCN20230504_DIVI,A very broad definition of cell types. Cluster...,division,1
2,CCN20230504_CLAS,The top level of cell type definition in the m...,class,2
3,CCN20230504_SUBC,The coarse level of cell type definition in th...,subclass,3
4,CCN20230504_SUPT,The second finest level of cell type definitio...,supertype,4
5,CCN20230504_CLUS,The finest level of cell type definition in th...,cluster,5


Read in cluster annotation term dataframe. Spike in a row to handle unassigned neurotransmitter clusters.

In [4]:
rpath = taxonomy_metadata['cluster_annotation_term']['files']['csv']['relative_path']
file = os.path.join( download_base, rpath)
term = pd.read_csv(file)

other_term = pd.DataFrame(columns=term.columns)
other_term.loc[0,'name'] = 'Other'
other_term.loc[0,'cluster_annotation_term_set_name'] = 'neurotransmitter'
other_term.loc[0,'color_hex_triplet'] = '#ebebeb'
term = pd.concat([term,other_term],ignore_index=True)

Read in the cluster annotation term name pivot table and sort it

In [5]:
rpath = taxonomy_metadata['cluster_to_cluster_annotation_membership_pivoted']['files']['csv']['relative_path']
file = os.path.join( download_base, rpath)
df = pd.read_csv(file)
df = df.fillna('Other')
df_sorted = df.sort_values(['class','subclass','supertype','cluster'])
df_sorted

Unnamed: 0,cluster_alias,neurotransmitter,division,class,subclass,supertype,cluster
125,128,Glut,1 Pallium glutamatergic,01 IT-ET Glut,001 Car3 Glut,0001 Car3 Glut_1,0001 Car3 Glut_1
126,129,Glut,1 Pallium glutamatergic,01 IT-ET Glut,001 Car3 Glut,0001 Car3 Glut_1,0002 Car3 Glut_1
127,130,Glut,1 Pallium glutamatergic,01 IT-ET Glut,001 Car3 Glut,0001 Car3 Glut_1,0003 Car3 Glut_1
140,143,Glut,1 Pallium glutamatergic,01 IT-ET Glut,001 Car3 Glut,0001 Car3 Glut_1,0004 Car3 Glut_1
128,131,Glut,1 Pallium glutamatergic,01 IT-ET Glut,001 Car3 Glut,0002 Car3 Glut_2,0005 Car3 Glut_2
...,...,...,...,...,...,...,...
5168,5279,Other,7 Immune,32 Immune,305 DC NN,1041 DC NN,5196 DC NN
5164,5275,Other,7 Immune,32 Immune,306 Lymphoid NN,1042 B cells NN,5197 B cells NN
5161,5272,Other,7 Immune,32 Immune,306 Lymphoid NN,1043 ILC NN,5198 ILC NN
5163,5274,Other,7 Immune,32 Immune,306 Lymphoid NN,1044 NK cells NN,5199 NK cells NN


Create a lookup by name dataframes for each term set

In [6]:
lookup = {}
for sindex, srow in term_sets.iterrows() :
    tag = srow['name']
    print(tag)
    pred = (term['cluster_annotation_term_set_name'] == tag)
    filtered = term[pred].copy()
    filtered.set_index('name', inplace=True)
    lookup[tag] = filtered

neurotransmitter
division
class
subclass
supertype
cluster


Helper functions to lookup a color by term set and term name and format a cell in the html table

In [7]:
def get_color( c, n ) :
    return lookup[c].loc[n]['color_hex_triplet']
                            
def format_cell (df,c) :
    pattern = '<div class="circle" style="background-color:%s"></div><div class="celltext">%s</div>'
    return [ pattern % (get_color(c,x),x) for x in df[c] ]


Helper function to create html document

In [8]:
def create_html (df,file,title) :
    
    # apply formatter to each term set
    df_formatted = df.copy()
    for sindex, srow in term_sets.iterrows() :
        tag = srow['name']
        if tag in df_formatted.columns :
            df_formatted[tag] = format_cell(df,tag)

    output = df_formatted.to_html(index=False, na_rep='',
                        render_links=True,escape=False,
                        classes="mystyle")

    html_string = '''
    <html>
    <head><title>%s</title></head>
    <link rel="stylesheet" type="text/css" href="simple_style.css"/>
    <body>
    {table}
    </body>
    </html>.
    ''' % title

    # OUTPUT AN HTML FILE
    with open(file, 'w') as f:
        f.write(html_string.format(table=output))

In [9]:
file = 'WMB-taxonomy_cluster_annotation_clusters.html'
title = 'WMB-taxonmy cluster annotations (clusters)'
create_html(df_sorted, file, title)
print(len(df_sorted))

5196


In [10]:
df_supertype = df_sorted[['class','subclass','supertype']].copy()
df_supertype.drop_duplicates(inplace=True)

file = 'WMB-taxonomy_cluster_annotation_supertypes.html'
title = 'WMB-taxonmy cluster annotations (supertypes)'
create_html(df_supertype, file, title)
print(len(df_supertype))

1115


In [11]:
df_supertype = df_sorted[['class','subclass']].copy()
df_supertype.drop_duplicates(inplace=True)

file = 'WMB-taxonomy_cluster_annotation_subclasses.html'
title = 'WMB-taxonmy cluster annotations (subclasses)'
create_html(df_supertype, file, title)
print(len(df_supertype))

307


In [12]:
df_supertype = df_sorted[['class']].copy()
df_supertype.drop_duplicates(inplace=True)

file = 'WMB-taxonomy_cluster_annotation_classes.html'
title = 'WMB-taxonmy cluster annotations (classes)'
create_html(df_supertype, file, title)
print(len(df_supertype))

32
