In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import requests
import pathlib

In [2]:
version = '20231030'
download_base = '../../abc_download_root'

use_local_cache = True
manifest_path = 'releases/%s/manifest.json' % version

if not use_local_cache :
    url = 'https://allen-brain-cell-atlas.s3-us-west-2.amazonaws.com/' + manifest_path
    manifest = json.loads(requests.get(url).text)
else :
    file = os.path.join(download_base,manifest_path)
    with open(file,'rb') as f:
        manifest = json.load(f)
    
metadata = manifest['file_listing']['SEAAD-taxonomy']['metadata']

In [3]:
view_directory = os.path.join( download_base, 
                               manifest['directory_listing']['SEAAD-taxonomy']['directories']['metadata']['relative_path'], 
                              'views')
view_directory = pathlib.Path( view_directory )
cache_views = True
if cache_views :
    os.makedirs( view_directory, exist_ok=True )

In [4]:
rpath = metadata['cluster_annotation_term_set']['files']['csv']['relative_path']
file = os.path.join( download_base, rpath)
term_set = pd.read_csv(file)
term_set.set_index('label',inplace=True)
term_set

Unnamed: 0_level_0,name,parent_term_set_label,term_set_order
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CCN20230508_CLAS,class,,0
CCN20230508_SUBC,subclass,CCN20230508_CLAS,1
CCN20230508_SUPT,supertype,CCN20230508_SUBC,2
CCN20230508_ACPS,abundancechangecps,,3


In [5]:
rpath = metadata['cluster_annotation_term']['files']['csv']['relative_path']
file = os.path.join( download_base, rpath)
term = pd.read_csv(file, keep_default_na=False)
term.set_index('label',inplace=True)
term.head(5)

Unnamed: 0_level_0,name,cluster_annotation_term_set_label,parent_term_label,parent_term_set_label,term_order,color_hex_triplet,cluster_annotation_term_set_name,parent_term_name
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CCN20230508_CLAS_0001,Neuronal: GABAergic,CCN20230508_CLAS,,,1,#F05A28,class,
CCN20230508_CLAS_0002,Neuronal: Glutamatergic,CCN20230508_CLAS,,,2,#00ADF8,class,
CCN20230508_CLAS_0003,Non-neuronal and Non-neural,CCN20230508_CLAS,,,3,#808080,class,
CCN20230508_SUBC_0001,Lamp5 Lhx6,CCN20230508_SUBC,CCN20230508_CLAS_0001,CCN20230508_CLAS,1,#935F50,subclass,Neuronal: GABAergic
CCN20230508_SUBC_0002,Lamp5,CCN20230508_SUBC,CCN20230508_CLAS_0001,CCN20230508_CLAS,2,#DA808C,subclass,Neuronal: GABAergic


In [6]:
term_count = term[['cluster_annotation_term_set_name','name']].groupby('cluster_annotation_term_set_name').count()
term_count.columns = ['count']
term_count

Unnamed: 0_level_0,count
cluster_annotation_term_set_name,Unnamed: 1_level_1
abundancechangecps,3
class,3
subclass,24
supertype,139


In [7]:
rpath = metadata['cluster_to_cluster_annotation_membership']['files']['csv']['relative_path']
file = os.path.join( download_base, rpath)
membership = pd.read_csv(file, keep_default_na=False)
membership

Unnamed: 0,cluster_alias,cluster_annotation_term_name,cluster_annotation_term_label,cluster_annotation_term_set_label,cluster_annotation_term_set_name,color_hex_triplet
0,Lamp5_Lhx6_1,Neuronal: GABAergic,CCN20230508_CLAS_0001,CCN20230508_CLAS,class,#F05A28
1,Lamp5_1,Neuronal: GABAergic,CCN20230508_CLAS_0001,CCN20230508_CLAS,class,#F05A28
2,Lamp5_2,Neuronal: GABAergic,CCN20230508_CLAS_0001,CCN20230508_CLAS,class,#F05A28
3,Lamp5_3,Neuronal: GABAergic,CCN20230508_CLAS_0001,CCN20230508_CLAS,class,#F05A28
4,Lamp5_4,Neuronal: GABAergic,CCN20230508_CLAS_0001,CCN20230508_CLAS,class,#F05A28
...,...,...,...,...,...,...
551,Micro-PVM_2,unchanged,CCN20230508_ACPS_0001,CCN20230508_ACPS,abundancechangecps,#F7F7F7
552,Micro-PVM_2_3-SEAAD,unchanged,CCN20230508_ACPS_0001,CCN20230508_ACPS,abundancechangecps,#F7F7F7
553,Micro-PVM_3-SEAAD,increase,CCN20230508_ACPS_0002,CCN20230508_ACPS,abundancechangecps,#B2182B
554,Micro-PVM_4-SEAAD,unchanged,CCN20230508_ACPS_0001,CCN20230508_ACPS,abundancechangecps,#F7F7F7


In [8]:
pivot = membership.groupby(['cluster_alias','cluster_annotation_term_set_name'])['cluster_annotation_term_name'].first().unstack()
pivot.drop_duplicates(inplace=True)
pivot

cluster_annotation_term_set_name,abundancechangecps,class,subclass,supertype
cluster_alias,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Astro_1,unchanged,Non-neuronal and Non-neural,Astrocyte,Astro_1
Astro_2,increase,Non-neuronal and Non-neural,Astrocyte,Astro_2
Astro_3,unchanged,Non-neuronal and Non-neural,Astrocyte,Astro_3
Astro_4,unchanged,Non-neuronal and Non-neural,Astrocyte,Astro_4
Astro_5,unchanged,Non-neuronal and Non-neural,Astrocyte,Astro_5
...,...,...,...,...
Vip_23,unchanged,Neuronal: GABAergic,Vip,Vip_23
Vip_4,unchanged,Neuronal: GABAergic,Vip,Vip_4
Vip_5,unchanged,Neuronal: GABAergic,Vip,Vip_5
Vip_6,unchanged,Neuronal: GABAergic,Vip,Vip_6


In [9]:
color = membership.groupby(['cluster_alias','cluster_annotation_term_set_name'])['color_hex_triplet'].first().unstack().fillna('#f9f9f9')
color.drop_duplicates(inplace=True)
color.columns = ['%s_color' % x for x in color.columns]
color

Unnamed: 0_level_0,abundancechangecps_color,class_color,subclass_color,supertype_color
cluster_alias,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Astro_1,#F7F7F7,#808080,#665C47,#D1C9BA
Astro_2,#B2182B,#808080,#665C47,#AAA395
Astro_3,#F7F7F7,#808080,#665C47,#847D71
Astro_4,#F7F7F7,#808080,#665C47,#5E574C
Astro_5,#F7F7F7,#808080,#665C47,#383228
...,...,...,...,...
Vip_23,#F7F7F7,#F05A28,#A45FBF,#492C56
Vip_4,#F7F7F7,#F05A28,#A45FBF,#CC9EE1
Vip_5,#F7F7F7,#F05A28,#A45FBF,#C598D9
Vip_6,#F7F7F7,#F05A28,#A45FBF,#BE92D2


In [10]:
if cache_views :
    
    file = os.path.join( view_directory, 'cluster_to_cluster_annotation_membership_pivoted.csv')
    pivot.to_csv( file )
    
    file = os.path.join( view_directory, 'cluster_to_cluster_annotation_membership_color.csv')
    color.to_csv( file )
