In [1]:
import os
import pandas as pd
from pathlib import Path

from abc_atlas_access.abc_atlas_cache.abc_project_cache import AbcProjectCache

In [2]:
version = '20250531'
download_base = Path('../../../data/abc_atlas')
abc_cache = AbcProjectCache.from_s3_cache(download_base)
abc_cache.load_manifest(f'releases/{version}/manifest.json')

In [4]:
abbreviation_term = abc_cache.get_metadata_dataframe(
    directory='HMBA-BG-taxonomy-CCN20250428',
    file_name='abbreviation_term',
)
abbreviation_term

Unnamed: 0,abbreviation_term,abbreviation_meaning,abbreviation_type,primary_identifier,secondary_identifier
0,ACx,allocortex,anatomical,DHBA:10292,
1,ADARB2,adenosine deaminase RNA specific B2 (inactive),gene,ENSEMBL:ENSG00000185736,
2,AMY,amygdaloid complex,anatomical,DHBA:10361,
3,Astro,astrocyte,cell_type,CL:0000127,
4,Astrocyte,astrocyte,cell_type,CL:0000127,
...,...,...,...,...,...
100,VIP,vasoactive intestinal peptide,gene,ENSEMBL:ENSG00000146469,
101,VLMC,vascular leptomeningeal cell,cell_type,CL:4023051,
102,VTR,ventral tegmental region of midbrain,anatomical,DHBA:146034938,
103,Vascular,vascular system,anatomical,UBERON:0007798,


Helper functions to format a clickable ontology id link to identifiers.

In [19]:
def create_clickable_ident_org_link(id):
    url_template = '''<a href="https://identifiers.org/{id}" target="_blank">{id}</a>'''.format(id=id)
    return url_template

def create_clickable_dhba_link(id):
    _, id_num = id.split(':')

    # Don't link to the DHBA if the id_num starts with 'AA'
    if id_num.startswith('AA'):
        url_template = id
    else:
        url_template = '''<a href="https://atlas.brain-map.org/atlas?atlas=265297126&structure={id_num}" target="_blank">{id}</a>'''.format(id_num=id_num, id=id)
    return url_template

def parse_abbreviation_term(id_val):
    if pd.isna(id_val):
        return ""
    elif id_val.startswith('DHBA'):
        return create_clickable_dhba_link(id_val)
    else:
        return create_clickable_ident_org_link(id_val)

Helper function to create a gene list

In [20]:
def create_output_html(df, file, title) :
    
    df['primary_identifier'] = df['primary_identifier'].apply(parse_abbreviation_term)
    df['secondary_identifier'] = df['secondary_identifier'].apply(parse_abbreviation_term)
    formatter = {'abbreviation_term': lambda x: '<b>' + x + '</b>'}
    output = df.to_html(index=False, na_rep='',
                        render_links=True,escape=False,
                        classes="mystyle",formatters=formatter)
    
    html_string = '''
    <html>
    <head><title>%s</title></head>
    <link rel="stylesheet" type="text/css" href="../../_static/simple_style.css"/>
    <body>
    {table}
    </body>
    </html>.
    ''' % title
    
    # OUTPUT AN HTML FILE
    with open(file, 'w') as f:
        f.write(html_string.format(table=output))


Create gene list html for HMBA-BG

In [21]:
terms = abc_cache.get_metadata_dataframe(
    directory='HMBA-BG-taxonomy-CCN20250428',
    file_name='abbreviation_term',
)
terms.sort_values('abbreviation_term', inplace=True)
print(len(terms))

output_directory = 'HMBA-BG-taxonomy-CCN20250428'
os.makedirs(output_directory, exist_ok=True)
output_file = os.path.join( output_directory, 'abbreviation_list.html')
create_output_html(terms, output_file, 'HMBA-BG-taxonomy-CCN20250428: abbreviation list')

105
