In [4]:
import os
import pandas as pd
from pathlib import Path

from abc_atlas_access.abc_atlas_cache.abc_project_cache import AbcProjectCache

In [5]:
version = '20250531'
download_base = Path('../../../data/abc_atlas')
abc_cache = AbcProjectCache.from_s3_cache(download_base)
abc_cache.load_manifest(f'releases/{version}/manifest.json')

Helper function to format a clickable ENSEMBL id link

In [10]:
def create_clickable_ENSEMBL_id(id):
    if pd.isna(id):
        return ""
    if id.startswith('ENSEMBL'):
        id = id.split(':')[1]
    url_template= '''<a href="https://identifiers.org/ENSEMBL:{id}" target="_blank">{id}</a>'''.format(id=id)
    return url_template

Helper function to format a clickable NCBI id link

In [7]:
def create_clickable_NCBI_id(id):
    if pd.notna(id) :
        url_template= '''<a href="https://identifiers.org/{id}" target="_blank">{id}</a>'''.format(id=id)
    else :
        url_template = ""
    return url_template

Helper function to create a gene list

In [8]:
def create_output_html( df, file, title ) :
    
    df['gene_identifier'] = df['gene_identifier'].apply(create_clickable_ENSEMBL_id)
    if 'mapped_ncbi_identifier' in df.columns :
        df['mapped_ncbi_identifier'] = df['mapped_ncbi_identifier'].apply(create_clickable_NCBI_id)
    formatter = {'gene_symbol': lambda x: '<b>' + x + '</b>'}
    output = df.to_html(index=False, na_rep='',
                        render_links=True,escape=False,
                        classes="mystyle",formatters=formatter)
    
    html_string = '''
    <html>
    <head><title>%s</title></head>
    <link rel="stylesheet" type="text/css" href="../../_static/simple_style.css"/>
    <body>
    {table}
    </body>
    </html>.
    ''' % title
    
    # OUTPUT AN HTML FILE
    with open(file, 'w') as f:
        f.write(html_string.format(table=output))


def create_output_ncbi_html( df, file, title ) :
    
    df['gene_identifier'] = df['gene_identifier'].apply(create_clickable_NCBI_id)
    if 'ensembl_gene_identifier' in df.columns:
        df['ensembl_gene_identifier'] = df['ensembl_gene_identifier'].apply(create_clickable_ENSEMBL_id)
    formatter = {'gene_symbol': lambda x: '<b>' + x + '</b>'}
    output = df.to_html(index=False, na_rep='',
                        render_links=True,escape=False,
                        classes="mystyle",formatters=formatter)
    
    html_string = '''
    <html>
    <head><title>%s</title></head>
    <link rel="stylesheet" type="text/css" href="../../_static/simple_style.css"/>
    <body>
    {table}
    </body>
    </html>.
    ''' % title
    
    # OUTPUT AN HTML FILE
    with open(file, 'w') as f:
        f.write(html_string.format(table=output))


Create gene list html for WMB-10X

In [13]:
gene = abc_cache.get_metadata_dataframe(directory='WMB-10X', file_name='gene')
gene.sort_values('gene_symbol',inplace=True)
print(len(gene))

output_directory = 'WMB-10X'
os.makedirs( output_directory, exist_ok=True)
output_file = os.path.join( output_directory, 'gene_list.html')
create_output_html( gene, output_file, 'WMB-10X: gene list')

32285


Create gene list html for WHB-10Xv3

In [14]:
gene = abc_cache.get_metadata_dataframe(directory='WHB-10Xv3', file_name='gene')
gene.sort_values('gene_symbol',inplace=True)
print(len(gene))

output_directory = 'WHB-10Xv3'
os.makedirs( output_directory, exist_ok=True)
output_file = os.path.join( output_directory, 'gene_list.html')
create_output_html( gene, output_file, 'WHB-10Xv3: gene list')

gene.csv: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.23M/4.23M [00:00<00:00, 8.67MMB/s]


59357


Create gene list html for MERFISH-C57BL6J-638850

In [8]:
gene = abc_cache.get_metadata_dataframe(directory='MERFISH-C57BL6J-638850', file_name='gene')
pred = ['Blank' not in x for x in gene['gene_identifier']]
gene = gene[pred].copy()
gene.sort_values('gene_symbol',inplace=True)
gene = gene[['gene_identifier','gene_symbol','name','transcript_identifier','mapped_ncbi_identifier']]
print(len(gene))


output_directory = 'MERFISH-C57BL6J-638850'
os.makedirs( output_directory, exist_ok=True)
output_file = os.path.join( output_directory, 'gene_list.html')
create_output_html( gene, output_file, 'MERFISH-C57BL6J-638850: gene list')

500


In [18]:
gene = abc_cache.get_metadata_dataframe(directory='MERFISH-C57BL6J-638850-imputed', file_name='gene')
pred = ['Blank' not in x for x in gene['gene_identifier']]
gene = gene[pred].copy()
gene.sort_values('gene_symbol',inplace=True)
gene = gene[['gene_identifier','gene_symbol','name','mapped_ncbi_identifier']]
print(len(gene))


output_directory = 'MERFISH-C57BL6J-638850-imputed'
os.makedirs( output_directory, exist_ok=True)
output_file = os.path.join( output_directory, 'gene_list.html')
create_output_html( gene, output_file, 'MERFISH-C57BL6J-638850-imputed: gene list')

8460


Create gene list html for Zhuang-C57BL6J-1

In [19]:
gene = abc_cache.get_metadata_dataframe(directory='Zhuang-ABCA-1', file_name='gene')
gene.sort_values('gene_symbol',inplace=True)
gene
print(len(gene))

output_directory = 'Zhuang-ABCA-1'
os.makedirs( output_directory, exist_ok=True)
output_file = os.path.join( output_directory, 'gene_list.html')
create_output_html( gene, output_file, 'Zhuang-ABCA-1: gene list')

gene.csv: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 84.7k/84.7k [00:00<00:00, 540kMB/s]


1122


Create ASAP-PMDB gene list

In [7]:
gene = abc_cache.get_metadata_dataframe(directory='ASAP-PMDBS-10X', file_name='gene')
gene.sort_values('gene_symbol',inplace=True)
print(len(gene))

output_directory = 'ASAP-PMDBS-10X'
os.makedirs( output_directory, exist_ok=True)
output_file = os.path.join( output_directory, 'gene_list.html')
create_output_html( gene, output_file, 'ASAP-PMDBS-10X: gene list')

36601


HMBA-BG aligned gene list

In [14]:
gene = abc_cache.get_metadata_dataframe(
    directory='HMBA-10xMultiome-BG-Aligned',
    file_name='gene'
)
gene.sort_values('gene_symbol', inplace=True)
print(len(gene))

output_directory = 'HMBA-10xMultiome-BG-Aligned'
os.makedirs(output_directory, exist_ok=True)
output_file = os.path.join(output_directory, 'gene_list.html')
create_output_ncbi_html(gene, output_file, 'HMBA-10xMultiome-BG-Aligned: gene list')

16630


HMBA-BG Human gene list

In [13]:
gene = abc_cache.get_metadata_dataframe(
    directory='HMBA-10xMultiome-BG',
    file_name='human_gene'
)
gene.sort_values('gene_symbol', inplace=True)
print(len(gene))

output_directory = 'HMBA-10xMultiome-BG'
os.makedirs(output_directory, exist_ok=True)
output_file = os.path.join(output_directory, 'human_gene_list.html')
create_output_html(gene, output_file, 'HMBA-10xMultiome-BG Human: gene list')

36601


HMBA-BG Macaque gene list

In [11]:
gene = abc_cache.get_metadata_dataframe(
    directory='HMBA-10xMultiome-BG',
    file_name='macaque_gene'
)
gene.sort_values('gene_symbol', inplace=True)
print(len(gene))

output_directory = 'HMBA-10xMultiome-BG'
os.makedirs(output_directory, exist_ok=True)
output_file = os.path.join(output_directory, 'macaque_gene_list.html')
create_output_ncbi_html(gene, output_file, 'HMBA-10xMultiome-BG Macaque: gene list')

35219


HMBA-BG Marmoset gene list

In [12]:
gene = abc_cache.get_metadata_dataframe(
    directory='HMBA-10xMultiome-BG',
    file_name='marmoset_gene'
)
gene.sort_values('gene_symbol', inplace=True)
print(len(gene))

output_directory = 'HMBA-10xMultiome-BG'
os.makedirs(output_directory, exist_ok=True)
output_file = os.path.join(output_directory, 'marmoset_gene_list.html')
create_output_ncbi_html(gene, output_file, 'HMBA-10xMultiome-BG Marmoset: gene list')

35787
