In [6]:
import os
import pandas as pd
from pathlib import Path
import numpy as np
import anndata
import time
import matplotlib.pyplot as plt

from abc_atlas_access.abc_atlas_cache.abc_project_cache import AbcProjectCache

In [17]:
version = '20230830'

download_base = Path('../../../abc_download_root')
abc_cache = AbcProjectCache.from_s3_cache(download_base)
abc_cache.load_manifest(f'releases/{version}/manifest.json')    

type.compare_manifests('releases/20230630/manifest.json', 'releases/20231215/manifest.json')
To load another version of the dataset, run
type.load_manifest('releases/20231215/manifest.json')


Helper function to format a clickable ENSEMBL id link

In [8]:
def create_clickable_ENSEMBL_id(id):
    url_template= '''<a href="https://identifiers.org/ENSEMBL:{id}" target="_blank">{id}</a>'''.format(id=id)
    return url_template

Helper function to format a clickable NCBI id link

In [9]:
def create_clickable_NCBI_id(id):
    if pd.notna(id) :
        url_template= '''<a href="https://identifiers.org/{id}" target="_blank">{id}</a>'''.format(id=id)
    else :
        url_template = ""
    return url_template

Helper function to create a gene list

In [10]:
def create_output_html( df, file, title ) :
    
    df['gene_identifier'] = df['gene_identifier'].apply(create_clickable_ENSEMBL_id)
    if 'mapped_ncbi_identifier' in df.columns :
        df['mapped_ncbi_identifier'] = df['mapped_ncbi_identifier'].apply(create_clickable_NCBI_id)
    formatter = {'gene_symbol': lambda x: '<b>' + x + '</b>'}
    output = df.to_html(index=False, na_rep='',
                        render_links=True,escape=False,
                        classes="mystyle",formatters=formatter)
    
    html_string = '''
    <html>
    <head><title>%s</title></head>
    <link rel="stylesheet" type="text/css" href="../../_static/simple_style.css"/>
    <body>
    {table}
    </body>
    </html>.
    ''' % title
    
    # OUTPUT AN HTML FILE
    with open(file, 'w') as f:
        f.write(html_string.format(table=output))


Create gene list html for WMB-10X

In [11]:
gene = abc_cache.get_metadata_dataframe(directory='WMB-10X', file_name='gene')
gene.sort_values('gene_symbol',inplace=True)
print(len(gene))

output_directory = 'WMB-10X'
os.makedirs( output_directory, exist_ok=True)
output_file = os.path.join( output_directory, 'gene_list.html')
create_output_html( gene, output_file, 'WMB-10X: gene list')

gene.csv: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 2.30M/2.30M [00:00<00:00, 3.47MMB/s]


32285


Create gene list html for MERFISH-C57BL6J-638850

In [12]:
gene = abc_cache.get_metadata_dataframe(directory='MERFISH-C57BL6J-638850', file_name='gene')
pred = ['Blank' not in x for x in gene['gene_identifier']]
gene = gene[pred].copy()
gene.sort_values('gene_symbol',inplace=True)
gene = gene[['gene_identifier','gene_symbol','name','transcript_identifier','mapped_ncbi_identifier']]
print(len(gene))


output_directory = 'MERFISH-C57BL6J-638850'
os.makedirs( output_directory, exist_ok=True)
output_file = os.path.join( output_directory, 'gene_list.html')
create_output_html( gene, output_file, 'MERFISH-C57BL6J-638850: gene list')

gene.csv: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 48.4k/48.4k [00:00<00:00, 436kMB/s]

500





Create gene list html for Zhuang-C57BL6J-1

In [18]:
abc_cache.list_directories

['Allen-CCF-2020',
 'MERFISH-C57BL6J-638850',
 'MERFISH-C57BL6J-638850-CCF',
 'MERFISH-C57BL6J-638850-sections',
 'WMB-10X',
 'WMB-10XMulti',
 'WMB-10Xv2',
 'WMB-10Xv3',
 'WMB-neighborhoods',
 'WMB-taxonomy',
 'Zhuang-ABCA-1',
 'Zhuang-ABCA-1-CCF',
 'Zhuang-ABCA-2',
 'Zhuang-ABCA-2-CCF',
 'Zhuang-ABCA-3',
 'Zhuang-ABCA-3-CCF',
 'Zhuang-ABCA-4',
 'Zhuang-ABCA-4-CCF']

In [19]:
gene = abc_cache.get_metadata_dataframe(directory='Zhuang-ABCA-1', file_name='gene')
gene.sort_values('gene_symbol',inplace=True)
gene
print(len(gene))

output_directory = 'Zhuang-ABCA-1'
os.makedirs( output_directory, exist_ok=True)
output_file = os.path.join( output_directory, 'gene_list.html')
create_output_html( gene, output_file, 'Zhuang-ABCA-1: gene list')

gene.csv: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 84.7k/84.7k [00:00<00:00, 377kMB/s]

1122



