In [1]:
import os
import pandas as pd
import numpy as np
import anndata
import time
import matplotlib.pyplot as plt
import json
import requests

In [2]:
download_base = '../../../abc_download_root'

url = 'https://allen-brain-cell-atlas.s3-us-west-2.amazonaws.com/releases/20230630/manifest.json'
manifest = json.loads(requests.get(url).text)
    

Helper function to format a clickable ENSEMBL id link

In [3]:
def create_clickable_ENSEMBL_id(id):
    url_template= '''<a href="https://identifiers.org/ENSEMBL:{id}" target="_blank">{id}</a>'''.format(id=id)
    return url_template

Helper function to create a gene list

In [4]:
def create_output_html( df, file, title ) :
    
    df['gene_identifier'] = df['gene_identifier'].apply(create_clickable_ENSEMBL_id)
    formatter = {'gene_symbol': lambda x: '<b>' + x + '</b>'}
    output = df.to_html(index=False, na_rep='',
                        render_links=True,escape=False,
                        classes="mystyle",formatters=formatter)
    
    html_string = '''
    <html>
    <head><title>%s</title></head>
    <link rel="stylesheet" type="text/css" href="simple_style.css"/>
    <body>
    {table}
    </body>
    </html>.
    ''' % title
    
    # OUTPUT AN HTML FILE
    with open(file, 'w') as f:
        f.write(html_string.format(table=output))


Create gene list html for WMB-10X

In [5]:
metadata = manifest['file_listing']['WMB-10X']['metadata']
rpath = metadata['gene']['files']['csv']['relative_path']
input_file = os.path.join( download_base, rpath)
gene = pd.read_csv(input_file)
gene.sort_values('gene_symbol',inplace=True)
gene
print(len(gene))
output_file = 'WMB-10x_gene-list.html'
create_output_html( gene, output_file, 'WMB-10x gene list')

32285


Create gene list html for MERFISH-C57BL6J-638850

In [6]:
metadata = manifest['file_listing']['MERFISH-C57BL6J-638850']['metadata']
rpath = metadata['gene']['files']['csv']['relative_path']
input_file = os.path.join( download_base, rpath)
gene = pd.read_csv(input_file)
pred = ['Blank' not in x for x in gene['gene_identifier']]
gene = gene[pred].copy()
gene.sort_values('gene_symbol',inplace=True)
gene = gene[['gene_identifier','gene_symbol','name','transcript_identifier']]
print(len(gene))
output_file = 'MERFISH-C57BL6J-638850_gene-list.html'
create_output_html( gene, output_file, 'MERFISH-C57BL6J-638850 gene list')

500
