# Table 1. Thalamus + Zona Incerta subset of the CCN20230722 whole mouse brain taxonomy.

In [1]:
import pandas as pd

from IPython.display import display, HTML

import sys
sys.path.append('/code/')
from thalamus_merfish_analysis import abc_load as abc

from matplotlib import rcParams
rcParams['ps.fonttype'] = 42
rcParams['pdf.fonttype'] = 42
rcParams['font.size'] = 7

import matplotlib.pyplot as plt
import seaborn as sns

get_ipython().run_line_magic('matplotlib', 'inline') 

## Load .obs data

In [2]:
# define path for saving outputs
results_dir = '../../results'

In [3]:
# load in the full dataset with the taxonomy color metadata
obs_wmb = abc.get_combined_metadata(drop_unused=False)

# load the thalamus subset
obs_th = abc.load_standard_thalamus(data_structure='obs')

# join the color metadata to the thalamus subset
color_cols = ['neurotransmitter_color', 
              'class_color', 
              'subclass_color', 
              'supertype_color', 
              'cluster_color']
obs_th_color = obs_th.join(obs_wmb[color_cols])

## Convert .obs dataframe to taxonomy table

In [4]:
# subset df to just taxonomy (+color) columns
taxonomy_levels = ['neurotransmitter','class', 'subclass', 'supertype', 'cluster']
obs_tax = obs_th_color.drop_duplicates(subset='cluster')[taxonomy_levels+color_cols]

# count the number of cells per unique cluster & add as column to the taxonomy df
cells_per_cluster_counts = obs_th_color.groupby('cluster',observed=True).size().reset_index(name='num_cells_in_cluster')
obs_tax = obs_tax.merge(cells_per_cluster_counts, on='cluster')

# clean up the taxonomy columns for sorting
for col in taxonomy_levels:
    # only keep categories that are present in the TH+ZI dataset
    obs_tax[col] = obs_tax[col].cat.remove_unused_categories()
    # remove leading/trailing whitespace
    obs_tax.loc[:,col] = obs_tax.loc[:,col].str.strip()
    # extract the ID # b/c lexically sorting the str object doesn't yield correct order
    obs_tax.loc[:,col+'_id'] = obs_tax.loc[:,col].str.extract('(\d+)', expand=False).astype(float)

# sort by ID #s
num_cols = [x+'_id' for x in taxonomy_levels]
taxonomy_df = obs_tax.sort_values(by=num_cols, ascending=True, ignore_index=True)
# clean up the ID columns
taxonomy_df = taxonomy_df.drop(columns=num_cols)

display(taxonomy_df.head(5))

Unnamed: 0,neurotransmitter,class,subclass,supertype,cluster,neurotransmitter_color,class_color,subclass_color,supertype_color,cluster_color,num_cells_in_cluster
0,GABA,12 HY GABA,093 RT-ZI Gnb3 Gaba,0431 RT-ZI Gnb3 Gaba_1,1580 RT-ZI Gnb3 Gaba_1,#FF3358,#FF6600,#CCB65C,#99FFFC,#CC807A,72
1,GABA,12 HY GABA,093 RT-ZI Gnb3 Gaba,0431 RT-ZI Gnb3 Gaba_1,1581 RT-ZI Gnb3 Gaba_1,#FF3358,#FF6600,#CCB65C,#99FFFC,#FFBB73,128
2,GABA,12 HY GABA,093 RT-ZI Gnb3 Gaba,0431 RT-ZI Gnb3 Gaba_1,1582 RT-ZI Gnb3 Gaba_1,#FF3358,#FF6600,#CCB65C,#99FFFC,#66483D,181
3,GABA,12 HY GABA,093 RT-ZI Gnb3 Gaba,0431 RT-ZI Gnb3 Gaba_1,1583 RT-ZI Gnb3 Gaba_1,#FF3358,#FF6600,#CCB65C,#99FFFC,#81CC3D,78
4,GABA,12 HY GABA,093 RT-ZI Gnb3 Gaba,0431 RT-ZI Gnb3 Gaba_1,1584 RT-ZI Gnb3 Gaba_1,#FF3358,#FF6600,#CCB65C,#99FFFC,#8FFF26,47


In [5]:
# Save out as csv file
taxonomy_df.to_csv(f'{results_dir}/thalamus_zi_taxonomy_CCN20230722.csv', index=False)

# Also saved out to the resources folder - doesn't need to be run again
# taxonomy_df.to_csv(f'../thalamus_merfish_analysis/resources/taxonomy_CCN20230722_th_zi_only.csv', index=False)

## Format taxonomy df as HTML table with colored markers

In [6]:
def create_abc_atlas_html_table(df):
    ''' Create an HTML table with the same custom formatting & dynamic color
    markers as the ABC Atlas taxonomy resource html page.
    
    See: https://alleninstitute.github.io/abc_atlas_access/_static/WMB-taxonomy/20230830/cluster.html#CS20230722_CLUS_0001
    '''
    # define the CSS style used by the ABC Atlas resource page
    # includes alternating gray and white with on-hover color
    styles = """
    <style>
    .mystyle {
        font-size: 11pt; 
        font-family: Arial;
        border-collapse: collapse; 
        border: 1px solid silver;
    }
    .mystyle td {
        padding: 5px;
    }
    .mystyle th {
        text-align: left;
        padding: 5px;
    }
    .mystyle tr:nth-child(even) {
        background: #E0E0E0;
    }
    .circle {
        background-color: blue;
        height: 20px;
        width: 20px;
        border-radius: 50%;
        display: inline-block;
    }
    .celltext {
        padding-left: 5px;
        display: inline-block;
    }
    </style>
    """
    
    def format_cell(value, color):
        ''' Set the circle color and text value for each cell using the defined 
        ABC Atlas taxonomy style.
        '''
        return f'<span class="circle" style="background-color: {color};"></span><span class="celltext">{value}</span>'

    # apply custom HTML formatting to each cell in the df
    formatted_df = pd.DataFrame()
    for col in df.columns:
        if not col.endswith('_color'):
            color_col = f'{col}_color'
            # add colored circle to each column that has a corresponding color column
            if color_col in df.columns:
                formatted_df[col] = df.apply(lambda x: format_cell(x[col], x[color_col]), axis=1)
            # otherwise, just the cell value as text
            else:
                formatted_df[col] = df[col]

    # convert df to html table via pandas
    html_table = formatted_df.to_html(escape=False, classes='mystyle')

    # append style to top of HTML table so it's stored with the html file
    # (alternatively, could be saved to a separate .css file and linked to the html file)
    full_html = styles + html_table
    return full_html

In [7]:
# convert taxonomy df to a HTML table formatted the same as the ABC Atlas resource
html_table = create_abc_atlas_html_table(taxonomy_df)

# save as html file to results
with open(f'{results_dir}/thalamus_zi_taxonomy_CCN20230722_table.html', 'w') as f:
    f.write(html_table)

## Display full taxonomy table in HTML format

In [8]:
# show the HTML table inline
display(HTML(html_table))

Unnamed: 0,neurotransmitter,class,subclass,supertype,cluster,num_cells_in_cluster
0,GABA,12 HY GABA,093 RT-ZI Gnb3 Gaba,0431 RT-ZI Gnb3 Gaba_1,1580 RT-ZI Gnb3 Gaba_1,72
1,GABA,12 HY GABA,093 RT-ZI Gnb3 Gaba,0431 RT-ZI Gnb3 Gaba_1,1581 RT-ZI Gnb3 Gaba_1,128
2,GABA,12 HY GABA,093 RT-ZI Gnb3 Gaba,0431 RT-ZI Gnb3 Gaba_1,1582 RT-ZI Gnb3 Gaba_1,181
3,GABA,12 HY GABA,093 RT-ZI Gnb3 Gaba,0431 RT-ZI Gnb3 Gaba_1,1583 RT-ZI Gnb3 Gaba_1,78
4,GABA,12 HY GABA,093 RT-ZI Gnb3 Gaba,0431 RT-ZI Gnb3 Gaba_1,1584 RT-ZI Gnb3 Gaba_1,47
5,GABA,12 HY GABA,093 RT-ZI Gnb3 Gaba,0431 RT-ZI Gnb3 Gaba_1,1585 RT-ZI Gnb3 Gaba_1,42
6,GABA,12 HY GABA,093 RT-ZI Gnb3 Gaba,0432 RT-ZI Gnb3 Gaba_2,1586 RT-ZI Gnb3 Gaba_2,104
7,GABA,12 HY GABA,093 RT-ZI Gnb3 Gaba,0432 RT-ZI Gnb3 Gaba_2,1587 RT-ZI Gnb3 Gaba_2,49
8,GABA,12 HY GABA,093 RT-ZI Gnb3 Gaba,0432 RT-ZI Gnb3 Gaba_2,1588 RT-ZI Gnb3 Gaba_2,238
9,GABA,12 HY GABA,093 RT-ZI Gnb3 Gaba,0432 RT-ZI Gnb3 Gaba_2,1589 RT-ZI Gnb3 Gaba_2,20
