In [1]:
import os
import pandas as pd
import numpy as np
import anndata
import time

In [2]:
input_base = '/allen/programs/celltypes/workgroups/rnaseqanalysis/lydian/ABC_handoff'
input_directory = os.path.join( input_base, 'metadata', '20230630', 'MERFISH-C57BL6J-638850' )

view_directory = os.path.join( input_directory, 'views')
cache_views = True
if cache_views :
    os.makedirs( view_directory, exist_ok=True )

In [7]:
file = os.path.join( input_directory,'views','cell_metadata_with_cluster_annotation.csv')
cell = pd.read_csv(file,dtype={"cell_label":str,"neurotransmitter":str})
cell.set_index('cell_label',inplace=True)

pred = (cell['low_quality_mapping'] == False)
cell = cell[pred]

In [8]:
cell.columns

Index(['brain_section_label', 'cluster_alias', 'average_correlation_score',
       'matrix_prefix', 'donor_label', 'low_quality_mapping', 'donor_genotype',
       'donor_sex', 'x', 'y', 'z', 'neurotransmitter', 'division', 'class',
       'subclass', 'supertype', 'cluster'],
      dtype='object')

In [9]:
file = os.path.join( input_directory,'views','example_genes_all_cells_expression.csv')
exp = pd.read_csv(file,dtype={"cell_label":str})
exp.set_index('cell_label',inplace=True)

In [14]:
def aggregate_by_metadata( df, gnames, value ) :
    grouped = df.groupby(value)[gnames].mean()
    grouped = grouped.sort_values(by=gnames[0],ascending=False)
    return grouped

In [15]:
ntgenes = ['Slc17a7','Slc17a6','Slc17a8','Slc32a1','Slc6a5','Slc6a3','Slc6a4']
filtered = exp[ntgenes]
joined = cell.join( filtered )

In [16]:
agg = aggregate_by_metadata( joined, ntgenes, 'neurotransmitter' )
agg.style.background_gradient(cmap='Reds')

Unnamed: 0_level_0,Slc17a7,Slc17a6,Slc17a8,Slc32a1,Slc6a5,Slc6a3,Slc6a4
neurotransmitter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Glut,5.45857,1.465431,0.19853,0.644799,0.1686,0.065161,0.060973
Glut-GABA,1.584636,1.415066,3.167689,4.636293,0.139741,0.203079,0.19931
Chol,1.336436,1.825008,0.908047,0.775578,0.348834,0.102714,0.12668
GABA,1.292743,0.330417,0.189557,4.356985,0.13847,0.073594,0.071125
Dopa,1.124439,1.19346,0.167337,2.669397,0.107171,4.27548,0.121355
GABA-Glyc,0.551766,0.866311,0.15279,5.134373,4.611229,0.072027,0.069877
Hist,0.414171,0.273116,0.09279,0.203777,0.042676,0.019365,0.037957
Nora,0.350148,2.256105,0.242355,0.509979,0.421445,0.078692,0.047854
Sero,0.249213,0.516299,2.996847,0.809066,0.279549,0.087234,6.667489


In [17]:
exgenes = ['Tac2']
filtered = exp[exgenes]
joined = cell.join( filtered )
agg = aggregate_by_metadata( joined, exgenes, 'neurotransmitter' )
agg.style.background_gradient(cmap='Reds')

Unnamed: 0_level_0,Tac2
neurotransmitter,Unnamed: 1_level_1
Glut-GABA,0.996132
Chol,0.86014
GABA,0.245813
GABA-Glyc,0.206716
Glut,0.183282
Hist,0.172152
Nora,0.13576
Dopa,0.13044
Sero,0.121097


In [18]:
agg = aggregate_by_metadata( joined, exgenes, 'class' ).head(8)
agg.style.background_gradient(cmap='Reds')

Unnamed: 0_level_0,Tac2
class,Unnamed: 1_level_1
08 MH-LH Glut,3.684035
04 CGE GABA,1.193551
14 CNU-HYa GABA,0.688624
11 HY GABA,0.528664
10 HY MM Glut,0.428825
15 HY Glut,0.364134
03 MOB-DG-IMN,0.223259
21 P GABA,0.222121


In [19]:
agg = aggregate_by_metadata( joined, exgenes, 'subclass' ).head(15)
agg.style.background_gradient(cmap='Reds')

Unnamed: 0_level_0,Tac2
subclass,Unnamed: 1_level_1
105 BST Tac2 Gaba,4.452095
063 MH Tac2 Glut,4.409363
095 CEA-BST Crh Gaba,2.967356
258 SPVC Nmu Glut,2.448923
276 MOB-mi Frmd7 Gaba,2.177203
037 Sncg Gaba,2.050399
036 Vip Gaba,1.967017
094 CEA-AAA-BST Ebf1 Gaba,1.946971
113 PVHd-DMH Lhx6 Gaba,1.541354
121 ARH-PVp Tbx3 Glut,1.319788
