In [1]:
import os
import pandas as pd
import numpy as np
import anndata
import time

In [2]:
input_base = '/allen/programs/celltypes/workgroups/rnaseqanalysis/lydian/ABC_handoff'
input_directory = os.path.join( input_base, 'metadata', '20230630', '10x-scRNA-seq' )

view_directory = os.path.join( input_directory, 'views')
cache_views = True
if cache_views :
    os.makedirs( view_directory, exist_ok=True )

In [3]:
file = os.path.join( input_directory,'views','cell_metadata_with_cluster_annotation.csv')
cell = pd.read_csv(file,dtype={"neurotransmitter":str})
cell.set_index('cell_label',inplace=True)

In [15]:
cell.columns

Index(['library_label', 'anatomical_division_label', 'cluster_alias',
       'library_method', 'region_of_interest_acronym', 'donor_label',
       'donor_genotype', 'donor_sex', 'matrix_prefix', 'neurotransmitter',
       'division', 'class', 'subclass', 'supertype', 'cluster'],
      dtype='object')

In [4]:
file = os.path.join( input_directory,'views','example_genes_all_cells_expression.csv')
exp = pd.read_csv(file)
exp.set_index('cell_label',inplace=True)

In [6]:
def aggregate_by_metadata( df, gnames, value ) :
    grouped = df.groupby(value)[gnames].mean()
    grouped = grouped.sort_values(by=gnames[0],ascending=False)
    return grouped

In [13]:
ntgenes = ['Slc17a7','Slc17a6','Slc17a8','Slc32a1','Slc6a5','Slc18a3','Slc6a3','Slc6a4','Slc6a2']
filtered = exp[ntgenes]
joined = cell.join( filtered )

In [14]:
agg = aggregate_by_metadata( joined, ntgenes, 'neurotransmitter' )
agg.style.background_gradient(cmap='Reds')

Unnamed: 0_level_0,Slc17a7,Slc17a6,Slc17a8,Slc32a1,Slc6a5,Slc18a3,Slc6a3,Slc6a4,Slc6a2
neurotransmitter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Glut,7.43567,3.032979,0.135912,0.127681,0.017977,0.009214,0.014403,0.014875,0.006944
Chol,6.083425,5.332758,0.730629,0.729178,0.037216,5.248576,0.174872,0.028319,0.014578
Glut-GABA,0.862483,2.86638,4.654191,7.400881,0.030228,0.063402,0.195864,0.030718,0.015725
GABA,0.757635,0.213226,0.064546,6.879469,0.033206,0.051181,0.030635,0.015862,0.004453
Nora,0.300822,7.599333,0.064817,0.191974,0.230401,0.031913,0.018423,0.087096,7.322662
GABA-Glyc,0.159951,0.526999,0.108015,7.890645,6.704141,0.089104,0.014543,0.028067,0.021336
Hist,0.132376,0.449004,0.140583,0.354488,0.02427,0.007497,0.01182,0.10733,0.006131
Dopa,0.076645,2.617096,0.06827,3.42414,0.018269,0.001419,6.555475,0.04644,0.005224
Sero,0.041932,0.698158,5.741428,0.306555,0.098213,0.002359,0.033511,9.817377,0.013302


In [16]:
agg = aggregate_by_metadata( joined, ntgenes, 'region_of_interest_acronym' )
agg.style.background_gradient(cmap='Reds')

Unnamed: 0_level_0,Slc17a7,Slc17a6,Slc17a8,Slc32a1,Slc6a5,Slc18a3,Slc6a3,Slc6a4,Slc6a2
region_of_interest_acronym,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AUD,8.311963,1.430157,0.127051,0.868285,0.001329,0.012193,0.000529,0.004815,0.00319
SSp,8.307015,1.844414,0.185158,0.410914,0.001462,0.011789,0.001344,0.001775,0.002975
MOp,7.938808,0.946706,0.168024,1.242897,0.002824,0.031415,0.002057,0.011413,0.004929
VIS,7.800689,1.483229,0.108854,1.13906,0.00249,0.021208,0.001659,0.0144,0.004219
TEa-PERI-ECT,7.713175,0.824319,0.126016,1.57342,0.00348,0.01289,0.002249,0.010423,0.004436
SS-GU-VISC,7.277509,1.401445,0.114201,0.86931,0.003143,0.015487,0.004876,0.004939,0.003183
PL-ILA-ORB,7.207855,0.823795,0.124314,1.255048,0.004302,0.017105,0.01423,0.0133,0.005079
VIS-PTLp,7.182527,1.488889,0.107182,0.791263,0.002367,0.018489,0.002838,0.004652,0.004844
RSP,6.788763,2.806705,0.066405,0.708547,0.005078,0.020982,0.002477,0.004072,0.004865
MO-FRP,6.779459,0.764872,0.124955,1.057193,0.004817,0.023999,0.003662,0.00484,0.004914


In [18]:
exgenes = ['Tac2']
filtered = exp[exgenes]
joined = cell.join( filtered )
agg = aggregate_by_metadata( joined, exgenes, 'neurotransmitter' )
agg.style.background_gradient(cmap='Reds')

Unnamed: 0_level_0,Tac2
neurotransmitter,Unnamed: 1_level_1
Chol,5.148534
Glut-GABA,1.944914
GABA,0.601072
Glut,0.099
GABA-Glyc,0.093852
Dopa,0.035979
Hist,0.034632
Sero,0.008437
Nora,0.0


In [20]:
agg = aggregate_by_metadata( joined, exgenes, 'region_of_interest_acronym' ).head(8)
agg.style.background_gradient(cmap='Reds')

Unnamed: 0_level_0,Tac2
region_of_interest_acronym,Unnamed: 1_level_1
AI,0.41807
TEa-PERI-ECT,0.38745
HY,0.33102
MOp,0.31559
VIS,0.303723
PL-ILA-ORB,0.303604
PAL,0.303496
MO-FRP,0.302619


In [9]:
agg = aggregate_by_metadata( joined, exgenes, 'class' ).head(8)
agg.style.background_gradient(cmap='Reds')

Unnamed: 0_level_0,Tac2
class,Unnamed: 1_level_1
08 MH-LH Glut,5.037336
04 CGE GABA,2.621625
14 CNU-HYa GABA,0.551792
11 HY GABA,0.453483
10 HY MM Glut,0.399869
15 HY Glut,0.388255
19 MY Glut,0.259901
06 CNU GABA,0.233099


In [11]:
agg = aggregate_by_metadata( joined, exgenes, 'subclass' ).head(15)
agg.style.background_gradient(cmap='Reds')

Unnamed: 0_level_0,Tac2
subclass,Unnamed: 1_level_1
063 MH Tac2 Glut,6.698351
105 BST Tac2 Gaba,6.227521
258 SPVC Nmu Glut,5.207479
276 MOB-mi Frmd7 Gaba,4.961338
037 Sncg Gaba,4.184213
036 Vip Gaba,4.138091
095 CEA-BST Crh Gaba,3.569873
121 ARH-PVp Tbx3 Glut,3.021044
053 MSN D1 Sema5a Gaba,1.932273
113 PVHd-DMH Lhx6 Gaba,1.733651
