# Checking Eraslan et al.

In [167]:
import scanpy as sc
import decoupler as dc
import pandas as pd
import sys
import anndata
import numpy as np

Read in Eraslan data. (The h5ad from Cellxgene). 

In [168]:
dataset = 'eraslan'
rt = '/home/ec2-user/curation/'
filenames = {'amrute':'ad.h5ad','brener':'ad.h5ad','chaffin':'human_dcm_hcm_scportal_03.17.2022.h5ad','eraslan':'ad.h5ad','hill':'ad.h5ad',
            'litvinukova':'ad.h5ad','kanemaru':'ad.h5ad','kuppe':'ad.h5ad','reichart':'ad.h5ad','selewa':'ad.h5ad',
             'knight_schrijver':'ad.h5ad',
            'sim':'ad.h5ad','simonson':'ICM_scportal_05.24.2022.h5ad','tucker':'healthy_human_4chamber_map_unnormalized_V4.h5ad',
            'linna_kuosmanen':['carebank.h5ad','periheart.h5ad']}

ad_raw = sc.read_h5ad(rt+dataset+'/'+filenames[dataset])


Point varname to the feature name.

In [169]:
ad_raw.var.set_index('feature_name',inplace=True)


In [170]:
ad_raw.raw.var.set_index('feature_name',inplace=True)


Pseudobulk by relevant categories.

In [171]:
ad = dc.get_pseudobulk(ad_raw,sample_col='donor_id',groups_col=['tissue','cell_type'],
    mode='mean',
    min_cells=0,
    min_counts=0,use_raw=True)


In [172]:
ad=ad[ad.obs['sex']=='male']

In [154]:
# ad.obs['donor_id']

In [188]:
df  = ad.obs[['donor_id','psbulk_n_cells','tissue','cell_type','sex']].copy()

In [189]:
x = ad[:,'XIST'].X
df['mean'] = np.array(x)
x = ad[:,'UTY'].X
df['meanuty'] = np.array(x)

Check male samples (tissue/cell type/donor ID tuples) with the highest XIST expression by mean.  It's predominantly Schwann cells. But, there are some surprises...

In [190]:
df.sort_values('mean',ascending =False).reset_index(drop=True)[:50]

Unnamed: 0,donor_id,psbulk_n_cells,tissue,cell_type,sex,mean,meanuty
0,GTEX-1CAMR,4.0,skin of leg,basal cell of epidermis,male,5.75,0.0
1,GTEX-1CAMR,32.0,skin of leg,epithelial cell of sweat gland,male,1.53125,0.15625
2,GTEX-1HSMQ,8.0,lingula of left lung,B cell,male,0.5,0.125
3,GTEX-1HSMQ,199.0,lingula of left lung,bronchial epithelial cell,male,0.477387,0.190955
4,GTEX-1HSMQ,472.0,lingula of left lung,fibroblast,male,0.358051,0.139831
5,GTEX-1HSMQ,8708.0,lingula of left lung,epithelial cell of alveolus of lung,male,0.343362,0.265848
6,GTEX-1HSMQ,198.0,lingula of left lung,macrophage,male,0.257576,0.166667
7,GTEX-1HSMQ,13.0,prostate gland,Schwann cell,male,0.230769,0.0
8,GTEX-144GM,500.0,esophagus muscularis mucosa,Schwann cell,male,0.218,0.194
9,GTEX-1I1GU,44.0,prostate gland,Schwann cell,male,0.204545,0.090909


A lot of these seem to be from a single donor, and a single sample: the left lung of 1HSMQ. The other 1HSMQ samples (non-Schwann data) have ~noise floor or flat zero for XIST. This is somewhat hard to explain.

In [191]:
df=ad[ad.obs['donor_id']=='GTEX-1HSMQ'].obs[['donor_id','psbulk_n_cells','tissue','cell_type','sex']].copy()
x = ad[ad.obs['donor_id']=='GTEX-1HSMQ','XIST'].X
df['mean'] = np.array(x)
x = ad[ad.obs['donor_id']=='GTEX-1HSMQ','UTY'].X
df['meanuty'] = np.array(x)

In [192]:
df.sort_values('mean',ascending =False).reset_index(drop=True)[:60]

Unnamed: 0,donor_id,psbulk_n_cells,tissue,cell_type,sex,mean,meanuty
0,GTEX-1HSMQ,8.0,lingula of left lung,B cell,male,0.5,0.125
1,GTEX-1HSMQ,199.0,lingula of left lung,bronchial epithelial cell,male,0.477387,0.190955
2,GTEX-1HSMQ,472.0,lingula of left lung,fibroblast,male,0.358051,0.139831
3,GTEX-1HSMQ,8708.0,lingula of left lung,epithelial cell of alveolus of lung,male,0.343362,0.265848
4,GTEX-1HSMQ,198.0,lingula of left lung,macrophage,male,0.257576,0.166667
5,GTEX-1HSMQ,13.0,prostate gland,Schwann cell,male,0.230769,0.0
6,GTEX-1HSMQ,199.0,esophagus muscularis mucosa,Schwann cell,male,0.165829,0.120603
7,GTEX-1HSMQ,308.0,lingula of left lung,respiratory basal cell,male,0.142857,0.37013
8,GTEX-1HSMQ,2900.0,lingula of left lung,alveolar macrophage,male,0.133448,0.365862
9,GTEX-1HSMQ,8.0,gastrocnemius,Schwann cell,male,0.125,0.5


In [193]:
df.sort_values('mean',ascending =False).reset_index(drop=True)[60:]

Unnamed: 0,donor_id,psbulk_n_cells,tissue,cell_type,sex,mean,meanuty
60,GTEX-1HSMQ,8.0,esophagus muscularis mucosa,mature NK T cell,male,0.0,0.0
61,GTEX-1HSMQ,7.0,esophagus muscularis mucosa,neuron,male,0.0,0.714286
62,GTEX-1HSMQ,12.0,esophagus muscularis mucosa,professional antigen presenting cell,male,0.0,0.666667
63,GTEX-1HSMQ,18.0,gastrocnemius,T cell,male,0.0,0.111111
64,GTEX-1HSMQ,117.0,gastrocnemius,contractile cell,male,0.0,0.230769
65,GTEX-1HSMQ,6.0,gastrocnemius,dendritic cell,male,0.0,1.0
66,GTEX-1HSMQ,62.0,gastrocnemius,endothelial cell of lymphatic vessel,male,0.0,0.33871
67,GTEX-1HSMQ,360.0,gastrocnemius,endothelial cell of vascular tree,male,0.0,0.258333
68,GTEX-1HSMQ,1196.0,gastrocnemius,fast muscle cell,male,0.0,0.346154
69,GTEX-1HSMQ,89.0,gastrocnemius,macrophage,male,0.0,0.359551
