In [1]:
import pandas as pd
import scanpy as sc
import numpy as np

In [2]:
# load raw count 
adata = sc.read_h5ad('../data/Tcell_PDL1.h5ad')

In [3]:
cluster = 't_Tact-XIST'

In [4]:
# select cell subset
adata = adata[adata.obs['Sub_Cluster']==cluster]
adata

View of AnnData object with n_obs × n_vars = 2365 × 27085
    obs: 'CellName', 'ID', 'Sample', 'Patient', 'group', 'Tissue', 'Origin', 'Response', 'Timeline', 'Treatment', 'batch', 'n_genes', 'percent_mito', 'percent_hsp', 'percent_ig', 'percent_rp', 'n_counts', 'leiden', 'Louvain', 'myleiden', 'defcls', 'Global_Cluster', 'Sub_Cluster', 'Global_tSNE_1', 'Global_tSNE_2', 'Global_UMAP_1', 'Global_UMAP_2', 'Sub_tSNE_1', 'Sub_tSNE_2', 'Sub_UMAP_1', 'Sub_UMAP_2'

In [5]:
# load Toomanycells result
ann = pd.read_csv('./data/TMC_result/'+cluster+'_cluster.csv',
                 usecols=['cell','cluster'],index_col=0)

In [6]:
# add Toomanycells cluster information to obs
adata.obs = adata.obs.join(ann)

In [7]:
# transform the label of non-responder enriched clusters to 0
adata.obs['cluster'].replace([20,24,25,26,33,41,42],0,inplace=True)

In [8]:
np.unique(adata.obs['cluster'])

array([ 0,  4,  5,  7,  8, 11, 12, 14, 15, 21, 29, 30, 32, 36, 37, 40])

In [9]:
# add a columns with label "NR_E" and "non-NR_E" depends on Toomanycells clusters
adata.obs['divide'] = np.where(adata.obs['cluster']==0,'NR_E','non-NR_E')

In [10]:
# normalize the raw count 
sc.pp.normalize_total(adata,target_sum=1e6)

In [11]:
# output the NR_E expression matrix
adata[adata.obs['divide']=='NR_E'].to_df().T.to_csv('./data/Compass_input/'+cluster+'_NR_E.tsv',sep='\t')

In [12]:
# output the non-NR_E expression matrix
adata[adata.obs['divide']=='non-NR_E'].to_df().T.to_csv('./data/Compass_input/'+cluster+'_non-NR_E.tsv',sep='\t')