# Immune exclusion signature scoring and T cell quantification
- Correspond to Figure R1-4

In [None]:
import scanpy as sc
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy import stats
import seaborn as sn

In [None]:
import anndata as ad
sys.path.append("../resources/scRNA/")
import zc_function as zc

In [None]:
# make output directory
import os
if not os.path.exists("scRNA_out"):
    os.mkdir("scRNA_out")

if not os.path.exists("scRNA_out/FFPE_scRNA/"):
    os.mkdir("scRNA_out/FFPE_scRNA/")

In [None]:
def convert_cell_type( dat, orig_obs_col = "celltype_annotation", new_obs_col = "Cell_Type"):
    """Convert cell type annotation string format
    @param dat: the anndata object intended to be modified
    @param orig_obs_col: original obs column where the format wants to be changed
    @param new_obs_col: the new obs column where the formated annotation will be stored
    
    return the anndata object with the new_obs_col added"""
    
    ct_df = dat.obs[orig_obs_col] # cell type dataframe
    ct_df2 = [ct_df[i].split("_")[1] for i in range( len(ct_df) )] #eg  '2_Goblet cells' --> 'Goblet cells'
    dat.obs[new_obs_col ] = ct_df2
    
    return dat

## check signatures in the entire dataset

In [None]:
dat = sc.read("../data/scRNA/outer_combined_all4_dat.h5ad")

In [None]:
dat

In [None]:
#dat.X.sum(axis = 1) #check if is raw

In [None]:
dat.raw = dat.copy() # save raw layer

In [None]:
dat.layers['arcsin'] = dat.raw.X.copy() # prep for an arcsin layer

In [None]:
sc.pp.normalize_total(dat, layer = 'arcsin') # normalize library size

In [None]:
dat.layers['arcsin'] = np.arcsinh(dat.layers['arcsin'])

### get signatures

In [None]:
immune_exclusive = ["DDR1", "TGFBI", "PAK4", "DPEP1"]

### make signature scores

In [None]:
dat.X.sum(axis = 1) #check if data is raw

In [None]:
dat.raw = dat.copy()

In [None]:
#dat.raw.X.sum(axis = 1)

In [None]:
zc.normalization(dat) #normalize library size, arcsin transform and z-score scalling within genes 

In [None]:
dat.X.sum(axis = 1)

In [None]:
sc.tl.score_genes(dat, gene_list=immune_exclusive, score_name='IES', use_raw = True) # use raw layer

In [None]:
sc.tl.score_genes(dat, gene_list=immune_exclusive, score_name='IES_norm', use_raw = False) #use normalized layer

In [None]:
sc.pl.umap(dat, color = ['Cell_Type', 'SampleId'  ], ncols=1)

In [None]:
sc.pl.umap(dat, color = immune_exclusive, use_raw = False, ncols=2, vmax = 5) # plot individual gene expression level

In [None]:
sc.pl.umap(dat, color = ['IES_norm'  , 'IES'], ncols=1, vmax = 2, vmin = 0, s= 2)

examine the distribution of the signature scores (raw and normalized)

In [None]:
dat.obs["IES_norm"].hist(bins = 100)

In [None]:
dat.obs["IES"].hist(bins = 100)

make a dictionary where keys are major cell types and values are cell type annotations

In [None]:
cell_category = dict()
cell_category["Epithelial"] = [ 'Goblet cells', 'Enterocytes','Cholangiocytes','Basal cells','Crypt cells',
                               'Epithelial cells', 'Ductal cells' ] 
cell_category["Immune"] = ['Macrophages', 'T memory cells', 'T cells','Plasma cells',
                           'B cells memory','B cells',  'Mast cells','NK cells', 'Neutrophils',
                           'Plasmacytoid dendritic cells','Dendritic cells','Alveolar macrophages',]
cell_category["Mesenchymal"] = ['Fibroblasts','Pericytes'] 
cell_category["Neuronal"] = ['Enteric glia cells', 'Schwann cells','Enteric neurons',]
cell_category["Endothelial"] = [ 'Endothelial cells',]
cell_category["Other"] = ['Smooth muscle cells', 'Adipocytes',]

reverse the key and values to make new adata obs column

In [None]:
cell_category_rev = dict()
for k,v in cell_category.items():
    for j in range(len(v)):
        cell_category_rev[v[j]] = k
        

In [None]:
#cell_category_rev

In [None]:
#dat.obs.Cell_Type.unique().shape

In [None]:
dat.obs["Major_cell_type"] = [cell_category_rev[k] for k in dat.obs.Cell_Type]

In [None]:
dat.obs.Major_cell_type.unique()

## Check immune signatures in T cells

### extract T cells

In [None]:
dat = sc.read("../data/scRNA/outer_combined_all4_dat.h5ad",) # re-read in the data if needed

In [None]:
dat.obs.Major_cell_type.unique()

In [None]:
sc.pl.umap(dat, color = ['Cell_Type','SampleId'], ncols=1)

In [None]:
dat.obs.Cell_Type.unique()

In [None]:
tc = dat[np.isin( dat.obs.Cell_Type, ['T memory cells', 'T cells'] ) ] # extract T cells based on cell type annotation 

In [None]:
tc

In [None]:
tc.X.sum(axis = 1) # check if raw

In [None]:
tc.raw = tc.copy()

In [None]:
zc.normalization(tc)

In [None]:
sc.pl.umap(tc, color = ['Cell_Type', 'CD8A','SampleId'], ncols=1)

## calculate CD8+ T cell abundance in each sample 

In [None]:
dat.raw.X.sum(axis = 1)

In [None]:
zc.normalization(dat)

In [None]:
#check CD8A expression 
sc.pl.umap(dat, color = ['CD8A', 'SampleId','Cell_Type','CD8T_core9'], use_raw = False, ncols=1, vmin = 0, vmax = 8)

### check histogram

In [None]:
cd8 = dat.to_df()['CD8A'] # CD8A gene expression per cell

In [None]:
cd8.shape

In [None]:
cd8_positive = cd8[cd8>-0.2]
cd8_positive.shape

In [None]:
cd8_positive.hist(bins = 100 )

In [None]:
# use otsu
zc.get_otsu_threshold(cd8)

will use 0 as threshold since the otsu threshold is too large based on 

In [None]:
is_cd8 = [str(cd8[i] > 0 and np.isin(dat.obs.Cell_Type[i], ['T cells', 'T memory cells' ])  ) for i in range(dat.n_obs)]

In [None]:
len(is_cd8)

In [None]:
#is_cd8

In [None]:
dat.obs["is_CD8"] = is_cd8

In [None]:
sc.pl.umap(dat, color = ['CD8A', 'SampleId','Cell_Type','is_CD8'], use_raw = False, ncols=1, vmin = 0, vmax = 8)

In [None]:
cd8_df = pd.concat([dat.obs.SampleId, dat.obs.is_CD8], axis = 1)

In [None]:
cd8_df.head()

In [None]:
bool_convert_dict = {'True':1, 'False':0}

In [None]:
cd8_df['is_CD8_int'] = [bool_convert_dict[k] for k in cd8_df["is_CD8"]]

In [None]:
cd8_df["is_CD8_int"].sum()

In [None]:
cd8_samples = cd8_df[["is_CD8_int", 'SampleId' ] ].groupby(by = 'SampleId').sum()

In [None]:
cd8_samples.head()

In [None]:
cd8_samples["CD8_cell_pct"] = cd8_df[["is_CD8_int", 'SampleId' ] ].groupby(by = 'SampleId').mean()

## save data

In [None]:
dat.X.sum(axis = 1)

In [None]:
dat.X = dat.raw.X # SAVE ONLY RAW COUNTS

In [None]:
dat

In [None]:
dat.write("../data/scRNA/outer_combined_all4_dat.h5ad", compression = 'gzip')

In [None]:
tc.X = tc.raw.X

In [None]:
tc.X.sum(axis = 1)

In [None]:
tc.write('../data/scRNA/combined_all4_tcell_dat.h5ad', compression = 'gzip' )

In [None]:
cd8_samples.to_csv("scRNA_out/FFPE_scRNA/cd8_cell_sample_wise.csv", header = True, index = True)