# Immune signature expression in T cells
- score the T cell signatures in Fernande-Garcia paper and a SD5 table in the T cell subset of the combined-all_4 dataset
- correlate the scores with single-cell Epithelial - stromal scores

In [None]:
import scanpy as sc
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy import stats

In [None]:
import seaborn as sn

In [None]:
import anndata as ad
import sys; sys.path.append("../resources/scRNA/")
import zc_function as zc

In [None]:
# make output directory
import os
if not os.path.exists("scRNA_out"):
    os.mkdir("scRNA_out")

if not os.path.exists("scRNA_out/FFPE_scRNA/"):
    os.mkdir("scRNA_out/FFPE_scRNA/")

## load data

In [None]:
tcell = sc.read("../data/scRNA/combined_all4_tcell_dat.h5ad")

In [None]:
tcell # check anndata

In [None]:
#tcell.X.sum(axis = 1) #check if raw
tcell.raw = tcell.copy()

keep selected obs columns

In [None]:
tcell.obs = tcell.obs[[ 'SampleId', 'percent.mt', 'exp_num', 'active_ident', 'Cell_Type', 'n_genes_by_counts',  'total_counts','pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'CD8T_core9', 'CD8T_cytokine', 'Major_cell_type', 'leiden', 'session_id',]]

In [None]:
list( tcell.obs.SampleId.unique() ) 

### check absent samples (10096_s2 and 10180_01_s2 )
- confirmed that these 2 samples do not have T cells 

In [None]:
dat = sc.read("../data/scRNA/outer_combined_all4_dat.h5ad")

In [None]:
dat

In [None]:
dat.obs.Cell_Type.unique()

In [None]:
tc = dat[np.isin( dat.obs.Cell_Type,['T memory cells','T cells',] )]

In [None]:
tc.obs.SampleId.unique()

In [None]:
dat.obs.SampleId.unique()

### load signatures

In [None]:
fern_found = pd.read_csv("../resources/scRNA/fernande_garcia_found_in_var.csv", header = 0, index_col = None)

In [None]:
fern_found.head()

In [None]:
sd5 = pd.read_excel("../resources/scRNA/SD5_immune_signatures.xlsx", engine = 'openpyxl', skiprows=1)
sd5

In [None]:
sd5.columns  = ['Resting0_Treg', 'Resting1_CD4 NV/CM', 'Resting2_CD4/CD8', 
                'Activated0_IFN Response', 'Activated1_Proliferation', 'Activated2_CD8 Cytotoxic', 'Activated3_CD8 Cytokine']

In [None]:
sd5

### load IES sum table

In [None]:
ies_sumtab = pd.read_csv("../resources/scRNA/sc_st_IES_sumtab.csv", header = 0, index_col = 0 ) # immune exclusion score summary table
ies_sumtab.head()

- table derived from the scRNA-seq data IES gene score expression (scIES) and spatial transcriptomic data (stIES) 
- E-S denotes 'Epithelial expression minus stromal expression'

## score signatures (use_raw = True) 

In [None]:
fern_found.shape

In [None]:
fern_found.iloc[:,0].dropna()

### check if all genes can be find in the data gene space
- all genes in fern_found and sd5 are found in the data

In [None]:
found_dict = dict()

for i in range(fern_found.shape[1]):
    gl = fern_found.iloc[:,i].dropna()
    gl2 = gl[np.isin(gl, tcell.var_names)] 
    
    print( fern_found.columns[i])
    gl3 = gl[np.isin(gl, tcell.var_names, invert=True)]
    
    if(len(gl3) > 0 ):
        print(f"{len(gl3)}/{len(gl)} not found")
    
    found_dict[fern_found.columns[i]] = gl2

In [None]:
found_dict_sd5 = dict()

for i in range(sd5.shape[1]):
    gl = sd5.iloc[:,i].dropna()
    gl2 = gl[np.isin(gl, tcell.var_names)] 
    
    print( sd5.columns[i])
    gl3 = gl[np.isin(gl, tcell.var_names, invert=True)]
    
    if(len(gl3) > 0 ):
        print(f"{len(gl3)}/{len(gl)} not found")
    
    found_dict_sd5[sd5.columns[i]] = gl2
    

### score

In [None]:
#score fern
for k in found_dict.keys():
    sc.tl.score_genes(tcell, gene_list= found_dict[k], score_name=k, use_raw = True)

In [None]:
#score sd5
for k in found_dict_sd5.keys():
    sc.tl.score_genes(tcell, gene_list= found_dict_sd5[k], score_name=k, use_raw = True)

In [None]:
tcell

In [None]:
sc.pl.umap(tcell, color = ['SampleId', 'Oxphos' , 'Cell_Type'])

In [None]:
score_names = ['Aerobic glycolysis', 'Oxphos', 'Oxphos vs. Glycolysis_Up', 'Oxphos vs. Glycolysis_Down', 'FA oxidation', 'FA synthesis', 'FA oxidation vs. synthesis_Up', 'FA oxidation vs. synthesis_Down', 'Glutamine catabolism_Up', 'Glutamine catabolism_Down', 'Polyamine synthesis_Up', 'Polyamine synthesis_Down', 'Metionine cycle', 'NEAA synthesis', 'AA uptake', 'Mevalonate pathway', 'PI3K-AKT-mTOR-MYC signaling_Up', 'PI3K-AKT-mTOR-MYC signaling_Down', 'Cytokine production', 'Resting0_Treg', 'Resting1_CD4 NV/CM', 'Resting2_CD4/CD8', 'Activated0_IFN Response', 'Activated1_Proliferation', 'Activated2_CD8 Cytotoxic', 'Activated3_CD8 Cytokine']

In [None]:
len(score_names)

## Correlate the immune signatures with IES 

In [None]:
score_df = tcell.obs[score_names] #dataframe for signature scores

In [None]:
score_df["SampleId"] = tcell.obs["SampleId"]

In [None]:
score_df.head()

In [None]:
sample_score_df = score_df.groupby(by = 'SampleId').mean() # pseudo-bulk from single cell level to sample level

get information from the ies summary table to the score_df

In [None]:
sample_score_df['scE-S'] = ies_sumtab["sc_E-S"] # single cell epithelial - stromal gene expression 

In [None]:
sample_score_df['cd8_high'] = ies_sumtab["cd8_high"]

In [None]:
sample_score_df['scE-S']

In [None]:
sample_score_df2 = sample_score_df.drop('10096_s3', axis = 0 ) # outlier

In [None]:
ies_sumtab

## box plots for selected signatures

In [None]:
sample_score_df.columns

In [None]:
sn.boxplot(data = sample_score_df, y = 'scE-S',x = 'cd8_high', palette = 'Set2', order = [True, False])

In [None]:
sn.boxplot(data = sample_score_df, y =  'Activated2_CD8 Cytotoxic',x = 'cd8_high', palette = 'Set2', order = [True, False])

In [None]:
sn.boxplot(data = sample_score_df, y = 'Activated3_CD8 Cytokine',x = 'cd8_high', palette = 'Set2', order = [True, False])

In [None]:
sn.boxplot(data = ies_sumtab, y = 'CD8_cell_pct',x = 'cd8_high', palette = 'Set2', order = [True, False])

## stat annotation

In [None]:
from statannotations.Annotator import Annotator

In [None]:
def make_stat_plot(hue_plot_params, data, pairs, test):
    #with sn.plotting_context("notebook", font_scale=1.4):
    # Create new plot
    _, ax = plt.subplots()

    # Plot with seaborn
    ax = sn.boxplot(ax=ax, **hue_plot_params)

    # Add annotations
    annotator = Annotator(ax, pairs, **hue_plot_params)
    annotator.configure(test=test).apply_and_annotate()
    
    

    # Label and show
    #add_legend(ax)
    #label_plot_for_subcats(ax)
    plt.show()

In [None]:
pairs =  [(True, False)] # groups to be compared with stat tests

In [None]:
hue_plot_params = {
    'data': sample_score_df, 
    'y':  'Activated3_CD8 Cytokine',
    'x': 'cd8_high', 
    'palette':  'Set2', 
    'order': [True, False]
    
}

make_stat_plot(hue_plot_params, sample_score_df, pairs, test = 't-test_ind')

In [None]:
hue_plot_params = {
    'data': sample_score_df, 
    'y':  'Activated2_CD8 Cytotoxic',
    'x': 'cd8_high', 
    'palette':  'Set2', 
    'order': [True, False]
    
}

make_stat_plot(hue_plot_params, sample_score_df, pairs, test = 't-test_ind')

In [None]:
hue_plot_params = {
    'data': sample_score_df, 
    'y':  'scE-S',
    'x': 'cd8_high', 
    'palette':  'Set2', 
    'order': [True, False]
    
}

make_stat_plot(hue_plot_params, sample_score_df, pairs, test = 't-test_ind')

In [None]:
#data = ies_sumtab, y = 'CD8_cell_pct'

hue_plot_params = {
    'data': ies_sumtab, 
    'y':  'CD8_cell_pct',
    'x': 'cd8_high', 
    'palette':  'Set2', 
    'order': [True, False]
    
}

make_stat_plot(hue_plot_params, ies_sumtab, pairs, test = 't-test_ind')

## Save result

In [None]:
tcell.write("scRNA_out/FFPE_scRNA/combined_all4_tcell_dat.h5ad")

In [None]:
sample_score_df.to_csv("scRNA_out/FFPE_scRNA/sample_immune_sig_score.csv", header = True, index = True) 