# Immune Exclusion Signature (IES) validation

1. score IES in all cells from VUMC and Broad datasets
2. perform refNMF to identify immune cell state scores
3. look for correlation between IES and TL2/TL3 abundance

In [None]:
# make output directories
import os
if not os.path.exists("scRNA_out"):
    os.mkdir("scRNA_out")

if not os.path.exists("scRNA_out/immexcl/"):
    os.mkdir("scRNA_out/immexcl/")

In [None]:
import numpy as np
import scanpy as sc
import seaborn as sns
import pandas as pd
import sys; sys.path.append("../resources/ST/")
from visium_utils import deconvolve_cnmf

In [None]:
sc.set_figure_params(transparent=True, dpi_save=400)
sc.settings.figdir = "scRNA_out/immexcl/"

---
### Define gene signatures

In [None]:
g = pd.read_csv("/home/cody/Dropbox/fuzzy-lasagna/resources/gene_signatures.csv")
g = g.fillna(0)
g.columns

In [None]:
genes = {}
for k in g.keys():
    genes[k] = [x for x in g[k] if x !=0]

In [None]:
len(genes.keys())

In [None]:
# exclude CBC, RSC, iCMS2 and iCMS3 lists
# we're going to concatenate them into two respective signature scores
genes_short = [key for key in genes.keys() if key not in [
    'iCMS2_Up','iCMS2_Down','iCMS3_Up','iCMS3_Down'
]]
len(genes_short)

---
### Define T cell activity gene signatures

In [None]:
g = pd.read_csv("/home/cody/Dropbox/fuzzy-lasagna/resources/signatures/szabo_natcomm_signatures.csv")
g = g.fillna(0)
g.columns

In [None]:
T_genes = {}
for k in g.keys():
    T_genes[k] = [x for x in g[k] if x !=0]

In [None]:
len(T_genes.keys())

In [None]:
g = pd.read_csv("/home/cody/Dropbox/fuzzy-lasagna/resources/signatures/fernandez-garcia_cellrep_signatures.csv")
g = g.fillna(0)
g.columns

In [None]:
for k in g.keys():
    T_genes[k] = [x for x in g[k] if x !=0]

In [None]:
len(T_genes.keys())

---
### Read in samples

In [None]:
v = sc.read("../data/scRNA/VUMC_HTAN_DIS_EPI_V2.h5ad")
v.obs.rename(columns={"Immune Exclusion":"IES"}, inplace=True)
v.obs.Patient = v.obs.Patient.astype(str).replace("nan",np.nan).fillna(v.obs["HTAN Specimen ID"].astype(str).replace("nan",np.nan))
v.obs["Compartment"] = "Epithelium"; v

In [None]:
v2 = sc.read("../data/scRNA/abnormal_epithelium.h5ad")
v2.obs.Patient = v2.obs.Patient.astype(str).replace("nan",np.nan).fillna(v2.obs["HTAN Specimen ID"].astype(str).replace("nan",np.nan))
v2.obs["Compartment"] = "Epithelium"; v2

In [None]:
v_stroma = sc.read("../data/scRNA/VUMC_HTAN_VAL_DIS_NONEPI_V2.h5ad")
v_stroma.obs.Patient = v_stroma.obs["HTAN Specimen ID"].values
v_stroma.obs["Compartment"] = "Stroma"; v_stroma

In [None]:
broad = sc.read("../data/scRNA/Broad_Epi_CRC_NoNormal_ReFiltered_Counts.h5ad")
broad.obs["Patient"] = broad.obs_names.str.split("_").str[0]
broad.obs["Compartment"] = "Epithelium"; broad

In [None]:
broad_stroma = sc.read("../data/scRNA/Broad_NonEpi_sub.h5ad")
broad_stroma.obs["Patient"] = broad_stroma.obs_names.str.split("_").str[0]
broad_stroma.obs["Compartment"] = "Stroma"; broad_stroma

---
### Calculate gene signatures in Broad dataset

In [None]:
%%time
for a in [broad, broad_stroma]:
    # score gene signatures
    for sig in list(genes.keys()):
        try:
            print(sig)
            sc.tl.score_genes(a, genes[sig], score_name=sig)
        except:
            print("{} failed!".format(sig))
    
    # score Stem_Cell_Index from Gil Vasquez, et al. using CBC and RSC lists
    try:
        print("Stem Cell Index")
        sc.tl.score_genes(
            a,
            gene_list=genes["RSC"],
            gene_pool=genes["CBC"]+genes["RSC"],
            ctrl_size=len(genes["CBC"]),
            score_name="Stem Cell Index",
        )
    except:
        print("Stem_Cell_Index failed!")
        
    # score iCMS2 & iCMS3 using both of their respective lists (Up and Down)
    try:
        print("iCMS2")
        sc.tl.score_genes(
            a,
            gene_list=genes["iCMS2_Up"],
            gene_pool=genes["iCMS2_Down"]+genes["iCMS2_Up"],
            ctrl_size=len(genes["iCMS2_Down"]),
            score_name="iCMS2",
        )
    except:
        print("iCMS2 failed!")
    try:
        print("iCMS3")
        sc.tl.score_genes(
            a,
            gene_list=genes["iCMS3_Up"],
            gene_pool=genes["iCMS3_Down"]+genes["iCMS3_Up"],
            ctrl_size=len(genes["iCMS3_Down"]),
            score_name="iCMS3",
        )
    except:
        print("iCMS3 failed!")

---
### Calculate T cell gene signatures

In [None]:
%%time
for a in [v, v2, v_stroma, broad, broad_stroma]:
    # score gene signatures
    for sig in [x for x in list(T_genes.keys()) if x.endswith("_Up")]:#list(T_genes.keys()):
        try:
            if sig.endswith("_Down"):
                pass
            elif sig.endswith("_Up"):
                if sig.split("_")[0] in a.obs.columns:
                    print("Overwriting {}".format(sig))
                    a.obs.drop(columns=[sig.split("_")[0]], inplace=True)
                print(sig.split("_")[0])
                sc.tl.score_genes(
                    a,
                    gene_list=T_genes[sig],
                    gene_pool=T_genes[sig.replace("_Up","_Down")] + T_genes[sig],
                    ctrl_size=len(T_genes[sig.replace("_Up","_Down")]),
                    score_name=sig.split("_")[0],
                )
            
            else:
                if sig in a.obs.columns:
                    print("Overwriting {}".format(sig))
                    a.obs.drop(columns=[sig], inplace=True)
                print(sig)
                sc.tl.score_genes(a, T_genes[sig], score_name=sig)
        except:
            print("{} failed!".format(sig))

In [None]:
# signatures that were successfully scored for T cell states
T_sigs_final = [
    'CD8 Activation',
    'CD4 reg resting',
    'CD4 NV/CM resting',
    'CD4/CD8 resting',
    'IFN response activated',
    'Proliferation activated',
    'CD8 cytotoxic activated',
    'CD8 cytokine activated',
    'Aerobic glycolysis',
    'Oxphos',
    'FA oxidation',
    'FA synthesis',
    'Metionine cycle',
    'NEAA synthesis',
    'AA uptake',
    'Mevalonate pathway',
    'Cytokine production',
    'Oxphos vs. Glycolysis',
    'FA oxidation vs. synthesis',
    'Polyamine synthesis',
    'PI3K-AKT-mTOR-MYC signaling',
]

---
### Now infer cNMF usage scores for cells by factorizing with reference consensus gene spectra

In [None]:
import kitchen.ingredients as k

In [None]:
# how to rename usage columns
rename_dict = {
    "usage_1":"STM",
    "usage_2":"END1",
    "usage_3":"BL1",
    "usage_4":"FIB1",
    "usage_5":"CRC1",
    "usage_6":"MYE1",
    "usage_7":"TL1",
    "usage_8":"MYE2",
    "usage_9":"CRC2",
    "usage_10":"CT",
    "usage_11":"SSC",
    "usage_12":"CRC3",
    "usage_13":"EE1",
    "usage_14":"MYE3",
    "usage_15":"PLA",
    "usage_16":"FIB2",
    "usage_17":"MYE4",
    "usage_18":"GOB",
    "usage_19":"MAS",
    "usage_20":"MYE5",
    "usage_21":"CRC4",
    "usage_22":"ABS",
    "usage_23":"TUF",
    "usage_24":"FIB3",
    "usage_25":"FIB4",
    "usage_26":"TL2",
    "usage_27":"END2",
    "usage_28":"TL3",
    "usage_29":"EE2",
    "usage_30":"BL2",
}

In [None]:
# deconvolve with refNMF
v, spectra, spectra_ref, niter = deconvolve_cnmf(adata=v, cnmf_dir="../data/scRNA/VUMC_NMF/", k=30)
v.obs.rename(columns=rename_dict, inplace=True)
# plot refNMF fractions
_ = k.plot_embedding(
    v,
    colors=["Cell_Type"] + list(rename_dict.values()),
    show_clustering=False,
    #save_to="{}_VUMCrefNMF30.png".format(s),
    #vmin=a.obs[list(rename_dict.values())].values.min(),
    #vmax=a.obs[list(rename_dict.values())].values.max(),
    ncols=5,
    cmap="viridis",
    figsize_scale=0.7,
)

In [None]:
# deconvolve with refNMF
v2, spectra, spectra_ref, niter = deconvolve_cnmf(adata=v2, cnmf_dir="../data/scRNA/VUMC_NMF/", k=30)
v2.obs.rename(columns=rename_dict, inplace=True)
# plot refNMF fractions
_ = k.plot_embedding(
    v2,
    colors=["Cell_Type"] + list(rename_dict.values()),
    show_clustering=False,
    #save_to="{}_VUMCrefNMF30.png".format(s),
    #vmin=a.obs[list(rename_dict.values())].values.min(),
    #vmax=a.obs[list(rename_dict.values())].values.max(),
    ncols=5,
    cmap="viridis",
    figsize_scale=0.7,
)

In [None]:
# deconvolve with refNMF
v_stroma, spectra, spectra_ref, niter = deconvolve_cnmf(adata=v_stroma, cnmf_dir="../data/scRNA/VUMC_NMF/", k=30)
v_stroma.obs.rename(columns=rename_dict, inplace=True)
# plot refNMF fractions
_ = k.plot_embedding(
    v_stroma,
    colors=["Cell_Type"] + list(rename_dict.values()),
    show_clustering=False,
    #save_to="{}_VUMCrefNMF30.png".format(s),
    #vmin=a.obs[list(rename_dict.values())].values.min(),
    #vmax=a.obs[list(rename_dict.values())].values.max(),
    ncols=5,
    cmap="viridis",
    figsize_scale=0.7,
)

In [None]:
# plot refNMF fractions
_ = k.plot_embedding(
    v_stroma,
    colors=["Cell_Type","Exhaustion","Cytotoxicity"] + list(T_sigs_final),
    show_clustering=False,
    #save_to="{}_VUMCrefNMF30.png".format(s),
    #vmin=a.obs[list(rename_dict.values())].values.min(),
    #vmax=a.obs[list(rename_dict.values())].values.max(),
    ncols=5,
    cmap="viridis",
    figsize_scale=0.7,
)

In [None]:
# deconvolve with refNMF
broad, spectra, spectra_ref, niter = deconvolve_cnmf(adata=broad, cnmf_dir="../data/scRNA/VUMC_NMF/", k=30)
broad.obs.rename(columns=rename_dict, inplace=True)
# plot refNMF fractions
_ = k.plot_embedding(
    broad,
    colors=list(rename_dict.values()),
    show_clustering=False,
    #save_to="{}_VUMCrefNMF30.png".format(s),
    #vmin=a.obs[list(rename_dict.values())].values.min(),
    #vmax=a.obs[list(rename_dict.values())].values.max(),
    ncols=5,
    cmap="viridis",
    figsize_scale=0.7,
)

In [None]:
# plot refNMF fractions
_ = k.plot_embedding(
    broad,
    colors=["Cell_Type"],
    show_clustering=False,
    #save_to="{}_VUMCrefNMF30.png".format(s),
    #vmin=a.obs[list(rename_dict.values())].values.min(),
    #vmax=a.obs[list(rename_dict.values())].values.max(),
    ncols=5,
    cmap="viridis",
    figsize_scale=0.7,
)

In [None]:
# deconvolve with refNMF
broad_stroma, spectra, spectra_ref, niter = deconvolve_cnmf(adata=broad_stroma, cnmf_dir="../data/scRNA/VUMC_NMF/", k=30)
broad_stroma.obs.rename(columns=rename_dict, inplace=True)
# plot refNMF fractions
_ = k.plot_embedding(
    broad_stroma,
    colors=["Cell_Type"] + list(rename_dict.values()),
    show_clustering=False,
    #save_to="{}_VUMCrefNMF30.png".format(s),
    #vmin=a.obs[list(rename_dict.values())].values.min(),
    #vmax=a.obs[list(rename_dict.values())].values.max(),
    ncols=5,
    cmap="viridis",
    figsize_scale=0.7,
)

In [None]:
# plot refNMF fractions
_ = k.plot_embedding(
    broad_stroma,
    colors=["Cell_Type","Exhaustion","Cytotoxicity"] + T_sigs_final,
    show_clustering=False,
    #save_to="{}_VUMCrefNMF30.png".format(s),
    #vmin=a.obs[list(rename_dict.values())].values.min(),
    #vmax=a.obs[list(rename_dict.values())].values.max(),
    ncols=5,
    cmap="viridis",
    figsize_scale=0.7,
)

---
Drop signature columns that overlap with cell types we'll be calling

In [None]:
v.obs.drop(columns=["B cell","Macrophage"], inplace=True)

In [None]:
v2.obs.drop(columns=["B cell","Macrophage"], inplace=True)

In [None]:
v_stroma.obs.drop(columns=["B cell","Macrophage"], inplace=True)

In [None]:
broad_stroma.obs.drop(columns=["B cell","Macrophage"], inplace=True)

In [None]:
broad.obs.drop(columns=["B cell","Macrophage"], inplace=True)

---
### Call B cells

In [None]:
broad_stroma.obs["B cell"] = np.nan
broad_stroma.obs.loc[broad_stroma.obs.BL1 >= 0.3, "B cell"] = "BL1"
broad_stroma.obs.loc[broad_stroma.obs.BL2 >= 0.3, "B cell"] = "BL2"
broad_stroma.obs.loc[broad_stroma.obs.PLA >= 0.3, "B cell"] = "PLA"
broad_stroma.obs["B cell"].value_counts()

In [None]:
v_stroma.obs["B cell"] = np.nan
v_stroma.obs.loc[v_stroma.obs.BL1 >= 0.3, "B cell"] = "BL1"
v_stroma.obs.loc[v_stroma.obs.BL2 >= 0.3, "B cell"] = "BL2"
v_stroma.obs.loc[v_stroma.obs.PLA >= 0.3, "B cell"] = "PLA"
v_stroma.obs["B cell"].value_counts()

---
### Call Macrophages

In [None]:
broad_stroma.obs["Macrophage"] = np.nan
broad_stroma.obs.loc[broad_stroma.obs.MYE1 >= 0.3, "Macrophage"] = "MYE1"
broad_stroma.obs.loc[broad_stroma.obs.MYE2 >= 0.3, "Macrophage"] = "MYE2"
broad_stroma.obs.loc[broad_stroma.obs.MYE3 >= 0.3, "Macrophage"] = "MYE3"
broad_stroma.obs["Macrophage"].value_counts()

In [None]:
v_stroma.obs["Macrophage"] = np.nan
v_stroma.obs.loc[v_stroma.obs.MYE1 >= 0.3, "Macrophage"] = "MYE1"
v_stroma.obs.loc[v_stroma.obs.MYE2 >= 0.3, "Macrophage"] = "MYE2"
v_stroma.obs.loc[v_stroma.obs.MYE3 >= 0.3, "Macrophage"] = "MYE3"
v_stroma.obs["Macrophage"].value_counts()

---
### Call Neutrophils

In [None]:
broad_stroma.obs["Neutrophil"] = np.nan
broad_stroma.obs.loc[broad_stroma.obs.MYE4 >= 0.3, "Neutrophil"] = "MYE4"
broad_stroma.obs["Neutrophil"].value_counts()

In [None]:
v_stroma.obs["Neutrophil"] = np.nan
v_stroma.obs.loc[v_stroma.obs.MYE4 >= 0.3, "Neutrophil"] = "MYE4"
v_stroma.obs["Neutrophil"].value_counts()

---
### Call TL1 cells

In [None]:
broad_stroma.obs["T helper"] = np.nan
broad_stroma.obs.loc[broad_stroma.obs.TL1 >= 0.3, "T helper"] = "TL1"
broad_stroma.obs["T helper"].value_counts()

In [None]:
v_stroma.obs["T helper"] = np.nan
v_stroma.obs.loc[v_stroma.obs.TL1 >= 0.3, "T helper"] = "TL1"
v_stroma.obs["T helper"].value_counts()

---
### Call TL2 and TL3 cells

In [None]:
broad_stroma.obs["T cytotoxic"] = np.nan
broad_stroma.obs.loc[broad_stroma.obs.TL3 >= 0.3, "T cytotoxic"] = "TL3"
broad_stroma.obs.loc[broad_stroma.obs.TL2 >= 0.3, "T cytotoxic"] = "TL2"
broad_stroma.obs["T cytotoxic"].value_counts()

In [None]:
v_stroma.obs["T cytotoxic"] = np.nan
v_stroma.obs.loc[v_stroma.obs.TL3 >= 0.3, "T cytotoxic"] = "TL3"
v_stroma.obs.loc[v_stroma.obs.TL2 >= 0.3, "T cytotoxic"] = "TL2"
v_stroma.obs["T cytotoxic"].value_counts()

---
## Prep for dotplots

In [None]:
import sys; sys.path.append("../resources/")
from dotplot_utils import *
sns.set_style("white")

In [None]:
sample_key = pd.read_csv("../resources/ST/visium_sample_key.csv", index_col=0)

In [None]:
cmap_dict = {
    # Tumor Type
    'SSL/HP':"#c4a4e1",'MSI':"#7a4fa3",'MSS':"#ffc101",'TA/TVA':"#fee799",'NL':"#1f77b4",'AD':"tab:orange",'CRC':"tab:red",
    'HM':"#7a4fa3",'CIN+':"#ffc101",'MSI-H':"#7a4fa3",
    # this one's global
    "nan":"#ffffff",
}

---
# refNMF Cell States

In [None]:
a_comb = None
for a in [v, v2, v_stroma, broad, broad_stroma]:
    tmp = a[:,:2].copy()
    tmp.obs = tmp.obs.loc[:,tmp.obs.columns.isin(list(rename_dict.values()) + [
        "IES","Patient","Cell_Type","Tumor_Type","Polyp_Type","MMRStatusTumor","Sample_Classification",
        "Compartment","IES+","T cytotoxic","T helper","Macrophage","Neutrophil","B cell","Cytotoxicity","Exhaustion"
    ] + T_sigs_final)]
    list_str_obj_cols = tmp.obs.columns[tmp.obs.dtypes == "category"].tolist()
    for str_obj_col in list_str_obj_cols:
        tmp.obs[str_obj_col] = tmp.obs[str_obj_col].astype(str)
        
    del tmp.var
    del tmp.obsm
    del tmp.varm
    del tmp.uns
    
    if a_comb is None:
        a_comb = tmp.copy()

    else:
        a_comb = a_comb.concatenate(tmp, join="outer", batch_key=None, fill_value=0)

In [None]:
a_comb.obs.Tumor_Type = a_comb.obs.Tumor_Type.astype(str).replace("nan",np.nan).fillna(a_comb.obs.MMRStatusTumor.astype(str).replace("nan",np.nan))
a_comb.obs.Polyp_Type = a_comb.obs.Polyp_Type.replace("TA","TA/TVA")
a_comb.obs.Polyp_Type = a_comb.obs.Polyp_Type.replace("TVA","TA/TVA")
a_comb.obs.Polyp_Type = a_comb.obs.Polyp_Type.replace("HP","SSL/HP")
a_comb.obs.Polyp_Type = a_comb.obs.Polyp_Type.replace("SSL","SSL/HP")
a_comb.obs.Tumor_Type = a_comb.obs.Tumor_Type.fillna(a_comb.obs.Polyp_Type.astype(str).replace("nan",np.nan))
a_comb.obs.Tumor_Type = a_comb.obs.Tumor_Type.replace("UNC","AD")

In [None]:
a_comb.obs.Tumor_Type.value_counts()

In [None]:
a_comb.obs.loc[(a_comb.obs.Tumor_Type.isin(["TA/TVA","SSL/HP","UNC"]))|(a_comb.obs.Sample_Classification.isin(["SER","UNC"])), "Sample_Classification"] = "AD"
a_comb.obs.loc[(a_comb.obs.Tumor_Type.isin(["MSS","MSI","MSI-H"]))|(a_comb.obs.Sample_Classification.isin(["MSS","MSI","MSI-H"])), "Sample_Classification"] = "CRC"
a_comb.obs.loc[a_comb.obs.Tumor_Type.isin(["NL"]), "Sample_Classification"] = "NL"
a_comb.obs.Sample_Classification = a_comb.obs.Sample_Classification.astype(str)
a_comb.obs.Sample_Classification = a_comb.obs.Sample_Classification.replace("nan","AD")

In [None]:
a_comb.obs.Sample_Classification.value_counts()

In [None]:
a_comb.obs.loc[a_comb.obs.Cell_Type=="T", "Cell_Type"] = "TL"
a_comb.obs.loc[a_comb.obs.Cell_Type=="B", "Cell_Type"] = "BL"

In [None]:
a_comb.obs.Cell_Type = a_comb.obs.Cell_Type.astype(str)
a_comb.obs.Cell_Type.value_counts()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(10,5))
sc.pl.violin(a_comb, ["IES"], groupby="Cell_Type", rotation=90, ax=ax)

In [None]:
fig, ax = plt.subplots(1,1,figsize=(7,5))
sc.pl.violin(a_comb, ["IES"], groupby="Tumor_Type", rotation=90, ax=ax)

In [None]:
fig, ax = plt.subplots(1,1,figsize=(5,5))
sc.pl.violin(a_comb, ["IES"], groupby="Sample_Classification", rotation=90, ax=ax)

In [None]:
# split refNMF cell states by compartment
nmf_epi = [
 'STM',
 'CRC1',
 'CRC2',
 'CT',
 'SSC',
 'CRC3',
 'EE1',
 'GOB',
 'CRC4',
 'ABS',
 'TUF',
 'EE2',
]
nmf_epi.sort()

nmf_stroma = [
 'END1',
 'FIB1',
 'FIB2',
 'FIB3',
 'FIB4',
 'END2',
]
nmf_stroma.sort()

nmf_immune = [
 'BL1',
 'MYE1',
 'TL1',
 'MYE2',
 'MYE3',
 'PLA',
 'MYE4',
 'MAS',
 'MYE5',
 'TL2',
 'TL3',
 'BL2'
]
nmf_immune.sort()

marker_states_dict = {
    "Activity Signatures": ["IES","Exhaustion"] + T_sigs_final,
    "Epithelial": nmf_epi,
    "Stromal": nmf_stroma,
    "Immune": nmf_immune,
}

In [None]:
cody_heatmap(
    a_comb,
    groupby="Cell_Type",
    features=sum(marker_states_dict.values(), []),
    cluster_vars=False,
    vars_dict=marker_states_dict,
    groupby_order=None,
    groupby_colordict=None,
    cluster_obs=True,
    figsize=(7.4, 12),
    save="scRNA_out/immexcl/celltype_VUMCrefNMF30_dotplot.png",
    dpi=400,
    cmap="Greys",
    size_title="Fraction of cells\nin group (%)",
)

In [None]:
a_comb.obs["Cell_Type_collapsed"] = a_comb.obs["Cell_Type"].astype(str)
a_comb.obs.loc[a_comb.obs.Cell_Type.isin(["cE01 (Stem/TA-like)","cE02 (Stem/TA-like/Immature Goblet)","cE03 (Stem/TA-like prolif)"]), "Cell_Type_collapsed"] = "CSC"

In [None]:
a_comb.obs.Cell_Type_collapsed.value_counts().sort_index()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(10,5))
sc.pl.violin(
    a_comb,
    ["IES"],
    groupby="Cell_Type_collapsed",
    order=[
        # malignant epi
        "ASC",
        "CSC",
        "SSC",
        # NL epi
        "ABS",
        "CT",
        "EE",
        "GOB",
        "STM",
        "TAC",
        "TUF",
        # stroma
        "END",
        "FIB",
        # immune
        "BL",
        "MAS",
        "MYE",
        "PLA",
        "TL",
    ],
    rotation=90,
    ax=ax,
    save="IES.png",
)

In [None]:
a_comb.obs[["B cell","Neutrophil","T cytotoxic","T helper","Macrophage"]] = a_comb.obs[["B cell","Neutrophil","T cytotoxic","T helper","Macrophage"]].replace("nan",np.nan)

In [None]:
a_comb.write("scRNA_out/immexcl/scRNA_combined_IES_refNMF.h5ad", compression="gzip")

In [None]:
a_comb = sc.read("scRNA_out/immexcl/scRNA_combined_IES_refNMF.h5ad")

In [None]:
a_comb.obs["CIN_status"] = a_comb.obs.Tumor_Type.astype(str)
a_comb.obs.loc[a_comb.obs.Tumor_Type.isin(["TA/TVA","AD"]), "CIN_status"] = "TA/TVA"
a_comb.obs.loc[a_comb.obs.Tumor_Type=="MSI", "CIN_status"] = "MSI-H"
a_comb.obs["CIN_status"] = a_comb.obs["CIN_status"].replace("nan",np.nan)

In [None]:
a_comb.obs.CIN_status.value_counts()

---
# Define function to use for plots below

In [None]:
def cellpct_v_IES(adata, celltype, colorby, palette, hue_order, save_to, color_box=False):
    t_counts = pd.DataFrame(adata.obs.groupby("Patient")[celltype].count().sort_values())
    ies_counts = pd.DataFrame(adata.obs.loc[adata.obs.Compartment=="Epithelium"].groupby("Patient")["IES"].mean().sort_values())
    counts_df = t_counts.merge(ies_counts, left_index=True, right_index=True, how="inner")
    counts_df = counts_df.reset_index().merge(adata.obs[["Patient",colorby]].drop_duplicates(), on="Patient", how="left").set_index("Patient")
    counts_df = counts_df.dropna()
    # epithelial cells
    cell_totals = adata.obs.loc[adata.obs.Compartment=="Epithelium"].groupby("Patient")["Patient"].count().reset_index(name="total_epi_cells")
    counts_df = counts_df.reset_index().merge(cell_totals, on="Patient", how="left").set_index("Patient")
    # now stromal cells
    cell_totals = adata.obs.loc[adata.obs.Compartment=="Stroma"].groupby("Patient")["Patient"].count().reset_index(name="total_stroma_cells")
    counts_df = counts_df.reset_index().merge(cell_totals, on="Patient", how="left").set_index("Patient")
    # filter to samples with stromal and epi cells
    counts_df = counts_df.loc[(counts_df.total_stroma_cells > 1)&(counts_df.total_epi_cells > 1)].copy()
    # calc percentages
    counts_df["pct_{}".format(celltype.replace(" ", ""))] = round((counts_df[celltype] / counts_df["total_stroma_cells"])*100, 3)
    ies_thresh = adata.obs.loc[(adata.obs.Compartment=="Stroma"), "IES"].mean() + adata.obs.loc[(adata.obs.Compartment=="Stroma"), "IES"].std()
    # IES thresh
    counts_df["IES_status"] = np.nan
    counts_df.loc[counts_df.IES >= ies_thresh, "IES_status"] = "IES+"
    counts_df.loc[counts_df.IES < ies_thresh, "IES_status"] = "IES-"
    # plot
    fig, axes = plt.subplots(1, 2, sharey=True, figsize=(5, 3.5), width_ratios=(3,1))
    axes[0].axvline(ies_thresh, ls="--", lw=1.8, alpha=0.7, c="k")
    #axes[0].annotate("Stromal\nMean+SD: {}".format(round(ies_thresh,2)), xy=(1.1*ies_thresh,85), fontsize="medium")
    sns.scatterplot(
        data=counts_df,
        x="IES",
        y="pct_{}".format(celltype.replace(" ", "")),
        hue=colorby,
        hue_order=hue_order,
        palette=palette,
        s=50,
        alpha=0.7,
        ax=axes[0],
    )

    PROPS = {
        'boxprops':{'facecolor':'none', 'edgecolor':'k'},
        'medianprops':{'color':'k'},
        'whiskerprops':{'color':'k'},
        'capprops':{'color':'k'}
    }
    sns.boxplot(
        data=counts_df,
        x="IES_status",
        y="pct_{}".format(celltype.replace(" ", "")),
        order=["IES-","IES+"],
        hue=colorby if color_box else None,
        dodge=True,
        orient="v",
        fliersize=5,
        linewidth=1.8,
        ax=axes[1],
        saturation=1,
        showcaps=False,
        **PROPS,
    )

    lgd = fig.legend(bbox_to_anchor=(0.95, 0.88), loc="upper left", fontsize="small", frameon=False)
    tit = fig.suptitle(celltype, fontsize=18)
    axes[0].get_legend().remove()
    if color_box:
        axes[1].get_legend().remove()
    axes[0].set_ylabel("% Stromal Cells (scRNA)")
    axes[1].set_xlabel("")
    axes[1].set_ylabel("")

    sns.despine()
    plt.tight_layout()
    plt.savefig(save_to, dpi=400, bbox_extra_artists=(lgd,tit), bbox_inches='tight')

In [None]:
def Tcellsig_v_IES(adata, signature, colorby, palette, hue_order, save_to, color_box=False):
    t_counts = pd.DataFrame(a_comb.obs.loc[a_comb.obs.Cell_Type=="TL"].groupby("Patient")[signature].mean().sort_values())
    ies_counts = pd.DataFrame(adata.obs.loc[adata.obs.Compartment=="Epithelium"].groupby("Patient")["IES"].mean().sort_values())
    counts_df = t_counts.merge(ies_counts, left_index=True, right_index=True, how="inner")
    counts_df = counts_df.reset_index().merge(a_comb.obs[["Patient",colorby]].drop_duplicates(), on="Patient", how="left").set_index("Patient")
    counts_df = counts_df.dropna()
    
    # IES thresh
    ies_thresh = adata.obs.loc[(adata.obs.Compartment=="Stroma"), "IES"].mean() + adata.obs.loc[(adata.obs.Compartment=="Stroma"), "IES"].std()
    counts_df["IES_status"] = np.nan
    counts_df.loc[counts_df.IES >= ies_thresh, "IES_status"] = "IES+"
    counts_df.loc[counts_df.IES < ies_thresh, "IES_status"] = "IES-"
    # plot
    fig, axes = plt.subplots(1, 2, sharey=True, figsize=(5, 3.5), width_ratios=(3,1))
    axes[0].axvline(ies_thresh, ls="--", lw=1.8, alpha=0.7, c="k")
    #axes[0].annotate("Stromal\nMean+SD: {}".format(round(ies_thresh,2)), xy=(1.1*ies_thresh,85), fontsize="medium")
    sns.scatterplot(
        data=counts_df,
        x="IES",
        y=signature,
        hue=colorby,
        hue_order=hue_order,
        palette=palette,
        s=50,
        alpha=0.7,
        ax=axes[0],
    )

    PROPS = {
        'boxprops':{'facecolor':'none', 'edgecolor':'k'},
        'medianprops':{'color':'k'},
        'whiskerprops':{'color':'k'},
        'capprops':{'color':'k'}
    }
    sns.boxplot(
        data=counts_df,
        x="IES_status",
        y=signature,
        order=["IES-","IES+"],
        hue=colorby if color_box else None,
        dodge=True,
        orient="v",
        fliersize=5,
        linewidth=1.8,
        ax=axes[1],
        saturation=1,
        showcaps=False,
        **PROPS,
    )

    lgd = fig.legend(bbox_to_anchor=(0.95, 0.88), loc="upper left", fontsize="small", frameon=False)
    tit = fig.suptitle(signature, fontsize=18)
    axes[0].get_legend().remove()
    if color_box:
        axes[1].get_legend().remove()
    axes[0].set_ylabel(signature)
    axes[1].set_xlabel("")
    axes[1].set_ylabel("")

    sns.despine()
    plt.tight_layout()
    plt.savefig(save_to, dpi=400, bbox_extra_artists=(lgd,tit), bbox_inches='tight')

---
# Correlating IES with TL2/3 cells by IES value

In [None]:
cellpct_v_IES(
    adata=a_comb,
    celltype="T cytotoxic",
    colorby="Sample_Classification",
    palette=cmap_dict,
    hue_order=["NL","AD","CRC"],
    save_to="scRNA_out/immexcl/pctCD8_vs_IESscore.png",
    color_box=False,
)

In [None]:
cellpct_v_IES(
    adata=a_comb,
    celltype="T cytotoxic",
    colorby="Tumor_Type",
    palette=cmap_dict,
    hue_order=["NL","TA/TVA","MSS","SSL/HP","MSI-H"],
    save_to="scRNA_out/immexcl/pctCD8_vs_IESscore_CINstatus.png",
    color_box=False,
)

---
# Correlating IES with TL1 cells by IES value

In [None]:
cellpct_v_IES(
    adata=a_comb,
    celltype="T helper",
    colorby="Sample_Classification",
    palette=cmap_dict,
    hue_order=["NL","AD","CRC"],
    save_to="scRNA_out/immexcl/pctCD4_vs_IESscore.png",
    color_box=False,
)

In [None]:
cellpct_v_IES(
    adata=a_comb,
    celltype="T helper",
    colorby="Tumor_Type",
    palette=cmap_dict,
    hue_order=["NL","TA/TVA","MSS","SSL/HP","MSI-H"],
    save_to="scRNA_out/immexcl/pctCD4_vs_IESscore_CINstatus.png",
    color_box=False,
)

---
# Correlating IES with Neutrophils by IES value

In [None]:
cellpct_v_IES(
    adata=a_comb,
    celltype="Neutrophil",
    colorby="Sample_Classification",
    palette=cmap_dict,
    hue_order=["NL","AD","CRC"],
    save_to="scRNA_out/immexcl/pctNeutrophil_vs_IESscore.png",
    color_box=False,
)

In [None]:
cellpct_v_IES(
    adata=a_comb,
    celltype="Neutrophil",
    colorby="Tumor_Type",
    palette=cmap_dict,
    hue_order=["NL","TA/TVA","MSS","SSL/HP","MSI-H"],
    save_to="scRNA_out/immexcl/pctNeutrophil_vs_IESscore_CINstatus.png",
    color_box=False,
)

---
# Correlating IES with Macrophages by IES value

In [None]:
cellpct_v_IES(
    adata=a_comb,
    celltype="Macrophage",
    colorby="Sample_Classification",
    palette=cmap_dict,
    hue_order=["NL","AD","CRC"],
    save_to="scRNA_out/immexcl/pctMacrophage_vs_IESscore.png",
    color_box=False,
)

In [None]:
cellpct_v_IES(
    adata=a_comb,
    celltype="Macrophage",
    colorby="Tumor_Type",
    palette=cmap_dict,
    hue_order=["NL","TA/TVA","MSS","SSL/HP","MSI-H"],
    save_to="scRNA_out/immexcl/pctMacrophage_vs_IESscore_CINstatus.png",
    color_box=False,
)

---
# Correlating IES with B cells by IES value

In [None]:
cellpct_v_IES(
    adata=a_comb,
    celltype="B cell",
    colorby="Sample_Classification",
    palette=cmap_dict,
    hue_order=["NL","AD","CRC"],
    save_to="scRNA_out/immexcl/pctBcell_vs_IESscore.png",
    color_box=False,
)

In [None]:
cellpct_v_IES(
    adata=a_comb,
    celltype="B cell",
    colorby="Tumor_Type",
    palette=cmap_dict,
    hue_order=["NL","TA/TVA","MSS","SSL/HP","MSI-H"],
    save_to="scRNA_out/immexcl/pctBcell_vs_IESscore_CINstatus.png",
    color_box=False,
)

---
# Correlating IES with Exhaustion by IES value

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="Exhaustion",
    colorby="Sample_Classification",
    palette=cmap_dict,
    hue_order=["NL","AD","CRC"],
    save_to="scRNA_out/immexcl/exhaustion_vs_IESscore.png",
    color_box=False,
)

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="Exhaustion",
    colorby="Tumor_Type",
    palette=cmap_dict,
    hue_order=["NL","TA/TVA","MSS","SSL/HP","MSI-H"],
    save_to="scRNA_out/immexcl/exhaustion_vs_IESscore_CINstatus.png",
    color_box=False,
)

---
# Correlating IES with Cytotoxicity by IES value

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="Cytotoxicity",
    colorby="Sample_Classification",
    palette=cmap_dict,
    hue_order=["NL","AD","CRC"],
    save_to="scRNA_out/immexcl/cytotoxicity_vs_IESscore.png",
    color_box=False,
)

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="Cytotoxicity",
    colorby="Tumor_Type",
    palette=cmap_dict,
    hue_order=["NL","TA/TVA","MSS","SSL/HP","MSI-H"],
    save_to="scRNA_out/immexcl/cytotoxicity_vs_IESscore_CINstatus.png",
    color_box=False,
)

---
# Correlating IES with `CD8 Activation` by IES value

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="CD8 Activation",
    colorby="Sample_Classification",
    palette=cmap_dict,
    hue_order=["NL","AD","CRC"],
    save_to="scRNA_out/immexcl/CD8Activation_vs_IESscore.png",
    color_box=False,
)

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="CD8 Activation",
    colorby="Tumor_Type",
    palette=cmap_dict,
    hue_order=["NL","TA/TVA","MSS","SSL/HP","MSI-H"],
    save_to="scRNA_out/immexcl/CD8Activation_vs_IESscore_CINstatus.png",
    color_box=False,
)

---
# Correlating IES with `CD4 reg resting` by IES value

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="CD4 reg resting",
    colorby="Sample_Classification",
    palette=cmap_dict,
    hue_order=["NL","AD","CRC"],
    save_to="scRNA_out/immexcl/CD4regresting_vs_IESscore.png",
    color_box=False,
)

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="CD4 reg resting",
    colorby="Tumor_Type",
    palette=cmap_dict,
    hue_order=["NL","TA/TVA","MSS","SSL/HP","MSI-H"],
    save_to="scRNA_out/immexcl/CD4regresting_vs_IESscore_CINstatus.png",
    color_box=False,
)

---
# Correlating IES with `CD4 NV/CM resting` by IES value

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="CD4 NV/CM resting",
    colorby="Sample_Classification",
    palette=cmap_dict,
    hue_order=["NL","AD","CRC"],
    save_to="scRNA_out/immexcl/CD4NVCMresting_vs_IESscore.png",
    color_box=False,
)

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="CD4 NV/CM resting",
    colorby="Tumor_Type",
    palette=cmap_dict,
    hue_order=["NL","TA/TVA","MSS","SSL/HP","MSI-H"],
    save_to="scRNA_out/immexcl/CD4NVCMresting_vs_IESscore_CINstatus.png",
    color_box=False,
)

---
# Correlating IES with `CD4/CD8 resting` by IES value

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="CD4/CD8 resting",
    colorby="Sample_Classification",
    palette=cmap_dict,
    hue_order=["NL","AD","CRC"],
    save_to="scRNA_out/immexcl/CD4CD8resting_vs_IESscore.png",
    color_box=False,
)

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="CD4/CD8 resting",
    colorby="Tumor_Type",
    palette=cmap_dict,
    hue_order=["NL","TA/TVA","MSS","SSL/HP","MSI-H"],
    save_to="scRNA_out/immexcl/CD4CD8resting_vs_IESscore_CINstatus.png",
    color_box=False,
)

---
# Correlating IES with `IFN response activated` by IES value

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="IFN response activated",
    colorby="Sample_Classification",
    palette=cmap_dict,
    hue_order=["NL","AD","CRC"],
    save_to="scRNA_out/immexcl/IFNresponseactivated_vs_IESscore.png",
    color_box=False,
)

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="IFN response activated",
    colorby="Tumor_Type",
    palette=cmap_dict,
    hue_order=["NL","TA/TVA","MSS","SSL/HP","MSI-H"],
    save_to="scRNA_out/immexcl/IFNresponseactivated_vs_IESscore_CINstatus.png",
    color_box=False,
)

---
# Correlating IES with `Proliferation activated` by IES value

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="Proliferation activated",
    colorby="Sample_Classification",
    palette=cmap_dict,
    hue_order=["NL","AD","CRC"],
    save_to="scRNA_out/immexcl/Proliferationactivated_vs_IESscore.png",
    color_box=False,
)

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="Proliferation activated",
    colorby="Tumor_Type",
    palette=cmap_dict,
    hue_order=["NL","TA/TVA","MSS","SSL/HP","MSI-H"],
    save_to="scRNA_out/immexcl/Proliferationactivated_vs_IESscore_CINstatus.png",
    color_box=False,
)

---
# Correlating IES with `CD8 cytotoxic activated` by IES value

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="CD8 cytotoxic activated",
    colorby="Sample_Classification",
    palette=cmap_dict,
    hue_order=["NL","AD","CRC"],
    save_to="scRNA_out/immexcl/CD8cytotoxicactivated_vs_IESscore.png",
    color_box=False,
)

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="CD8 cytotoxic activated",
    colorby="Tumor_Type",
    palette=cmap_dict,
    hue_order=["NL","TA/TVA","MSS","SSL/HP","MSI-H"],
    save_to="scRNA_out/immexcl/CD8cytotoxicactivated_vs_IESscore_CINstatus.png",
    color_box=False,
)

---
# Correlating IES with `CD8 cytokine activated` by IES value

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="CD8 cytokine activated",
    colorby="Sample_Classification",
    palette=cmap_dict,
    hue_order=["NL","AD","CRC"],
    save_to="scRNA_out/immexcl/CD8cytokineactivated_vs_IESscore.png",
    color_box=False,
)

In [None]:
Tcellsig_v_IES(
    adata=a_comb,
    signature="CD8 cytokine activated",
    colorby="Tumor_Type",
    palette=cmap_dict,
    hue_order=["NL","TA/TVA","MSS","SSL/HP","MSI-H"],
    save_to="scRNA_out/immexcl/CD8cytokineactivated_vs_IESscore_CINstatus.png",
    color_box=False,
)

---
# Loop through rest

In [None]:
T_sigs_final2 = [
 'Aerobic glycolysis',
 'Oxphos',
 'FA oxidation',
 'FA synthesis',
 'Metionine cycle',
 'NEAA synthesis',
 'AA uptake',
 'Mevalonate pathway',
 'Cytokine production',
 'Oxphos vs. Glycolysis',
 'FA oxidation vs. synthesis',
 'Polyamine synthesis',
 'PI3K-AKT-mTOR-MYC signaling',
]

In [None]:
for sig in T_sigs_final2:
    print("starting {}".format(sig))
    Tcellsig_v_IES(
        adata=a_comb,
        signature=sig,
        colorby="Sample_Classification",
        palette=cmap_dict,
        hue_order=["NL","AD","CRC"],
        save_to="scRNA_out/immexcl/{}_vs_IESscore.png".format(sig.replace(" ","")),
        color_box=False,
    )
    Tcellsig_v_IES(
        adata=a_comb,
        signature=sig,
        colorby="Tumor_Type",
        palette=cmap_dict,
        hue_order=["NL","TA/TVA","MSS","SSL/HP","MSI-H"],
        save_to="scRNA_out/immexcl/{}_vs_IESscore_CINstatus.png".format(sig.replace(" ","")),
        color_box=False,
    )