In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import MILWRM.ST as st

# some stuff to make this notebook work better with Scanpy
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns

In [None]:
# make output directories
import os
if not os.path.exists("scRNA_out"):
    os.mkdir("scRNA_out")

if not os.path.exists("scRNA_out/gene_signatures/"):
    os.mkdir("scRNA_out/gene_signatures/")

In [None]:
sc.set_figure_params(transparent=True, dpi_save=400)
sns.set_style("white")
sc.settings.figdir = "scRNA_out/gene_signatures/"

---
### Define gene signatures

In [None]:
g = pd.read_csv("../resources/gene_signatures.csv")
g = g.fillna(0)
g.columns

In [None]:
genes = {}
for k in g.keys():
    genes[k] = [x for x in g[k] if x !=0]

In [None]:
len(genes.keys())

In [None]:
# exclude CBC, RSC, iCMS2 and iCMS3 lists
# we're going to concatenate them into two respective signature scores
genes_short = [key for key in genes.keys() if key not in [
    'iCMS2_Up','iCMS2_Down','iCMS3_Up','iCMS3_Down'
]]
len(genes_short)

In [None]:
# recombine signatures into useful categories
signatures_epi_stroma = [
    'IFN Stimulated EPI',
    'iCMS2',
    'iCMS3',
    'CBC', # Gil Vasquez, et al. 2022
    'RSC', # Gil Vasquez, et al. 2022
    'Basal',
    'Squamous',
    'Mesenchymal',
    'Glandular',
    'Ciliated',
    'Alveolar',
    'AC', # astrocyte
    'OPC', # oligodendrocyte progenitor
    'NPC', # neural progenitor
    'IES', # custom epithelial-intrinsic exclusion signature
]

signatures_myeloid = [
    'Myeloid',
    'cDC2',
    'cDC1',
    'Macrophage',
    'Classical Monocytes',
    'M1',
    'M2',
    'Neutrophils',
    'Costimulatory MYE',
    'Stimulatory DCs',
    'IFN Stimulated MYE',
]

signatures_lymphoid = [
    'T cell',
    'T reg',
    'T cell CD4',
    'T cell CD8',
    'T reg Resting',
    'T reg Suppressive',
    'T reg Tissue_Homing',
    'T reg Cytokines',
    'T reg Activation',
    'TH1',
    'TH2',
    'TH17',
    'T cell Exhaustion',
    'TRM', # tissue-resident memory T cells
    'NK cell',
    'B cell',
    'Plasma cell',
]

signatures_activity = [
    'EMT',
    'pEMT',
    'Senescence',
    'SASP',
    'Interferon',
    'Hypoxia',
    'Oxphos',
    'Stress',
    'Stress Response',
    'Bacterial Response',
    'Fibrosis',
    'Cytotoxicity',
    'Exhaustion',
    'Cytokines',
    'Chemokines',
    'MHC',
    'Fetal',
    'Stem',
    'Metaplasia',
    'Proliferation',
    'Translation',
    'Cycle',
    'Metal',
]

signatures_curated = [
    # activity
    'pEMT',
    'Metaplasia',
    'Stem',
    'CytoTRACE',
    'MHC',
    'Fibrosis',
    'Bacterial Response',
    # epi/stroma
    'IFN Stimulated EPI',
    'iCMS2',
    'iCMS3',
    'CBC', # Gil Vasquez, et al. 2022
    'RSC', # Gil Vasquez, et al. 2022
    'Squamous',
    'IES', # custom epithelial-intrinsic exclusion signature
    # lymphoid
    'T reg Suppressive',
    'T cell CD4',
    'T cell CD8',
    'T cell Exhaustion',
    # myeloid
    'IFN Stimulated MYE',
    'Neutrophils',
]

signatures_fig_1_2 = [
    # activity
    'Metaplasia',
    'Stem',
    'CytoTRACE',
    # epi/stroma
    'IFN Stimulated EPI',
    'iCMS2',
    'iCMS3',
    'CBC', # Gil Vasquez, et al. 2022
    'RSC', # Gil Vasquez, et al. 2022
    # lymphoid
    'T reg Suppressive',
    'T cell CD4',
    'T cell CD8',
    'T cell Exhaustion',
    # myeloid
    'IFN Stimulated MYE',
    'Neutrophils',
]

---
### Read in samples

In [None]:
nl_epi = sc.read("../data/scRNA/VUMC_HTAN_DIS_EPI_V2.h5ad")
epi = sc.read("../data/scRNA/abnormal_epithelium.h5ad")
stroma = sc.read("../data/scRNA/VUMC_HTAN_VAL_DIS_NONEPI_V2.h5ad")
broad = sc.read("../data/scRNA/Broad_Epi_sub.h5ad")

In [None]:
%%time
for a in [broad, nl_epi, epi, stroma]:
    # score gene signatures
    for sig in list(genes.keys()):
        try:
            print(sig)
            sc.tl.score_genes(a, genes[sig], score_name=sig)
        except:
            print("{} failed!".format(sig))

    # score Stem_Cell_Index from Gil Vasquez, et al. using CBC and RSC lists
    #try:
    #    print("Stem Cell Index")
    #    sc.tl.score_genes(
    #        a,
    #        gene_list=genes["RSC"],
    #        gene_pool=genes["CBC"]+genes["RSC"],
    #        ctrl_size=len(genes["CBC"]),
    #        score_name="Stem Cell Index",
    #    )
    #except:
    #    print("Stem_Cell_Index failed!")

    # score iCMS2 & iCMS3 using both of their respective lists (Up and Down)
    try:
        print("iCMS2")
        sc.tl.score_genes(
            a,
            gene_list=genes["iCMS2_Up"],
            gene_pool=genes["iCMS2_Down"]+genes["iCMS2_Up"],
            ctrl_size=len(genes["iCMS2_Down"]),
            score_name="iCMS2",
        )
    except:
        print("iCMS2 failed!")
    try:
        print("iCMS3")
        sc.tl.score_genes(
            a,
            gene_list=genes["iCMS3_Up"],
            gene_pool=genes["iCMS3_Down"]+genes["iCMS3_Up"],
            ctrl_size=len(genes["iCMS3_Down"]),
            score_name="iCMS3",
        )
    except:
        print("iCMS3 failed!")

---
## Get CytoTRACE values

In [None]:
a_comb = sc.read("../data/scRNA/VUMC_COMBINED.h5ad")

In [None]:
cyto = pd.read_csv("../step1/scRNA_out/Broad_Epi_sub_CytoTRACE.csv", index_col=0)
cyto.columns = ["CytoTRACE"]
cyto

In [None]:
a_comb.obs = a_comb.obs.merge(cyto, left_index=True, right_index=True, how="left")

In [None]:
a_comb.obs.Compartment.value_counts()

In [None]:
tmp = a_comb[a_comb.obs.Compartment=="Normal_Epithelium",:].copy()

In [None]:
nl_epi.obs["CytoTRACE"] = np.nan
nl_epi.obs.loc[list(set(tmp.obs_names).intersection(set(nl_epi.obs_names))), "CytoTRACE"] = tmp.obs.loc[
    list(set(tmp.obs_names).intersection(set(nl_epi.obs_names))),
    "CytoTRACE"
].values

In [None]:
tmp = a_comb[a_comb.obs.Compartment=="Abnormal_Epithelium",:].copy()

In [None]:
epi.obs["CytoTRACE"] = np.nan
epi.obs.loc[list(set(tmp.obs_names).intersection(set(epi.obs_names))), "CytoTRACE"] = tmp.obs.loc[
    list(set(tmp.obs_names).intersection(set(epi.obs_names))),
    "CytoTRACE"
].values

In [None]:
tmp = a_comb[a_comb.obs.Compartment=="Stroma",:].copy()

In [None]:
stroma.obs["CytoTRACE"] = np.nan
stroma.obs.loc[list(set(tmp.obs_names).intersection(set(stroma.obs_names))), "CytoTRACE"] = tmp.obs.loc[
    list(set(tmp.obs_names).intersection(set(stroma.obs_names))),
    "CytoTRACE"
].values

---
### Plot gene signatures

In [None]:
import kitchen.ingredients as k

In [None]:
%%time
for a, name in zip([nl_epi, epi, stroma],["NLEPI","EPI","STROMA"]):
    # plot signature overlays (EPI/STROMA)
    p = k.plot_embedding(
        a,
        colors=["Cell_Type","Tumor_Type"] + signatures_epi_stroma,
        save_to="scRNA_out/gene_signatures/VUMC_{}_signatures_epi_stroma.png".format(name),
        ncols=5,
        figsize_scale=0.6,
        show_clustering=False,
        cmap="viridis",
    )
    # plot signature overlays (MYE)
    p = k.plot_embedding(
        a,
        colors=["Cell_Type","Tumor_Type"] + signatures_myeloid,
        save_to="scRNA_out/gene_signatures/VUMC_{}_signatures_myeloid.png".format(name),
        ncols=5,
        figsize_scale=0.6,
        show_clustering=False,
        cmap="viridis",
    )
    # plot signature overlays (LYMPH)
    p = k.plot_embedding(
        a,
        colors=["Cell_Type","Tumor_Type"] + signatures_lymphoid,
        save_to="scRNA_out/gene_signatures/VUMC_{}_signatures_lymphoid.png".format(name),
        ncols=5,
        figsize_scale=0.6,
        show_clustering=False,
        cmap="viridis",
    )
    # plot signature overlays (ACT)
    p = k.plot_embedding(
        a,
        colors=["Cell_Type","Tumor_Type"] + signatures_activity,
        save_to="scRNA_out/gene_signatures/VUMC_{}_signatures_activity.png".format(name),
        ncols=5,
        figsize_scale=0.6,
        show_clustering=False,
        cmap="viridis",
    )

---
### Write updated anndata objects

In [None]:
nl_epi.write("../data/scRNA/VUMC_HTAN_DIS_EPI_V2.h5ad", compression="gzip")

In [None]:
epi.write("../data/scRNA/abnormal_epithelium.h5ad", compression="gzip")

In [None]:
stroma.write("../data/scRNA/VUMC_HTAN_VAL_DIS_NONEPI_V2.h5ad", compression="gzip")

In [None]:
broad.write("../data/scRNA/Broad_Epi_sub.h5ad", compression="gzip")