In [None]:
import pandas as pd
import scanpy as sc
import MILWRM.ST as st

# some stuff to make this notebook work better with Scanpy
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns; sns.set_style("white")

---
### Define gene signatures

In [None]:
g = pd.read_csv("../../resources/gene_signatures.csv")
g = g.fillna(0)
g.columns

In [None]:
genes = {}
for k in g.keys():
    genes[k] = [x for x in g[k] if x !=0]

In [None]:
len(genes.keys())

In [None]:
# exclude CBC, RSC, iCMS2 and iCMS3 lists
# we're going to concatenate them into two respective signature scores
genes_short = [key for key in genes.keys() if key not in [
    'iCMS2_Up','iCMS2_Down','iCMS3_Up','iCMS3_Down','CBC','RSC'
]]
len(genes_short)

In [None]:
# recombine signatures into useful categories
signatures_epi_stroma = [
    'IFN stimulated EPI',
    'iCMS2',
    'iCMS3',
    'Stem cell index', # Gil Vasquez, et al. 2022
    'Basal',
    'Squamous',
    'Mesenchymal',
    'Glandular',
    'Ciliated',
    'Alveolar',
    'AC', # astrocyte
    'OPC', # oligodendrocyte progenitor
    'NPC', # neural progenitor
    'IES', # custom epithelial-intrinsic exclusion signature
]

signatures_myeloid = [
    'Myeloid',
    'cDC2',
    'cDC1',
    'Macrophage',
    'Classical monocytes',
    'M1',
    'M2',
    'Neutrophils',
    'Costimulatory MYE',
    'Stimulatory DCs',
    'IFN stimulated MYE',
]

signatures_lymphoid = [
    'T cell',
    'T reg',
    'T cell CD4',
    'T cell CD8',
    'T reg resting',
    'T reg suppressive',
    'T reg tissue homing',
    'T reg cytokines',
    'T reg activation',
    'TH1',
    'TH2',
    'TH17',
    'T cell exhaustion',
    'TRM', # tissue-resident memory T cells
    'NK cell',
    'B cell',
    'Plasma cell',
]

signatures_activity = [
    'EMT',
    'pEMT',
    'Senescence',
    'SASP',
    'Interferon',
    'Hypoxia',
    'Oxphos',
    'Stress',
    'Stress response',
    'Bacterial response',
    'Fibrosis',
    'Cytotoxicity',
    'Exhaustion',
    'Cytokines',
    'Chemokines',
    'MHC',
    'Fetal',
    'Stem',
    'Metaplasia',
    'Proliferation',
    'Translation',
    'Cycle',
    'Metal',
    'CytoTRACE',
    #'CNV score',
]

signatures_curated = [
    # activity
    'pEMT',
    'Metaplasia',
    'Stem',
    'CytoTRACE',
    'IES',
    #'CNV score',
    'MHC',
    'Fibrosis',
    'Bacterial response',
    # epi/stroma
    'IFN stimulated EPI',
    'iCMS2',
    'iCMS3',
    'Stem cell index', # Gil Vasquez, et al. 2022
    'Squamous',
    # lymphoid
    'T reg suppressive',
    'T cell CD4',
    'T cell CD8',
    'T cell exhaustion',
    # myeloid
    'IFN stimulated MYE',
    'Neutrophils',
]

---
### Read in key dataframe with sample information

In [None]:
sample_key = pd.read_csv("../../resources/visium_sample_key.csv", index_col=0)

In [None]:
for s in sample_key.index:
    print("Starting {}:".format(s), end="\n")
    a = sc.read("datasets/{}_master.h5ad".format(s))  # read in anndata
    
    # drop existing gene signature columns
    a.obs.drop(columns = list(genes.keys()) + ["iCMS2","iCMS3"], inplace=True)
        
    # score gene signatures
    for sig in list(genes.keys()):
        try:
            print(sig)
            sc.tl.score_genes(a, genes[sig], score_name=sig)
        except:
            print("{} failed!".format(sig))
    
    # score Stem_Cell_Index from Gil Vasquez, et al. using CBC and RSC lists
    try:
        print("Stem Cell Index")
        sc.tl.score_genes(
            a,
            gene_list=genes["RSC"],
            gene_pool=genes["CBC"]+genes["RSC"],
            ctrl_size=len(genes["CBC"]),
            score_name="Stem Cell Index",
        )
    except:
        print("Stem Cell Index failed!")
        
    # score iCMS2 & iCMS3 using both of their respective lists (Up and Down)
    try:
        print("iCMS2")
        sc.tl.score_genes(
            a,
            gene_list=genes["iCMS2_Up"],
            gene_pool=genes["iCMS2_Down"]+genes["iCMS2_Up"],
            ctrl_size=len(genes["iCMS2_Down"]),
            score_name="iCMS2",
        )
    except:
        print("iCMS2 failed!")
    try:
        print("iCMS3")
        sc.tl.score_genes(
            a,
            gene_list=genes["iCMS3_Up"],
            gene_pool=genes["iCMS3_Down"]+genes["iCMS3_Up"],
            ctrl_size=len(genes["iCMS3_Down"]),
            score_name="iCMS3",
        )
    except:
        print("iCMS3 failed!")
    
    # plot signature overlays (EPI/STROMA)
    p = st.assemble_pita(
        a,
        features=signatures_epi_stroma,
        label=signatures_epi_stroma,
        use_rep=".obs",
        save_to="gene_signatures/{}_{}_signatures_epi_stroma.png".format(sample_key.loc[s, "sample_key_short"], sample_key.loc[s, "block_name"]),
        ncols=5,
        histo="hires",
        cmap="viridis",
    )
    # plot signature overlays (MYE)
    p = st.assemble_pita(
        a,
        features=signatures_myeloid,
        label=signatures_myeloid,
        use_rep=".obs",
        save_to="gene_signatures/{}_{}_signatures_myeloid.png".format(sample_key.loc[s, "sample_key_short"], sample_key.loc[s, "block_name"]),
        ncols=4,
        histo="hires",
        cmap="viridis",
    )
    # plot signature overlays (LYMPH)
    p = st.assemble_pita(
        a,
        features=signatures_lymphoid,
        label=signatures_lymphoid,
        use_rep=".obs",
        save_to="gene_signatures/{}_{}_signatures_lymphoid.png".format(sample_key.loc[s, "sample_key_short"], sample_key.loc[s, "block_name"]),
        ncols=5,
        histo="hires",
        cmap="viridis",
    )
    # plot signature overlays (ACT)
    p = st.assemble_pita(
        a,
        features=signatures_activity,
        label=signatures_activity,
        use_rep=".obs",
        save_to="gene_signatures/{}_{}_signatures_activity.png".format(sample_key.loc[s, "sample_key_short"], sample_key.loc[s, "block_name"]),
        ncols=5,
        histo="hires",
        cmap="viridis",
    )
    
    # save to master anndata object
    print("\tSaving to {}".format("datasets/{}_master.h5ad".format(s)), end="\n\n")
    a.write("datasets/{}_master.h5ad".format(s), compression="gzip")

---
# IES: Immune Exclusion Signature (_DDR1_, _TGFBI_, _PAK4_, _DPEP1_)

In [None]:
for s in sample_key.index:
    print("Starting {}:".format(s), end="\n")
    a = sc.read("datasets/{}_master.h5ad".format(s))  # read in anndata
        
    # score gene signatures
    for sig in ["IES"]:
        try:
            print(sig)
            sc.tl.score_genes(a, genes[sig], score_name=sig)
        except:
            print("{} failed!".format(sig))
    
    # plot signature overlay
    p = st.assemble_pita(
        a,
        features=signatures_epi_stroma,
        label=signatures_epi_stroma,
        use_rep=".obs",
        save_to="gene_signatures/{}_{}_signatures_epi_stroma.png".format(sample_key.loc[s, "sample_key_short"], sample_key.loc[s, "block_name"]),
        ncols=5,
        histo="hires",
        cmap="viridis",
    )
    
    # save to master anndata object
    print("\tSaving to {}".format("datasets/{}_master.h5ad".format(s)), end="\n\n")
    a.write("datasets/{}_master.h5ad".format(s), compression="gzip")