In [None]:
import sys; sys.path.append("../resources/")
from dotplot_utils import *
import pandas as pd
import seaborn as sns
# make this notebook work better with Scanpy
import warnings; warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
# make output directories
import os
if not os.path.exists("ST_out"):
    os.mkdir("ST_out")

if not os.path.exists("ST_out/plots_overview/"):
    os.mkdir("ST_out/plots_overview/")

In [None]:
sc.set_figure_params(transparent=True, dpi_save=400)
sc.settings.figdir = "ST_out/plots_overview/"

---
## Read in key dataframe with sample information

In [None]:
sample_key = pd.read_csv("../resources/ST/visium_sample_key.csv", index_col=0)

---

In [None]:
outs = []
for s in sample_key.index:
    a = sc.read("../data/ST/{}_master.h5ad".format(s))
    print("Read adata from ../data/ST/{}_master.h5ad".format(s))

    # compile training data
    outs.append(a)

assert len(outs) == len(sample_key), "Check length of outs"

In [None]:
# concatenate anndata objects
a_comb = outs[0].concatenate(
    outs[1:],
    join="outer",
    batch_categories=list(sample_key.index),
    fill_value=0,
)
del a_comb.obsm
del a_comb.var

In [None]:
ies_thresh = a_comb.obs.loc[(a_comb.obs.pathology_annotation=="smooth_muscle"), "IEX"].mean() + a_comb.obs.loc[(a_comb.obs.pathology_annotation=="smooth_muscle"), "IEX"].std()

In [None]:
ies_thresh

In [None]:
len(a_comb.obs.loc[(a_comb.obs.pathology_annotation=="smooth_muscle"), "IEX"])

In [None]:
a_comb.obs["CNV_clone_domain"] = a_comb.obs["CNV clone"].values
a_comb.obs["CNV_clone_domain"] = a_comb.obs["CNV_clone_domain"].astype(str)
a_comb.obs.loc[a_comb.obs["CNV clone"].isin(["1", "2", "3", "1A", "1B"]), "CNV_clone_domain"] = "Tumor"
a_comb.obs["CNV_clone_domain"].replace({"S":"Stroma","E":"Edge"}, inplace=True)
a_comb.obs["CNV_clone_domain"] = a_comb.obs["CNV_clone_domain"].astype("category")

In [None]:
a_comb.obs["Evolution_grouped"] = a_comb.obs["Evolution"].values
a_comb.obs["Evolution_grouped"] = a_comb.obs["Evolution_grouped"].astype(str)
a_comb.obs.loc[a_comb.obs["Evolution"].isin(["B", "N"]), "Evolution_grouped"] = "N/B"
a_comb.obs.loc[a_comb.obs["Evolution_grouped"]=="nan", "Evolution_grouped"] = np.nan
a_comb.obs["Evolution_grouped"] = a_comb.obs["Evolution_grouped"].astype("category")

In [None]:
# save maximum CNV score for each patient
pd.DataFrame(a_comb.obs.loc[a_comb.obs["CNV clone"]!="S",:].groupby(["Patient"])["CNV score"].mean()).to_csv("../resources/ST/cnv_score_per_patient.csv")

#### read list of genes detected in all Visium samples with their ENSEMBL IDs

In [None]:
tmp = pd.read_csv("../resources/ST/master_visium_genes_list.csv", index_col=0)

In [None]:
a_comb.var = a_comb.var.merge(tmp, left_index=True, right_index=True, how="left")

#### Adjust 'Variants' to 'Mut. Burden' to avoid highly-weighted mutations from polyps with large tissue area

In [None]:
tmp = pd.DataFrame(a_comb.obs.groupby("Patient")["Variants"].max().fillna(0))
tmp.rename(columns={"Variants":"TMB"}, inplace=True)
a_comb.obs = a_comb.obs.merge(tmp, left_on="Patient", right_index=True, how="left")

In [None]:
a_comb.obs["TMB"] = a_comb.obs["TMB"].astype(int)

---
## Define gene signatures

In [None]:
# recombine signatures into useful categories
signatures_epi_stroma = [
    'IFN stimulated EPI',
    'iCMS2',
    'iCMS3',
    #'Stem cell index', # Gil Vasquez, et al. 2022
    'Basal',
    'Squamous',
    'Mesenchymal',
    'Glandular',
    'Ciliated',
    'Alveolar',
    'AC', # astrocyte
    'OPC', # oligodendrocyte progenitor
    'NPC', # neural progenitor
    'IEX', # custom epithelial-intrinsic exclusion signature
]

signatures_myeloid = [
    'Myeloid',
    'cDC2',
    'cDC1',
    'Macrophage',
    'Classical monocytes',
    'M1',
    'M2',
    'Neutrophils',
    'Costimulatory MYE',
    'Stimulatory DCs',
    'IFN stimulated MYE',
]

signatures_lymphoid = [
    'T cell',
    'T reg',
    'T cell CD4',
    'T cell CD8',
    'T reg resting',
    'T reg suppressive',
    'T reg tissue homing',
    'T reg cytokines',
    'T reg activation',
    'TH1',
    'TH2',
    'TH17',
    'T cell exhaustion',
    'TRM', # tissue-resident memory T cells
    'NK cell',
    'B cell',
    'Plasma cell',
]

signatures_activity = [
    'EMT',
    'pEMT',
    'Senescence',
    'SASP',
    'Interferon',
    'Hypoxia',
    'Oxphos',
    'Stress',
    'Stress response',
    'Bacterial response',
    'Fibrosis',
    'Cytotoxicity',
    'Exhaustion',
    'Cytokines',
    'Chemokines',
    'MHC',
    'Fetal',
    'Stem',
    'Metaplasia',
    'Proliferation',
    'Translation',
    'Cycle',
    'Metal',
    'CytoTRACE',
    'CNV score',
]

signatures_curated = [
    # activity
    'pEMT',
    'Metaplasia',
    'Stem',
    'CytoTRACE',
    'IEX',
    'CNV score',
    'MHC',
    'Fibrosis',
    'Bacterial response',
    # epi/stroma
    'IFN stimulated EPI',
    'iCMS2',
    'iCMS3',
    #'Stem cell index', # Gil Vasquez, et al. 2022
    'Squamous',
    # lymphoid
    'T reg suppressive',
    'T cell CD4',
    'T cell CD8',
    'T cell exhaustion',
    # myeloid
    'IFN stimulated MYE',
    'Neutrophils',
]

signatures_fig_1 = [
    "Stem",
    "T cell CD4",
    "iCMS2",
    "iCMS3",
    "Metaplasia",
    "CytoTRACE",
    "CNV score",
    "T cell CD8"
]

signatures_fig_2 = [
    "Stem",
    "T cell CD4",
    "iCMS2",
    "iCMS3",
    "Metaplasia",
    "CytoTRACE",
    "CNV score",
    "T cell CD8",
    "IFN stimulated MYE",
    "T reg suppressive",
    "TMB",
    "T cell exhaustion",
]

---
## Rename things and set up for plotting

In [None]:
# rename obs columns with MxIF stains in them
old_mxif = list(a_comb.obs.columns[a_comb.obs.columns.str.endswith("_MxIF")])
new_mxif = list(a_comb.obs.columns[a_comb.obs.columns.str.endswith("_MxIF")].str.split("_").str[0])
a_comb.obs.rename(columns=dict(zip(old_mxif,new_mxif)), inplace=True)

In [None]:
# rename cell state columns in master anndata.obs
et = list(a_comb.obs.columns[a_comb.obs.columns.str.endswith("_VUMCrefNMF30")])
et_new = [x.replace("_VUMCrefNMF30", "") for x in et]
a_comb.obs.rename(columns=dict(zip(et, et_new)), inplace=True)

In [None]:
a_comb

In [None]:
cmap_dict = {
    # Tumor Type
    'SSL/HP':"#c4a4e1",'MSI-H':"#7a4fa3",'MSS':"#ffc101",'TA/TVA':"#fee799",'NL':"#1f77b4",
    'HM':"#7a4fa3",'CIN+':"#ffc101",
    # Tumor Location
    "Cecum":"#1f4e79","Ascending":"#2e74b7","Hepatic Flexure":"#bdd6ef","Transverse":"#ff717a","Descending":"#fe0001","Sigmoid":"#c00101",
    # this one's global
    "nan":"#ffffff",
    # These are black and white for T and F
    "T":"#000000","F":"#ffffff",
    # evolution
    "N":"tab:blue","B":"tab:green","L":"tab:orange","N/B":"tab:blue",
    # CNV clone domain
    "Tumor":"#000000","Stroma":"tab:pink","Edge":"tab:red"
}
stage_colordict = dict(zip(["AD","I","II","III/IV"], sns.color_palette("Reds", len(["AD","I","II","III/IV"])).as_hex()))
grade_colordict = dict(zip(["G1","G2","G3"], sns.color_palette("Reds", len(["G1","G2","G3"])).as_hex()))
cmap_dict = {**cmap_dict, **stage_colordict, **grade_colordict}

In [None]:
patient_colordict = dict(zip(sample_key.patient_name, [cmap_dict[x] for x in sample_key["CIN Status"]]))

In [None]:
cmap = matplotlib.cm.get_cmap("plasma", 8)
mw_colordict = dict(zip(["D0","D1","D2","D3","D4","D5","D6","D7"], [matplotlib.colors.rgb2hex(cmap(i)) for i in range(cmap.N)]))

---
# Overview plots

Summaries of global signatures, cell states, and archetypes by tumor type, MILWRM domain, and patient

In [None]:
signatures_activity_height = 7
signatures_epi_stroma_height = 4.1
signatures_myeloid_height = 3
signatures_lymphoid_height = 4.7
signatures_curated_height = 6
signatures_fig_1_height = 3
signatures_fig_2_height = 3.2
refNMF_height = 8
archetypes_height = 2.7
mxif_height = 8.2

In [None]:
# define heatmap widths
milwrm_width = 4.1
tumor_type_width = 3.2
tumor_class_width = 3.2
tumor_loc_width = 3.7
tumor_stage_width = 3.2
tumor_grade_width = 3.0
patient_width = 8.7
pathology_width = 4.0
evolution_width = 3.0
evolution_grouped_width = 2.5
cnv_domain_width = 3.0

---
# Gene Signatures + MxIF Intensities

In [None]:
for features_name, features_list, height in zip(
        ["signatures_activity", "signatures_epi_stroma", "signatures_myeloid", "signatures_lymphoid", "signatures_curated", "signatures_fig_1", "signatures_fig_2", "MxIF"],
        [signatures_activity, signatures_epi_stroma, signatures_myeloid, signatures_lymphoid, signatures_curated, signatures_fig_1, signatures_fig_2, new_mxif],
        [signatures_activity_height, signatures_epi_stroma_height, signatures_myeloid_height, signatures_lymphoid_height, signatures_curated_height, signatures_fig_1_height, signatures_fig_2_height, mxif_height],
    ):
    # set counter to 0 for the first group
    counter = 0
    for group_name, group, width, groupby_order, groupby_colordict, in zip(
        ["VUMCrefNMF30_MILWRM", "tumortype", "tumorclass", "tumorloc", "tumorstage", "tumorgrade", "pathology_annotation", "evolution", "evolution_grouped", "CNVclonedomain", "patient"],
        ["MILWRM Domain", "Tumor Type", "CIN Status", "Tumor Location", "Tumor Stage", "Tumor Grade", "pathology_annotation", "Evolution", "Evolution_grouped", "CNV_clone_domain", "Patient"],
        [milwrm_width, tumor_type_width, tumor_class_width, tumor_loc_width, tumor_stage_width, tumor_grade_width, pathology_width, evolution_width, evolution_grouped_width, cnv_domain_width, patient_width],
        [None, None, None, ["Cecum","Ascending","Hepatic Flexure","Transverse","Descending","Sigmoid"], ["NL","AD","I","II","III/IV"], ["NL","G1","G2","G3"], None, ["L","N","B"], ["N/B","L"], ["Stroma","Edge","Tumor"], None],
        [mw_colordict, cmap_dict, cmap_dict, cmap_dict, cmap_dict, cmap_dict, None, cmap_dict, cmap_dict, cmap_dict, patient_colordict],
    ):
        if counter == 0:
            # for the first group, hierarchically cluster the features
            features_ordered = cody_heatmap(
                a_comb,
                groupby=group,
                features=features_list,
                cluster_vars=True,
                vars_dict=None,
                groupby_order=groupby_order,
                groupby_colordict=groupby_colordict,
                cluster_obs=True if groupby_order is None else False,
                figsize=(width, height),
                save="ST_out/plots_overview/{}_{}_dotplot.png".format(group_name, features_name),
                dpi=400,
                cmap="Greys",
                size_title="Fraction of cells or\nspots in group (%)",
            )
            counter += 1
        else:
            # after the first group, features are ordered based on the first group clustering
            cody_heatmap(
                a_comb,
                groupby=group,
                features=features_ordered,
                cluster_vars=False,
                vars_dict=None,
                groupby_order=groupby_order,
                groupby_colordict=groupby_colordict,
                cluster_obs=True if groupby_order is None else False,
                figsize=(width, height),
                save="ST_out/plots_overview/{}_{}_dotplot.png".format(group_name, features_name),
                dpi=400,
                cmap="Greys",
                size_title="Fraction of spots\nin group (%)",
            )

In [None]:
group_name = "patient"
group = "Patient"
features_list = signatures_fig_1
features_name = "signatures_fig_1"
height = signatures_fig_1_height
width = patient_width
groupby_colordict = patient_colordict
groupby_order = [
    "SR00001",
    "HTA11_01938",
    "HTA11_07862",
    "HTA11_10711",
    "PAT05785",
    "PAT06439",
    "PAT15211",
    "PAT30884",
    "PAT59600",
    "PAT59667",
    "PAT71397",
    "PAT71662",
    "PAT73899",
    "PAT74143",
    "SG00003",
    "SG00004",
    "HTA11_06134",
    "HTA11_07663",
    "HTA11_08622_A",
    "HTA11_08622_B",
    "PAT00222",
    "PAT01586",
    "PAT01587",
    "PAT33430",
    "PAT40364",
    "PAT54273",
    "PAT59460",
    "PAT73458",
    "SG00001",
    "SG00002",
]
features_ordered = [
    "Stem",
    "T cell CD4",
    "iCMS2",
    "iCMS3",
    "Metaplasia",
    "CytoTRACE",
    "CNV score",
    "T cell CD8",
]

cody_heatmap(
    a_comb,
    groupby=group,
    features=features_ordered,
    cluster_vars=False,
    vars_dict=None,
    groupby_order=groupby_order,
    groupby_colordict=groupby_colordict,
    cluster_obs=True if groupby_order is None else False,
    figsize=(width, 2.4),
    save="ST_out/plots_overview/{}_{}_dotplot.png".format(group_name, features_name),
    dpi=400,
    cmap="Greys",
    size_title="Fraction of spots\nin group (%)",
)

---
# Combined markers

In [None]:
# split refNMF cell states by compartment
mxif = [
    "HLAA",
    "NAKATPASE",
    "PANCK",
    "OLFM4",
    "PCNA",
    "PEGFR",
    "COLLAGEN",
    "SMA",
]

epi_sigs = [
    "CNV score",
    "CytoTRACE",
    "Stem",
    "Metaplasia",
    "Hypoxia",
    "Oxphos",
    "Stress response",
    "Bacterial response",
    "Fibrosis",
    "EMT",
]

str_sigs = [
    "Neutrophils",
    "M1",
    "M2",
    "T reg",
    "T cell CD4",
    "T cell CD8",
    "Cytotoxicity",
    "T cell exhaustion",
]

marker_states_dict = {
    "MxIF": mxif,
    "Activity": epi_sigs,
    "Immune": str_sigs,
}
custom_height = 7.3

In [None]:
a_comb = a_comb[:, a_comb.var_names!="OLFM4"].copy()

In [None]:
a_comb = a_comb[:, a_comb.var_names!="PCNA"].copy()

In [None]:
for group_name, group, width, groupby_order, groupby_colordict in zip(
    ["VUMCrefNMF30_MILWRM", "tumortype", "tumorloc", "tumorstage", "tumorgrade", "pathology_annotation", "evolution", "evolution_grouped", "CNVclonedomain", "patient"],
    ["MILWRM Domain", "Tumor Type", "Tumor Location", "Tumor Stage", "Tumor Grade", "pathology_annotation", "Evolution", "Evolution_grouped", "CNV_clone_domain", "Patient"],
    [milwrm_width, 4, tumor_loc_width, tumor_stage_width, tumor_grade_width, pathology_width, evolution_width, evolution_grouped_width, cnv_domain_width, patient_width],
    [None, None, ["Cecum","Ascending","Hepatic Flexure","Transverse","Descending","Sigmoid"], ["NL","AD","I","II","III/IV"], ["NL","G1","G2","G3"], None, ["L","N","B"], ["N/B","L"], ["Stroma","Edge","Tumor"], None],
    [mw_colordict, cmap_dict, cmap_dict, cmap_dict, cmap_dict, None, cmap_dict, cmap_dict, cmap_dict, patient_colordict],
):
    cody_heatmap(
        a_comb,
        groupby=group,
        features=sum(marker_states_dict.values(), []),
        cluster_vars=False,
        vars_dict=marker_states_dict,
        groupby_order=groupby_order,
        groupby_colordict=groupby_colordict,
        cluster_obs=True if groupby_order is None else False,
        figsize=(width, custom_height),
        save="ST_out/plots_overview/{}_epivsstroma_dotplot.png".format(group_name),
        dpi=400,
        cmap="Greys",
        size_title="Fraction of spots\nin group (%)",
    )

---
# refNMF Cell States

In [None]:
# split refNMF cell states by compartment
nmf_epi = [
 'STM',
 'CRC1',
 'CRC2',
 'CT',
 'SSC',
 'CRC3',
 'EE1',
 'GOB',
 'CRC4',
 'ABS',
 'TUF',
 'EE2',
]
nmf_epi.sort()

nmf_stroma = [
 'END1',
 'FIB1',
 'FIB2',
 'FIB3',
 'FIB4',
 'END2',
]
nmf_stroma.sort()

nmf_immune = [
 'BL1',
 'MYE1',
 'TL1',
 'MYE2',
 'MYE3',
 'PLA',
 'MYE4',
 'MAS',
 'MYE5',
 'TL2',
 'TL3',
 'BL2'
]
nmf_immune.sort()

marker_states_dict = {
    "Epithelial": nmf_epi,
    "Stromal": nmf_stroma,
    "Immune": nmf_immune,
}

In [None]:
for group_name, group, width, groupby_order, groupby_colordict in zip(
    ["VUMCrefNMF30_MILWRM", "tumortype", "tumorclass", "tumorloc", "tumorstage", "tumorgrade", "pathology_annotation", "evolution", "evolution_grouped", "CNVclonedomain", "patient"],
    ["MILWRM Domain", "Tumor Type", "CIN Status", "Tumor Location", "Tumor Stage", "Tumor Grade", "pathology_annotation", "Evolution", "Evolution_grouped", "CNV_clone_domain", "Patient"],
    [milwrm_width, 4, 4, tumor_loc_width, tumor_stage_width, tumor_grade_width, pathology_width, evolution_width, evolution_grouped_width, cnv_domain_width, patient_width],
    [None, None, None, ["Cecum","Ascending","Hepatic Flexure","Transverse","Descending","Sigmoid"], ["NL","AD","I","II","III/IV"], ["NL","G1","G2","G3"], None, ["L","N","B"], ["N/B","L"], ["Stroma","Edge","Tumor"], None],
    [mw_colordict, cmap_dict, cmap_dict, cmap_dict, cmap_dict, cmap_dict, None, cmap_dict, cmap_dict, cmap_dict, patient_colordict],
):
    cody_heatmap(
        a_comb,
        groupby=group,
        features=sum(marker_states_dict.values(), []),
        cluster_vars=False,
        vars_dict=marker_states_dict,
        groupby_order=groupby_order,
        groupby_colordict=groupby_colordict,
        cluster_obs=True if groupby_order is None else False,
        figsize=(width, refNMF_height),
        save="ST_out/plots_overview/{}_VUMCrefNMF30_dotplot.png".format(group_name),
        dpi=400,
        cmap="Greys",
        size_title="Fraction of spots\nin group (%)",
    )

---
# Gene and cell state summaries for immune exclusion analysis

In [None]:
a_comb.obs.drop(columns=["DPEP1"],inplace=True)

In [None]:
a_comb.X = a_comb.X.todense()

In [None]:
a_comb.layers["raw_counts"] = a_comb.X.copy()

In [None]:
custom_dict = {
    "PPT": ["CNV score","TMB"],
    "Excl.": ["IEX","DDR1","TGFBI","PAK4","DPEP1","Fibrosis"],
    "uEnv.": ["FIB2","FIB3","MYE2"],
    "Inf.": ["T cell CD4","T reg suppressive","TL1","T cell CD8","TL2","TL3","MYE4","MYE5"],
    "Act.": ["Oxphos","Hypoxia","pEMT"],
    "MSS": ["iCMS2","Stem","CRC2"],
    "MSI-H": ["iCMS3","SSC","Metaplasia"],
    "NL": ["GOB","ABS","CT"],
}
custom_dict_height = 7.5

In [None]:
for features_name, features_list, height in zip(
        ["immexcl"],
        [custom_dict],
        [custom_dict_height],
    ):
    for group_name, group, width, groupby_order, groupby_colordict, in zip(
        ["VUMCrefNMF30_MILWRM", "tumortype", "tumorclass", "tumorloc", "tumorstage", "tumorgrade", "patient", "pathology_annotation", "CNVclonedomain", "evolution", "evolution_grouped"],
        ["MILWRM Domain", "Tumor Type", "CIN Status", "Tumor Location", "Tumor Stage", "Tumor Grade", "Patient", "pathology_annotation", "CNV_clone_domain", "Evolution", "Evolution_grouped"],
        [milwrm_width, 3.8, 3.8, tumor_loc_width, tumor_stage_width, tumor_grade_width, patient_width, pathology_width, cnv_domain_width, evolution_width, evolution_grouped_width],
        [None, None, None, ["Cecum","Ascending","Hepatic Flexure","Transverse","Descending","Sigmoid"], ["NL","AD","I","II","III/IV"], ["NL","G1","G2","G3"], None, None, ["Stroma","Edge","Tumor"], ["L","N","B"], ["N/B","L"]],
        [mw_colordict, cmap_dict, cmap_dict, cmap_dict, cmap_dict, cmap_dict, patient_colordict, None, cmap_dict, cmap_dict, cmap_dict],
    ):
        cody_heatmap(
            a_comb,
            groupby=group,
            features=sum(custom_dict.values(), []),
            cluster_vars=False,
            vars_dict=custom_dict,
            groupby_order=groupby_order,
            groupby_colordict=groupby_colordict,
            cluster_obs=True if groupby_order is None else False,
            figsize=(width, height),
            save="ST_out/plots_overview/{}_{}_dotplot.png".format(group_name, features_name),
            dpi=400,
            cmap="Greys",
            size_title="Fraction of spots\nin group (%)",
        )

---
# MILWRM proportion plot

In [None]:
import seaborn as sns; sns.set_style("white")

In [None]:
df_count = pd.DataFrame()
for pat in a_comb.obs.Patient.unique():
    df = a_comb[a_comb.obs.Patient==pat,:].obs["MILWRM Domain"].value_counts(normalize=True, sort=False)
    df_count = pd.concat([df_count, df], axis=1)
df_count = df_count.T.reset_index(drop=True)

In [None]:
df_count.index = list(a_comb.obs.Patient.unique())
df_count = df_count.fillna(0)

In [None]:
df_count = df_count[["D0", "D1", "D2", "D3", "D4", "D5", "D6", "D7"]]

In [None]:
ax = df_count.plot.bar(stacked=True, cmap="plasma", figsize=(7,4))
ax.legend(loc="best", bbox_to_anchor=(1, 1))
ax.set_ylabel("MILWRM Domain Proportion")
ax.set_ylim((0, 1))
ax.set_xticklabels(
    ax.get_xticklabels(),
    path_effects=[pe.withStroke(linewidth=0.2, foreground="k")],
)
[t.set_color(i) for i, t in zip([patient_colordict[x.get_text()] for x in ax.get_xticklabels()], ax.xaxis.get_ticklabels())]
plt.savefig("ST_out/plots_overview/patient_VUMCrefNMF30_MILWRM_proportions.png", dpi=400, bbox_inches="tight")

---
# Immune Exclusion Signature barplots

In [None]:
import sys; sys.path.append("../../../utility_scripts/")
from boxplot_utils import *

In [None]:
boxplots_group(
    a_comb,
    outdir="ST_out/plots_overview/",
    obs=["CIN Status"],
    colors=["IEX"],
    figsize=(4,4),
    sig=False,
    cmap_dict=cmap_dict,
)

In [None]:
a_comb_epi = a_comb[~a_comb.obs["CNV clone"].isin(["S","E"]),:].copy()

In [None]:
a_comb_epi.obs["Tumor Type"] = a_comb_epi.obs["Tumor Type"].astype("category")
a_comb_epi.obs["Tumor Type"].cat.reorder_categories(["MSI-H","MSS","NL","SSL/HP","TA/TVA"])

In [None]:
boxplots_group(
    a_comb_epi,
    outdir="ST_out/plots_overview/",
    obs=["Tumor Type"],
    colors=["IEX"],
    figsize=(4,4),
    sig=False,
    cmap_dict=cmap_dict,
)

In [None]:
boxplots_group(
    a_comb,
    outdir="ST_out/plots_overview/",
    obs=["Tumor Type"],
    colors=["IEX"],
    figsize=(4,6),
    sig=True,
    cmap_dict=cmap_dict,
)

In [None]:
boxplots_group(
    a_comb,
    outdir="ST_out/plots_overview/",
    obs=["Tumor Type"],
    colors=["TMB"],
    figsize=(4,4),
    sig=False,
    cmap_dict=cmap_dict,
)

---
# Tissue Archetypes

In [None]:
# split refNMF cell states by compartment
at_epi = [
 'A3_Endocrine',
 'A4_MUC',
 'A5_ABS',
 'A8_Stem',
]
at_epi.sort()

at_stroma = [
 'A6_SM',
]
at_stroma.sort()

at_immune = [
 'A1_Innate',
 'A2_Cytotoxic',
 'A7_Adaptive',
]
at_immune.sort()

archetypes_dict = {
    "Epi.": at_epi,
    "Non-Epi.": at_stroma + at_immune,
}

In [None]:
for group_name, group, width, groupby_order, groupby_colordict in zip(
    ["tumortype", "tumorloc", "tumorstage", "tumorgrade", "patient", "pathology_annotation", "VUMCrefNMF30_MILWRM", "evolution"],
    ["Tumor Type", "Tumor Location", "Tumor Stage", "Tumor Grade", "Patient", "pathology_annotation", "MILWRM Domain", "Evolution"],
    [4, tumor_loc_width, tumor_stage_width, tumor_grade_width, patient_width, pathology_width, milwrm_width, evolution_width],
    [None, ["Cecum","Ascending","Hepatic Flexure","Transverse","Descending","Sigmoid"], ["NL","AD","I","II","III/IV"], ["NL","G1","G2","G3"], None, None, None, ["L","N","B"]],
    [cmap_dict, cmap_dict, cmap_dict, cmap_dict, patient_colordict, None, mw_colordict, cmap_dict],
):
    cody_heatmap(
        a_comb,
        groupby=group,
        features=sum(archetypes_dict.values(), []),
        cluster_vars=False,
        vars_dict=archetypes_dict,
        groupby_order=groupby_order,
        groupby_colordict=groupby_colordict,
        cluster_obs=True if groupby_order is None else False,
        figsize=(width, archetypes_height),
        save="ST_out/plots_overview/{}_archetypes_dotplot.png".format(group_name),
        dpi=400,
        cmap="Greys",
        size_title="Fraction of spots\nin group (%)",
    )