In [None]:
import sys; sys.path.append("../resources/")
from dotplot_utils import *
from boxplot_utils import *
import pandas as pd
import seaborn as sns
# make this notebook work better with Scanpy
import warnings; warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
# make output directories
import os
if not os.path.exists("ST_out"):
    os.mkdir("ST_out")

if not os.path.exists("ST_out/infercnv/"):
    os.mkdir("ST_out/infercnv/")

In [None]:
sc.set_figure_params(transparent=True, dpi_save=400)
sc.settings.figdir = "ST_out/infercnv/"

---
## Read in key dataframe with sample information

In [None]:
sample_key = pd.read_csv("../resources/ST/visium_sample_key.csv", index_col=0)

---

In [None]:
outs = []
for s in sample_key.index:
    a = sc.read("../data/ST/{}_master.h5ad".format(s))
    print("Read adata from ../data/ST/{}_master.h5ad".format(s))

    # compile training data
    outs.append(a)

assert len(outs) == len(sample_key), "Check length of outs"

In [None]:
for i, x in enumerate(sample_key.index):
    outs[i].obs["Evolution"] = sample_key.loc[x, "Evolution"]

In [None]:
# concatenate anndata objects
a_comb = outs[0].concatenate(
    outs[1:],
    join="outer",
    batch_categories=list(sample_key.index),
    fill_value=0,
)
del a_comb.obsm
del a_comb.var

In [None]:
a_comb.obs["CNV_clone_domain"] = a_comb.obs["CNV clone"].values
a_comb.obs["CNV_clone_domain"] = a_comb.obs["CNV_clone_domain"].astype(str)
a_comb.obs.loc[a_comb.obs["CNV clone"].isin(["1", "2", "3", "1A", "1B"]), "CNV_clone_domain"] = "Tumor"
a_comb.obs["CNV_clone_domain"].replace({"S":"Stroma","E":"Edge"}, inplace=True)
a_comb.obs["CNV_clone_domain"] = a_comb.obs["CNV_clone_domain"].astype("category")

In [None]:
a_comb.obs["Evolution_grouped"] = a_comb.obs["Evolution"].values
a_comb.obs["Evolution_grouped"] = a_comb.obs["Evolution_grouped"].astype(str)
a_comb.obs.loc[a_comb.obs["Evolution"].isin(["B", "N"]), "Evolution_grouped"] = "N/B"
a_comb.obs["Evolution_grouped"] = a_comb.obs["Evolution_grouped"].astype("category")

In [None]:
sample_key.loc[sample_key.tumor_type=="SSL/HP","CIN Status"] = "HM"

In [None]:
CIN_PATs = sample_key.loc[sample_key["CIN Status"].fillna("CIN+")=="CIN+","patient_name"].unique()

In [None]:
CIN_PATs

In [None]:
HM_PATs = sample_key.loc[sample_key["CIN Status"].fillna("CIN+")=="HM","patient_name"].unique()

In [None]:
HM_PATs

In [None]:
a_comb.obs["CIN_status"] = "CIN+"
a_comb.obs.loc[a_comb.obs.Patient.isin(HM_PATs), "CIN_status"] = "HM"
a_comb.obs.loc[a_comb.obs["Tumor Type"].isin(["SSL/HP", "TA/TVA", "NL"]), "CIN_status"] = "CIN-"
a_comb.obs.loc[(a_comb.obs["CNV clone"] == "1B") & (a_comb.obs.Patient=="PAT33430"), "CIN_status"] = "CIN+"
a_comb.obs["CIN_status"] = a_comb.obs["CIN_status"].astype("category")

In [None]:
a_comb.obs["CIN_status"].cat.categories

In [None]:
a_comb.obs["Tumor_Type_CIN"] = a_comb.obs["Tumor Type"].astype(str)
a_comb.obs.loc[(a_comb.obs.CIN_status == "CIN+") & (a_comb.obs["Tumor Type"] == "MSI-H"), "Tumor_Type_CIN"] = "MSI-H (CIN+)"
a_comb.obs.loc[(a_comb.obs["CNV clone"] == "1B") & (a_comb.obs.Patient=="PAT33430"), "Tumor_Type_CIN"] = "MSI-H (CIN+)"
a_comb.obs.loc[(a_comb.obs.CIN_status == "HM") & (a_comb.obs["Tumor Type"] == "MSS"), "Tumor_Type_CIN"] = "MSS (HM)"
a_comb.obs["Tumor_Type_CIN"] = a_comb.obs["Tumor_Type_CIN"].astype("category")

In [None]:
a_comb.obs["Tumor_Type_CIN"].value_counts()

In [None]:
a_comb.obs["Tumor_vs_NL"] = a_comb.obs["Tumor Type"].astype(str)
a_comb.obs.loc[a_comb.obs["Tumor Type"].isin(["MSI-H","MSS"]), "Tumor_vs_NL"] = "CRC"
a_comb.obs["Tumor_vs_NL"] = a_comb.obs["Tumor_vs_NL"].astype("category")

In [None]:
a_comb.obs["Tumor_vs_NL"].value_counts()

---
## Rename things and set up for plotting

In [None]:
# rename obs columns with MxIF stains in them
old_mxif = list(a_comb.obs.columns[a_comb.obs.columns.str.endswith("_MxIF")])
new_mxif = list(a_comb.obs.columns[a_comb.obs.columns.str.endswith("_MxIF")].str.split("_").str[0])
a_comb.obs.rename(columns=dict(zip(old_mxif,new_mxif)), inplace=True)

In [None]:
# rename cell state columns in master anndata.obs
et = list(a_comb.obs.columns[a_comb.obs.columns.str.endswith("_VUMCrefNMF30")])
et_new = [x.replace("_VUMCrefNMF30", "") for x in et]
a_comb.obs.rename(columns=dict(zip(et, et_new)), inplace=True)

In [None]:
a_comb

In [None]:
cmap_dict = {
    # Tumor Type
    'SSL/HP':"#c4a4e1",'MSI-H':"#7a4fa3",'MSS':"#ffc101",'TA/TVA':"#fee799",'NL':"#1f77b4",
    "MSI-H (CIN+)":"#7a4fa3", 'MSS (HM)':"#ffc101",
    # Tumor Location
    "Cecum":"#1f4e79","Ascending":"#2e74b7","Hepatic Flexure":"#bdd6ef","Transverse":"#ff717a","Descending":"#fe0001","Sigmoid":"#c00101",
    # this one's global
    "nan":"#ffffff",
    # These are black and white for T and F
    "T":"#000000","F":"#ffffff",
    # evolution
    "N":"tab:blue","B":"tab:green","L":"tab:orange","N/B":"tab:blue",
    # CNV clone domain
    "Tumor":"#000000","Stroma":"tab:pink","Edge":"tab:red",
    "CRC":"#ffffff",
}
stage_colordict = dict(zip(["AD","I","II","III/IV"], sns.color_palette("Reds", len(["AD","I","II","III/IV"])).as_hex()))
grade_colordict = dict(zip(["G1","G2","G3"], sns.color_palette("Reds", len(["G1","G2","G3"])).as_hex()))
CIN_colordict = dict(zip(["HM","CIN-","CIN+"], sns.color_palette("Reds", len(["HM","CIN-","CIN+"])).as_hex()))
cmap_dict = {**cmap_dict, **stage_colordict, **grade_colordict, **CIN_colordict}

In [None]:
patient_colordict = dict(zip(sample_key.patient_name, [cmap_dict[x] for x in sample_key.tumor_type]))

In [None]:
cmap = matplotlib.cm.get_cmap("plasma", 8)
mw_colordict = dict(zip(["D0","D1","D2","D3","D4","D5","D6","D7"], [matplotlib.colors.rgb2hex(cmap(i)) for i in range(cmap.N)]))

---

In [None]:
a_comb_epi = a_comb[~a_comb.obs["CNV clone"].isin(["S"]),:].copy()

In [None]:
a_comb_epi.obs["Tumor Type"] = a_comb_epi.obs["Tumor Type"].astype("category")
a_comb_epi.obs["Tumor Type"].cat.reorder_categories(["MSI-H","MSS","NL","SSL/HP","TA/TVA"])

In [None]:
boxplots_group(
    a_comb_epi,
    outdir="ST_out/infercnv/",
    obs=["Tumor Type"],
    colors=["CNV score"],
    figsize=(4,4),
    sig=False,
    cmap_dict=cmap_dict,
    titles=["ST (tumor region spots)"],
)

In [None]:
boxplots_group(
    a_comb,
    outdir="ST_out/infercnv/",
    obs=["Tumor Type"],
    colors=["CNV score"],
    figsize=(4,6),
    sig=True,
    cmap_dict=cmap_dict,
    titles=["ST (tumor region spots)"],
)

In [None]:
a_comb_epi.obs["Tumor Type"].value_counts()

In [None]:
a_comb_epi.obs["Tumor_Type_CIN"] = a_comb_epi.obs["Tumor_Type_CIN"].cat.reorder_categories(
    ["NL","SSL/HP","TA/TVA","MSS (HM)","MSI-H","MSS","MSI-H (CIN+)"]
)

In [None]:
boxplots_group(
    a_comb_epi,
    outdir="ST_out/infercnv/",
    obs=["Tumor_Type_CIN"],
    colors=["CNV score"],
    figsize=(4,4),
    sig=False,
    cmap_dict=cmap_dict,
    titles=["ST (tumor region spots)"],
)

In [None]:
a_comb_epi.obs.Tumor_Type_CIN.value_counts()

In [None]:
a_comb_epi.obs["CIN_status"] = a_comb_epi.obs["CIN_status"].cat.reorder_categories(
    ["CIN-","HM","CIN+"]
)

In [None]:
boxplots_group(
    a_comb_epi,
    outdir="ST_out/infercnv/",
    obs=["CIN_status"],
    colors=["CNV score"],
    figsize=(4,4),
    sig=False,
    cmap_dict=cmap_dict,
    titles=["ST (tumor region spots)"],
)

In [None]:
a_comb_epi.obs.CIN_status.value_counts()

In [None]:
a_comb_epi.obs["Tumor_vs_NL"].cat.categories

In [None]:
boxplots_group(
    a_comb_epi,
    outdir="ST_out/infercnv/",
    obs=["Tumor_vs_NL"],
    colors=["CNV score"],
    figsize=(4,4),
    sig=False,
    cmap_dict=cmap_dict,
    titles=["ST (tumor region spots)"],
)

In [None]:
boxplots_group(
    a_comb_epi,
    outdir="ST_out/infercnv/",
    obs=["Tumor_vs_NL"],
    colors=["CNV score"],
    figsize=(4,6),
    sig=True,
    cmap_dict=cmap_dict,
    titles=["ST (tumor region spots)"],
)

In [None]:
a_comb_epi.obs.Tumor_vs_NL.value_counts()

In [None]:
boxplots_group(
    a_comb,
    outdir="ST_out/infercnv/",
    obs=["Evolution"],
    colors=["CNV score"],
    figsize=(4,6),
    sig=True,
    cmap_dict=cmap_dict,
    titles=["ST (tumor region spots)"],
)

In [None]:
boxplots_group(
    a_comb,
    outdir="ST_out/infercnv/",
    obs=["Evolution_grouped"],
    colors=["CNV score"],
    figsize=(4,6),
    sig=True,
    cmap_dict=cmap_dict,
    titles=["ST (tumor region spots)"],
)