In [None]:
import sys; sys.path.append("../resources/")
from dotplot_utils import *
import pandas as pd
import seaborn as sns
# make this notebook work better with Scanpy
import warnings; warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
import catheat
import matplotlib.pyplot as plt

In [None]:
# make output directories
import os
if not os.path.exists("ST_out"):
    os.mkdir("ST_out")

if not os.path.exists("ST_out/plots_overview/"):
    os.mkdir("ST_out/plots_overview/")

In [None]:
sc.set_figure_params(transparent=True, dpi_save=400)
sc.settings.figdir = "ST_out/plots_overview/"

---
## Read in key dataframe with sample information

In [None]:
sample_key_orig = pd.read_csv("../resources/ST/visium_sample_key.csv", index_col=0)
sample_key_orig["block_type"] = sample_key_orig.block_name + "_" + sample_key_orig.tumor_type
tmp = pd.DataFrame(sample_key_orig.block_type.value_counts())
tmp.columns = ["# Visium"]
sample_key = sample_key_orig.merge(tmp, left_on="block_type", right_index=True, how="left")
sample_key = sample_key.drop_duplicates("block_type")

In [None]:
cnv_scores = pd.read_csv("../step5/ST_out/infercnv/cnv_score_per_patient.csv", index_col=0)

In [None]:
muts = pd.read_csv("../step1/WES_out/pseudobulk_variants_per_sample.csv")
muts = muts.merge(sample_key[["patient_name","block_name"]], left_on="Tumor_Sample_Barcode", right_on="patient_name", how="left")
muts.block_name = muts.block_name.fillna(muts.Tumor_Sample_Barcode)
muts = muts.loc[~((muts.patient_name=="PAT71397")&(muts.block_name=="WD86055"))].reset_index().copy()

In [None]:
crc_muts = pd.read_csv("../step1/WES_out/CRC_mutations_pseudobulk.csv", index_col=0)
crc_muts.columns = [x + " Mut." for x in crc_muts.columns]
crc_muts = crc_muts.reset_index()
crc_muts = crc_muts.merge(sample_key[["patient_name","block_name"]], left_on="index", right_on="patient_name", how="left")
crc_muts.block_name = crc_muts.block_name.fillna(crc_muts["index"])
crc_muts = crc_muts.loc[~((crc_muts.patient_name=="PAT71397")&(crc_muts.block_name=="WD86055"))].reset_index().copy()

In [None]:
sample_key = sample_key.merge(cnv_scores, left_on="patient_name", right_index=True, how="left")
sample_key = sample_key.reset_index().merge(
    muts[["block_name","Variants"]],
    left_on="block_name",
    right_on="block_name",
    how="left",
).set_index("index")
sample_key = sample_key.reset_index().merge(
    crc_muts[["block_name","APC Mut.","KRAS Mut.","TP53 Mut.","BRAF Mut."]],
    left_on="block_name",
    right_on="block_name",
    how="left",
).set_index("index")
sample_key.loc[sample_key.block_name=="WD86055", "Variants"] = muts.loc[muts.Tumor_Sample_Barcode=="WD86055","Variants"].values[0]

In [None]:
sample_key.Variants = sample_key.Variants.fillna(0)

In [None]:
sample_key.loc[sample_key.block_name=="WD86055", "CNV score"] = 0.031962

In [None]:
sample_key.rename(columns={"CNV score":"CNV score (bulk)"}, inplace=True)
sample_key

In [None]:
sample_key["CNV score (bulk)"].max()

In [None]:
sample_key["CNV score (bulk)"].min()

In [None]:
sample_key["Variants"].max()

In [None]:
sample_key["Variants"].min()

---
## Define pseudotime ordering by `LCM_ROI`

In [None]:
muts = pd.read_csv("../step1/WES_out/LCM_variants_per_sample.csv", index_col=0)
mapper = {
    "WD86055_ROI4":"WD86055_ROI1",
    "WD86055_ROI2":"WD86055_ROI2",
    "WD86055_ROI3":"WD86055_ROI3",
    "WD86055_ROI1":"WD86055_ROI4",
    "WD86056_ROI2":"WD86055_ROI5",
    "WD86056_ROI1":"WD86055_ROI6",
    "WD86056_ROI4":"WD86055_ROI7",
    "WD86056_ROI3":"WD86055_ROI8",
    "WD33475_ROI4":"WD33475_ROI3",
    "WD33475_ROI5":"WD33475_ROI4",
}
muts.Tumor_Sample_Barcode.replace(mapper, inplace=True)
muts["Tumor_Sample_Barcode"] = [sample_key.loc[sample_key.block_name==x.split("_")[0],"patient_name"][0] + "_" + x.split("_")[1] for x in muts["Tumor_Sample_Barcode"]]
muts[["Patient", "ROI"]] = muts["Tumor_Sample_Barcode"].str.split("_", expand=True)

In [None]:
muts = muts.merge(sample_key.loc[sample_key.tumor_type.isin(["MSS","MSI-H"]),["patient_name","tumor_type"]], left_on="Patient", right_on="patient_name", how="left").drop_duplicates()
muts = muts.drop(columns=["patient_name"])

In [None]:
muts

In [None]:
pt_orders = {}

muts_MSS = muts.loc[muts.tumor_type=="MSS",:].copy()
muts_MSS.Patient = muts_MSS.Patient.astype("category")
for pat in muts_MSS.Patient.cat.categories:
    pt_orders[pat] = list(muts.loc[muts.Patient==pat, "ROI"])[::-1]
    
muts_MSI = muts.loc[muts.tumor_type=="MSI-H",:].copy()
muts_MSI.Patient = muts_MSI.Patient.astype("category")
for pat in muts_MSI.Patient.cat.categories:
    pt_orders[pat] = list(muts.loc[muts.Patient==pat, "ROI"])[::-1]

In [None]:
muts2 = pd.read_csv("../step1/WES_out/pseudobulk_variants_per_sample.csv", index_col=0); muts2

---

In [None]:
outs = []
names = []
for s in sample_key_orig.index:
    a = sc.read("../data/ST/{}_master.h5ad".format(s))
    print("Read adata from ../data/ST/{}_master.h5ad".format(s))
    
    if "Variants" in a.obs.columns:
        a.obs.drop(columns="Variants", inplace=True)
    
    if "LCM_ROI" in a.obs.columns:
        # filter to LCM ROIs
        a.obs.LCM_ROI = a.obs.LCM_ROI.astype(str)
        a = a[a.obs.LCM_ROI.str.contains("ROI"), :].copy()
        print("Filtered to {} spots within LCM ROIs".format(a.n_obs))
        
        # add number of detected Variants
        a.obs = a.obs.reset_index().merge(muts[["Patient","ROI","Variants"]], left_on=["Patient","LCM_ROI"], right_on=["Patient","ROI"], how="left").drop(columns="ROI").set_index("index")
        
        # create CNV_LCM intersection
        a.obs["CNV_LCM"] = ""
        for ROI in a.obs.LCM_ROI.unique():
            print("{}: ".format(ROI), end = "")
            try:
                if a.obs.loc[(a.obs.LCM_ROI==ROI)&(~a.obs["CNV clone"].isin(["S","E"])), "CNV clone"].value_counts().max() < 20:
                    print("No dominant clone!")
                else:
                    dom_clone = a.obs.loc[(a.obs.LCM_ROI==ROI)&(~a.obs["CNV clone"].isin(["S","E"])), "CNV clone"].value_counts().index[a.obs.loc[(a.obs.LCM_ROI==ROI)&(~a.obs["CNV clone"].isin(["S","E"])), "CNV clone"].value_counts().argmax()]
                    print("Dominant clone = {}".format(dom_clone))
                    a.obs.loc[(a.obs.LCM_ROI==ROI)&(a.obs["CNV clone"].isin([dom_clone, "E"])), "CNV_LCM"] = sample_key_orig.loc[s, "patient_name"] + " " + ROI + " " + dom_clone
            except:
                print("No dominant clone!")
                
        a = a[a.obs.CNV_LCM != "", :].copy()
        print("Filtered to {} spots within CNV clones".format(a.n_obs))
        
        # compile training data
        outs.append(a)
        names.append(s)
        
    elif s == "8270_12_WD84216":
            print("Skipping 8270_12_WD84216")
    else:
        print("No LCM variants found")
        try:
            a.obs["Variants"] = muts2.loc[muts2.Tumor_Sample_Barcode==sample_key_orig.loc[s,:].block_name,"Variants"].values[0]
            print("Using bulk WES variants")
            a = a[~a.obs["CNV clone"].isin(["S","E"]), :].copy()
            print("Filtered to {} spots within CNV clones".format(a.n_obs))
            
            # create CNV_LCM intersection
            a.obs["LCM_ROI"] = sample_key_orig.loc[s, "patient_name"]
            a.obs["CNV_LCM"] = a.obs.Patient.astype(str) + " " + a.obs["CNV clone"].astype(str)
            
            # compile training data
            outs.append(a)
            names.append(s)
            
        except:
            print("No bulk variants found")
            a.obs["Variants"] = 0
            
            #find most abundant clone and set "E" to that
            dom_clone = a.obs.loc[~a.obs["CNV clone"].isin(["S","E"]), "CNV clone"].value_counts().index[a.obs.loc[~a.obs["CNV clone"].isin(["S","E"]), "CNV clone"].value_counts().argmax()]
            print("Dominant clone = {}".format(dom_clone))
            a.obs.loc[a.obs["CNV clone"]=="E", "CNV clone"] = dom_clone
            
            # subset to epithelium
            a = a[a.obs["CNV clone"]!="S", :].copy()
            print("Filtered to {} spots within CNV clones".format(a.n_obs))
            
            # create CNV_LCM intersection
            a.obs["LCM_ROI"] = sample_key_orig.loc[s, "patient_name"]
            a.obs["CNV_LCM"] = a.obs.Patient.astype(str) + " " + a.obs["CNV clone"].astype(str)
            
            # compile training data
            outs.append(a)
            names.append(s)
            
    print("\n")

In [None]:
# concatenate anndata objects
a_comb = outs[0].concatenate(
    outs[1:],
    join="outer",
    batch_categories=names,
    fill_value=0,
)
del a_comb.var
del a_comb.obsm
a_comb  # view object

#### Adjust 'Variants' to 'TMB' for plotting

In [None]:
a_comb.obs.rename(columns={"Variants":"TMB"}, inplace=True)

---
## Rename things and set up for plotting

In [None]:
# rename obs columns with MxIF stains in them
old_mxif = list(a_comb.obs.columns[a_comb.obs.columns.str.endswith("_MxIF")])
new_mxif = list(a_comb.obs.columns[a_comb.obs.columns.str.endswith("_MxIF")].str.split("_").str[0])
a_comb.obs.rename(columns=dict(zip(old_mxif,new_mxif)), inplace=True)

In [None]:
# rename cell state columns in master anndata.obs
et = list(a_comb.obs.columns[a_comb.obs.columns.str.endswith("_VUMCrefNMF30")])
et_new = [x.replace("_VUMCrefNMF30", "") for x in et]
a_comb.obs.rename(columns=dict(zip(et, et_new)), inplace=True)

In [None]:
a_comb

In [None]:
cmap_dict = {
    # Tumor Type
    'SSL/HP':"#c4a4e1",'MSI-H':"#7a4fa3",'MSS':"#ffc101",'TA/TVA':"#fee799",'NL':"#1f77b4",
    # Tumor Location
    "Cecum":"#62a32e","Ascending":"#b0dc7d","Hepatic Flexure":"#ebf6db","Transverse":"#fbe6f1","Descending":"#eeadd4","Sigmoid":"#cf4191",
    # this one's global
    "nan":"#ffffff",
    # These are black and white for T and F
    "T":"#000000","F":"#ffffff",
    # evolution
    "N":"tab:blue","B":"tab:green","L":"tab:orange",
    # CNV clone domain
    "T":"#000000","S":"tab:pink","E":"tab:red"
}
stage_colordict = dict(zip(["AD","I","II","III/IV"], sns.color_palette("Reds", len(["AD","I","II","III/IV"])).as_hex()))
grade_colordict = dict(zip(["G1","G2","G3"], sns.color_palette("Reds", len(["G1","G2","G3"])).as_hex()))
cmap_dict = {**cmap_dict, **stage_colordict, **grade_colordict}

In [None]:
patient_colordict = dict(zip(sample_key.patient_name, [cmap_dict[x] for x in sample_key.tumor_type]))

import matplotlib
cmap = matplotlib.cm.get_cmap("plasma", 8)
mw_colordict = dict(zip(["D0","D1","D2","D3","D4","D5","D6","D7"], [matplotlib.colors.rgb2hex(cmap(i)) for i in range(cmap.N)]))

---
# LCM_ROI - CNV Clone intersection ordering

In [None]:
CIN_PATs = sample_key.loc[(sample_key["CIN Status"].isin(["CIN+","NL","TA/TVA"]))&(sample_key.patient_name!='HTA11_01938'),"patient_name"].unique()

In [None]:
CIN_PATs

In [None]:
HM_PATs = sample_key.loc[sample_key["CIN Status"].isin(["HM","SSL/HP"]),"patient_name"].unique()

In [None]:
HM_PATs

In [None]:
a_comb.obs.rename(columns={"cnv_score":"CNV score"}, inplace=True)

In [None]:
AD_CIN_adata = a_comb[a_comb.obs["Patient"].isin(list(CIN_PATs)), :].copy()
AD_CIN = AD_CIN_adata.obs.groupby(["CNV_LCM"]).mean().sort_values(["CNV score"], ascending=[True]).reset_index()

SER_HM_adata = a_comb[a_comb.obs["Patient"].isin(list(HM_PATs)), :].copy()
SER_HM = SER_HM_adata.obs.groupby(["CNV_LCM"]).mean().sort_values(["TMB"], ascending=[True]).reset_index()

In [None]:
AD_CIN_order = list(AD_CIN.CNV_LCM)
SER_HM_order = list(SER_HM.CNV_LCM)
custom_order = AD_CIN_order + SER_HM_order

In [None]:
patient_colordict = dict(zip(sample_key.patient_name, [cmap_dict[x] for x in sample_key.tumor_type]))

CIN_colors = dict(zip(AD_CIN_order, [patient_colordict[x.split(" ")[0]] for x in AD_CIN_order]))
HM_colors = dict(zip(SER_HM_order, [patient_colordict[x.split(" ")[0]] for x in SER_HM_order]))

custom_colors = dict(zip(custom_order, [patient_colordict[x.split(" ")[0]] for x in custom_order]))

In [None]:
custom_colors

---

In [None]:
custom_df = pd.read_csv("../resources/ST/LCM-CNV_PPT_rankings.csv")

In [None]:
custom_df

In [None]:
palette = {"T":"tab:red", "F":"k"}

In [None]:
cmap_dict = {
    # Tumor Type
    'SSL/HP':"#c4a4e1",'MSI-H':"#7a4fa3",'MSS':"#ffc101",'TA/TVA':"#fee799",'NL':"#1f77b4",
    # Tumor Type
    'HM':"#7a4fa3",'CIN+':"#ffc101",
    # Tumor Location
    "Cecum":"#1f4e79","Ascending":"#2e74b7","Hepatic Flexure":"#bdd6ef","Transverse":"#ff717a","Descending":"#fe0001","Sigmoid":"#c00101",
    # this one's global
    "nan":"#ffffff",
    # These are black and white for T and F
    "T":"#000000","F":"#ffffff",
}
stage_colordict = dict(zip(["AD","I","II","III/IV"], sns.color_palette("Reds", len(["AD","I","II","III/IV"])).as_hex()))
grade_colordict = dict(zip(["G1","G2","G3"], sns.color_palette("Reds", len(["G1","G2","G3"])).as_hex()))
cmap_dict = {**cmap_dict, **stage_colordict, **grade_colordict}

In [None]:
sns.set_style("white")

In [None]:
plt.figure(figsize=(4,4.5))
sns.scatterplot(
    data=custom_df.loc[~((custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H")),:],
    x="CNV_norm", y="TMB_norm", style="APC status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","APC mut."]
)
sns.scatterplot(
    data=custom_df.loc[(custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H"),:],
    x="CNV_norm", y="TMB_norm", style="APC status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","APC mut."], legend=False
)
sns.scatterplot(
    data=custom_df.loc[(custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H"),:],
    x="CNV_norm", y="TMB_norm", style="APC status", hue="Tumor type", s=70, alpha=1, palette=cmap_dict, style_order=["WT","APC mut."], legend=False
)

# draw box and identity line
plt.plot((0.4, -0.05), (0.4, 0.4), color="k", ls="--")
plt.plot((0.4, 0.4), (0.4, -0.05), color="k", ls="--")
plt.plot((0.4, 1.0), (0.4, 1.0), color="k", ls="--")

plt.legend(bbox_to_anchor=(0, 1.02, 1, .02), loc='lower left', borderaxespad=0, mode="expand", ncol=3, frameon=False)
plt.xlabel("CNV Score")
plt.ylabel("TMB")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine()
plt.tight_layout()
plt.savefig("ST_out/plots_overview/TMB_v_CNV_APC.png")

In [None]:
plt.figure(figsize=(4,4.5))
sns.scatterplot(
    data=custom_df.loc[~((custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H")),:],
    x="CNV_norm", y="TMB_norm", style="KRAS status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","KRAS mut."]
)
sns.scatterplot(
    data=custom_df.loc[(custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H"),:],
    x="CNV_norm", y="TMB_norm", style="KRAS status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","KRAS mut."], legend=False
)
sns.scatterplot(
    data=custom_df.loc[(custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H"),:],
    x="CNV_norm", y="TMB_norm", style="KRAS status", hue="Tumor type", s=70, alpha=1, palette=cmap_dict, style_order=["WT","KRAS mut."], legend=False
)

# draw box and identity line
plt.plot((0.4, -0.05), (0.4, 0.4), color="k", ls="--")
plt.plot((0.4, 0.4), (0.4, -0.05), color="k", ls="--")
plt.plot((0.4, 1.0), (0.4, 1.0), color="k", ls="--")

plt.legend(bbox_to_anchor=(0, 1.02, 1, .02), loc='lower left', borderaxespad=0, mode="expand", ncol=3, frameon=False)
plt.xlabel("CNV Score")
plt.ylabel("TMB")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine()
plt.tight_layout()
plt.savefig("ST_out/plots_overview/TMB_v_CNV_KRAS.png")

In [None]:
plt.figure(figsize=(4,4.5))
sns.scatterplot(
    data=custom_df.loc[~((custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H")),:],
    x="CNV_norm", y="TMB_norm", style="TP53 status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","TP53 mut."]
)
sns.scatterplot(
    data=custom_df.loc[(custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H"),:],
    x="CNV_norm", y="TMB_norm", style="TP53 status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","TP53 mut."], legend=False
)
sns.scatterplot(
    data=custom_df.loc[(custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H"),:],
    x="CNV_norm", y="TMB_norm", style="TP53 status", hue="Tumor type", s=70, alpha=1, palette=cmap_dict, style_order=["WT","TP53 mut."], legend=False
)

# draw box and identity line
plt.plot((0.4, -0.05), (0.4, 0.4), color="k", ls="--")
plt.plot((0.4, 0.4), (0.4, -0.05), color="k", ls="--")
plt.plot((0.4, 1.0), (0.4, 1.0), color="k", ls="--")

plt.legend(bbox_to_anchor=(0, 1.02, 1, .02), loc='lower left', borderaxespad=0, mode="expand", ncol=3, frameon=False)
plt.xlabel("CNV Score")
plt.ylabel("TMB")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine()
plt.tight_layout()
plt.savefig("ST_out/plots_overview/TMB_v_CNV_TP53.png")

In [None]:
plt.figure(figsize=(4,4.5))
sns.scatterplot(
    data=custom_df.loc[~((custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H")),:],
    x="CNV_norm", y="TMB_norm", style="BRAF status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","BRAF mut."]
)
sns.scatterplot(
    data=custom_df.loc[(custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H"),:],
    x="CNV_norm", y="TMB_norm", style="BRAF status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","BRAF mut."], legend=False
)
sns.scatterplot(
    data=custom_df.loc[(custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H"),:],
    x="CNV_norm", y="TMB_norm", style="BRAF status", hue="Tumor type", s=70, alpha=1, palette=cmap_dict, style_order=["WT","BRAF mut."], legend=False
)

# draw box and identity line
plt.plot((0.4, -0.05), (0.4, 0.4), color="k", ls="--")
plt.plot((0.4, 0.4), (0.4, -0.05), color="k", ls="--")
plt.plot((0.4, 1.0), (0.4, 1.0), color="k", ls="--")

plt.legend(bbox_to_anchor=(0, 1.02, 1, .02), loc='lower left', borderaxespad=0, mode="expand", ncol=3, frameon=False)
plt.xlabel("CNV Score")
plt.ylabel("TMB")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine()
plt.tight_layout()
plt.savefig("ST_out/plots_overview/TMB_v_CNV_BRAF.png")

In [None]:
import matplotlib
from matplotlib.gridspec import GridSpec

In [None]:
custom_df["CIN_status_bin"] = "CIN+"
custom_df.loc[custom_df["CIN status"].isin(["HM","SSL/HP"]),"CIN_status_bin"] = "HM"

In [None]:
feature = "PPT"
cmap_CIN = matplotlib.colors.LinearSegmentedColormap.from_list("", ["lightyellow","#ffc101"])
cmap_HM = matplotlib.colors.LinearSegmentedColormap.from_list("", ["lavender","#7a4fa3"])

fig = plt.figure(figsize=(5,5))
gs = GridSpec(12,12)
ax_scatter = fig.add_subplot(gs[3:11, 1:9])
ax_hist_y = fig.add_subplot(gs[3:11, 9:11])
ax_hist_x = fig.add_subplot(gs[1:3, 1:9])
ax_cbar_x = fig.add_subplot(gs[11, 1:9])
ax_cbar_y = fig.add_subplot(gs[3:11, 0])

norm = plt.Normalize(0, 1)
sm = plt.cm.ScalarMappable(cmap=cmap_CIN, norm=norm)
sm.set_array([])
cbar = fig.colorbar(sm, orientation="horizontal", location="bottom", ticks=[0.2, 0.4, 0.6, 0.8, 1.0], cax=ax_cbar_x)
cbar.ax.set_xlabel("CIN+ PPT")

sm = plt.cm.ScalarMappable(cmap=cmap_HM, norm=norm)
sm.set_array([])
cbar = fig.colorbar(sm, orientation="vertical", location="left", ticks=[0.2, 0.4, 0.6, 0.8, 1.0], cax=ax_cbar_y)
cbar.ax.set_ylabel("HM PPT")

ax_scatter.scatter(
    data=custom_df.loc[custom_df["CIN status"].isin(["HM","SSL/HP"])],
    x="CNV_norm", y="TMB_norm", c=feature, s=150, alpha=0.7, cmap=cmap_HM, edgecolor=(0,0,0,0.2)
)
sns.kdeplot(data=custom_df, y="TMB_norm", ax=ax_hist_y, hue="CIN_status_bin", legend=False, palette=cmap_dict, lw=2, alpha=0.7, common_norm=False)

ax_scatter.scatter(
    data=custom_df.loc[custom_df["CIN status"].isin(["CIN+","NL","TA/TVA"])],
    x="CNV_norm", y="TMB_norm", c=feature, s=150, alpha=0.7, cmap=cmap_CIN, edgecolors=(0,0,0,0.2)
)
sns.kdeplot(data=custom_df, x="CNV_norm", ax=ax_hist_x, hue="CIN_status_bin", legend=False, palette=cmap_dict, lw=2, alpha=0.7, common_norm=False)

# draw box and identity line
ax_scatter.plot((0.4, -0.05), (0.4, 0.4), color="k", ls="--", lw=2)
ax_scatter.plot((0.4, 0.4), (0.4, -0.05), color="k", ls="--", lw=2)
ax_scatter.plot((0.4, 1.0), (0.4, 1.0), color="k", ls="--", lw=2)

for lax in [ax_scatter, ax_hist_x, ax_hist_y]:
    lax.tick_params(
        axis='x',          # changes apply to the x-axis
        which='both',      # both major and minor ticks are affected
        bottom=False,      # ticks along the bottom edge are off
        top=False,         # ticks along the top edge are off
        labelbottom=False) # labels along the bottom edge are off
    lax.tick_params(
        axis='y',          # changes apply to the x-axis
        which='both',      # both major and minor ticks are affected
        left=False,      # ticks along the bottom edge are off
        right=False,         # ticks along the top edge are off
        labelleft=False) # labels along the bottom edge are off
    lax.set_xlabel("")
    lax.set_ylabel("")
    
sns.despine(ax=ax_scatter, left=True, bottom=True)
sns.despine(ax=ax_hist_x, left=True)
sns.despine(ax=ax_hist_y, bottom=True)
gs.update(wspace=0.18, hspace=0.18) # set the spacing between axes. 
plt.savefig("ST_out/plots_overview/TMB_v_CNV_{}.png".format(feature))

In [None]:
for pat in custom_df.Patient.value_counts()[custom_df.Patient.value_counts() > 1].index:
    print("Processing {}".format(pat))
    custom_df["{}_PPT".format(pat)] = np.nan
    custom_df.loc[custom_df.Patient==pat,"{}_PPT".format(pat)] = custom_df.loc[custom_df.Patient==pat,"PPT"]
    
    feature = "{}_PPT".format(pat)
    cmap_CIN = matplotlib.colors.LinearSegmentedColormap.from_list("", ["lightyellow","#ffc101"])
    cmap_HM = matplotlib.colors.LinearSegmentedColormap.from_list("", ["lavender","#7a4fa3"])

    fig = plt.figure(figsize=(5,5))
    gs = GridSpec(12,12)
    ax_scatter = fig.add_subplot(gs[3:11, 1:9])
    ax_hist_y = fig.add_subplot(gs[3:11, 9:11])
    ax_hist_x = fig.add_subplot(gs[1:3, 1:9])
    ax_cbar_x = fig.add_subplot(gs[11, 1:9])
    ax_cbar_y = fig.add_subplot(gs[3:11, 0])

    norm = plt.Normalize(0, 1)
    sm = plt.cm.ScalarMappable(cmap=cmap_CIN, norm=norm)
    sm.set_array([])
    cbar = fig.colorbar(sm, orientation="horizontal", location="bottom", ticks=[0.2, 0.4, 0.6, 0.8, 1.0], cax=ax_cbar_x)
    cbar.ax.set_xlabel("CIN+ PPT")

    sm = plt.cm.ScalarMappable(cmap=cmap_HM, norm=norm)
    sm.set_array([])
    cbar = fig.colorbar(sm, orientation="vertical", location="left", ticks=[0.2, 0.4, 0.6, 0.8, 1.0], cax=ax_cbar_y)
    cbar.ax.set_ylabel("HM PPT")

    if custom_df.loc[custom_df["CIN status"].isin(["HM","SSL/HP"]), feature].isnull().all():
        ax_scatter.scatter(
            data=custom_df,
            x="CNV_norm", y="TMB_norm", c=feature, s=150, alpha=0.7, cmap=cmap_CIN, norm=norm, edgecolors="k", plotnonfinite=True
        )
    elif custom_df.loc[custom_df["CIN status"].isin(["CIN+","NL","TA/TVA"]), feature].isnull().all():
        ax_scatter.scatter(
            data=custom_df,
            x="CNV_norm", y="TMB_norm", c=feature, s=150, alpha=0.7, cmap=cmap_HM, norm=norm, edgecolors="k", plotnonfinite=True
        )
    else:
        ax_scatter.scatter(
            data=custom_df.loc[custom_df["CIN status"].isin(["HM","SSL/HP"])],
            x="CNV_norm", y="TMB_norm", c=feature, s=150, alpha=0.7, cmap=cmap_HM, norm=norm, edgecolors="k", plotnonfinite=True
        )
        ax_scatter.scatter(
            data=custom_df.loc[custom_df["CIN status"].isin(["CIN+","NL","TA/TVA"])],
            x="CNV_norm", y="TMB_norm", c=feature, s=150, alpha=0.7, cmap=cmap_CIN, norm=norm, edgecolors="k", plotnonfinite=True
        )
        
    sns.kdeplot(data=custom_df, y="TMB_norm", ax=ax_hist_y, hue="CIN_status_bin", legend=False, palette=cmap_dict, lw=2, alpha=0.7, common_norm=False)
    sns.kdeplot(data=custom_df, x="CNV_norm", ax=ax_hist_x, hue="CIN_status_bin", legend=False, palette=cmap_dict, lw=2, alpha=0.7, common_norm=False)

    # draw box and identity line
    ax_scatter.plot((0.4, -0.05), (0.4, 0.4), color="k", ls="--", lw=2)
    ax_scatter.plot((0.4, 0.4), (0.4, -0.05), color="k", ls="--", lw=2)
    ax_scatter.plot((0.4, 1.0), (0.4, 1.0), color="k", ls="--", lw=2)

    for lax in [ax_scatter, ax_hist_x, ax_hist_y]:
        lax.tick_params(
            axis='x',          # changes apply to the x-axis
            which='both',      # both major and minor ticks are affected
            bottom=False,      # ticks along the bottom edge are off
            top=False,         # ticks along the top edge are off
            labelbottom=False) # labels along the bottom edge are off
        lax.tick_params(
            axis='y',          # changes apply to the x-axis
            which='both',      # both major and minor ticks are affected
            left=False,      # ticks along the bottom edge are off
            right=False,         # ticks along the top edge are off
            labelleft=False) # labels along the bottom edge are off
        lax.set_xlabel("")
        lax.set_ylabel("")

    sns.despine(ax=ax_scatter, left=True, bottom=True)
    sns.despine(ax=ax_hist_x, left=True)
    sns.despine(ax=ax_hist_y, bottom=True)
    gs.update(wspace=0.18, hspace=0.18) # set the spacing between axes. 
    plt.savefig("ST_out/plots_overview/TMB_v_CNV_{}.png".format(feature))

---

In [None]:
plt.figure(figsize=(4.5,4.5))
sns.scatterplot(
    data=custom_df.loc[~((custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H")),:],
    x="PPT", y="CIN index", style="APC status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","APC mut."]
)
sns.scatterplot(
    data=custom_df.loc[(custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H"),:],
    x="PPT", y="CIN index", style="APC status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","APC mut."], legend=False
)
sns.scatterplot(
    data=custom_df.loc[(custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H"),:],
    x="PPT", y="CIN index", style="APC status", hue="Tumor type", s=70, alpha=1, palette=cmap_dict, style_order=["WT","APC mut."], legend=False
)
plt.legend(bbox_to_anchor=(0, 1.02, 1, .02), loc='lower left', borderaxespad=0, mode="expand", ncol=3, frameon=False)
plt.axhline(0, color="k", ls="--")
plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_APC.png")

In [None]:
plt.figure(figsize=(4.5,4.5))
sns.scatterplot(
    data=custom_df,
    x="PPT", y="CIN index", style="APC status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","APC mut."]
)
plt.legend(bbox_to_anchor=(0, 1.02, 1, .02), loc='lower left', borderaxespad=0, mode="expand", ncol=3, frameon=False)
plt.axhline(0, color="k", ls="--")
plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_CINstatus_APC.png")

In [None]:
plt.figure(figsize=(4.5,4.5))
sns.scatterplot(
    data=custom_df,
    x="PPT", y="CIN index", style="APC status", hue="Tumor type", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","APC mut."]
)
plt.legend(bbox_to_anchor=(0, 1.02, 1, .02), loc='lower left', borderaxespad=0, mode="expand", ncol=3, frameon=False)
plt.axhline(0, color="k", ls="--")
plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_tumortype_APC.png")

In [None]:
plt.figure(figsize=(4.5,4.5))
sns.scatterplot(
    data=custom_df.loc[~((custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H")),:],
    x="PPT", y="CIN index", style="KRAS status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","KRAS mut."]
)
sns.scatterplot(
    data=custom_df.loc[(custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H"),:],
    x="PPT", y="CIN index", style="KRAS status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","KRAS mut."], legend=False
)
sns.scatterplot(
    data=custom_df.loc[(custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H"),:],
    x="PPT", y="CIN index", style="KRAS status", hue="Tumor type", s=70, alpha=1, palette=cmap_dict, style_order=["WT","KRAS mut."], legend=False
)
plt.legend(bbox_to_anchor=(0, 1.02, 1, .02), loc='lower left', borderaxespad=0, mode="expand", ncol=3, frameon=False)
plt.axhline(0, color="k", ls="--")
plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_KRAS.png")

In [None]:
plt.figure(figsize=(4.5,4.5))
sns.scatterplot(
    data=custom_df,
    x="PPT", y="CIN index", style="KRAS status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","KRAS mut."]
)
plt.legend(bbox_to_anchor=(0, 1.02, 1, .02), loc='lower left', borderaxespad=0, mode="expand", ncol=3, frameon=False)
plt.axhline(0, color="k", ls="--")
plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_CINstatus_KRAS.png")

In [None]:
plt.figure(figsize=(4.5,4.5))
sns.scatterplot(
    data=custom_df,
    x="PPT", y="CIN index", style="KRAS status", hue="Tumor type", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","KRAS mut."]
)
plt.legend(bbox_to_anchor=(0, 1.02, 1, .02), loc='lower left', borderaxespad=0, mode="expand", ncol=3, frameon=False)
plt.axhline(0, color="k", ls="--")
plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_tumortype_KRAS.png")

In [None]:
custom_df.loc[((custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H")),:]

In [None]:
plt.figure(figsize=(4.5,4.5))
sns.scatterplot(
    data=custom_df.loc[~((custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H")),:],
    x="PPT", y="CIN index", style="TP53 status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","TP53 mut."]
)
sns.scatterplot(
    data=custom_df.loc[(custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H"),:],
    x="PPT", y="CIN index", style="TP53 status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","TP53 mut."], legend=False
)
sns.scatterplot(
    data=custom_df.loc[(custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H"),:],
    x="PPT", y="CIN index", style="TP53 status", hue="Tumor type", s=70, alpha=1, palette=cmap_dict, style_order=["WT","TP53 mut."], legend=False
)
plt.legend(bbox_to_anchor=(0, 1.02, 1, .02), loc='lower left', borderaxespad=0, mode="expand", ncol=3, frameon=False)
plt.axhline(0, color="k", ls="--")
plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_TP53.png")

In [None]:
plt.figure(figsize=(4.5,4.5))
sns.scatterplot(
    data=custom_df,
    x="PPT", y="CIN index", style="TP53 status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","TP53 mut."]
)
plt.legend(bbox_to_anchor=(0, 1.02, 1, .02), loc='lower left', borderaxespad=0, mode="expand", ncol=3, frameon=False)
plt.axhline(0, color="k", ls="--")
plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_CINstatus_TP53.png")

In [None]:
plt.figure(figsize=(4.5,4.5))
sns.scatterplot(
    data=custom_df,
    x="PPT", y="CIN index", style="TP53 status", hue="Tumor type", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","TP53 mut."]
)
plt.legend(bbox_to_anchor=(0, 1.02, 1, .02), loc='lower left', borderaxespad=0, mode="expand", ncol=3, frameon=False)
plt.axhline(0, color="k", ls="--")
plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_tumortype_TP53.png")

In [None]:
plt.figure(figsize=(4.5,4.5))
sns.scatterplot(
    data=custom_df.loc[~((custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H")),:],
    x="PPT", y="CIN index", style="BRAF status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","BRAF mut."]
)
sns.scatterplot(
    data=custom_df.loc[(custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H"),:],
    x="PPT", y="CIN index", style="BRAF status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","BRAF mut."], legend=False
)
sns.scatterplot(
    data=custom_df.loc[(custom_df["CIN status"]=="CIN+")&(custom_df["Tumor type"]=="MSI-H"),:],
    x="PPT", y="CIN index", style="BRAF status", hue="Tumor type", s=70, alpha=1, palette=cmap_dict, style_order=["WT","BRAF mut."], legend=False
)
plt.legend(bbox_to_anchor=(0, 1.02, 1, .02), loc='lower left', borderaxespad=0, mode="expand", ncol=3, frameon=False)
plt.axhline(0, color="k", ls="--")
plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_BRAF.png")

In [None]:
plt.figure(figsize=(4.5,4.5))
sns.scatterplot(
    data=custom_df,
    x="PPT", y="CIN index", style="BRAF status", hue="CIN status", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","BRAF mut."]
)
plt.legend(bbox_to_anchor=(0, 1.02, 1, .02), loc='lower left', borderaxespad=0, mode="expand", ncol=3, frameon=False)
plt.axhline(0, color="k", ls="--")
plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_CINstatus_BRAF.png")

In [None]:
plt.figure(figsize=(4.5,4.5))
plt.axhline(0, color="k", ls="--")
sns.scatterplot(
    data=custom_df,
    x="PPT", y="CIN index", style="BRAF status", hue="Tumor type", s=150, alpha=0.7, palette=cmap_dict, style_order=["WT","BRAF mut."]
)
plt.legend(bbox_to_anchor=(0, 1.02, 1, .02), loc='lower left', borderaxespad=0, mode="expand", ncol=3, frameon=False)
plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_tumortype_BRAF.png")

---

In [None]:
custom_df = custom_df.merge(
    sample_key[["patient_name","tumor_type","tumor_stage","tumor_stage_actual","tumor_grade","tumor_loc","Evolution"]],
    left_on=["Patient","Tumor type"], right_on=["patient_name","tumor_type"], how="left")

In [None]:
custom_df.index = custom_df["CNV_LCM"]
custom_df.index.name = "LCM ROI-CNV clone"
custom_df.rename(columns={
    "CNV score":"CIN+ PPT (CNV score)",
    "TMB":"HM PPT (TMB)",
    "tumor_grade":"Tumor Grade",
    "tumor_stage":"Tumor Stage",
    "gender":"Gender",
    "ethnicity":"Ethnicity",
    "mets":"Metastasis",
    "tumor_loc":"Tumor Location",
}, inplace=True)

In [None]:
custom_df.loc[custom_df["Tumor Stage"].isin(["AD","NL"]), "Tumor Stage"] = "nan"
custom_df.loc[custom_df["Tumor Grade"]=="NL", "Tumor Grade"] = "nan"

In [None]:
# set up colormaps
cmap_dict = {
    "SSL/HP":"#c4a4e1","MSI-H":"#7a4fa3","MSS":"#ffc101","TA/TVA":"#fee799","NL":"#1f77b4",
    "CIN+":"#ffc101","HM":"#7a4fa3",
    "Cecum":"#62a32e","Ascending":"#b0dc7d","Hepatic Flexure":"#ebf6db","Transverse":"#fbe6f1","Descending":"#eeadd4","Sigmoid":"#cf4191",
    #"Cecum":"#1f4e79","Ascending":"#2e74b7","Hepatic Flexure":"#bdd6ef","Transverse":"#ff717a","Descending":"#fe0001","Sigmoid":"#c00101",
    "lymph_node":"#000000",
    "nan":"#ffffff",
    "KRAS mut.":"#000000","APC mut.":"#000000","TP53 mut.":"#000000","BRAF mut.":"#000000",
    "WT":"#ffffff",
    "T":"#000000","F":"#ffffff",
    "N":"#1f77b4","L":"#ff7f0e","B":"#2ca02c",
    "DOD":"#ff0000","DOC":"#ff0000","AWD":"#ffff00","NED":"#008000",
}
stagecolor_dict = dict(zip(["I","II","III/IV"], sns.color_palette("Greys", 3).as_hex()))
gradecolor_dict = dict(zip(["G1","G2","G3"], sns.color_palette("Greys", 3).as_hex()))
cnvcolor_dict = dict(zip(custom_df["CIN+ PPT (CNV score)"].unique()[custom_df["CIN+ PPT (CNV score)"].unique().argsort()].astype(str), sns.color_palette("Greys", len(custom_df["CIN+ PPT (CNV score)"].unique())).as_hex()))
varcolor_dict = dict(zip(custom_df["HM PPT (TMB)"].unique()[custom_df["HM PPT (TMB)"].unique().argsort()].astype(str), sns.color_palette("Greys", len(custom_df["HM PPT (TMB)"].unique())).as_hex()))
cmap_dict = {**stagecolor_dict, **cmap_dict, **gradecolor_dict, **cnvcolor_dict, **varcolor_dict}

---

In [None]:
# set up box label strings
tmp = custom_df[["CIN+ PPT (CNV score)","HM PPT (TMB)","Tumor type","Tumor Location","Tumor Grade","tumor_stage_actual","APC status","KRAS status","TP53 status","BRAF status","Evolution"]].T
tmp.loc[["CIN+ PPT (CNV score)","HM PPT (TMB)","Tumor type","Tumor Location","APC status","KRAS status","TP53 status","BRAF status"]] = ""
tmp = tmp.fillna("")
tmp = tmp.replace("nan", "")
tmp = tmp.replace("F", "")
labels = np.asarray(tmp)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(28,8))
# Plot the categorical columns as heatmap
catheat.heatmap(
    custom_df[
        ["CIN+ PPT (CNV score)","HM PPT (TMB)","Tumor type","Tumor Location","Tumor Grade","Tumor Stage","APC status","KRAS status","TP53 status","BRAF status","Evolution"]
    ].T.fillna("F"),
    cmap=cmap_dict,
    ax=ax,
    leg_pos="right",
    square=True,
    linewidths=2,
    legend=False,
    annot=labels,
    fmt="",
    annot_kws={"fontsize":12},
)
plt.xlabel("")
plt.tick_params(left = False, bottom = False)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CNV_LCM_heatmap_categorized.png", dpi=800)

---
# Pared-down for Figure 3

In [None]:
# set up box label strings
tmp = custom_df[["CIN+ PPT (CNV score)","HM PPT (TMB)","Tumor type","Evolution"]].T
tmp.loc[["CIN+ PPT (CNV score)","HM PPT (TMB)","Tumor type"]] = ""
tmp = tmp.fillna("")
tmp = tmp.replace("nan", "")
tmp = tmp.replace("F", "")
labels = np.asarray(tmp)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20,4))
# Plot the categorical columns as heatmap
catheat.heatmap(
    custom_df[
        ["CIN+ PPT (CNV score)","HM PPT (TMB)","Tumor type","Evolution"]
    ].T.fillna("F"),
    cmap=cmap_dict,
    ax=ax,
    leg_pos="right",
    square=True,
    linewidths=2,
    legend=False,
    annot=labels,
    fmt="",
    annot_kws={"fontsize":12},
)
plt.xlabel("")
plt.tick_params(left = False, bottom = False)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CNV_LCM_heatmap_categorized_Fig3.png", dpi=800)

In [None]:
# set up box label strings
tmp = custom_df[["CIN+ PPT (CNV score)","HM PPT (TMB)","Tumor type","Evolution","APC status","KRAS status","TP53 status","BRAF status"]].T
tmp.loc[["CIN+ PPT (CNV score)","HM PPT (TMB)","Tumor type","APC status","KRAS status","TP53 status","BRAF status"]] = ""
tmp = tmp.fillna("")
tmp = tmp.replace("nan", "")
tmp = tmp.replace("F", "")
labels = np.asarray(tmp)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20,4))
# Plot the categorical columns as heatmap
catheat.heatmap(
    custom_df[
        ["CIN+ PPT (CNV score)","HM PPT (TMB)","Tumor type","Evolution","APC status","KRAS status","TP53 status","BRAF status"]
    ].T.fillna("F"),
    cmap=cmap_dict,
    ax=ax,
    leg_pos="right",
    square=True,
    linewidths=2,
    legend=False,
    annot=labels,
    fmt="",
    annot_kws={"fontsize":12},
)
plt.xlabel("")
plt.tick_params(left = False, bottom = False)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CNV_LCM_heatmap_categorized_Fig3_drivermuts.png", dpi=800)

---

In [None]:
custom_df_2 = a_comb.obs.loc[
    a_comb.obs["CNV_LCM"].isin(custom_df["CNV_LCM"].unique()),
    ["CNV_LCM","iCMS3","iCMS2","IES","CRC2"]
].groupby("CNV_LCM").mean()

custom_df_2 = custom_df.merge(custom_df_2, left_on="CNV_LCM", right_index=True)

In [None]:
feature = "CRC2"

plt.figure(figsize=(4.5,4.5))
plt.axhline(0, color="k", ls="--")
sns.scatterplot(
    data=custom_df_2,
    x="PPT", y="CIN index", hue=feature, s=150, alpha=0.7, palette="coolwarm", legend=None,
)

norm = plt.Normalize(custom_df_2[feature].min(), custom_df_2[feature].max())
cmap = sns.color_palette("coolwarm", as_cmap=True)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, orientation="horizontal", location="top")
cbar.ax.set_xlabel(feature)

plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_{}.png".format(feature))

In [None]:
feature = "iCMS2"

plt.figure(figsize=(4.5,4.5))
plt.axhline(0, color="k", ls="--")
sns.scatterplot(
    data=custom_df_2,
    x="PPT", y="CIN index", hue=feature, s=150, alpha=0.7, palette="coolwarm", legend=None,
)

norm = plt.Normalize(custom_df_2[feature].min(), custom_df_2[feature].max())
cmap = sns.color_palette("coolwarm", as_cmap=True)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, orientation="horizontal", location="top")
cbar.ax.set_xlabel(feature)

plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_{}.png".format(feature))

In [None]:
feature = "iCMS3"

plt.figure(figsize=(4.5,4.5))
plt.axhline(0, color="k", ls="--")
sns.scatterplot(
    data=custom_df_2,
    x="PPT", y="CIN index", hue=feature, s=150, alpha=0.7, palette="coolwarm", legend=None,
)

norm = plt.Normalize(custom_df_2[feature].min(), custom_df_2[feature].max())
cmap = sns.color_palette("coolwarm", as_cmap=True)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, orientation="horizontal", location="top")
cbar.ax.set_xlabel(feature)

plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_{}.png".format(feature))

In [None]:
feature = "IES"

plt.figure(figsize=(4.5,4.5))
plt.axhline(0, color="k", ls="--")
sns.scatterplot(
    data=custom_df_2,
    x="PPT", y="CIN index", hue=feature, s=150, alpha=0.7, palette="coolwarm", legend=None,
)

norm = plt.Normalize(custom_df_2[feature].min(), custom_df_2[feature].max())
cmap = sns.color_palette("coolwarm", as_cmap=True)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, orientation="horizontal", location="top")
cbar.ax.set_xlabel(feature)

plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_{}.png".format(feature))

In [None]:
custom_df_2["PAT71662 IES"] = np.nan
custom_df_2.loc[custom_df_2.Patient=="PAT71662","PAT71662 IES"] = custom_df_2.loc[custom_df_2.Patient=="PAT71662","IES"]

In [None]:
feature = "PAT71662 IES"

plt.figure(figsize=(4.5,4.5))
plt.axhline(0, color="k", ls="--")

cmap1 = plt.cm.get_cmap("coolwarm").copy()
cmap1.set_bad("lightgrey", alpha=0.7)

plt.scatter(
    data=custom_df_2,
    x="PPT", y="CIN index", c=feature, s=150, alpha=0.7, cmap=cmap1, plotnonfinite=True,
    vmin=custom_df_2["IES"].min(), vmax=custom_df_2["IES"].max()
)

norm = plt.Normalize(custom_df_2["IES"].min(), custom_df_2["IES"].max())
cmap = sns.color_palette("coolwarm", as_cmap=True)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, orientation="horizontal", location="top")
cbar.ax.set_xlabel(feature)

plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_{}.png".format(feature.replace(" ","_")))

In [None]:
custom_df_2["PAT73458 IES"] = np.nan
custom_df_2.loc[custom_df_2.Patient=="PAT73458","PAT73458 IES"] = custom_df_2.loc[custom_df_2.Patient=="PAT73458","IES"]

In [None]:
feature = "PAT73458 IES"

plt.figure(figsize=(4.5,4.5))
plt.axhline(0, color="k", ls="--")

cmap1 = plt.cm.get_cmap("coolwarm").copy()
cmap1.set_bad("lightgrey", alpha=0.7)

plt.scatter(
    data=custom_df_2,
    x="PPT", y="CIN index", c=feature, s=150, alpha=0.7, cmap=cmap1, plotnonfinite=True,
    vmin=custom_df_2["IES"].min(), vmax=custom_df_2["IES"].max()
)

norm = plt.Normalize(custom_df_2["IES"].min(), custom_df_2["IES"].max())
cmap = sns.color_palette("coolwarm", as_cmap=True)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, orientation="horizontal", location="top")
cbar.ax.set_xlabel(feature)

plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_{}.png".format(feature.replace(" ","_")))

In [None]:
custom_df_2["SG00001 IES"] = np.nan
custom_df_2.loc[custom_df_2.Patient=="SG00001","SG00001 IES"] = custom_df_2.loc[custom_df_2.Patient=="SG00001","IES"]

In [None]:
feature = "SG00001 IES"

plt.figure(figsize=(4.5,4.5))
plt.axhline(0, color="k", ls="--")

cmap1 = plt.cm.get_cmap("coolwarm").copy()
cmap1.set_bad("lightgrey", alpha=0.7)

plt.scatter(
    data=custom_df_2,
    x="PPT", y="CIN index", c=feature, s=150, alpha=0.7, cmap=cmap1, plotnonfinite=True,
    vmin=custom_df_2["IES"].min(), vmax=custom_df_2["IES"].max()
)

norm = plt.Normalize(custom_df_2["IES"].min(), custom_df_2["IES"].max())
cmap = sns.color_palette("coolwarm", as_cmap=True)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, orientation="horizontal", location="top")
cbar.ax.set_xlabel(feature)

plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_{}.png".format(feature.replace(" ","_")))

In [None]:
custom_df_2["SG00002 IES"] = np.nan
custom_df_2.loc[custom_df_2.Patient=="SG00002","SG00002 IES"] = custom_df_2.loc[custom_df_2.Patient=="SG00002","IES"]

In [None]:
feature = "SG00002 IES"

plt.figure(figsize=(4.5,4.5))
plt.axhline(0, color="k", ls="--")

cmap1 = plt.cm.get_cmap("coolwarm").copy()
cmap1.set_bad("lightgrey", alpha=0.7)

plt.scatter(
    data=custom_df_2,
    x="PPT", y="CIN index", c=feature, s=150, alpha=0.7, cmap=cmap1, plotnonfinite=True,
    vmin=custom_df_2["IES"].min(), vmax=custom_df_2["IES"].max()
)

norm = plt.Normalize(custom_df_2["IES"].min(), custom_df_2["IES"].max())
cmap = sns.color_palette("coolwarm", as_cmap=True)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, orientation="horizontal", location="top")
cbar.ax.set_xlabel(feature)

plt.ylabel("CIN index")
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left=False,      # ticks along the bottom edge are off
    right=False,         # ticks along the top edge are off
    labelleft=False) # labels along the bottom edge are off
sns.despine(bottom=True)
plt.tight_layout()
plt.savefig("ST_out/plots_overview/CIN_index_{}.png".format(feature.replace(" ","_")))

---

In [None]:
import sys; sys.path.append("../resources/ST/"); from visium_utils import *

In [None]:
import numpy as np
import pandas as pd
from scipy import stats
import seaborn as sns # For pairplots and heatmaps
import matplotlib.pyplot as plt

In [None]:
sns.set_style("white")

In [None]:
def display_correlation(df, figsize=(10,10)):
    r = df.corr(method="spearman")
    plt.figure(figsize=figsize)
    heatmap = sns.clustermap(
        df.corr(),
        vmin=-1, 
        vmax=1,
        annot=True,
    )
    heatmap.ax_row_dendrogram.set_visible(False)
    plt.title("Spearman Correlation")
    return(r)


def plot_data_corr(df, title, color="green"):    
    r = display_correlation(df)
    fig, ax = plt.subplots(nrows=2, ncols=int((len(df.columns)-1)/2),figsize=(40,3))
    for i in range(1,len(df.columns)):
        ax[i-1].scatter(df["CNV score"],df.values[:,i],color=color)
        ax[i-1].title.set_text(title[i] +'\n r = ' + 
                             "{:.2f}".format(r.values[0,i]))
        ax[i-1].set(xlabel=df.columns[0],ylabel=df.columns[i])
    fig.subplots_adjust(wspace=.7)    
    plt.show()

In [None]:
def corrfunc(x, y, **kws):
    r, _ = stats.pearsonr(x, y)
    ax = plt.gca()
    ax.annotate("r = {:.2f}".format(r),
                xy=(.1, .9), xycoords=ax.transAxes)

---

In [None]:
for gene in ["DDR1","TGFBI","PAK4","DPEP1"]:
    a_comb.obs[gene] = a_comb[:,gene].X.todense()

---

In [None]:
CIN_df = a_comb.obs.loc[
    a_comb.obs["CNV_LCM"].isin(custom_df.loc[custom_df["CIN status"].isin(["CIN+","NL","TA/TVA"]), "CNV_LCM"].unique()),
    ["CNV_LCM","CNV score","iCMS2","DDR1","TGFBI","PAK4","DPEP1"]
].groupby("CNV_LCM").mean()

In [None]:
g = sns.PairGrid(CIN_df, palette=["Greys"], height=1.5)
g.map_upper(plt.scatter, s=10, alpha=0.5, color="k")
g.map_diag(sns.histplot, bins=10, color="k")
g.map_lower(sns.kdeplot, cmap="Greys_d")
g.map_lower(corrfunc)
for ax in g.axes.flatten():
    # rotate x axis labels
    ax.set_xlabel(ax.get_xlabel(), rotation = 45)
    # set x labels alignment
    ax.xaxis.get_label().set_horizontalalignment('right')
    # rotate y axis labels
    ax.set_ylabel(ax.get_ylabel(), rotation = 0)
    # set y labels alignment
    ax.yaxis.get_label().set_horizontalalignment('right')
plt.tight_layout()
plt.savefig("ST_out/plots_overview/immexcl_PTregression.png")

---

In [None]:
CIN_df = a_comb.obs.loc[
    a_comb.obs["CNV_LCM"].isin(custom_df.loc[custom_df["CIN status"].isin(["CIN+","NL","TA/TVA"]), "CNV_LCM"].unique()),
    ["CNV_LCM","CNV score","iCMS2","IES","TL2","T cell CD8"]
].groupby("CNV_LCM").mean()

In [None]:
g = sns.PairGrid(CIN_df, palette=["Greys"], height=1.5)
g.map_upper(plt.scatter, s=10, alpha=0.5, color="k")
g.map_diag(sns.histplot, bins=10, color="k")
g.map_lower(sns.kdeplot, cmap="Greys_d")
g.map_lower(corrfunc)
for ax in g.axes.flatten():
    # rotate x axis labels
    ax.set_xlabel(ax.get_xlabel(), rotation = 45)
    # set x labels alignment
    ax.xaxis.get_label().set_horizontalalignment('right')
    # rotate y axis labels
    ax.set_ylabel(ax.get_ylabel(), rotation = 0)
    # set y labels alignment
    ax.yaxis.get_label().set_horizontalalignment('right')
plt.tight_layout()
plt.savefig("ST_out/plots_overview/immexcl_vs_imminf_PTregression.png")

---

In [None]:
HM_df = a_comb.obs.loc[
    a_comb.obs["CNV_LCM"].isin(custom_df.loc[custom_df["CIN status"].isin(["HM","SSL/HP"]), "CNV_LCM"].unique()),
    ["CNV_LCM","TMB","iCMS3","FIB3","MYE2","T cell CD8"]
].groupby("CNV_LCM").mean()

In [None]:
g = sns.PairGrid(HM_df, palette=["Greys"], height=1.5)
g.map_upper(plt.scatter, s=10, alpha=0.5, color="k")
g.map_diag(sns.histplot, bins=10, color="k")
g.map_lower(sns.kdeplot, cmap="Greys_d")
g.map_lower(corrfunc)
for ax in g.axes.flatten():
    # rotate x axis labels
    ax.set_xlabel(ax.get_xlabel(), rotation = 45)
    # set x labels alignment
    ax.xaxis.get_label().set_horizontalalignment('right')
    # rotate y axis labels
    ax.set_ylabel(ax.get_ylabel(), rotation = 0)
    # set y labels alignment
    ax.yaxis.get_label().set_horizontalalignment('right')
plt.tight_layout()
plt.savefig("ST_out/plots_overview/immexcl_HM_PTregression.png")

---
# Prep for traDE-seq

In [None]:
from sklearn.preprocessing import MinMaxScaler

def generate_pt_df(adata, columns, pt_order, pt_column="CNV_LCM"):
    df = adata.obs.loc[a_comb.obs[pt_column].isin(pt_order), [pt_column] + columns].copy()
    scaler = MinMaxScaler()
    df[columns] = scaler.fit_transform(df[columns])
    df["Pseudotime"] = df[pt_column].replace(dict(zip(pt_order, range(len(pt_order)))))
    return df

In [None]:
sns.set_theme(style="ticks")
from matplotlib.lines import Line2D

def pt_plot(df, stat="frequency", figsize=(7,3), cmap="tab10", bw_adjust=1.5, save=None, dpi=400, ylim=35, show_bars=False, **kwargs):
    f, ax = plt.subplots(figsize=figsize)
    sns.despine(f)
    sns.histplot(
        data=df.melt(id_vars=["Pseudotime","CNV_LCM"]),
        x="Pseudotime",
        hue="variable",
        weights="value",
        stat=stat,
        multiple="layer",
        palette=cmap,
        bins=len(df.CNV_LCM.unique()),
        kde=True,
        alpha=0.2 if show_bars else 0,
        kde_kws={"bw_adjust":bw_adjust},
        line_kws={"lw":4,"alpha":0.8},
        legend=False,
        **kwargs,
    )
    plt.ylim([0,ylim])
    plt.xlim([-1, len(df.CNV_LCM.unique()) + 1])
    plt.ylabel("Weighted Frequency")
    custom_lines = [Line2D([0], [0], color=sns.color_palette(cmap)[x], lw=4, alpha=0.8) for x in range(len(df.melt(id_vars=["Pseudotime","CNV_LCM"]).variable.unique()))]
    ax.legend(
        custom_lines,
        list(df.melt(id_vars=["Pseudotime","CNV_LCM"]).variable.unique()),
        loc="upper center",
        bbox_to_anchor=(0.5, 1.2),
        ncol=len(df.melt(id_vars=["Pseudotime","CNV_LCM"]).variable.unique()),
        frameon=False,
    )
    plt.tick_params(bottom = False, labelbottom=False)
    ax.plot((len(df.CNV_LCM.unique()) + 0.75), (0), ls="", marker=">", ms=8, color="k", transform=ax.get_xaxis_transform(), clip_on=False)
    if save:
        plt.tight_layout()
        plt.savefig(save, dpi=dpi)

---

In [None]:
AD_CIN_order = custom_df.loc[custom_df["CIN status"].isin(["CIN+","NL","TA/TVA"]), "CNV_LCM"].values

In [None]:
CIN_df = generate_pt_df(
    a_comb,
    columns=["iCMS2","IES","Fibrosis","CRC2","CT","ABS","GOB"],
    pt_order=AD_CIN_order,
    pt_column="CNV_LCM",
)

In [None]:
CIN_df.Pseudotime.max()

In [None]:
pt_plot(
    CIN_df,
    figsize=(8,4),
    cmap="tab20c",
    bw_adjust=1.8,
    save="ST_out/plots_overview/test_PPT_regression_fits.png",
    dpi=400,
    ylim=34,
)

In [None]:
CIN_df = generate_pt_df(
    a_comb,
    columns=["CNV score","TMB"],
    pt_order=AD_CIN_order,
    pt_column="CNV_LCM",
)

In [None]:
CIN_df.Pseudotime = CIN_df["CNV score"].copy()
CIN_df["CIN"] = 1
CIN_df["HM"] = 0

---

In [None]:
SER_HM_order = custom_df.loc[custom_df["CIN status"].isin(["HM","SSL/HP"]), "CNV_LCM"].values

In [None]:
HM_df = generate_pt_df(
    a_comb,
    columns=["CNV score","TMB"],
    pt_order=SER_HM_order,
    pt_column="CNV_LCM",
)

In [None]:
HM_df.Pseudotime = HM_df["TMB"].copy()
HM_df["HM"] = 1
HM_df["CIN"] = 0

In [None]:
tradeseq_df = pd.concat([CIN_df,HM_df])

In [None]:
tradeseq_df

In [None]:
tradeseq_df.to_csv("ST_out/tradeseq.csv")

---
### Regular genes first

In [None]:
a_tradeseq = a_comb[a_comb.obs.CNV_LCM.isin(custom_df.CNV_LCM),:].copy(); a_tradeseq

In [None]:
%%time
a_tradeseq.layers["raw_counts"] = a_tradeseq.X.copy()
sc.pp.normalize_total(a_tradeseq)
sc.pp.log1p(a_tradeseq)
sc.pp.scale(a_tradeseq)

In [None]:
sc.pp.highly_variable_genes(a_tradeseq, n_top_genes=10000)

In [None]:
np.array(a_tradeseq.layers["raw_counts"].sum(axis=0)).squeeze()

In [None]:
a_tradeseq.var["total_counts"] = np.array(a_tradeseq.layers["raw_counts"].sum(axis=0)).squeeze()

In [None]:
a_tradeseq.var.loc[["DDR1","DPEP1","PAK4","TGFBI"]]

In [None]:
a_tradeseq.X = a_tradeseq.layers["raw_counts"].copy()

In [None]:
a_tradeseq[:, a_tradeseq.var.total_counts > 80000].to_df().to_csv("ST_out/tradeseq_counts.csv")

---
### Now gene signatures and cell states

In [None]:
a_tradeseq = a_comb[tradeseq_df.index,:].copy()

In [None]:
columns = [
    'CytoTRACE','Cytotoxicity','Exhaustion','Cytokines','Chemokines','MHC','Fetal','Stem','Metaplasia',
    'Bacterial response','Proliferation','Translation','Neutrophils','RSC','CBC','T cell exhaustion',
    'B cell','NK cell','Plasma cell','T reg resting','T reg suppressive','T reg tissue homing','T reg cytokines',
    'T reg activation','TH1','TH2','TH17','TRM','M1','M2','Costimulatory MYE','Stimulatory DCs',
    'IFN stimulated MYE','Senescence','SASP','IFN stimulated EPI','Stress response','Fibrosis',
    'T cell','Myeloid','Stroma','T reg','T cell CD4','T cell CD8','cDC2','cDC1','Macrophage','Classical monocytes',
    'Cycle','Stress','Interferon','Hypoxia','Oxphos','Metal','Mesenchymal','pEMT','Alveolar','Basal','Squamous',
    'iCMS2','iCMS3','IES',
    'STM','END1','BL1','FIB1','CRC1','MYE1','TL1','MYE2','CRC2','CT','SSC','CRC3','EE1','MYE3','PLA','FIB2',
    'MYE4','GOB','MAS','MYE5','CRC4','ABS','TUF','FIB3','FIB4','TL2','END2','TL3','EE2','BL2',
    #'LYSOZYME','CD20','MUC5AC','CD11B','CD45','FOXP3','CD8','CD3D','SOX9','CGA','DPEP1','PEGFR','NAKATPASE',
    #'AQP5','HLAA','PSTAT3','PCNA','CD4','CD68','CEACAM5','CDX2','COLLAGEN','OLFM4','VIMENTIN','PANCK','SMA',
    #'CD27','GAMMAACTIN','BCATENIN','MUC2','ERBB2','COX2'
]

scaler = MinMaxScaler()
a_tradeseq = sc.AnnData(
    scaler.fit_transform(a_tradeseq.obs[columns]),
    obs = a_tradeseq.obs[['Sample', 'Patient', 'Block ID', 'Tumor Type']],
)

a_tradeseq.var_names = columns

a_tradeseq

In [None]:
a_tradeseq.X

In [None]:
a_tradeseq.to_df().to_csv("ST_out/tradeseq_signatures_cellstates.csv")