In [None]:
import sys; sys.path.append("../resources/")
from infercnv_utils import *
# make this notebook work better with Scanpy
import warnings; warnings.simplefilter(action='ignore', category=FutureWarning)
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import infercnvpy as cnv

In [None]:
# make output directories
import os
if not os.path.exists("ST_out"):
    os.mkdir("ST_out")

if not os.path.exists("ST_out/infercnv/"):
    os.mkdir("ST_out/infercnv/")

In [None]:
sc.set_figure_params(transparent=True, dpi_save=400)
sc.settings.figdir = "ST_out/infercnv/"

In [None]:
sample_order = ["HTA11_06134","HTA11_07663","HTA11_08622","HTA11_01938","HTA11_07862","HTA11_10711","PAT73458","PAT71397","PAT71662","PAT73899","PAT74143"]

---
# Read in ST CNV samples (all patients)

In [None]:
sample_key = pd.read_csv("../resources/ST/visium_sample_key.csv", index_col=0)

In [None]:
cmap_dict = {
    # Tumor Type
    'SSL/HP':"#c4a4e1",'MSI-H':"#7a4fa3",'MSS':"#ffc101",'TA/TVA':"#fee799",'NL':"#1f77b4",
    # Tumor Location
    "Cecum":"#1f4e79","Ascending":"#2e74b7","Hepatic Flexure":"#bdd6ef","Transverse":"#ff717a","Descending":"#fe0001","Sigmoid":"#c00101",
    # this one's global
    "nan":"#ffffff",
    # These are black and white for T and F
    "T":"#000000","F":"#ffffff",
}
stage_colordict = dict(zip(["AD","I","II","III/IV"], sns.color_palette("Reds", len(["AD","I","II","III/IV"])).as_hex()))
grade_colordict = dict(zip(["G1","G2","G3"], sns.color_palette("Reds", len(["G1","G2","G3"])).as_hex()))
cmap_dict = {**cmap_dict, **stage_colordict, **grade_colordict}

In [None]:
patient_colordict = dict(zip(sample_key.patient_name, [cmap_dict[x] for x in sample_key.tumor_type]))

In [None]:
outs = []
for pat in sample_key.patient_name.unique():
    print(pat)
    a_comb = load_cnv(pat, sample_key=sample_key, CNV_group="patient_name", infercnv_dir="../step3/ST_out/infercnv/", dataset_dir="../data/ST/")
    outs.append(a_comb)

In [None]:
for out in outs:
    out.obs.cnv_leiden = out.obs.cnv_leiden.astype(str)
    out.obs.cnv_leiden = out.obs.cnv_leiden.astype("category")
    out.obsm["X_cnv"], out.uns["cnv"] = remap_cnv(
        out.obsm["X_cnv"],
        out.uns["cnv"],
        [17,10,9,6,7,8,7,5,6,6,9,9,2,5,5,7,9,2,12,4,2,3],
    )

In [None]:
%%time
# concatenate anndata objects
a_comb = outs[0].concatenate(
    outs[1:],
    join="outer",
    batch_categories=list(sample_key.index),
    fill_value=0,
)
#del a_comb.obsm
del a_comb.var

# add cnv genome partitions
a_comb.uns["cnv"] = outs[0].uns["cnv"]

# uniqueify clone names
a_comb.obs["CNV Clone"] = a_comb.obs["Patient"] + " " + a_comb.obs["CNV Clone"]

In [None]:
# create patient colormap corresponding to tumor type
a_comb.obs.Patient = a_comb.obs.Patient.astype("category")
a_comb.uns["Patient_colors"] = [patient_colordict[x] for x in a_comb.obs.Patient.cat.categories]

In [None]:
a_comb.obs["Tumor Type"] = a_comb.obs["Tumor Type"].astype("category")
a_comb.uns["Tumor Type_colors"] = [cmap_dict[x] for x in a_comb.obs["Tumor Type"].cat.categories]

In [None]:
# subset to major clones
a_comb_major = a_comb[
    ~(a_comb.obs["CNV Clone"].str.contains(" E")|a_comb.obs["CNV Clone"].str.contains(" S")),
    :
].copy()

In [None]:
cnv.pl.chromosome_heatmap(
    a_comb_major,
    groupby="Patient",
    save="_ST_patient_all_remapped.png",
    dendrogram=True,
    figsize=(12,18),
)

In [None]:
%%time
cnv.tl.pca(a_comb_major, n_comps = 50)
cnv.pp.neighbors(a_comb_major, n_neighbors=int(np.sqrt(a_comb_major.n_obs)))
cnv.tl.leiden(a_comb_major, resolution=0.8)

In [None]:
a_comb_major.obsm["X_pca"] = a_comb_major.obsm["X_cnv_pca"]

In [None]:
import colorcet as cc
a_comb_major.uns["Patient_colors"] = sns.color_palette(cc.glasbey, n_colors=len(a_comb_major.obs.Patient.cat.categories)).as_hex()

In [None]:
sc.pl.pca(a_comb_major, color=["Patient","Tumor Type", "CNV Score", "cnv_leiden"], components=['1,2'], cmap="viridis", ncols=1, size=10)

In [None]:
%time cnv.tl.umap(a_comb_major)

In [None]:
pat_colordict = dict(zip(a_comb_major.obs.Patient.cat.categories, a_comb_major.uns["Patient_colors"]))

In [None]:
for pat in a_comb_major.obs.Patient.cat.categories:
    print(pat)
    a_comb_major.obs[pat] = np.nan
    a_comb_major.obs.loc[a_comb_major.obs.Patient==pat, pat] = pat
    a_comb_major.obs[pat] = a_comb_major.obs[pat].astype("category")
    a_comb_major.uns["{}_colors".format(pat)] = [patient_colordict[x] for x in a_comb_major.obs[pat].cat.categories]

In [None]:
a_comb_major.obs["PAT71397"] = a_comb_major.obs["Tumor Type"].values
a_comb_major.obs.loc[a_comb_major.obs.Patient!="PAT71397", "PAT71397"] = np.nan

In [None]:
a_comb_major.obs["PAT71397"] = a_comb_major.obs["PAT71397"].astype("category")
a_comb_major.uns["PAT71397_colors"] = [cmap_dict[x] for x in a_comb_major.obs["PAT71397"].cat.categories]

In [None]:
cnv.pl.umap(a_comb_major, color=list(a_comb_major.obs.Patient.cat.categories), ncols=5, size=18, frameon=False, legend_loc=None, save="_majorclone_patients.png")

In [None]:
a_comb_major.obs["PAT71397 CNV Score"] = a_comb_major.obs["CNV Score"].values
a_comb_major.obs.loc[a_comb_major.obs.Patient!="PAT71397", "PAT71397 CNV Score"] = np.nan

In [None]:
a_comb_major.obs["PAT71397 CNV Clone"] = a_comb_major.obs["CNV Clone"].astype(str).values
a_comb_major.obs.loc[a_comb_major.obs.Patient!="PAT71397", "PAT71397 CNV Clone"] = np.nan

In [None]:
a_comb_major.obs["PAT71397 CNV Clone"] = a_comb_major.obs["PAT71397 CNV Clone"].astype("category")
a_comb_major.uns["PAT71397 CNV Clone_colors"] = sns.color_palette("tab10", 3)

In [None]:
a_comb_major.obs["PAT71397 CNV Clone"].replace({"PAT71397 1":"1", "PAT71397 2":"2", "PAT71397 3":"3"}, inplace=True)

In [None]:
cnv.pl.umap(
    a_comb_major,
    color=["PAT71397 CNV Score", "PAT71397 CNV Clone", "PAT71397"],
    cmap="viridis",
    ncols=3,
    size=18,
    frameon=False,
    save="_majorclone_PAT71397.png",
    na_in_legend=False,
)

In [None]:
a_comb_major.obs["Tumor Stage"] = a_comb_major.obs["Tumor Stage"].astype("category")
a_comb_major.uns["Tumor Stage_colors"] = [cmap_dict[x] for x in a_comb_major.obs["Tumor Stage"].cat.categories]

In [None]:
a_comb_major.obs["Tumor Grade"] = a_comb_major.obs["Tumor Grade"].astype("category")
a_comb_major.uns["Tumor Grade_colors"] = [cmap_dict[x] for x in a_comb_major.obs["Tumor Grade"].cat.categories]

In [None]:
cnv.pl.umap(
    a_comb_major,
    color=["Patient"],
    cmap="viridis",
    ncols=1,
    size=18,
    frameon=False,
    save="_majorclone.png",
    na_in_legend=False,
)

In [None]:
cnv.pl.umap(
    a_comb_major,
    color=["Tumor Type","CNV Score"],
    cmap="viridis",
    ncols=2,
    size=18,
    frameon=False,
    save="_majorclone_tumortype_score.png",
    na_in_legend=False,
)

---
# Read in scRNA data

In [None]:
a_comb = sc.read("../data/scRNA/VUMC_COMBINED.h5ad"); a_comb

---
### Add CNV inference results

In [None]:
a = a_comb[a_comb.obs.Patient != "", :].copy() ; a

In [None]:
# read in CNV matrix and put in a.obsm slot
tmp = np.load("../step1/scRNA_out/infercnv/VUMC_cnv.npz", allow_pickle="TRUE")
a.obsm["X_cnv"] = tmp.f.arr_0.item()
# read in CNV genomic partitions
a.uns["cnv"] = np.load("../step1/scRNA_out/infercnv/uns_cnv_VUMC.npy", allow_pickle="TRUE").item()
# read in CNV score and leiden labels
tmp = pd.read_csv("../step1/scRNA_out/infercnv/VUMC_cnv_leiden.csv", index_col=0)
a.obs = a.obs.merge(tmp, left_index=True, right_index=True)

In [None]:
a.obsm["X_cnv"], a.uns["cnv"] = remap_cnv(
    a.obsm["X_cnv"],
    a.uns["cnv"],
    [17,10,9,6,7,8,7,5,6,6,9,9,2,5,5,7,9,2,12,4,2,3],
)

In [None]:
a.obs.cnv_leiden = a.obs.cnv_leiden.astype(str)
a.obs.cnv_leiden = a.obs.cnv_leiden.astype("category")

In [None]:
cnv.pl.chromosome_heatmap(
    a,
    groupby="cnv_leiden",
    save="_VUMC_leiden_remapped.png",
    dendrogram=True,
    figsize=(12,8),
)

In [None]:
# create patient colormap corresponding to tumor type
a.obs.Patient = a.obs.Patient.astype("category")
a.obs.Patient = a.obs.Patient.cat.reorder_categories(sample_order)
#a.uns["Patient_colors"] = [patient_colordict[x] for x in a.obs.Patient.cat.categories]

In [None]:
a_major = a[~(a.obs["CNV Clone"].str.contains(" E")|a.obs["CNV Clone"].str.contains(" S")),:].copy()

In [None]:
cnv.pl.chromosome_heatmap(
    a_major,
    groupby="Patient",
    save="_VUMC_patient_remapped.png",
    figsize=(12,16),
)

---
# Read in ST CNV samples (with matched scRNA)

In [None]:
sample_key = pd.read_csv("../resources/ST/visium_sample_key.csv", index_col=0)

In [None]:
outs = []
for pat in a.obs.Patient.unique():
    print(pat)
    a_comb = load_cnv(pat, sample_key=sample_key, CNV_group="patient_name")
    outs.append(a_comb)

In [None]:
for out in outs:
    out.obs.cnv_leiden = out.obs.cnv_leiden.astype(str)
    out.obs.cnv_leiden = out.obs.cnv_leiden.astype("category")
    out.obsm["X_cnv"], out.uns["cnv"] = remap_cnv(
        out.obsm["X_cnv"],
        out.uns["cnv"],
        [17,10,9,6,7,8,7,5,6,6,9,9,2,5,5,7,9,2,12,4,2,3],
    )

In [None]:
%%time
# concatenate anndata objects
a_comb = outs[0].concatenate(
    outs[1:],
    join="outer",
    batch_categories=list(sample_key.index),
    fill_value=0,
)
#del a_comb.obsm
del a_comb.var

# add cnv genome partitions
a_comb.uns["cnv"] = outs[0].uns["cnv"]

# uniqueify clone names
a_comb.obs["CNV Clone"] = a_comb.obs["Patient"] + " " + a_comb.obs["CNV Clone"]

In [None]:
a_comb.obs.Patient.cat.categories

In [None]:
a_comb.obs.Patient = a_comb.obs.Patient.astype(str)
a_comb.obs.loc[a_comb.obs.Patient.isin(["HTA11_08622_A","HTA11_08622_B"]), "Patient"] = "HTA11_08622"

In [None]:
# create patient colormap corresponding to tumor type
a_comb.obs.Patient = a_comb.obs.Patient.astype("category")
a_comb.obs.Patient = a_comb.obs.Patient.cat.reorder_categories(sample_order)
#a_comb.uns["Patient_colors"] = [patient_colordict[x] for x in a_comb.obs.Patient.cat.categories]

In [None]:
# subset to major clones
a_comb_major = a_comb[
    ~(a_comb.obs["CNV Clone"].str.contains(" E")|a_comb.obs["CNV Clone"].str.contains(" S")),
    :
].copy()

In [None]:
cnv.pl.chromosome_heatmap(
    a_comb_major,
    groupby="Patient",
    save="_ST_patient_remapped.png",
    dendrogram=False,
    figsize=(12,16),
)

---
# Distance Calculations

In [None]:
from sklearn.metrics import pairwise_distances

In [None]:
sns.set_style("white")
from matplotlib import patheffects as pe

In [None]:
dist_dict = {}
for out in outs:
    pat = out.obs.Patient.unique()[0]
    print(pat)
    print(out.shape)
    st_tmp = out[~out.obs["CNV Clone"].isin(["S","E"]),:].copy()
    print(st_tmp.shape)
    sc_tmp = a_major[a_major.obs.Patient == pat,:].copy()
    print(sc_tmp.shape)
    max_score = sc_tmp.obs.cnv_score.max()
    print("maximum scRNA CNV score: {}".format(max_score))
    sc_tmp = sc_tmp[sc_tmp.obs.cnv_score >= 0.5 * max_score,:].copy()
    print(sc_tmp.shape)
    dist_dict[pat] = 1 - pairwise_distances(X=st_tmp.obsm["X_cnv"], Y=sc_tmp.obsm["X_cnv"], metric="cosine").flatten()

In [None]:
fig, ax = plt.subplots(len(dist_dict), 1, figsize=(4,7), sharex=True, sharey=True)
fig.subplots_adjust(hspace=0.01)
for i, title in enumerate(sample_order):
    sns.kdeplot(dist_dict[title], ax=ax[i], color="k")
    ax[i].set_ylabel(
        title,
        rotation="horizontal",
        ha="right",
        fontdict={"color":patient_colordict[title]},
        path_effects=[pe.withStroke(linewidth=0.3, foreground="k")],
    )
    ax[i].set_yticklabels([])
    ax[i].set_xlim([-1,1])
    ax[i].axvline(0.0, color="k", alpha=0.8, ls="--")
plt.xlabel("Cosine Similarity")
sns.despine(left=True)
plt.tight_layout()
fig.subplots_adjust(hspace=0)
plt.savefig("ST_out/infercnv/infercnv_scRNA_val_cosinesimilarity.png")