# SERPENTINE -- Tumor - Blood Matching TCRs

In [None]:
# load packages
import sys
import scanpy as sc
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.cm as cm


In [None]:
# remove warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# set up figure parameters
plt.rcParams['figure.figsize'] = (4.0, 4.0)
sc.settings.verbosity = 0
sc.set_figure_params(dpi=300, dpi_save=600, format='pdf', figsize=(4, 4))

In [None]:
# set working and fig dir
work_dir = "/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/data/tumour_blood_overlapping_clonotypes_phenotypes"
sc.settings.figdir = os.path.join("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/figures/TFM/Fig4")
#fig_dir = os.path.join(work_dir, "plots")
fig_dir = "/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/figures/TFM/Fig4"

In [None]:
# read anndata object
adata = sc.read_h5ad("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/data/outputdata/combined/Combined_SCR_C02_TCR_tumour_blood_30-04-24.h5ad")

In [None]:
# plot annotation 2.0
sc.pl.umap(
        adata,
        color="Annotation_2.0",
        vmin=0,
        vmax="p99",  
        sort_order=False,  
        frameon=False,
        use_raw=False,
        title="T Cells Annotation",
        save = "_T_Cells_Annotation.png"
)

In [None]:
# basic T cells annotation
sc.pl.umap(
        adata,
        color="cell_type",
        vmin=0,
        vmax="p99",  
        sort_order=False,  
        frameon=False,
        use_raw=False,
        title="Basic T Cells Annotation",
        save = "_Basic_T_Cells_Annotation.png" 
)

In [None]:
# some markers 
'''
sc.pl.umap(
        adata,
        color=["CD4", "CD8B", "FOXP3", "HAVCR2", "MKI67"],
        vmin=0,
        vmax="p99",  
        sort_order=False,  
        frameon=False,
        use_raw=False,
        save = "_Markers" 
)
'''

In [None]:
adata.obs['cloneSize'].unique()

In [None]:
# order cloneSize var
legend_order = ['Hyperexpanded (50 < X <= 2611)', 'Large (20 < X <= 50)', 'Medium (5 < X <= 20)',
               'Small (1 < X <= 5)', 'Single (0 < X <= 1)', 'NA']
adata.obs['cloneSize'] = pd.Categorical(adata.obs['cloneSize'], categories=legend_order)
#adata.obs['cloneSize'] = np.array(adata.obs['cloneSize'])

In [None]:
color_map = cm.get_cmap('inferno')
colorblind_vector = [color_map(i/4) for i in range(4, -1, -1)]
colorblind_vector.append("grey")
print(colorblind_vector)


In [None]:
# create a dictionary of cloneSize colors
colorblind_dict = dict(map(lambda i,j : (i,j) , legend_order, colorblind_vector))

In [None]:
sc.pl.umap(adata, 
           color=["cloneSize"], 
           palette=colorblind_dict,
           na_color = "grey",
           frameon=False,
           title="Tumor Clonotypes",
           save="_Clonal_Expansion_all_clones_all_patients_all_timepoints.png"
)

In [None]:
def split_umap(adata_list, split_by, ncol=2, nrow=1, **kwargs):
    categories = adata.obs[split_by].cat.categories
    if nrow is None:
        nrow = int(np.ceil(len(categories) / ncol))
    fig, axs = plt.subplots(nrow, ncol, figsize=(5*ncol, 3*nrow)) #5,4
    axs = axs.flatten()
    for i, cat in enumerate(categories):
        ax = axs[i]
        if i != len(categories)-1:
            sc.pl.umap(adata_list[i][adata_list[i].obs[split_by] == cat], ax=ax, show=False, title=cat,  **kwargs)
        else:
            sc.pl.umap(adata_list[i][adata_list[i].obs[split_by] == cat], ax=ax, show=False, title=cat, **kwargs)

    plt.tight_layout()

In [None]:
adata.obs['blood'] = adata.obs['blood'].map({"TRUE" : True, "FALSE" : False})
adata.obs['blood_pre'] = adata.obs['blood_pre'].map({"TRUE": True, "FALSE": False})
adata.obs['blood_post'] = adata.obs['blood_post'].map({"TRUE": True, "FALSE": False})

In [None]:
print(pd.crosstab(adata.obs['blood'], columns = "count"))
print(31466/(31466+14249))

In [None]:
# Number of unique tumor clonotypes found in blood (integrated object)
print("Data for Integrated Object")
print("Number of Tumor Clonotypes: " + str(adata.obs.ctnt_TRB.nunique()))
print("Number of Tumor Clonotypes found in Blood: " + str(adata[adata.obs.blood == True].obs.ctnt_TRB.nunique()))
print("Number of Tumor Clonotypes NOT found in Blood: " + str(adata[adata.obs.blood == False].obs.ctnt_TRB.nunique()))
print("Proportion of Tumor Clonotypes in Blood: " + str( adata[adata.obs.blood == True].obs.ctnt_TRB.nunique() / adata.obs.ctnt_TRB.nunique() ))

In [None]:
# Number of unique tumor clonotypes found in blood (P01)
print("Data for Patient 01")
print("Number of Tumor Clonotypes: " + str(adata[adata.obs.patient == "01"].obs.ctnt_TRB.nunique()))
print("Number of Tumor Clonotypes found in Blood: " + str(adata[(adata.obs.patient == "01") & (adata.obs.blood == True)].obs.ctnt_TRB.nunique()))
print("Number of Tumor Clonotypes NOT found in Blood: " + str(adata[(adata.obs.patient == "01") & (adata.obs.blood == False)].obs.ctnt_TRB.nunique()))
print("Proportion of Tumor Clonotypes in Blood: " + str( adata[(adata.obs.patient == "01") & (adata.obs.blood == True)].obs.ctnt_TRB.nunique() / adata[adata.obs.patient == "01"].obs.ctnt_TRB.nunique() ))

In [None]:
# Number of unique tumor clonotypes found in blood (P02)
print("Data for Patient 02")
print("Number of Tumor Clonotypes: " + str(adata[adata.obs.patient == "02"].obs.ctnt_TRB.nunique()))
print("Number of Tumor Clonotypes found in Blood: " + str(adata[(adata.obs.patient == "02") & (adata.obs.blood == True)].obs.ctnt_TRB.nunique()))
print("Number of Tumor Clonotypes NOT found in Blood: " + str(adata[(adata.obs.patient == "02") & (adata.obs.blood == False)].obs.ctnt_TRB.nunique()))
print("Proportion of Tumor Clonotypes in Blood: " + str( adata[(adata.obs.patient == "02") & (adata.obs.blood == True)].obs.ctnt_TRB.nunique() / adata[adata.obs.patient == "02"].obs.ctnt_TRB.nunique() ))

In [None]:
# Number of unique tumor clonotypes found in blood (P03)
print("Data for Patient 03")
print("Number of Tumor Clonotypes: " + str(adata[adata.obs.patient == "03"].obs.ctnt_TRB.nunique()))
print("Number of Tumor Clonotypes found in Blood: " + str(adata[(adata.obs.patient == "03") & (adata.obs.blood == True)].obs.ctnt_TRB.nunique()))
print("Number of Tumor Clonotypes NOT found in Blood: " + str(adata[(adata.obs.patient == "03") & (adata.obs.blood == False)].obs.ctnt_TRB.nunique()))
print("Proportion of Tumor Clonotypes in Blood: " + str( adata[(adata.obs.patient == "03") & (adata.obs.blood == True)].obs.ctnt_TRB.nunique() / adata[adata.obs.patient == "03"].obs.ctnt_TRB.nunique() ))

In [None]:
# Number of unique tumor clonotypes found in blood (P08)
print("Data for Patient 08")
print("Number of Tumor Clonotypes: " + str(adata[adata.obs.patient == "08"].obs.ctnt_TRB.nunique()))
print("Number of Tumor Clonotypes found in Blood: " + str(adata[(adata.obs.patient == "08") & (adata.obs.blood == True)].obs.ctnt_TRB.nunique()))
print("Number of Tumor Clonotypes NOT found in Blood: " + str(adata[(adata.obs.patient == "08") & (adata.obs.blood == False)].obs.ctnt_TRB.nunique()))
print("Proportion of Tumor Clonotypes in Blood: " + str( adata[(adata.obs.patient == "08") & (adata.obs.blood == True)].obs.ctnt_TRB.nunique() / adata[adata.obs.patient == "08"].obs.ctnt_TRB.nunique() ))

In [None]:
# Number of unique tumor clonotypes found in blood (P10)
print("Data for Patient 10")
print("Number of Tumor Clonotypes: " + str(adata[adata.obs.patient == "10"].obs.ctnt_TRB.nunique()))
print("Number of Tumor Clonotypes found in Blood: " + str(adata[(adata.obs.patient == "10") & (adata.obs.blood == True)].obs.ctnt_TRB.nunique()))
print("Number of Tumor Clonotypes NOT found in Blood: " + str(adata[(adata.obs.patient == "10") & (adata.obs.blood == False)].obs.ctnt_TRB.nunique()))
print("Proportion of Tumor Clonotypes in Blood: " + str( adata[(adata.obs.patient == "10") & (adata.obs.blood == True)].obs.ctnt_TRB.nunique() / adata[adata.obs.patient == "10"].obs.ctnt_TRB.nunique() ))

In [None]:
sc.pl.umap(adata[adata.obs.blood == True],  
           color="cloneSize", 
           palette=colorblind_dict,
           frameon=False,
           title="Tumor-blood Clonotypes",
           save="_Clonal_Expansion_blood-tumor_clones_all_patients_all_timepoints.png"
)

In [None]:
adata[adata.obs.blood == True].obs.blood_pre.unique

In [None]:
split_umap(adata_list=[adata[adata.obs.blood_pre == True], adata[adata.obs.blood_post == True]],
           color=["cloneSize"], 
           split_by="Timepoint",
           palette=colorblind_dict,
           frameon=False,
           legend_fontsize=8,
           #save="_Clonal_Expansion_blood-tumor_clones_split_timepoint_all_patients.png"
)
plt.savefig(os.path.join(fig_dir, "umap" + "_Clonal_Expansion_blood-tumor_clones_split_timepoint_all_patients.png"), dpi=300, format="pdf", bbox_inches="tight")

In [None]:
# Patient 01
split_umap(adata_list=[adata[(adata.obs.blood_pre == True) & (adata.obs.patient == "01")], adata[(adata.obs.blood_post == True) & (adata.obs.patient == "01")]], 
           color=["cloneSize"], 
           split_by="Timepoint",
           palette=colorblind_dict,
           frameon=False,
           legend_fontsize=8,
           #save="_patient01_Clonal_Expansion_blood-tumor_clones_split_timepoint.png"
)
plt.savefig(os.path.join(fig_dir, "umap" + "_patient01_Clonal_Expansion_blood-tumor_clones_split_timepoint.png"), dpi=300, format="png", bbox_inches="tight")

In [None]:
# Patient 02
split_umap(adata_list=[adata[(adata.obs.blood_pre == True) & (adata.obs.patient == "02")], adata[(adata.obs.blood_post == True) & (adata.obs.patient == "02")]], 
           color=["cloneSize"], 
           split_by="Timepoint",
           palette=colorblind_dict,
           frameon=False,
           legend_fontsize=8,
           #save="_patient02_Clonal_Expansion_blood-tumor_clones_split_timepoint.png"
)
plt.savefig(os.path.join(fig_dir, "umap" + "_patient02_Clonal_Expansion_blood-tumor_clones_split_timepoint.png"), dpi=300, format="png", bbox_inches="tight")

In [None]:
# Patient 03
split_umap(adata_list=[adata[(adata.obs.blood_pre == True) & (adata.obs.patient == "03")], adata[(adata.obs.blood_post == True) & (adata.obs.patient == "03")]], 
           color=["cloneSize"], 
           split_by="Timepoint",
           palette=colorblind_dict,
           frameon=False,
           legend_fontsize=8,
           #save="_patient03_Clonal_Expansion_blood-tumor_clones_split_timepoint.png"
)
plt.savefig(os.path.join(fig_dir, "umap" + "_patient03_Clonal_Expansion_blood-tumor_clones_split_timepoint.png"), dpi=300, format="png", bbox_inches="tight")

In [None]:
# Patient 08
split_umap(adata_list=[adata[(adata.obs.blood_pre == True) & (adata.obs.patient == "08")], adata[(adata.obs.blood_post == True) & (adata.obs.patient == "08")]], 
           color=["cloneSize"], 
           split_by="Timepoint",
           palette=colorblind_dict,
           frameon=False,
           legend_fontsize=8,
           #save="_patient08_Clonal_Expansion_blood-tumor_clones_split_timepoint.png"
)
plt.savefig(os.path.join(fig_dir, "umap" + "_patient08_Clonal_Expansion_blood-tumor_clones_split_timepoint.png"), dpi=300, format="png", bbox_inches="tight")

In [None]:
# Patient 10
split_umap(adata_list=[adata[(adata.obs.blood_pre == True) & (adata.obs.patient == "10")], adata[(adata.obs.blood_post == True) & (adata.obs.patient == "10")]], 
           color=["cloneSize"], 
           split_by="Timepoint",
           palette=colorblind_dict,
           frameon=False,
           legend_fontsize=8,
           #save="_patient10_Clonal_Expansion_blood-tumor_clones_split_timepoint.png"
)
plt.savefig(os.path.join(fig_dir, "umap" + "_patient10_Clonal_Expansion_blood-tumor_clones_split_timepoint.png"), dpi=300, format="png", bbox_inches="tight")

# Poster/Thesis Plots

In [None]:
# load packages
import sys
import scanpy as sc
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.cm as cm

In [None]:
work_dir = "/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/"
fig_dir = os.path.join(work_dir, "figures", "ISCO24_poster", "Fig3")
sc.settings.figdir = os.path.join(work_dir, "figures", "ISCO24_poster", "Fig3")
sc.set_figure_params(dpi=120, dpi_save=600, format='pdf', frameon=False, figsize=(3,3))

In [None]:
# read anndata object
adata = sc.read_h5ad("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/data/outputdata/combined/Combined_SCR_C02_TCR_tumour_blood_30-04-24.h5ad")

In [None]:
# order cloneSize var
legend_order = ['Hyperexpanded (50 < X <= 2611)', 'Large (20 < X <= 50)', 'Medium (5 < X <= 20)',
               'Small (1 < X <= 5)', 'Single (0 < X <= 1)', 'NA']
adata.obs['cloneSize'] = pd.Categorical(adata.obs['cloneSize'], categories=legend_order)
#adata.obs['cloneSize'] = np.array(adata.obs['cloneSize'])

In [None]:
color_map = cm.get_cmap('inferno')
colorblind_vector = [color_map(i/4) for i in range(4, -1, -1)]
colorblind_vector.append("grey")
print(colorblind_vector)


In [None]:
# create a dictionary of cloneSize colors
colorblind_dict = dict(map(lambda i,j : (i,j) , legend_order, colorblind_vector))

In [None]:
def split_umap(adata_list, split_by, ncol=2, nrow=1, **kwargs):
    categories = adata.obs[split_by].cat.categories
    if nrow is None:
        nrow = int(np.ceil(len(categories) / ncol))
    fig, axs = plt.subplots(nrow, ncol, figsize=(5*ncol, 3*nrow)) #5,4
    axs = axs.flatten()
    for i, cat in enumerate(categories):
        ax = axs[i]
        if i != len(categories)-1:
            sc.pl.umap(adata_list[i][adata_list[i].obs[split_by] == cat], ax=ax, show=False, title=cat, legend_loc="none",  **kwargs)
        else:
            sc.pl.umap(adata_list[i][adata_list[i].obs[split_by] == cat], ax=ax, show=False, title=cat, **kwargs)

    plt.tight_layout()

In [None]:
sc.pl.umap(
        adata,
        color="cloneSize",
        vmin=0,
        vmax="p99",  
        sort_order=True,  
        frameon=False,
        use_raw=False,
        palette=colorblind_dict,
        title="Tumor Clonotypes",
        legend_fontsize=8,
        #save = "_Main_Annotation.png"
)

In [None]:
sc.pl.umap(
        adata[adata.obs.blood == "TRUE"],
        color="cloneSize",
        vmin=0,
        vmax="p99",  
        sort_order=True,  
        frameon=False,
        use_raw=False,
        palette=colorblind_dict,
        title="Tumor-Blood Clonotypes (69%)",
        legend_fontsize=8,
        legend_loc="none",
        save = "_Tumor-Blood_Clonotypes.pdf"
)

In [None]:
sc.pl.umap(
        adata[adata.obs.timepoint == "SCR"],
        color="cloneSize",
        vmin=0,
        vmax="p99",  
        sort_order=True,  
        frameon=False,
        use_raw=False,
        palette=colorblind_dict,
        title="T0/-ICI",
        legend_fontsize=8,
        legend_loc="best",
        save = "_Tumor_Clonotypes_T0.pdf"
)

In [None]:
sc.pl.umap(
        adata[adata.obs.timepoint != "SCR"],
        color="cloneSize",
        vmin=0,
        vmax="p99",  
        sort_order=True,  
        frameon=False,
        use_raw=False,
        palette=colorblind_dict,
        title="T1/+ICI",
        legend_fontsize=8,
        save = "_Tumor_Clonotypes_T1.pdf"
)

In [None]:
sc.pl.umap(adata[adata.obs.blood_pre == True],
           color=["cloneSize"], 
           palette=colorblind_dict,
           frameon=False,
           legend_fontsize=8,
           #legend_loc = "none",
           title = "Tumor-blood Clonotypes (T0/-ICI)",
           save="_Clonal_Expansion_blood-tumor_clones_T0_all_patients.pdf"
)

In [None]:
sc.pl.umap(adata[adata.obs.blood_post == True],
           color=["cloneSize"], 
           palette=colorblind_dict,
           frameon=False,
           legend_fontsize=8,
           title = "Tumor-blood Clonotypes (T1/+ICI)",
           save="_Clonal_Expansion_blood-tumor_clones_T1_all_patients.pdf"
)

In [None]:
# Convert to pandas DataFrame for easier manipulation
df = adata.obs.copy()

# Ensure 'clone_size' is numeric
df['cloneSize'] = pd.to_numeric(df['cloneSize'], errors='coerce')

# Sort by clone size to find the most expanded clones
df = df.sort_values(by='cloneSize', ascending=False)

# Filter for genes (clones) that are expanded (cloneSize > 1)
df = df[df['cloneSize'].notna()]

# Get unique clones (each clone is repeated as many times as it is expanded)
#unique_clones = sorted_df.drop_duplicates(subset='cloneSize', keep='first')



df.cloneSize

In [None]:
adata.obs.cloneSize

### Responder Patient Plots

In [None]:
sc.pl.umap(
        adata[(adata.obs.patient == "08") & (adata.obs.timepoint != "SCR")],
        color="cloneSize",
        vmin=0,
        vmax="p99",  
        sort_order=True,  
        frameon=False,
        use_raw=False,
        palette=colorblind_dict,
        title="Clonal Expansion",
        legend_fontsize=8,
        legend_loc="best",
        #save = "_Tumor_Clonotypes_T0.png"
)