# Part 15: Analysis of TCR from HPAP

In [None]:
# Loading libraries
library("Matrix")
library(Seurat)
library(DT)
library(dplyr)
library(here)
library(ggplot2)
library(kableExtra)
library(cowplot)
library(tidyverse)
library(reshape)
library(SingleR)
library(furrr)

ggtheme <- function() {
  theme(
    axis.text = element_text(size = 20, colour = "black"),
    axis.title = element_text(size = 20, colour = "black"),
    text = element_text(size = 20, colour = "black"),
    legend.text = element_text(size = 20, colour = "black"),
    legend.key.size =  unit(10, units = "points"),
    plot.background = element_blank()
     )
}


In this part, we will analyze TCR repertoire data, which were obtained from the HPAP database. Raw data can be downloaded upon registration from the HPAP database. Processed data can be downloaded from Zenodo: `data/published_data/HPAP_TCR`.

## Raw data

This part requires raw data, so please download it before running the code below. Raw data, i.e. folders such as `HPAP-001` should be saved in the folder `./data/published_data/HPAP_TCR`. 

In [None]:
paths  <- list.dirs("./data/published_data/HPAP_TCR", recursive = F)

In [None]:
paths  <- paste0(paths, "/T cell studies/T cell receptor repertoire TCRb Vregion seq/")

In [None]:
paths

In [None]:
get_data  <- function(path){
    file_path  <- list.files(path)
    file_path1  <- file_path[grepl(file_path, pattern = "Replicate-1.clones_TRB.tsv")]
    file_path2  <- file_path[grepl(file_path, pattern = "Replicate-2.clones_TRB.tsv")]
    
    dat1  <- fread(file_path1)
    dat2  <- fread(file_path2)
}

In [None]:
library(data.table)

In [None]:
get_mixcr_tcr_data  <- function(path){
    file_path  <- list.files(path, full.names = T)
    file_name  <- list.files(path)
    
    file_path1  <- file_path[grepl(file_path, pattern = "Replicate-1.clones_TRB.tsv")]
    file_path2  <- file_path[grepl(file_path, pattern = "Replicate-2.clones_TRB.tsv")]
    
    dat1  <- fread(file_path1)
    dat2  <- fread(file_path2)

    dat1  <- dat1  %>% filter(minQualCDR3 == 58)
    dat2  <- dat2  %>% filter(minQualCDR3 == 58)
    dat1$Patient_ID  <- substr(file_name[1],1,8)
    dat1$Replicate  <- "Rep1"

    dat2$Patient_ID  <- substr(file_name[1],1,8)
    dat2$Replicate  <- "Rep2"

    dat  <- rbind(dat1, dat2)
    return(dat)
}


Now load, process and merge all TCR data. 

In [None]:
all_tcrs  <- purrr::map(.x = paths, .f = get_mixcr_tcr_data)

In [None]:
fct_qual_as_char  <- function(df){
    df$minQualCDR2  <- as.character(df$minQualCDR2)
    df$minQualFR3  <- as.character(df$minQualFR3)
    return(df)
}

In [None]:
all_tcrs2  <- map(.x = all_tcrs, .f = fct_qual_as_char)

In [None]:
all_tcrs3  <- all_tcrs2  %>% bind_rows

In [None]:
all_tcrs3  %>% group_by(Patient_ID)  %>% tally  %>% arrange(n)

In [None]:
write.csv(all_tcrs3, "hpap_tcr.csv")

## Processed data

To recapitulate the analysis, please load the `hpap_tcr.csv` file saved in the folder `./data/published_data/HPAP_TCR` at Zenodo. 

In [None]:
all_tcrs3  <- read_csv("hpap_tcr.csv")

In [None]:
all_tcrs3

The metadata file is also saved in the same folder at Zenodo. 

In [None]:
patient_metadata  <- read_delim("hpap_medatata.csv")

In [None]:
patient_metadata

In [None]:
patient_metadata  %>% filter(donor_ID %in% all_tcrs3$Patient_ID)  %>% pull(clinical_diagnosis)  %>% table

In [None]:
all_tcrs4  <- left_join(all_tcrs3, patient_metadata  %>% mutate(Patient_ID = donor_ID))

In [None]:
all_tcrs4

In [None]:
colnames(all_tcrs4)

## CDR3 beta length

Below, we will focus on the length of CDR3 (similar to our dataset which is described in parts 04 and 05).

In [None]:
metadata_6  <- all_tcrs4

In [None]:
metadata_6$cdr3_B_nchar  <- nchar(metadata_6$aaSeqCDR3)

In [None]:
metadata_6$Sample_ID  <- paste(metadata_6$Patient_ID, metadata_6$Replicate)

In [None]:
options(repr.plot.width = 17)
metadata_6 %>% 
filter(as.numeric(cdr3_B_nchar) <23)  %>% 
filter(clinical_diagnosis %in% c("T1D control", "T1DM"))  %>% 
mutate(Disease = ifelse(grepl(pattern = "control", x = clinical_diagnosis), "Ctrl", "Dia"))  %>% 
group_by(Disease, Sample_ID, cdr3_B_nchar)  %>% 
tally  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
ggplot(aes(x = Disease, y = n)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0.5, size = 0.5, alpha = 1, aes(color = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 19, drop = T) +
 ylab("Log2 clonal abundance") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

In [None]:
options(repr.plot.width = 17)
metadata_6 %>% 
filter(as.numeric(cdr3_B_nchar) <23)  %>% 
filter(clinical_diagnosis %in% c("T1D control", "T1DM"))  %>% 
mutate(Disease = ifelse(grepl(pattern = "control", x = clinical_diagnosis), "Ctrl", "Dia"))  %>% 
group_by(Disease, Sample_ID, cdr3_B_nchar)  %>% 
tally  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
ggplot(aes(x = Disease, y = n)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0.5, size = 0.5, alpha = 1, aes(color = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 19, drop = T) +
 ylab("Log2 clonal abundance") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

* Count each sequence for one patient as many times as it occurs
* I count the number of sequences of a given length for one patient
* Then I plot the frequency of that length for the given patient

In [None]:
options(repr.plot.width = 17)
test_length  <- metadata_6 %>% 
filter(as.numeric(cdr3_B_nchar) <23)  %>% 
filter(clinical_diagnosis %in% c("T1D control", "T1DM"))  %>% 
mutate(Disease = ifelse(grepl(pattern = "control", x = clinical_diagnosis), "Ctrl", "Dia"))  %>% 
group_by(Disease, Patient_ID, cdr3_B_nchar)  %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

In [None]:
test_length  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
ggplot(aes(x = Disease, y = freq)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0, size = 0.5, alpha = 1, aes(color = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 19, drop = T) +
 ylab("Frequency") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

* Count each sequence for one patient only once

* I count the number of sequences of a given length for one patient

* Then I plot the frequency of that length for the given patient

In [None]:
all_tcrs4

## Plot for figure

In [None]:
options(repr.plot.width = 17)
metadata_6 %>% 
filter(as.numeric(cdr3_B_nchar) <23)  %>% 
filter(clinical_diagnosis %in% c("T1D control", "T1DM"))  %>% 
mutate(Disease = ifelse(grepl(pattern = "control", x = clinical_diagnosis), "Ctrl", "Dia"))  %>% 
group_by(Disease, Sample_ID, cdr3_B_nchar)  %>% 
tally  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
ggplot(aes(x = Disease, y = n)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0.5, size = 0.5, alpha = 1, aes(color = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 19, drop = T) +
 ylab("Log2 clonal abundance") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

In [None]:
metadata_6  %>% colnames

In [None]:
options(repr.plot.width = 17)
test_length  <- metadata_6 %>% 
dplyr::select(clinical_diagnosis, Patient_ID, cdr3_B_nchar, aaSeqCDR3)  %>% 
unique  %>% 
filter(clinical_diagnosis %in% c("T1D control", "T1DM"))  %>% 
mutate(Disease = ifelse(grepl(pattern = "control", x = clinical_diagnosis), "Ctrl", "Dia"))  %>% 
group_by(Disease, Patient_ID, cdr3_B_nchar)  %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

test_length  %>% group_by(Patient_ID)  %>% summarise(sum = sum(freq))

In [None]:
test_length  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
filter(as.numeric(cdr3_B_nchar) <26)  %>% 
ggplot(aes(x = Disease, y = freq)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0, size = 0.5, alpha = 1, aes(color = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 21, drop = T) +
 ylab("Frequency") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

In [None]:
test_length  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
filter(as.numeric(cdr3_B_nchar) <24)  %>% 
ggplot(aes(x = Disease, y = freq)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0, size = 0.7, shape = 21, alpha = 0.7, aes(fill = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 21, drop = T) +
 scale_fill_manual(values = c("dodgerblue", "red2"))  +
ylab("Frequency") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format", size = 7) + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank()) + ggtheme()
ggsave(filename = "../figures/tcr_plots/tcr_length/hpap.png", width = 22, height = 10, units = "cm")
ggsave(filename = "../figures/tcr_plots/tcr_length/hpap.svg", width = 22, height = 10, units = "cm")

# Change metadata to our format

Next, we need to unify metadata to the same format which we use in our study. 

In [None]:
all_tcrs5  <- all_tcrs4  %>% 
mutate(cdr3_B = aaSeqCDR3,
      Sample_char = paste(Patient_ID, clinical_diagnosis))  %>% 
filter(clinical_diagnosis %in% c("T1DM", "T1D control"))

In [None]:
clone_table_individual <- all_tcrs5 %>%
  dplyr::group_by(cdr3_B, Sample_char) %>%
          dplyr::summarize(n = n()) %>%
    arrange(desc(n)) %>% pivot_wider(names_from = Sample_char, values_from = n, values_fill = 0) 

In [None]:
is_positive <- function(number){
  number2 <- ifelse(is.na(number),0,ifelse(number==0,0,1))
  return(number2)
}

In [None]:
df_all4  <- get_df_all4_for_tcr_analysis(clone_table_individual)

# TCR overlap

Now let's analyze the TCR overlap in healthy donors and T1D patients, similar to analysis in our dataset. 

In [None]:
dir.create("../figures/tcr/", recursive = T)

In [None]:
  df24 <- df_all4
    df24[df24 == 1] <- 0

    matrix_4  <- as.matrix(df24)
    

In [None]:
    sample_annot <- data.frame(row.names = rownames(matrix_4), 
                         rn = rownames(matrix_4))  %>% 
                separate(rn, sep = " ", 
                         remove = F,
                         into = c("Patient_ID","Disease"))  

In [None]:
sample_annot  <- sample_annot   %>%  mutate(Disease = ifelse(Disease == "T1D", "Ctrl", "T1DM"))

In [None]:
        pheatmap::pheatmap(matrix_4, 
                           cluster_rows = T, 
                           cluster_cols = T, 
                           #filename = paste0("../figures/tcr/",sample_name,"_heatmap.png"), 
                       width = 17, 
                       height = 17)

In [None]:
overlap_index  <- df_all4  %>% 
    rownames_to_column("var1")  %>% 
    pivot_longer(!var1, names_to = "var2", values_to = "overlap")  %>% 
    unique  %>% as.data.frame()  %>% 
separate(var1, sep = " ", remove = F, into = c("Patient_ID","Disease_1"))  %>% 
    separate(var2, sep = " ", remove = F, into = c("Patient_ID","Disease_2"))  %>% 
    mutate(comparison_type = ifelse(
    var1 == var2, "SELF - SELF", ifelse(
    Disease_1 == "T1DM" & Disease_2 == "T1DM", "DIA - DIA", ifelse(
    Disease_1 == "T1D" & Disease_2 == "T1D", "CTRL - CTRL",   "DIA - CTRL"
    ))))

In [None]:
    overlap_index %>% 
    filter(comparison_type != "SELF - SELF")  %>% 
    ggplot(aes(x = comparison_type, y = overlap)) +  
    geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
      ggrastr::rasterize(geom_jitter(position=position_jitter(0.2), size = 1, color = "grey70", alpha = 0.1)) +
       geom_violin(aes(color = comparison_type), scale = "width", alpha = 0.7) +  theme_classic() + 

       NoLegend() + theme(axis.text.x = element_text(angle = 45, vjust = 0.8, hjust =0.8)) +
      ggtitle("Overlap") + 
      xlab("Compared diagnoses") +
      ylab("Percentage of shared") 

In [None]:
    ggsave(paste0("../figures/tcr/",sample, "_overlap1.png"), width = 15, height = 11, units = "cm")
    ggsave(paste0("../figures/tcr/",sample, "_overlap1.svg"), width = 15, height = 11, units = "cm")

In [None]:
    overlap_index %>% 
    filter(comparison_type != "SELF - SELF")  %>% 
    filter(comparison_type != "SELF T1 - SELF T0")  %>% 
    ggplot(aes(x = comparison_type, y = overlap)) +  
    geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
      ggrastr::rasterize(geom_jitter(position=position_jitter(0.2), size = 1, color = "grey70", alpha = 0.1)) +
       geom_violin(aes(color = comparison_type), scale = "width", alpha = 0.7) +  theme_classic() + 

       NoLegend() + theme(axis.text.x = element_text(angle = 45, vjust = 0.8, hjust =0.8)) +
      ggtitle("Overlap without self self") + 
ggpubr::stat_compare_means(comparisons = list( c(1,3), c(2,3), c(1,2)), size = 7)+
    ggpubr::stat_compare_means(size = 7, label = "p.format") +
    
      xlab("Compared diagnoses") +
      ylab("Percentage of shared") 

In [None]:
    ggsave(paste0("../figures/tcr/",sample, "_overlap2.png"), width = 15, height = 11, units = "cm")
    ggsave(paste0("../figures/tcr/",sample, "_overlap2.svg"), width = 15, height = 11, units = "cm")

In [None]:
    overlap_index %>% 
    filter(comparison_type %in% c("CTRL - CTRL", 
                                  "DIA - CTRL",
                                  "DIA - DIA",
                                  "SELF T1 - SELF T0"))  %>% 
    ggplot(aes(x = comparison_type, y = log(overlap+0.001))) +  
    geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
      ggrastr::rasterize(geom_jitter(position=position_jitter(0.2), size = 1, color = "grey70", alpha = 0.7)) +
       geom_violin(aes(color = comparison_type), scale = "width", alpha = 0.7) +  theme_classic() + 

       NoLegend() + theme(axis.text.x = element_text(angle = 45, vjust = 0.8, hjust =0.8)) +
    scale_color_manual(values = c("blue", "purple", "red", "grey20")) + 
    stat_summary(fun = "median",
                   geom = "crossbar", 
                   width = 0.5,
                   colour = "grey44") + 
   ggpubr::stat_compare_means(comparisons = list( c(1,3), c(2,3), c(1,2)), size = 7)+
    ggpubr::stat_compare_means(size = 7, label = "p.format") +
    ggtheme() +
    theme(axis.ticks.x = element_blank()) +
      xlab("Compared diagnoses") +
      ylab("log(percentage of shared)") 

In [None]:
    ggsave(paste0("../figures/tcr/",sample, "_overlap_final.png"), width = 11, height = 18, units = "cm")
    ggsave(paste0("../figures/tcr/",sample, "_overlap_final.svg"), width = 11, height = 18, units = "cm")

# TCR properties peptides

Now let's analyze the biochemic properties of the TCR repertoires, similar to analysis in our dataset. 

In [None]:
library(Peptides)
library(Seurat)
library(dplyr)
    
add_TCR_properties  <- function(df_tcr){
    df_tcr1  <- df_tcr  %>% mutate(pI_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,pI(cdr3_B)),
           boman_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,boman(cdr3_B)),
          charge_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,charge(cdr3_B)),
          hmoment_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,hmoment(cdr3_B)),
          hydrophobicity_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,hydrophobicity(cdr3_B)),
          mw_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,mw(cdr3_B)),
          mz_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,mz(cdr3_B)),
          )
    return(df_tcr1)
    }

In [None]:
library(ggpubr)

In [None]:
all_tcrs4_tcr  <- add_TCR_properties(all_tcrs5)

## All clones

In [None]:
dir.create("figures_tcr_peptides_HPAP")

In [None]:
options(repr.plot.width = 4, repr.plot.height = 6.5)


for(i in 131:137){
   df  <- data.frame(Score = all_tcrs4_tcr[,i],
                  Annotation = all_tcrs4_tcr$clinical_diagnosis) 
    colnames(df)[1]  <- "Score"
    dataMedian <- summarise(group_by(df, Annotation), MD = 1.2*median(Score, na.rm = T))
       
p  <- ggplot(df, aes(x = Annotation, y = Score)) +
    ggrastr::rasterise(geom_jitter(position=position_jitter(0.2), size = 1, color = "grey70", alpha = 0.7)) +
    geom_violin(aes(color = Annotation), scale = "width", alpha = 0.7) +  theme_classic() + 
   NoLegend() + theme(axis.text.x = element_text(angle = 45, vjust = 0.8, hjust =0.8)) +
    scale_color_manual(values = c("blue", "#c41515ff")) + 
    stat_summary(fun = "median",
                   geom = "crossbar", 
                   width = 0.6,
                   colour = "grey20") +
    geom_text(data = dataMedian, aes(Annotation, MD, label = round(MD, digits = 2)), 
             size = 7) +
    xlab("") +
    scale_fill_continuous(guide=FALSE) +
   #scale_y_continuous(limits = c(NA, max())) +
   ggpubr::stat_compare_means(size = 7, label = "p.format", label.x = 1.3,
                              label.y.npc = 0.9) +
  ggtitle(colnames(all_tcrs4_tcr)[i]) + 
  theme(panel.background = element_blank(), axis.text.x = element_text(angle = 90),
      axis.ticks.x = element_blank()) + ggtheme()
   print(p)
    ggsave(paste0("figures_tcr_peptides_HPAP/",colnames(all_tcrs4_tcr)[i], ".svg"), width = 7, height = 14, units = "cm")
    ggsave(paste0("figures_tcr_peptides_HPAP/",colnames(all_tcrs4_tcr)[i], ".png"), width = 7, height = 14, units = "cm")
}

In [None]:
library(ggbeeswarm)

## All clones by patient

In [None]:

for(i in 131:137){
   df  <- data.frame(Score = all_tcrs4_tcr[,i],
                  Disease = all_tcrs4_tcr$clinical_diagnosis,
                    Patient_ID = all_tcrs4_tcr$Patient_ID
                 ) 
    colnames(df)[1]  <- "Score"
    dataMedian <- summarise(group_by(df, Disease), MD = 1.2*median(Score, na.rm = T))
       
p  <-    df  %>% 
    group_by(Patient_ID, Disease)  %>% 
    summarise(avg_score = mean(Score, na.rm = TRUE))  %>% 
    ggplot(aes(x = Disease, y = avg_score)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Disease), scale = "width") + 
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Disease), cex = 3, 
                shape = 21, color = "black", corral = "random") +
scale_fill_manual(values = c("#1874cdff",   "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",   "#aa2a2aff","#e88989ff")) + #ggpubr::stat_compare_means(comparisons = list( c(1,3), c(2,3), c(1,2)), size = 7)+
    ggpubr::stat_compare_means(size = 7, label = "p.format") +
  ggtitle(colnames(all_tcrs4_tcr)[i]) + 
  theme(panel.background = element_blank()) + ggtheme()
   print(p)
    ggsave(paste0("figures_tcr_peptides_HPAP/",colnames(all_tcrs4_tcr)[i], "_bypatient.svg"), width = 7, height = 14, units = "cm")
    ggsave(paste0("figures_tcr_peptides_HPAP/",colnames(all_tcrs4_tcr)[i], "_bypatient.png"), width = 7, height = 14, units = "cm")
}

## One clone counted just once

In [None]:
one_random_clone  <- function(df_all_tcr){
 
df_tcr_oneclone  <- df_all_tcr  %>% group_by(cdr3_B, Patient_ID)  %>% slice_sample(n = 1)

    return(df_tcr_oneclone)
    }

In [None]:
all_tcrs4_tcr_one_random  <- one_random_clone(all_tcrs4_tcr)

In [None]:

for(i in 131:137){
   df  <- data.frame(Score = all_tcrs4_tcr_one_random[,i],
                  Annotation = all_tcrs4_tcr_one_random$clinical_diagnosis) 
    colnames(df)[1]  <- "Score"
    dataMedian <- summarise(group_by(df, Annotation), MD = 1.2*median(Score, na.rm = T))
       
p  <- ggplot(df, aes(x = Annotation, y = Score)) +
    ggrastr::rasterise(geom_jitter(position=position_jitter(0.2), size = 1, color = "grey70", alpha = 0.7)) +
    geom_violin(aes(color = Annotation), scale = "width", alpha = 0.7) +  theme_classic() + 
   NoLegend() + theme(axis.text.x = element_text(angle = 45, vjust = 0.8, hjust =0.8)) +
    scale_color_manual(values = c("blue", "#c41515ff")) + 
    stat_summary(fun = "median",
                   geom = "crossbar", 
                   width = 0.6,
                   colour = "grey20") +
    geom_text(data = dataMedian, aes(Annotation, MD, label = round(MD, digits = 2)), 
             size = 7) +
    xlab("") +
    scale_fill_continuous(guide=FALSE) +
   #scale_y_continuous(limits = c(NA, max())) +
   ggpubr::stat_compare_means(size = 7, label = "p.format", label.x = 1.3,
                              label.y.npc = 0.9) +
  ggtitle(colnames(all_tcrs4_tcr_one_random)[i]) + 
  theme(panel.background = element_blank(), axis.text.x = element_text(angle = 90),
      axis.ticks.x = element_blank()) + ggtheme()
   print(p)
    ggsave(paste0("figures_tcr_peptides_HPAP/",colnames(all_tcrs4_tcr)[i], "_one_random.svg"), width = 7, height = 14, units = "cm")
    ggsave(paste0("figures_tcr_peptides_HPAP/",colnames(all_tcrs4_tcr)[i], "_one_random.png"), width = 7, height = 14, units = "cm")
}

## One clone counted just once by patient

In [None]:
dir.create("../figures/tcr/peptides_cd4_one_random_bypatient/")

In [None]:

for(i in 131:137){
   df  <- data.frame(Score = all_tcrs4_tcr_one_random[,i],
                  Disease = all_tcrs4_tcr_one_random$clinical_diagnosis,
                    Patient_ID = all_tcrs4_tcr_one_random$Patient_ID
                 ) 
    colnames(df)[1]  <- "Score"
    dataMedian <- summarise(group_by(df, Disease), MD = 1.2*median(Score, na.rm = T))
       
p  <-    df  %>% 
    group_by(Patient_ID, Disease)  %>% 
    summarise(avg_score = mean(Score, na.rm = TRUE))  %>% 
    ggplot(aes(x = Disease, y = avg_score)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Disease), scale = "width") + 
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Disease), cex = 3, 
                shape = 21, color = "black", corral = "random") +
scale_fill_manual(values = c("#1874cdff",   "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",   "#aa2a2aff","#e88989ff")) + #ggpubr::stat_compare_means(comparisons = list( c(1,3), c(2,3), c(1,2)), size = 7)+
    ggpubr::stat_compare_means(size = 7, label = "p.format") +
  ggtitle(colnames(all_tcrs4_tcr_one_random)[i]) + 
  theme(panel.background = element_blank()) + ggtheme()
   print(p)
    ggsave(paste0("figures_tcr_peptides_HPAP/",colnames(all_tcrs4_tcr)[i], "_bypatient_onerandom.svg"), width = 7, height = 14, units = "cm")
    ggsave(paste0("figures_tcr_peptides_HPAP/",colnames(all_tcrs4_tcr)[i], "_bypatient_onerandom.png"), width = 7, height = 14, units = "cm")
}

## TCR properties table

In [None]:
get_tcr_prop_table  <- function(i) {
    
    # All clones
     df  <- data.frame(Score = all_tcrs4_tcr[,i],
                  Annotation = all_tcrs4_tcr$clinical_diagnosis) 
    colnames(df)[1]  <- "Score"
    df  <- df  %>% dplyr::filter(!is.na(Score))
    
    wcx  <- wilcox.test(df$Score ~ df$Annotation, conf.int = T)

    df2  <- df  %>% group_by(Annotation)  %>% summarise(mean = mean(Score), sd = sd(Score))

    df_all  <- data.frame(name = colnames(all_tcrs4_tcr)[i], 
                          cell_type = "HPAP",
                          test_type = "All clones",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         ) 
    
     df_final  <-  df_all
    
    # Random clones
    df  <- data.frame(Score = all_tcrs4_tcr_one_random[,i],
                  Annotation = all_tcrs4_tcr_one_random$clinical_diagnosis)  
     colnames(df)[1]  <- "Score"
    df  <- df  %>% dplyr::filter(!is.na(Score))
    
    
    wcx  <- wilcox.test(df$Score ~ df$Annotation, conf.int = T)

    df2  <- df  %>% group_by(Annotation)  %>% summarise(mean = mean(Score), sd = sd(Score))

     df_all  <- data.frame(name = colnames(all_tcrs4_tcr_one_random)[i], 
                          cell_type = "HPAP",
                          test_type = "Random clones",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         ) 
    
        df_final  <- rbind(df_final, df_all)
    
    
    # All clones by patient
    
      df  <- data.frame(Score = all_tcrs4_tcr[,i],
                  Disease = all_tcrs4_tcr$clinical_diagnosis,
                    Patient_ID = all_tcrs4_tcr$Patient_ID
                 ) 
    colnames(df)[1]  <- "Score" 
    
    df  <- df  %>% 
    dplyr::filter(!is.na(Score))  %>% 
    group_by(Patient_ID, Disease)  %>% 
    summarise(Score = mean(Score)) 
    
    wcx  <- wilcox.test(df$Score ~ df$Disease, conf.int = T)

    df2  <- df  %>% group_by(Disease)  %>% summarise(mean = mean(Score), sd = sd(Score))

     df_all  <- data.frame(name = colnames(all_tcrs4_tcr)[i], 
                          cell_type = "HPAP",
                          test_type = "All clones by patient",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         ) 
    
        df_final  <- rbind(df_final, df_all)
    
    # Random clones by patient
    
      df  <- data.frame(Score = all_tcrs4_tcr_one_random[,i],
                  Disease = all_tcrs4_tcr_one_random$clinical_diagnosis,
                    Patient_ID = all_tcrs4_tcr_one_random$Patient_ID
                 ) 
    colnames(df)[1]  <- "Score" 
    
    df  <- df  %>% 
    dplyr::filter(!is.na(Score))  %>% 
    group_by(Patient_ID, Disease)  %>% 
    summarise(Score = mean(Score)) 
    
    wcx  <- wilcox.test(df$Score ~ df$Disease, conf.int = T)

    df2  <- df  %>% group_by(Disease)  %>% summarise(mean = mean(Score), sd = sd(Score))

     df_all  <- data.frame(name = colnames(all_tcrs4_tcr_one_random)[i], 
                          cell_type = "HPAP",
                          test_type = "Random clones by patient",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         ) 
    
        df_final  <- rbind(df_final, df_all)
    return(df_final)
}

In [None]:
for(i in 131:137)    {
        # All clones
     df  <- data.frame(Score = all_tcrs4_tcr[,i],
                  Annotation = all_tcrs4_tcr$clinical_diagnosis) 
    colnames(df)[1]  <- "Score"
    df  <- df  %>% dplyr::filter(!is.na(Score))
    
    wcx  <- wilcox.test(df$Score ~ df$Annotation, conf.int = T)

    df2  <- df  %>% group_by(Annotation)  %>% summarise(mean = mean(Score), sd = sd(Score))

    df_all  <- data.frame(name = colnames(all_tcrs4_tcr)[i], 
                          cell_type = "HPAP",
                          test_type = "All clones",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         ) 
    
     df_final  <-  df_all
    
    # Random clones
    df  <- data.frame(Score = all_tcrs4_tcr_one_random[,i],
                  Annotation = all_tcrs4_tcr_one_random$clinical_diagnosis)  
     colnames(df)[1]  <- "Score"
    df  <- df  %>% dplyr::filter(!is.na(Score))
    
    
    wcx  <- wilcox.test(df$Score ~ df$Annotation, conf.int = T)

    df2  <- df  %>% group_by(Annotation)  %>% summarise(mean = mean(Score), sd = sd(Score))

     df_all  <- data.frame(name = colnames(all_tcrs4_tcr_one_random)[i], 
                          cell_type = "HPAP",
                          test_type = "Random clones",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         ) 
    
        df_final  <- rbind(df_final, df_all)
    
    
    # All clones by patient
    
      df  <- data.frame(Score = all_tcrs4_tcr[,i],
                  Disease = all_tcrs4_tcr$clinical_diagnosis,
                    Patient_ID = all_tcrs4_tcr$Patient_ID
                 ) 
    colnames(df)[1]  <- "Score" 
    
    df  <- df  %>% 
    dplyr::filter(!is.na(Score))  %>% 
    group_by(Patient_ID, Disease)  %>% 
    summarise(Score = mean(Score)) 
    
    wcx  <- wilcox.test(df$Score ~ df$Disease, conf.int = T)

    df2  <- df  %>% group_by(Disease)  %>% summarise(mean = mean(Score), sd = sd(Score))

     df_all  <- data.frame(name = colnames(all_tcrs4_tcr)[i], 
                          cell_type = "HPAP",
                          test_type = "All clones by patient",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         ) 
    
        df_final  <- rbind(df_final, df_all)
    
    # Random clones by patient
    
      df  <- data.frame(Score = all_tcrs4_tcr_one_random[,i],
                  Disease = all_tcrs4_tcr_one_random$clinical_diagnosis,
                    Patient_ID = all_tcrs4_tcr_one_random$Patient_ID
                 ) 
    colnames(df)[1]  <- "Score" 
    
    df  <- df  %>% 
    dplyr::filter(!is.na(Score))  %>% 
    group_by(Patient_ID, Disease)  %>% 
    summarise(Score = mean(Score)) 
    
    wcx  <- wilcox.test(df$Score ~ df$Disease, conf.int = T)

    df2  <- df  %>% group_by(Disease)  %>% summarise(mean = mean(Score), sd = sd(Score))

     df_all  <- data.frame(name = colnames(all_tcrs4_tcr_one_random)[i], 
                          cell_type = "HPAP",
                          test_type = "Random clones by patient",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         ) 
    
        df_final  <- rbind(df_final, df_all)
    
    if(i>131){
        df_final_final  <- rbind(df_final_final, df_final)
    } else {
        df_final_final  <- df_final
    }
}

In [None]:
write.csv(df_final_final, "../tables/tcr/peptides/hpap_all_props.csv")

In [None]:
cd4_props  <- read_csv("../tables/tcr/peptides/cd4_tcr_all_props_table.csv")
cd8_props  <- read_csv("../tables/tcr/peptides/cd8_tcr_all_props_table.csv")
hpap_props  <- read_csv("../tables/tcr/peptides/hpap_all_props.csv")

In [None]:
all_props  <- rbind(cd8_props, cd4_props, hpap_props)

In [None]:
all_props$test  <- "test"

In [None]:
install.packages("ggh4x")

In [None]:
options(repr.plot.width = 9, repr.plot.height = 20)

all_props  %>% 
mutate(cell_type_test_type = paste(cell_type, test_type))  %>% 
dplyr::filter(grepl(name, pattern = "cdr3_B"))  %>% 
ggplot(aes(x = cell_type_test_type, y = estimate), color = 'grey') +
# geom_vline(xintercept = 0, color = "gray75") +
 geom_hline(yintercept = 0, color = "grey") +
  geom_point(aes(color = pval<0.05), size = 2) +
  theme_classic() +
  scale_color_manual(values = c("grey","indianred3"))+
  facet_wrap(~name, scales = "free", ncol = 1) +
#scale_shape_manual(values=c("\u25BA","\u25C4")) +
#ggnewscale::new_scale_fill() +
geom_linerange(aes(ymin = lower, ymax = upper, color = pval<0.05),
                 alpha = 0.5) +
#theme(axis.text.x = element_text(angle = 90)) +
coord_flip()
# ggh4x::facet_nested(test ~ name + test_type, scales = "free_y", independent = "y") 
 #ggh4x::facet_grid2(rows = vars(test_type), cols = vars(name), scales = "free_y", independent = "y")

In [None]:
options(repr.plot.width = 9, repr.plot.height = 20)

all_props  %>% 
mutate(cell_type_test_type = paste(cell_type, test_type))  %>% 
dplyr::filter(grepl(name, pattern = "cdr3_B"))  %>% 
ggplot(aes(x = cell_type_test_type, y = estimate), color = 'grey') +
# geom_vline(xintercept = 0, color = "gray75") +
 geom_hline(yintercept = 0, color = "grey") +
  geom_point(aes(color = pval<0.05), size = 2) +
  theme_classic() +
  scale_color_manual(values = c("grey","indianred3"))+
  facet_wrap(~name, scales = "free", ncol = 1) +
#scale_shape_manual(values=c("\u25BA","\u25C4")) +
#ggnewscale::new_scale_fill() +
geom_linerange(aes(ymin = lower, ymax = upper, color = pval<0.05),
                 alpha = 0.5) +
#theme(axis.text.x = element_text(angle = 90)) +
coord_flip()
# ggh4x::facet_nested(test ~ name + test_type, scales = "free_y", independent = "y") 
 #ggh4x::facet_grid2(rows = vars(test_type), cols = vars(name), scales = "free_y", independent = "y")

In [None]:
all_props$name  %>% table

In [None]:
options(repr.plot.width = 9, repr.plot.height = 20)

all_props  %>% 
mutate(cell_type_test_type = paste(cell_type, test_type))  %>% 
dplyr::filter(grepl(name, pattern = "cdr3_A1"))  %>% 
ggplot(aes(x = cell_type_test_type, y = estimate), color = 'grey') +
# geom_vline(xintercept = 0, color = "gray75") +
 geom_hline(yintercept = 0, color = "grey") +
  geom_point(aes(color = pval<0.05), size = 2) +
  theme_classic() +
  scale_color_manual(values = c("grey","indianred3"))+
  facet_wrap(~name, scales = "free", ncol = 1) +
#scale_shape_manual(values=c("\u25BA","\u25C4")) +
#ggnewscale::new_scale_fill() +
geom_linerange(aes(ymin = lower, ymax = upper, color = pval<0.05),
                 alpha = 0.5) +
#theme(axis.text.x = element_text(angle = 90)) +
coord_flip()
# ggh4x::facet_nested(test ~ name + test_type, scales = "free_y", independent = "y") 
 #ggh4x::facet_grid2(rows = vars(test_type), cols = vars(name), scales = "free_y", independent = "y")

In [None]:
options(repr.plot.width = 9, repr.plot.height = 20)

all_props  %>% 
mutate(cell_type_test_type = paste(cell_type, test_type))  %>% 
dplyr::filter(grepl(name, pattern = "cdr3_clone"))  %>% 
ggplot(aes(x = cell_type_test_type, y = estimate), color = 'grey') +
# geom_vline(xintercept = 0, color = "gray75") +
 geom_hline(yintercept = 0, color = "grey") +
  geom_point(aes(color = pval<0.05), size = 2) +
  theme_classic() +
  scale_color_manual(values = c("grey","indianred3"))+
  facet_wrap(~name, scales = "free", ncol = 1) +
#scale_shape_manual(values=c("\u25BA","\u25C4")) +
#ggnewscale::new_scale_fill() +
geom_linerange(aes(ymin = lower, ymax = upper, color = pval<0.05),
                 alpha = 0.5) +
#theme(axis.text.x = element_text(angle = 90)) +
coord_flip()
# ggh4x::facet_nested(test ~ name + test_type, scales = "free_y", independent = "y") 
 #ggh4x::facet_grid2(rows = vars(test_type), cols = vars(name), scales = "free_y", independent = "y")

In [None]:
all_props$name  %>% table

In [None]:
all_props  %>% 
filter(name %in% c("boman_cdr3_B","hmoment_cdr3_B","hydrophobicity_cdr3_B"))   %>% 
head

In [None]:
ggtheme <- function() {
  theme(
    axis.text = element_text(size = 20),
    axis.title = element_text(size = 20),
    text = element_text(size = 20, colour = "black"),
    legend.text = element_text(size = 20),
    legend.key.size =  unit(10, units = "points")
    
  )
}


In [None]:
options(repr.plot.width = 9, repr.plot.height = 6)

all_props  %>% 
filter(name %in% c("boman_cdr3_B","hmoment_cdr3_B","hydrophobicity_cdr3_B"))  %>% 
mutate(cell_type_test_type = paste(cell_type, test_type))  %>% 
dplyr::filter(test_type %in% c("All clones"))  %>% 
ggplot(aes(x = cell_type_test_type, y = estimate), color = 'grey') +
# geom_vline(xintercept = 0, color = "gray75") +
 geom_hline(yintercept = 0, color = "grey", linewidth = 1) +
  geom_point(aes(color = pval<0.05), size = 4) +
  theme_classic() +
  scale_color_manual(values = c("grey","indianred3"))+
  facet_wrap(~name, scales = "fixed", ncol = 1) +
#scale_shape_manual(values=c("\u25BA","\u25C4")) +
#ggnewscale::new_scale_fill() +
geom_linerange(aes(ymin = lower, ymax = upper, color = pval<0.05),
                 alpha = 0.5, linewidth = 2) + ggtheme() +
#theme(axis.text.x = element_text(angle = 90)) +
coord_flip()  + xlab("")
ggsave("../figures/tcr_peptides/peptides.svg", create.dir = T, 
      width = 20, height = 15, units = "cm")

In [None]:
tcr_all_props_table2  <- bind_rows(tcr_all_props_table)

In [None]:
tcr_all_props_table2  %>% arrange(pval)

In [None]:
dir.create( "../tables/tcr/peptides/")

In [None]:
write.csv(tcr_all_props_table2, "../tables/tcr/peptides/cd4_tcr_all_props_table.csv")