# Part 4: Analysis of TCR repertoires in CD8 T cells

In this notebook, we will focus on analysis of TCR repertoires from the CD8 T cells. 

In [None]:
.libPaths("~/R/x86_64-pc-linux-gnu-library/4.4/")
source("diabetes_analysis_v07.R")

The first part will be performed on the full dataset, i.e., including unconventional cells. 

# Full dataset

Load the full CD8 dataset. 

In [None]:
cd8_l1_full_filt  <- readRDS("../data/processed/L1/cd8_l1_full_filt.rds")

## TCR analysis

For repertoire analyses, we will now work only with the metadata. 

In [None]:
metadata_6 <- cd8_l1_full_filt@meta.data 

In [None]:
metadata_6 <- metadata_6 %>% mutate(
  clone_nt = paste(cdr3_B_nt,cdr3_A1_nt,cdr3_A2_nt),
  clone_aa = paste("CDR3b",cdr3_B,"CDR3a",cdr3_A1)
)

In [None]:
cd8_l1_full_filt@meta.data  <- metadata_6
rownames(cd8_l1_full_filt@meta.data)  <- colnames(cd8_l1_full_filt)

Let's create a table of clones with their counts in the dataset. 

In [None]:
clone_table <- metadata_6 %>%
  dplyr::group_by(clone_aa) %>%
          dplyr::summarize(n = n(), sum = sum()) %>%
    arrange(desc(n))

clone_table

Now we will add the clone counts (clone abundance) to the metadata of the Seurat object. We will add log clone abundance and clone abundance group. 

In [None]:
metadata_6$test <- 0
metadata_6 <- metadata_6 %>% group_by(test, clone_aa) %>% mutate(clone_abundance = as.numeric(n()))


metadata_6 <- as.data.frame(metadata_6 %>% mutate(clone_abundance = as.numeric(ifelse(clone_abundance>1000,NA_integer_,clone_abundance))) %>%
  mutate(log_clone_abundance = log(clone_abundance, base = 2)))
rownames(metadata_6) <- colnames(cd8_l1_full_filt)

cd8_l1_full_filt@meta.data <- metadata_6
rownames(cd8_l1_full_filt@meta.data) <- colnames(cd8_l1_full_filt)

cd8_l1_full_filt <- AddMetaData(cd8_l1_full_filt, as.numeric(metadata_6$clone_abundance), "clone_abundance")
rownames(cd8_l1_full_filt@meta.data) <- colnames(cd8_l1_full_filt)

FeaturePlot(cd8_l1_full_filt, reduction = "umap", features = "log_clone_abundance", raster = F)

FeaturePlot(subset(cd8_l1_full_filt, clone_abundance >=1 & clone_abundance < 2000), 
            reduction = "umap", features = "log_clone_abundance", cols = c("lightblue","firebrick")) + theme_classic() + theme(plot.title = element_text(hjust = 0.5)) + ggtitle("Log2 clone abundance")

DimPlot(cd8_l1_full_filt)

# Clone abundance bar graph
metadata_6 <- metadata_6 %>%
  mutate( clone_abundance_group = case_when(clone_abundance>30 ~ ">30",
                                            clone_abundance>10&clone_abundance<=30 ~ "11-30",
                                            clone_abundance>6&clone_abundance<=10 ~ "6-10",
                                            clone_abundance>2&clone_abundance<=5 ~ "3-5",
                                            clone_abundance==2 ~ "2",
                                            clone_abundance==1 ~ "1",
                                            TRUE ~ "1"
                                            ))



We will plot the top 100 most abundant clones in the dataset. 

In [None]:
for(i in 2:100){
  DimPlot(cd8_l1_full_filt, raster = F, 
          cells.highlight = colnames(cd8_l1_full_filt)[grep(cd8_l1_full_filt$clone_aa, 
                                                       pattern = pull(clone_table, clone_aa)[i])]) +
    ggtitle(pull(clone_table, clone_aa)[i])
    #ggsave(paste0("../figures/tcr/cd8_tcr/tcr_plots_cd8_full/tcr_clone_",i,".png"), create.dir = TRUE, width = 13, height = 10, units = "cm")

}

## iNKT and MAIT sequences

We will now plot the iNKT and MAIT sequences to show where these cells are located on our UMAP plot. 

In [None]:
DimPlot(cd8_l1_full_filt, cells.highlight = colnames(cd8_l1_full_filt)[grep(cd8_l1_full_filt$cdr3_A1, pattern = "CVVSDRGSTLGRLYF")], raster = F)

The following lists of MAIT/iNKT were obtained from 10x webpage and from the literature. 

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
for(i in c('CAVRDSNYQLIW', 'CAVMDSNYQLIW', 'CAVLDSNYQLIW', 'CAVMDSSYKLIF', 'CAVTDSNYQLIW', 'CAVRDGDYKLSF', 'CAVKDSNYQLIW', 'CAAMDSNYQLIW', 'CAALDSNYQLIW', 'CALNDYKLSF', 'CAVVDSNYQLIW', 'CVVSDRGSTLGRLYF', 'CAVIDSNYQLIW', 'CAENTGGFKTIF', 'CAVSDSNYQLIW', 'CALSGGSNYKLTF', 'CAVEDQTGANNLFF', 'CALSDSGGGADGLTF', 'CAVRDRDYKLSF', 'CAGMDSNYQLIW', 'CAVNDYKLSF', 'CAPMDSNYQLIW', 'CASMDSNYQLIW', 'CAVNRDDKIIF', 'CAENSGGSNYKLTF', 'CAPLDSNYQLIW', 'CALNSGGSNYKLTF', 'CVVNDYKLSF', 'CALSSNDYKLSF', 'CAASNQAGTALIF', 'CVVNTGNQFYF', 'CVVNTNAGKSTF', 'CAVEDTGGFKTIF', 'CAVEDSNYQLIW', 'CAVDNYGQNFVF', 'CALSDSGGSNYKLTF', 'CAVMDSSYKLIF', 'CAVNTGGFKTIF', 'CAVRDGNYQLIW', 'CALNTGFQKLVF', 'CAENTGTASKLTF', 'CAATDSNYQLIW', 'CAVNQAGTALIF', 'CAENYGGSQGNLIF', 'CAVLNRDDKIIF', 'CAVEDNYGQNFVF', 'CAVNDYKLSF', 'CVVNNARLMF', 'CAVDNYGQNFVF', 'CAVDSSASKIIF', 'CALIYNQGGKLIF', 'CALNTGGFKTIF', 'CAENNAGNMLTF', 'CAVLDSSYKLIF', 'CAAMDSSYKLIF')){
   print(DimPlot(cd8_l1_full_filt, raster = F, cells.highlight = colnames(cd8_l1_full_filt)[grep(cd8_l1_full_filt$cdr3_A1, pattern = i)]) + NoLegend() + ggtitle(i))
   #ggsave(create.dir = TRUE, paste0("../figures/tcr/cd8_tcr/invariant_sequences2_cd8_full/",i,".png"), width = 18, height = 16, units = "cm")
}    
    

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
for(i in c(
"CASARGVNEQYF",
"CASRGQGLGEQYF",
"CASRYYSVQGRTDTQYF",
"CASSAMDTEAFF",
"CASSAPLAGHYEQYF",
"CASSAWDGYEQYF",
"CASSDGFTDTQYF",
"CASSDLGLAGVIEQFF",
"CASSDLMGPDNYEQYF",
"CASSDLPETQYF",
"CASSDQNTEAFF",
"CASSDRANEQFF",
"CASSDRLAGDTQYF",
"CASSDRRQGAHQPQHF",
"CASSEAGSGEKLFF",
"CASSEALILFF",
"CASSEAPWRDSGNTIYF",
"CASSEEGALKESVGTQYF",
"CASSEEGALKESVGTQYF",
"CASSEFDGGQETQYF",
"CASSEFGGTERTQETQYF",
"CASSEFGGTERTQETQYF",
"CASSEFGQSADEQFF",
"CASSEGGQDYEQYF",
"CASSEGTAGTDTQYF",
"CASSEGTGPNSPLHF",
"CASSEGWEQYF",
"CASSELLRGQGRTGELFF",
"CASSELTDTQYF",
"CASSEMGQGVYTF",
"CASSENSGTGRIYEQYF",
"CASSEPSSGNTIYF",
"CASSEPTGLGTDTQYF",
"CASSESATGFSPLHF",
"CASSESGGSTEAFF",
"CASSESLAGGYNEQFF",
"CASSESVETQYF",
"CASSEWAGGQETQYF",
"CASSEWEDITDTQYF",
"CASSEWGRTQETQYF",
"CASSEWGTNEKLFF",
"CASSEYESTNEKLFF",
"CASSEYESTNEKLFF",
"CASSEYFAGFNEQYF",
"CASSEYGTLQETYF",
"CASSEYMEAGIPTDTQYF",
"CASSEYMEGGEKLFF",
"CASSEYRRRSGEKLFF",
"CASSFGGETQYF",
"CASSGDRRQGAHQPQHF",
"CASSGLLTGPDTQYF",
"CASSGLRDRGLYEQYF",
"CASSGTGGAFDEQFF",
"CASSGTVTEAFF",
"CASSGYQGGGETQYF",
"CASSPIGGHGYEQYF",
"CASSPRDRWHEQYF",
"CASSRGGFDEQYF",
"CASSRGGGTEAFF",
"CASSRGGYTEAFF",
"CASSTGGADEKLFF",
"CASSVPLRDYEQYF",
"CASTGASGTYEQYF",
"CASTPRKGTDVGNTIYF",
"CASTPSGGWSSDTQYF",
"CASTSLETSQYF"
)){
   print(DimPlot(cd8_l1_full_filt, raster = F, cells.highlight = colnames(cd8_l1_full_filt)[grep(cd8_l1_full_filt$cdr3_A1, pattern = i)]) + NoLegend() + ggtitle(i))
   #ggsave(create.dir = TRUE, paste0("../figures/tcr/cd8_tcr/nkt_sequences_cd8/",i,".png"), width = 18, height = 16, units = "cm")
}

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
for(i in c(
"CAALDSNYQLIW",
"CAAMDSNYQLIW",
"CARSDSNYQLIW",
"CASMDSNYQLIW",
"CASSDSGESGTEAFF",
"CASSPSGGDYNEQFF",
"CASSQIAGGQQETQYF",
"CAVLDSNYQLIW",
"CAVMDSNYQLIW",
"CAVNGDDYKLSF",
"CAVRDGDYKLSF",
"CAVRDSDYKLSF",
"CAVRDSNYQLIQW",
"CAVRDSNYQLIW",
"CAVSDSNYQLIW",
"CAVSLQDYKLSF",
"CSARQGAESREQYF"

)){
   DimPlot(cd8_l1_full_filt, raster = F, pt.size = 0.5, 
           cells.highlight = colnames(cd8_l1_full_filt)[grep(cd8_l1_full_filt$cdr3_A1, pattern = i)]) + 
    ggtitle(i) + NoLegend() + theme(axis.text = element_text(size = 25), title = element_text(size = 25))
   #ggsave(create.dir = TRUE, paste0("../figures/tcr/cd8_tcr/mait_sequences_cd8/",i,".png"), width = 18, height = 18, units = "cm")
}

### Sequencing sufficiency

Next, we will check what was the efficiency of TRA/TRB detection. 

In [None]:
cd8_l1_full_filt@meta.data <- cd8_l1_full_filt@meta.data %>% mutate(clone_status_tra = ifelse(is.na(cdr3_A1), 
                                                      ifelse(is.na(cdr3_A2),"NA", "OK"), 
                                                      ifelse(is.na(cdr3_A2),"OK", "OK")),
                            clone_status_trb = ifelse(is.na(cdr3_B), "NA", "OK"))
                 
DimPlot(cd8_l1_full_filt, group.by = "clone_status_tra", cols = c("indianred2", "palegreen3"), raster = T) + 
ggtitle("Clone status TRA")      
#ggsave(create.dir = TRUE, "../figures/tcr/cd8_tcr/seq_sufficiency/do_we_have_data_tra.png", width = 14, height = 10, units = "cm", dpi = 120)

DimPlot(cd8_l1_full_filt, group.by = "clone_status_trb", cols = c("indianred2", "palegreen3"), raster = T) + 
ggtitle("Clone status TRB")      
#ggsave("../figures/tcr/cd8_tcr/seq_sufficiency/do_we_have_data_trb.png", width = 14, height = 10, units = "cm", dpi = 120)


cd8_l1_full_filt@meta.data %>% group_by(clone_status_tra) %>% summarize(n = n()) %>% 
  mutate(freq = n / sum(n))

cd8_l1_full_filt@meta.data %>% group_by(clone_status_trb) %>% summarize(n = n()) %>% 
  mutate(freq = n / sum(n))

## CDR3 length

Below, we will focus on the length of CDR3 as it has been proposed previously that T1D [Gomez-Tourino et al.](https://www.nature.com/articles/s41467-017-01925-2). 

In [None]:
metadata_6$cdr3_B_nchar  <- nchar(metadata_6$cdr3_B)

In [None]:
metadata_6$Condition  %>% table

In [None]:
options(repr.plot.width = 17)
metadata_6 %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Dia T1"))  %>% 
mutate(Disease == ifelse(grepl(pattern = "Ctrl", x = Condition), "Ctrl", "Dia"))  %>% 
group_by(Disease, Sample_ID, cdr3_B_nchar)  %>% 
tally  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
ggplot(aes(x = Disease, y = n)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0.5, size = 0.5, alpha = 1, aes(color = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 19, drop = T) +
 ylab("Log2 clonal abundance") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

## CDR3 beta length

Let's now focus on CDR3 of the TRB chain. 

In [None]:
metadata_6$cdr3_B_nchar  <- nchar(metadata_6$cdr3_B)

In [None]:
options(repr.plot.width = 17)
metadata_6 %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Dia T1"))  %>% 
mutate(Disease == ifelse(grepl(pattern = "Ctrl", x = Condition), "Ctrl", "Dia"))  %>% 
group_by(Disease, Sample_ID, cdr3_B_nchar)  %>% 
tally  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
ggplot(aes(x = Disease, y = n)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0.5, size = 0.5, alpha = 1, aes(color = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 19, drop = T) +
 ylab("Log2 clonal abundance") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

* Count each sequence for one patient as many times as it occurs
* I count the number of sequences of a given length for one patient
* Then I plot the frequency of that length for the given patient

In [None]:
options(repr.plot.width = 17)
test_length  <- metadata_6 %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Ctrl T1", "Dia T1"))  %>% 
mutate(Disease == ifelse(grepl(pattern = "Ctrl", x = Condition), "Ctrl", "Dia"))  %>% 
group_by(Disease, Patient_ID, cdr3_B_nchar)  %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

test_length  %>% group_by(Patient_ID)  %>% summarise(sum = sum(freq))

In [None]:
test_length

In [None]:
test_length  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
ggplot(aes(x = Disease, y = freq)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0, size = 0.5, alpha = 1, aes(color = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 19, drop = T) +
 ylab("Frequency") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

* Count each sequence for one patient only once

* I count the number of sequences of a given length for one patient

* Then I plot the frequency of that length for the given patient

In [None]:
options(repr.plot.width = 17)
test_length  <- metadata_6 %>% 
dplyr::select(Disease, Condition, Patient_ID, cdr3_B_nchar, cdr3_B)  %>% 
unique  %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Ctrl T1", "Dia T1"))  %>% 
mutate(Disease == ifelse(grepl(pattern = "Ctrl", x = Condition), "Ctrl", "Dia"))  %>% 
group_by(Disease, Patient_ID, cdr3_B_nchar)  %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

test_length  %>% group_by(Patient_ID)  %>% summarise(sum = sum(freq))

In [None]:
test_length  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
ggplot(aes(x = Disease, y = freq)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0, size = 0.5, alpha = 1, aes(color = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 19, drop = T) +
 ylab("Frequency") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

In [None]:
metadata_6$cdr3_B_nchar_nucl  <- nchar(metadata_6$cdr3_B_nt)

* Count each sequence for one patient only once

* I count the number of sequences of a given length *in nucleotides* for one patient

* Then I plot the frequency of that length for the given patient

In [None]:
options(repr.plot.width = 17)
test_length  <- metadata_6 %>% 
dplyr::select(Disease, Condition, Patient_ID, cdr3_B_nchar_nucl, cdr3_B_nt)  %>% 
unique  %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Ctrl T1", "Dia T1"))  %>% 
mutate(Disease == ifelse(grepl(pattern = "Ctrl", x = Condition), "Ctrl", "Dia"))  %>% 
group_by(Disease, Patient_ID, cdr3_B_nchar_nucl)  %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

test_length  %>% 
filter(!is.na(cdr3_B_nchar_nucl))  %>% 
ggplot(aes(x = Disease, y = freq)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0, size = 0.5, alpha = 1, aes(color = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar_nucl),
             scales = "fixed", ncol = 19, drop = T) +
 ylab("Frequency") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

In [None]:
options(repr.plot.width = 17)
test_length  <- metadata_6 %>% 
dplyr::select(Disease, Condition, Patient_ID, cdr3_B_nchar, cdr3_B)  %>% 
unique  %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Ctrl T1", "Dia T1"))  %>% 
mutate(Disease == ifelse(grepl(pattern = "Ctrl", x = Condition), "Ctrl", "Dia"))  %>% 
group_by(Disease, Patient_ID, cdr3_B_nchar)  %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

test_length  %>% group_by(Patient_ID)  %>% summarise(sum = sum(freq))

In [None]:
test_length  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
ggplot(aes(x = Disease, y = freq)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0, size = 0.7, shape = 21, alpha = 0.7, aes(fill = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 19, drop = T) +
 scale_fill_manual(values = c("dodgerblue", "red2"))  +
ylab("Frequency") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format", size = 7) + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank()) + ggtheme()
#ggsave(filename = "../figures/tcr/cd8_tcr/tcr_length/cd8_with_unconv.png", width = 22, height = 10, units = "cm")
#ggsave(filename = "../figures/tcr/cd8_tcr/tcr_length/cd8_with_unconv.svg", width = 22, height = 10, units = "cm")

# CD8 subcluster

Now let's repeat the analyses in the dataset that contains only conventional CD8+ T cells. 

In [None]:
cd8_l2_subcluster  <- readRDS("../data/processed/L2/cd8_l2_subcluster.rds")

In [None]:
metadata_6 <- cd8_l2_subcluster@meta.data 

In [None]:
metadata_6 <- metadata_6 %>% mutate(
  clone_nt = paste(cdr3_B_nt,cdr3_A1_nt,cdr3_A2_nt),
  clone_aa = paste("CDR3b",cdr3_B,"CDR3a",cdr3_A1)
)

In [None]:
cd8_l2_subcluster@meta.data  <- metadata_6
rownames(cd8_l2_subcluster@meta.data)  <- colnames(cd8_l2_subcluster)

In [None]:
clone_table <- metadata_6 %>%
  dplyr::group_by(clone_aa) %>%
          dplyr::summarize(n = n(), sum = sum()) %>%
    arrange(desc(n))

clone_table

In [None]:
cd8_l2_subcluster@meta.data  <- cd8_l2_subcluster@meta.data  %>% mutate(
  clone_nt = paste(cdr3_B_nt,cdr3_A1_nt,cdr3_A2_nt),
  clone_aa = paste("CDR3b",cdr3_B,"CDR3a",cdr3_A1)
)

In [None]:
metadata_6$test <- 0
metadata_6 <- metadata_6 %>% group_by(test, clone_aa) %>% mutate(clone_abundance = as.numeric(n()))


metadata_6 <- as.data.frame(metadata_6 %>% mutate(clone_abundance = as.numeric(ifelse(clone_abundance>1000,NA_integer_,clone_abundance))) %>%
  mutate(log_clone_abundance = log(clone_abundance, base = 2)))
rownames(metadata_6) <- colnames(cd8_l2_subcluster)

cd8_l2_subcluster@meta.data <- metadata_6
rownames(cd8_l2_subcluster@meta.data) <- colnames(cd8_l2_subcluster)

cd8_l2_subcluster <- AddMetaData(cd8_l2_subcluster, as.numeric(metadata_6$clone_abundance), "clone_abundance")
rownames(cd8_l2_subcluster@meta.data) <- colnames(cd8_l2_subcluster)

FeaturePlot(cd8_l2_subcluster, reduction = "umap", features = "log_clone_abundance", raster = F)

FeaturePlot(subset(cd8_l2_subcluster, clone_abundance >=1 & clone_abundance < 2000), 
            reduction = "umap", features = "log_clone_abundance", cols = c("lightblue","firebrick")) + theme_classic() + theme(plot.title = element_text(hjust = 0.5)) + ggtitle("Log2 clone abundance")

DimPlot(cd8_l2_subcluster)

# Clone abundance bar graph
metadata_6 <- metadata_6 %>%
  mutate( clone_abundance_group = case_when(clone_abundance>30 ~ ">30",
                                            clone_abundance>10&clone_abundance<=30 ~ "11-30",
                                            clone_abundance>6&clone_abundance<=10 ~ "6-10",
                                            clone_abundance>2&clone_abundance<=5 ~ "3-5",
                                            clone_abundance==2 ~ "2",
                                            clone_abundance==1 ~ "1",
                                            TRUE ~ "1"
                                            ))



In [None]:
options(repr.plot.width = 5, repr.pot.height = 4)
FeaturePlot(subset(cd8_l2_subcluster, clone_abundance >=1 & clone_abundance < 2000), 
            reduction = "umap", features = "log_clone_abundance", cols = c("lightblue","firebrick")) + theme_classic() + theme(plot.title = element_text(hjust = 0.5)) + ggtitle("Log2 clone abundance")


### Clone dimplots

In [None]:
for(i in 2:300){
  DimPlot(cd8_l2_subcluster, raster = F, 
          cells.highlight = colnames(cd8_l2_subcluster)[grep(cd8_l2_subcluster$clone_aa, 
                                                       pattern = pull(clone_table, clone_aa)[i])]) +
    ggtitle(pull(clone_table, clone_aa)[i])
    #ggsave(paste0("../figures/tcr/cd8_tcr/tcr_plots_cd8_l2_subcluster/tcr_clone_",i,".png"), width = 13, height = 10, units = "cm")

}

### Clone abundance bar graph

In [None]:
cd8_l2_subcluster$annotations_manual  %>% table

In [None]:
options(repr.plot.width = 3.5, repr.plot.height = 3)

metadata_6 <- metadata_6 %>%
  mutate( clone_abundance_group = ifelse(clone_abundance>50, "50+",
                                          ifelse(clone_abundance>10, "11-49",
                                             ifelse(clone_abundance>5, "6-10",
                                             ifelse(clone_abundance>1, "2-5",
                                             ifelse(clone_abundance==1, "1","1"))))))

metadata_6 %>% 
  filter(!is.na(clone_abundance_group)) %>% 
  ggplot(aes(x = factor(annotations_manual, levels = rev(c("Naive","Tem","Tcm","Proliferating", "Temra"))))) + 
  geom_bar(aes(fill = factor(clone_abundance_group, levels = c("1","2-5","6-10","11-49","50+"))), position = "fill") + 
  coord_flip() +
  scale_fill_brewer(palette = "Blues") + xlab("Frequency")+
  ylab("") + 
  theme_classic() + 
  theme(plot.title = element_text(hjust = 0.5), 
        legend.position="right", 
        panel.border = element_blank(), 
        legend.title = element_blank()) + 
  ggtitle("Clone abundance in clusters")

#ggsave("../figures/tcr/Log2CloneAbundance_bar_clusters_cd8.png", width = 14, height = 6, units = "cm", dpi = 120)
#ggsave("../figures/tcr/Log2CloneAbundance_bar_clusters_cd8.svg", width = 14, height = 6, units = "cm", dpi = 120)

### Size of clones - cluster

In [None]:
options(repr.plot.width = 8, repr.plot.height = 5)

metadata_6 %>% 
  group_by(clone_aa, annotations_manual) %>% 
  ggplot(aes(x = reorder(annotations_manual, log_clone_abundance, mean, na.rm = TRUE), y = log_clone_abundance)) + 
  geom_boxplot(outlier.shape = NA) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  #scale_color_manual(values = c("indianred2","dodgerblue2","green4")) +
  geom_jitter(width = 0.2, height = 0.5, size = 0.5, alpha = 0.2) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  #facet_wrap(~Annotations, scales = "free", ncol = 4) +
  ylab("Log2 clonal abundance") +
  xlab("Cluster") +
  theme_classic() + 
  ylim(c(0,8))+
  ggpubr::stat_compare_means(label.x.npc = 0.3)

#ggsave("../figures/tcr/size_of_clones1.png", width = 11, height = 8, units = "cm", dpi = 120)

In [None]:
options(repr.plot.width = 14, repr.plot.height = 4)

metadata_6 %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0"))  %>% 
  group_by(clone_aa, annotations_manual, Condition) %>% 
  ggplot(aes(x = Condition, y = log_clone_abundance)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Condition), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0.5, size = 0.5, alpha = 0.2, aes(color = Condition)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(annotations_manual, levels = levels(factor(reorder(annotations_manual, log_clone_abundance, mean, na.rm = TRUE)))),
             scales = "fixed", ncol = 11, drop = T) +
  scale_color_manual(values = c("green4", "indianred2")) +
  scale_fill_manual(values = c("green4", "indianred2")) +
  ylab("Log2 clonal abundance") +
  xlab("Cluster") +
  theme_classic() + 
  ylim(c(0,8))+
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

#ggsave("../figures/tcr/size_of_clones2.png", width = 11, height = 8, units = "cm", dpi = 120)

In [None]:
options(repr.plot.width = 14, repr.plot.height = 4)

metadata_6 %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0"))  %>% 
  group_by(annotations_manual, Condition, Patient_Time) %>%
summarize(mean_log_clone_abundance = mean(log_clone_abundance, na.rm = T))  %>% 
ggplot(aes(x = Condition, y = mean_log_clone_abundance)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Condition), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0.5, size = 0.5, alpha = 1, aes(color = Condition)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(annotations_manual, levels = levels(factor(reorder(annotations_manual, mean_log_clone_abundance, mean, na.rm = TRUE)))),
             scales = "fixed", ncol = 11, drop = T) +
  scale_color_manual(values = c("green4", "indianred2")) +
  scale_fill_manual(values = c("green4", "indianred2")) +
  ylab("Log2 clonal abundance") +
  xlab("Cluster") +
  theme_classic() + 
  ylim(c(0,5))+
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

#ggsave("../figures/tcr/size_of_clones3.png", width = 11, height = 8, units = "cm", dpi = 120)

## CDR3 length

In [None]:
metadata_6$cdr3_B_nchar  <- nchar(metadata_6$cdr3_B)

In [None]:
metadata_6$Condition  %>% table

In [None]:
options(repr.plot.width = 17)
metadata_6 %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Dia T1"))  %>% 
mutate(Disease == ifelse(grepl(pattern = "Ctrl", x = Condition), "Ctrl", "Dia"))  %>% 
group_by(Disease, Sample_ID, cdr3_B_nchar)  %>% 
tally  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
ggplot(aes(x = Disease, y = n)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0.5, size = 0.5, alpha = 1, aes(color = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 19, drop = T) +
 ylab("Log2 clonal abundance") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

## CDR3 beta length

In [None]:
metadata_6$cdr3_B_nchar  <- nchar(metadata_6$cdr3_B)

In [None]:
options(repr.plot.width = 17)
metadata_6 %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Dia T1"))  %>% 
mutate(Disease == ifelse(grepl(pattern = "Ctrl", x = Condition), "Ctrl", "Dia"))  %>% 
group_by(Disease, Sample_ID, cdr3_B_nchar)  %>% 
tally  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
ggplot(aes(x = Disease, y = n)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0.5, size = 0.5, alpha = 1, aes(color = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 19, drop = T) +
 ylab("Log2 clonal abundance") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

* Count each sequence for one patient as many times as it occurs
* I count the number of sequences of a given length for one patient
* Then I plot the frequency of that length for the given patient

In [None]:
options(repr.plot.width = 17)
test_length  <- metadata_6 %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Ctrl T1", "Dia T1"))  %>% 
mutate(Disease == ifelse(grepl(pattern = "Ctrl", x = Condition), "Ctrl", "Dia"))  %>% 
group_by(Disease, Patient_ID, cdr3_B_nchar)  %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

test_length  %>% group_by(Patient_ID)  %>% summarise(sum = sum(freq))

In [None]:
test_length

In [None]:
test_length  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
ggplot(aes(x = Disease, y = freq)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0, size = 0.5, alpha = 1, aes(color = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 19, drop = T) +
 ylab("Frequency") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

* Count each sequence for one patient only once

* I count the number of sequences of a given length for one patient

* Then I plot the frequency of that length for the given patient

In [None]:
options(repr.plot.width = 17)
test_length  <- metadata_6 %>% 
dplyr::select(Disease, Condition, Patient_ID, cdr3_B_nchar, cdr3_B)  %>% 
unique  %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Ctrl T1", "Dia T1"))  %>% 
mutate(Disease == ifelse(grepl(pattern = "Ctrl", x = Condition), "Ctrl", "Dia"))  %>% 
group_by(Disease, Patient_ID, cdr3_B_nchar)  %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

test_length  %>% group_by(Patient_ID)  %>% summarise(sum = sum(freq))

In [None]:
test_length  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
ggplot(aes(x = Disease, y = freq)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0, size = 0.5, alpha = 1, aes(color = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 19, drop = T) +
 ylab("Frequency") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

In [None]:
metadata_6$cdr3_B_nchar_nucl  <- nchar(metadata_6$cdr3_B_nt)

* Count each sequence for one patient only once

* I count the number of sequences of a given length *in nucleotides* for one patient

* Then I plot the frequency of that length for the given patient

In [None]:
options(repr.plot.width = 17)
test_length  <- metadata_6 %>% 
dplyr::select(Disease, Condition, Patient_ID, cdr3_B_nchar_nucl, cdr3_B_nt)  %>% 
unique  %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Ctrl T1", "Dia T1"))  %>% 
mutate(Disease == ifelse(grepl(pattern = "Ctrl", x = Condition), "Ctrl", "Dia"))  %>% 
group_by(Disease, Patient_ID, cdr3_B_nchar_nucl)  %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

test_length  %>% 
filter(!is.na(cdr3_B_nchar_nucl))  %>% 
ggplot(aes(x = Disease, y = freq)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0, size = 0.5, alpha = 1, aes(color = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar_nucl),
             scales = "fixed", ncol = 19, drop = T) +
 ylab("Frequency") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

In [None]:
options(repr.plot.width = 17)
test_length  <- metadata_6 %>% 
dplyr::select(Disease, Condition, Patient_ID, cdr3_B_nchar, cdr3_B)  %>% 
unique  %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Ctrl T1", "Dia T1"))  %>% 
mutate(Disease == ifelse(grepl(pattern = "Ctrl", x = Condition), "Ctrl", "Dia"))  %>% 
group_by(Disease, Patient_ID, cdr3_B_nchar)  %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

test_length  %>% group_by(Patient_ID)  %>% summarise(sum = sum(freq))

In [None]:
test_length  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
ggplot(aes(x = Disease, y = freq)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0, size = 0.7, shape = 21, alpha = 0.7, aes(fill = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 19, drop = T) +
 scale_fill_manual(values = c("dodgerblue", "red2"))  +
ylab("Frequency") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format", size = 7) + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank()) + ggtheme()
#ggsave(filename = "../figures/tcr/cd8_tcr/tcr_length/cd8.png", width = 22, height = 10, units = "cm")
#ggsave(filename = "../figures/tcr/cd8_tcr/tcr_length/cd8.svg", width = 22, height = 10, units = "cm")

# TRAV and TRBV usage

In the next section, we will focus on the TRAV/TRBV repertoire. 

### TRAV

In [None]:
trav_usage <- metadata_6 %>%
filter(!is.na(v_gene_A1))  %>% 
  dplyr::group_by(Patient_ID, v_gene_A1) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))  %>% arrange(desc(freq))  %>% 
dplyr::select(-n)  %>% 
pivot_wider(names_from = "v_gene_A1", values_from = "freq", values_fill = 0)  %>% 
pivot_longer(!Patient_ID, names_to = "v_gene_A1", values_to = "freq")

In [None]:
trav_usage  <- trav_usage  %>% mutate(Disease = ifelse((substr(Patient_ID, 1,1)=="1"), "Dia","Ctrl"))

In [None]:
trav_usage <- metadata_6 %>%
filter(!is.na(v_gene_A1))  %>% 
  dplyr::group_by(Patient_ID, v_gene_A1) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))  %>% arrange(desc(freq))  %>% 
dplyr::select(-n)  %>% 
pivot_wider(names_from = "v_gene_A1", values_from = "freq", values_fill = 0)  %>% 
pivot_longer(!Patient_ID, names_to = "v_gene_A1", values_to = "freq")

trav_usage

trav_usage  <- trav_usage  %>% mutate(Disease = ifelse((substr(Patient_ID, 1,1)=="1"), "Dia","Ctrl"))

trav_usage

In [None]:
options(repr.plot.height = 25, repr.plot.width = 10)
trav_usage  %>% 
filter(grepl(v_gene_A1, pattern = "TRD")==F)  %>% 
group_by(v_gene_A1, Disease)  %>% 
summarise(freq2 = mean(freq, na.rm = TRUE),
          min = min(freq, na.rm = TRUE),
          max = max(freq, na.rm = TRUE))  %>% 
#filter(freq2>0.005)  %>% 
mutate(v_gene_A1 = fct_reorder(v_gene_A1, desc(freq2))) %>%
  ggplot(aes(x = Disease, y = freq2, color = Disease)) + 
facet_grid(rows = vars(fct_reorder(v_gene_A1, desc(freq2)))) +
geom_point(size = 2) + coord_flip() + 
geom_linerange(aes(ymin = min, ymax = max, color = Disease),
                 alpha = 0.5, linewidth = 1) +
ggtheme()  +
theme(strip.text.y = element_text(
        angle = 0), 
     axis.ticks.y = element_blank(),
     axis.text.y = element_blank(),
      strip.background = element_blank(),
      panel.background = element_blank(),
  panel.grid.major = element_line(size = 0.3, linetype = 'solid',
                                colour = "grey92"), 
  panel.grid.minor = element_line(size = 0.3, linetype = 'solid',
                                colour = "grey92")
     ) +
ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") +
scale_color_manual(values = c("dodgerblue", "red3"))

In [None]:
#ggsave("../figures/tcr/vdj_usage/cd8_trav.png", width = 20, height = 50, units = "cm")
#ggsave("../figures/tcr/vdj_usage/cd8_trav.svg", width = 20, height = 50, units = "cm")

### TRAJ

In [None]:
traj_usage <- metadata_6 %>%
filter(!is.na(j_gene_A1))  %>% 
  dplyr::group_by(Patient_ID, j_gene_A1) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))  %>% arrange(desc(freq))  %>% 
dplyr::select(-n)  %>% 
pivot_wider(names_from = "j_gene_A1", values_from = "freq", values_fill = 0)  %>% 
pivot_longer(!Patient_ID, names_to = "j_gene_A1", values_to = "freq")

In [None]:
traj_usage  <- traj_usage %>% mutate(Disease = ifelse((substr(Patient_ID, 1,1)=="1"), "Dia","Ctrl"))

In [None]:
options(repr.plot.height = 15, repr.plot.width = 10)
traj_usage  %>% 
filter(grepl(j_gene_A1, pattern = "TRD")==F)  %>% 
group_by(j_gene_A1, Disease)  %>% 
summarise(freq2 = mean(freq, na.rm = TRUE),
          min = min(freq, na.rm = TRUE),
          max = max(freq, na.rm = TRUE))  %>% 
#filter(freq2>0.005)  %>% 
mutate(j_gene_A1 = fct_reorder(j_gene_A1, desc(freq2))) %>%
  ggplot(aes(x = Disease, y = freq2, color = Disease)) + 
facet_grid(rows = vars(fct_reorder(j_gene_A1, desc(freq2)))) +
geom_point(size = 2) + coord_flip() + 
geom_linerange(aes(ymin = min, ymax = max, color = Disease),
                 alpha = 0.5, linewidth = 1) +
ggtheme()  +
theme(strip.text.y = element_text(
        angle = 0), 
     axis.ticks.y = element_blank(),
     axis.text.y = element_blank(),
      strip.background = element_blank(),
      panel.background = element_blank(),
  panel.grid.major = element_line(size = 0.3, linetype = 'solid',
                                colour = "grey92"), 
  panel.grid.minor = element_line(size = 0.3, linetype = 'solid',
                                colour = "grey92")
     ) +
ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") +
scale_color_manual(values = c("dodgerblue", "red3"))

In [None]:
#ggsave("../figures/tcr/vdj_usage/cd8_traj.png", width = 15, height = 35, units = "cm")
#ggsave("../figures/tcr/vdj_usage/cd8_traj.svg", width = 15, height = 35, units = "cm")

### TRBV

In [None]:
metadata_6$v_gene_B  %>% table

In [None]:
trbv_usage <- metadata_6 %>%
filter(!is.na(v_gene_B))  %>% 
  dplyr::group_by(Patient_ID, v_gene_B) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))  %>% arrange(desc(freq))  %>% 
dplyr::select(-n)  %>% 
pivot_wider(names_from = "v_gene_B", values_from = "freq", values_fill = 0)  %>% 
pivot_longer(!Patient_ID, names_to = "v_gene_B", values_to = "freq")

In [None]:
trbv_usage  <- trbv_usage %>% mutate(Disease = ifelse((substr(Patient_ID, 1,1)=="1"), "Dia","Ctrl"))

In [None]:
options(repr.plot.height = 15, repr.plot.width = 10)
trbv_usage  %>% 
filter(grepl(v_gene_B, pattern = "TRD")==F)  %>% 
group_by(v_gene_B, Disease)  %>% 
summarise(freq2 = mean(freq, na.rm = TRUE),
          min = min(freq, na.rm = TRUE),
          max = max(freq, na.rm = TRUE))  %>% 
#filter(freq2>0.005)  %>% 
mutate(v_gene_B = fct_reorder(v_gene_B, desc(freq2))) %>%
  ggplot(aes(x = Disease, y = freq2, color = Disease)) + 
facet_grid(rows = vars(fct_reorder(v_gene_B, desc(freq2)))) +
geom_point(size = 2) + coord_flip() + 
geom_linerange(aes(ymin = min, ymax = max, color = Disease),
                 alpha = 0.5, linewidth = 1) +
ggtheme()  +
theme(strip.text.y = element_text(
        angle = 0), 
     axis.ticks.y = element_blank(),
     axis.text.y = element_blank(),
      strip.background = element_blank(),
      panel.background = element_blank(),
  panel.grid.major = element_line(size = 0.3, linetype = 'solid',
                                colour = "grey92"), 
  panel.grid.minor = element_line(size = 0.3, linetype = 'solid',
                                colour = "grey92")
     ) +
ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") +
scale_color_manual(values = c("dodgerblue", "red3"))

In [None]:
#ggsave("../figures/tcr/vdj_usage/cd8_trbv.png", width = 15, height = 35, units = "cm")
#ggsave("../figures/tcr/vdj_usage/cd8_trbv.svg", width = 15, height = 35, units = "cm")

### TRBJ

In [None]:
metadata_6$j_gene_B  %>% table

In [None]:
trbj_usage <- metadata_6 %>%
filter(!is.na(j_gene_B))  %>% 
  dplyr::group_by(Patient_ID, j_gene_B) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))  %>% arrange(desc(freq))  %>% 
dplyr::select(-n)  %>% 
pivot_wider(names_from = "j_gene_B", values_from = "freq", values_fill = 0)  %>% 
pivot_longer(!Patient_ID, names_to = "j_gene_B", values_to = "freq")

In [None]:
trbj_usage  <- trbj_usage %>% mutate(Disease = ifelse((substr(Patient_ID, 1,1)=="1"), "Dia","Ctrl"))

In [None]:
options(repr.plot.height = 15, repr.plot.width = 10)
trbj_usage  %>% 
filter(grepl(j_gene_B, pattern = "TRD")==F)  %>% 
group_by(j_gene_B, Disease)  %>% 
summarise(freq2 = mean(freq, na.rm = TRUE),
          min = min(freq, na.rm = TRUE),
          max = max(freq, na.rm = TRUE))  %>% 
#filter(freq2>0.005)  %>% 
mutate(j_gene_B = fct_reorder(j_gene_B, desc(freq2))) %>%
  ggplot(aes(x = Disease, y = freq2, color = Disease)) + 
facet_grid(rows = vars(fct_reorder(j_gene_B, desc(freq2)))) +
geom_point(size = 2) + coord_flip() + 
geom_linerange(aes(ymin = min, ymax = max, color = Disease),
                 alpha = 0.5, linewidth = 1) +
ggtheme()  +
theme(strip.text.y = element_text(
        angle = 0), 
     axis.ticks.y = element_blank(),
     axis.text.y = element_blank(),
      strip.background = element_blank(),
      panel.background = element_blank(),
  panel.grid.major = element_line(size = 0.3, linetype = 'solid',
                                colour = "grey92"), 
  panel.grid.minor = element_line(size = 0.3, linetype = 'solid',
                                colour = "grey92")
     ) +
ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") +
scale_color_manual(values = c("dodgerblue", "red3"))

In [None]:
#ggsave("../figures/tcr/vdj_usage/cd8_trbj.png", width = 15, height = 10, units = "cm")
#ggsave("../figures/tcr/vdj_usage/cd8_trbj.svg", width = 15, height = 10, units = "cm")

# Clones shared between samples

In the next section, we will check if there are any clones that overlap between samples and between patients. 

In [None]:
cd8_l2_subcluster$Sample_char  <- paste(cd8_l2_subcluster$Patient_ID, 
                                  cd8_l2_subcluster$Disease,
                                  cd8_l2_subcluster$Time,
                                  cd8_l2_subcluster$Age_group,
                                  cd8_l2_subcluster$Sex,
                                  cd8_l2_subcluster$Experiment_ID)

In [None]:
clone_table <- metadata_6 %>%
filter(clone_aa != "CDR3b NA CDR3a NA")  %>% 
  dplyr::group_by(Sample_char, clone_aa) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

In [None]:
clone_table_individual <- metadata_6 %>%
  dplyr::group_by(clone_aa, Sample_char) %>%
          dplyr::summarize(n = n()) %>%
    arrange(desc(n)) %>% pivot_wider(names_from = Sample_char, values_from = n) 

In [None]:
is_positive <- function(number){
  number2 <- ifelse(is.na(number),0,ifelse(number==0,0,1))
  return(number2)
}

In [None]:
dim(clone_table_individual)

In [None]:
clone_table_individual_binary <- clone_table_individual %>% mutate_at(vars(2:88), is_positive)
clone_table_individual$sum <- rowSums(clone_table_individual_binary[,2:88])

clone_table_individual %>% arrange(desc(sum))

In [None]:
clone_table_individual  <- (clone_table_individual %>% arrange(desc(sum)))[2:nrow(clone_table_individual),]

In [None]:
clone_table_individual_binary  <- (clone_table_individual_binary)[c(1,3:nrow(clone_table_individual_binary)),]

In [None]:
write.csv(clone_table_individual %>% arrange(desc(sum)), "../tables/tcr/overlapping_clones_cd8.csv")

In [None]:
clone_table_individual_small  <- clone_table_individual  %>% filter(sum > 1)

In [None]:
write.csv(clone_table_individual_small %>% arrange(desc(sum)), "../tables/tcr/overlapping_clones_cd8_small.csv")

### Repertoire overlap table

In [None]:
dim(clone_table_individual_binary)

In [None]:
order_cols  <- order((colnames(clone_table_individual_binary)[2:88]))+1

In [None]:
clone_table_individual_binary  <- clone_table_individual_binary[,
                                                                c(1,order_cols)]

In [None]:
df_all4 <- data.frame("")

for(j in 2:88){
  subset1 <- clone_table_individual_binary[,c(1,j)]
 colnames(subset1)  <- c("aa", "sub1")
  vector_overlap <- c()
    subset1  <- subset1  %>% dplyr::filter(sub1 >0)
    
  for(i in 2:88){
    subset2 <- clone_table_individual_binary[,c(1,i)]
    colnames(subset2)  <- c("aa", "sub2")
    subset2  <- subset2  %>%  dplyr::filter(sub2 >0)
    is_in_second_patient <- nrow(subset1[subset1$aa %in% subset2$aa,])
    total <- nrow(subset1)
    vector_overlap <- c(vector_overlap,is_in_second_patient/total)
  }
  df <- as.data.frame(x = vector_overlap)
  colnames(df) <- colnames(clone_table_individual_binary)[j]
  df
  df_all4 <- cbind(df_all4, df)
}



In [None]:
df_all4 <- df_all4[,2:88]
rownames(df_all4) <- colnames(df_all4)



In [None]:
df24 <- df_all4
df24[df24 == 1] <- 0


In [None]:
matrix_4  <- as.matrix(df24)

In [None]:
options(repr.plot.height = 17, repr.plot.width = 17)
pheatmap::pheatmap(matrix_4, cluster_rows = F, cluster_cols = F)

In [None]:
matrix_5  <- log(matrix_4+0.0001)

options(repr.plot.height = 17, repr.plot.width = 17)
pheatmap::pheatmap(matrix_5, cluster_rows = F, cluster_cols = F)

## TRB shared by patients

We will repeat the analysis using just TCRb. 

In [None]:
clone_table <- metadata_6 %>%
filter(clone_aa != "CDR3b NA CDR3a NA")  %>% 
  dplyr::group_by(Sample_char, cdr3_B) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

In [None]:
clone_table_individual <- metadata_6 %>%
  dplyr::group_by(cdr3_B, Sample_char) %>%
          dplyr::summarize(n = n()) %>%
    arrange(desc(n)) %>% pivot_wider(names_from = Sample_char, values_from = n, values_fill = 0) 

In [None]:
clone_table_individual_binary <- clone_table_individual %>% mutate_at(vars(2:88), is_positive)
clone_table_individual$sum <- rowSums(clone_table_individual_binary[,2:88])

clone_table_individual %>% arrange(desc(sum))

In [None]:
clone_table_individual  <- (clone_table_individual %>% arrange(desc(sum)))[2:nrow(clone_table_individual),]

In [None]:
clone_table_individual_binary  <- (clone_table_individual_binary)[c(2:nrow(clone_table_individual_binary)),]

In [None]:
write.csv(clone_table_individual %>% arrange(desc(sum)), "../tables/tcr/overlapping_trb_cd8.csv")

In [None]:
clone_table_individual_small  <- clone_table_individual  %>% filter(sum > 1)

In [None]:
write.csv(clone_table_individual_small %>% arrange(desc(sum)), "../tables/tcr/overlapping_trb_cd8_small.csv")

### Repertoire overlap table

In [None]:
clone_table_individual_binary

In [None]:
order_cols  <- order((colnames(clone_table_individual_binary)[2:88]))+1

In [None]:
clone_table_individual_binary  <- clone_table_individual_binary[,
                                                                c(1,order_cols)]

In [None]:
df_all4 <- data.frame("")

for(j in 2:88){
  subset1 <- clone_table_individual_binary[,c(1,j)]
 colnames(subset1)  <- c("aa", "sub1")
  vector_overlap <- c()
    subset1  <- subset1  %>% dplyr::filter(sub1 >0)
    
  for(i in 2:88){
    subset2 <- clone_table_individual_binary[,c(1,i)]
    colnames(subset2)  <- c("aa", "sub2")
    subset2  <- subset2  %>%  dplyr::filter(sub2 >0)
    is_in_second_patient <- nrow(subset1[subset1$aa %in% subset2$aa,])
    total <- nrow(subset1)
    vector_overlap <- c(vector_overlap,is_in_second_patient/total)
  }
  df <- as.data.frame(x = vector_overlap)
  colnames(df) <- colnames(clone_table_individual_binary)[j]
  df
  df_all4 <- cbind(df_all4, df)
}


In [None]:
df_all4 <- df_all4[,2:88]
rownames(df_all4) <- colnames(df_all4)


In [None]:
df24 <- df_all4
df24[df24 == 1] <- 0


In [None]:
matrix_4  <- as.matrix(df24)

In [None]:
options(repr.plot.height = 17, repr.plot.width = 17)
pheatmap::pheatmap(matrix_4, cluster_rows = F, cluster_cols = F)

In [None]:
matrix_5  <- log(matrix_4+0.0001)

options(repr.plot.height = 17, repr.plot.width = 17)
pheatmap::pheatmap(matrix_5, cluster_rows = F, cluster_cols = F)

## Overlap by patient

Now we will repeat the analysis, but focusing on individual donors instead of individual samples. 

### Clone_aa

In [None]:
clone_table_individual <- metadata_6 %>%
mutate(Condition = paste(Patient_ID, Disease))  %>% 
  dplyr::group_by(clone_aa, Condition) %>%
          dplyr::summarize(n = n()) %>%
    arrange(desc(n)) %>% pivot_wider(names_from = Condition, values_from = n) 

In [None]:
is_positive <- function(number){
  number2 <- ifelse(is.na(number),0,ifelse(number==0,0,1))
  return(number2)
}

In [None]:
clone_table_individual_binary <- clone_table_individual %>% mutate_at(vars(2:44), is_positive)
clone_table_individual$sum <- rowSums(clone_table_individual_binary[,2:44])

clone_table_individual %>% arrange(desc(sum))

In [None]:
clone_table_individual  <- (clone_table_individual %>% arrange(desc(sum)))[2:nrow(clone_table_individual),]

In [None]:
clone_table_individual_binary  <- (clone_table_individual_binary)[c(2:nrow(clone_table_individual_binary)),]

In [None]:
write.csv(clone_table_individual %>% arrange(desc(sum)), "../tables/tcr/overlapping_clones_cd8_by_patient.csv")

### Repertoire overlap table

In [None]:
clone_table_individual_binary  <- clone_table_individual_binary[,
                                                                c(1,order((colnames(clone_table_individual_binary)[2:44]))+1)]

In [None]:
clone_table_individual_binary

In [None]:
df_all5 <- data.frame("")

for(j in 2:44){
  subset1 <- clone_table_individual_binary[,c(1,j)]
 colnames(subset1)  <- c("aa", "sub1")
  vector_overlap <- c()
    subset1  <- subset1  %>% dplyr::filter(sub1 >0)
    
  for(i in 2:44){
    subset2 <- clone_table_individual_binary[,c(1,i)]
    colnames(subset2)  <- c("aa", "sub2")
    subset2  <- subset2  %>%  dplyr::filter(sub2 >0)
    is_in_second_patient <- nrow(subset1[subset1$aa %in% subset2$aa,])
    total <- nrow(subset1)
    vector_overlap <- c(vector_overlap,is_in_second_patient/total)
  }
  df <- as.data.frame(x = vector_overlap)
  colnames(df) <- colnames(clone_table_individual_binary)[j]
  df
  df_all5 <- cbind(df_all5, df)
}

In [None]:
df_all5 <- df_all5[,2:44]
rownames(df_all5) <- colnames(df_all5)

In [None]:
df25 <- df_all5
df25[df25 == 1] <- 0


In [None]:
matrix_6  <- as.matrix(df25)

In [None]:
options(repr.plot.height = 12, repr.plot.width = 12)
pheatmap::pheatmap(matrix_6, cluster_rows = F, cluster_cols = F)

In [None]:
pheatmap::pheatmap(matrix_6, cluster_rows = T, cluster_cols = T)

## TCRb

In [None]:
clone_table_individual <- metadata_6 %>%
mutate(Condition = paste(Patient_ID, Disease))  %>% 
  dplyr::group_by(cdr3_B, Condition) %>%
          dplyr::summarize(n = n()) %>%
    arrange(desc(n)) %>% pivot_wider(names_from = Condition, values_from = n) 

In [None]:
clone_table_individual

In [None]:
clone_table_individual_binary <- clone_table_individual %>% mutate_at(vars(2:44), is_positive)
clone_table_individual$sum <- rowSums(clone_table_individual_binary[,2:44])

clone_table_individual %>% arrange(desc(sum))

In [None]:
clone_table_individual  <- (clone_table_individual %>% arrange(desc(sum)))[2:nrow(clone_table_individual),]

In [None]:
clone_table_individual_binary  <- (clone_table_individual_binary)[c(2:nrow(clone_table_individual_binary)),]

In [None]:
write.csv(clone_table_individual %>% arrange(desc(sum)), "../tables/tcr/overlapping_trb_cd8_by_patient.csv")

In [None]:
clone_table_individual_small  <- clone_table_individual  %>% filter(sum > 1)

In [None]:
clone_table_individual_small

In [None]:
write.csv(clone_table_individual_small %>% arrange(desc(sum)), "../tables/tcr/overlapping_trb_cd8_small_by_patient.csv")

### Repertoire overlap table

In [None]:
clone_table_individual_binary  <- clone_table_individual_binary[,
                                                                c(1,order((colnames(clone_table_individual_binary)[2:44]))+1)]

In [None]:
df_all5 <- data.frame("")

for(j in 2:44){
  subset1 <- clone_table_individual_binary[,c(1,j)]
 colnames(subset1)  <- c("aa", "sub1")
  vector_overlap <- c()
    subset1  <- subset1  %>% dplyr::filter(sub1 >0)
    
  for(i in 2:44){
    subset2 <- clone_table_individual_binary[,c(1,i)]
    colnames(subset2)  <- c("aa", "sub2")
    subset2  <- subset2  %>%  dplyr::filter(sub2 >0)
    is_in_second_patient <- nrow(subset1[subset1$aa %in% subset2$aa,])
    total <- nrow(subset1)
    vector_overlap <- c(vector_overlap,is_in_second_patient/total)
  }
  df <- as.data.frame(x = vector_overlap)
  colnames(df) <- colnames(clone_table_individual_binary)[j]
  df
  df_all5 <- cbind(df_all5, df)
}



In [None]:
df_all5 <- df_all5[,2:44]
rownames(df_all5) <- colnames(df_all5)

In [None]:
df25 <- df_all5
df25[df25 == 1] <- 0

In [None]:
matrix_6  <- as.matrix(df25)

In [None]:
options(repr.plot.height = 12, repr.plot.width = 12)
pheatmap::pheatmap(matrix_6, cluster_rows = F, cluster_cols = F)

In [None]:
pheatmap::pheatmap(matrix_6, cluster_rows = T, cluster_cols = T)

## TCRa

In [None]:
clone_table_individual <- metadata_6 %>%
mutate(Condition = paste(Patient_ID, Disease))  %>% 
  dplyr::group_by(cdr3_A1, Condition) %>%
          dplyr::summarize(n = n()) %>%
    arrange(desc(n)) %>% pivot_wider(names_from = Condition, values_from = n) 

In [None]:
clone_table_individual_binary <- clone_table_individual %>% mutate_at(vars(2:44), is_positive)
clone_table_individual$sum <- rowSums(clone_table_individual_binary[,2:44])

clone_table_individual %>% arrange(desc(sum))

In [None]:
clone_table_individual  <- (clone_table_individual %>% arrange(desc(sum)))[2:nrow(clone_table_individual),]

In [None]:
clone_table_individual_binary  <- (clone_table_individual_binary)[c(2:nrow(clone_table_individual_binary)),]

In [None]:
write.csv(clone_table_individual %>% arrange(desc(sum)), "../tables/tcr/overlapping_tra_cd8_by_patient.csv")

In [None]:
clone_table_individual_small  <- clone_table_individual  %>% filter(sum > 1)

In [None]:
write.csv(clone_table_individual_small %>% arrange(desc(sum)), "../tables/tcr/overlapping_tra_cd8_small_by_patient.csv")

### Repertoire overlap table

In [None]:
clone_table_individual_binary  <- clone_table_individual_binary[,
                                                                c(1,order((colnames(clone_table_individual_binary)[2:44]))+1)]

In [None]:
df_all5 <- data.frame("")

for(j in 2:44){
  subset1 <- clone_table_individual_binary[,c(1,j)]
 colnames(subset1)  <- c("aa", "sub1")
  vector_overlap <- c()
    subset1  <- subset1  %>% dplyr::filter(sub1 >0)
    
  for(i in 2:44){
    subset2 <- clone_table_individual_binary[,c(1,i)]
    colnames(subset2)  <- c("aa", "sub2")
    subset2  <- subset2  %>%  dplyr::filter(sub2 >0)
    is_in_second_patient <- nrow(subset1[subset1$aa %in% subset2$aa,])
    total <- nrow(subset1)
    vector_overlap <- c(vector_overlap,is_in_second_patient/total)
  }
  df <- as.data.frame(x = vector_overlap)
  colnames(df) <- colnames(clone_table_individual_binary)[j]
  df
  df_all5 <- cbind(df_all5, df)
}

In [None]:
df_all5 <- df_all5[,2:44]
rownames(df_all5) <- colnames(df_all5)

In [None]:
df25 <- df_all5
df25[df25 == 1] <- 0

In [None]:
matrix_6  <- as.matrix(df25)

In [None]:
options(repr.plot.height = 12, repr.plot.width = 12)
pheatmap::pheatmap(matrix_6, cluster_rows = F, cluster_cols = F)

In [None]:
pheatmap::pheatmap(matrix_6, cluster_rows = T, cluster_cols = T)

# TCR properties peptides

Finally, we will evaluate the biochemical properties of TCR sequences by the peptides package. 

In [None]:
cd8_l2_subcluster  <- readRDS("../data/processed/L2/cd8_l2_subcluster.rds")

In [None]:
library(Peptides)
library(Seurat)
library(dplyr)
library(ggpubr)

In [None]:
add_TCR_properties  <- function(seurat){
    seurat@meta.data  <- seurat@meta.data  %>% mutate(pI_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,pI(cdr3_B)),
           boman_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,boman(cdr3_B)),
          charge_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,charge(cdr3_B)),
          hmoment_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,hmoment(cdr3_B)),
          hydrophobicity_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,hydrophobicity(cdr3_B)),
          mw_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,mw(cdr3_B)),
          mz_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,mz(cdr3_B)),
          pI_cdr3_A1 = ifelse(is.na(cdr3_A1),NA_integer_,pI(cdr3_A1)),
           boman_cdr3_A1 = ifelse(is.na(cdr3_A1),NA_integer_,boman(cdr3_A1)),
          charge_cdr3_A1 = ifelse(is.na(cdr3_A1),NA_integer_,charge(cdr3_A1)),
          hmoment_cdr3_A1 = ifelse(is.na(cdr3_A1),NA_integer_,hmoment(cdr3_A1)),
          hydrophobicity_cdr3_A1 = ifelse(is.na(cdr3_A1),NA_integer_,hydrophobicity(cdr3_A1)),
          mw_cdr3_A1 = ifelse(is.na(cdr3_A1),NA_integer_,mw(cdr3_A1)),
          mz_cdr3_A1 = ifelse(is.na(cdr3_A1),NA_integer_,mz(cdr3_A1)),
          pI_cdr3_clone = ifelse(is.na(cdr3_B)|is.na(cdr3_A1),NA_integer_,pI(paste0(cdr3_B,cdr3_A1))),
           boman_cdr3_clone = ifelse(is.na(cdr3_B)|is.na(cdr3_A1),NA_integer_,boman(paste0(cdr3_B,cdr3_A1))),
          charge_cdr3_clone = ifelse(is.na(cdr3_B)|is.na(cdr3_A1),NA_integer_,charge(paste0(cdr3_B,cdr3_A1))),
          hmoment_cdr3_clone = ifelse(is.na(cdr3_B)|is.na(cdr3_A1),NA_integer_,hmoment(paste0(cdr3_B,cdr3_A1))),
          hydrophobicity_cdr3_clone = ifelse(is.na(cdr3_B)|is.na(cdr3_A1),NA_integer_,hydrophobicity(paste0(cdr3_B,cdr3_A1))),
          mw_cdr3_clone = ifelse(is.na(cdr3_B)|is.na(cdr3_A1),NA_integer_,mw(paste0(cdr3_B,cdr3_A1))),
          mz_cdr3_clone = ifelse(is.na(cdr3_B)|is.na(cdr3_A1),NA_integer_,mz(paste0(cdr3_B,cdr3_A1))))
    return(seurat)
    }

In [None]:
cd8_l2_subcluster  <- add_TCR_properties(cd8_l2_subcluster)

In [None]:
colnames(cd8_l2_subcluster@meta.data[,180:200])

In [None]:
cd8_l2_subcluster@meta.data[,180:200]

In [None]:
dir.create("../figures/tcr/peptides_cd8_allclones/")

## All clones

In [None]:
options(repr.plot.width = 4, repr.plot.height = 6.5)


for(i in 185:205){
   df  <- data.frame(Score = cd8_l2_subcluster@meta.data[,i],
                  Annotation = cd8_l2_subcluster$Disease)
    dataMedian <- summarise(group_by(df, Annotation), MD = 1.2*median(Score, na.rm = T))
       
p  <- ggplot(df, aes(x = Annotation, y = Score)) +
    ggrastr::rasterise(geom_jitter(position=position_jitter(0.2), size = 1, color = "grey70", alpha = 0.7)) +
    geom_violin(aes(color = Annotation), scale = "width", alpha = 0.7) +  theme_classic() + 
   NoLegend() + theme(axis.text.x = element_text(angle = 45, vjust = 0.8, hjust =0.8)) +
    scale_color_manual(values = c("blue", "#c41515ff")) + 
    stat_summary(fun = "median",
                   geom = "crossbar", 
                   width = 0.6,
                   colour = "grey20") +
    geom_text(data = dataMedian, aes(Annotation, MD, label = round(MD, digits = 2)), 
             size = 7) +
    xlab("") +
    scale_fill_continuous(guide=FALSE) +
   #scale_y_continuous(limits = c(NA, max())) +
   ggpubr::stat_compare_means(size = 7, label = "p.format", label.x = 1.3,
                              label.y.npc = 0.9) +
  ggtitle(colnames(cd8_l2_subcluster@meta.data)[i]) + 
  theme(panel.background = element_blank(), axis.text.x = element_text(angle = 90),
      axis.ticks.x = element_blank()) + ggtheme()
   print(p)
    ggsave(paste0("../figures/tcr/peptides_cd8_allclones/",colnames(cd8_l2_subcluster@meta.data)[i], ".svg"), width = 7, height = 14, units = "cm")
    ggsave(paste0("../figures/tcr/peptides_cd8_allclones/",colnames(cd8_l2_subcluster@meta.data)[i], ".png"), width = 7, height = 14, units = "cm")
}

## All clones by patient

In [None]:
dir.create("../figures/tcr/peptides_cd8_bypatient/")

In [None]:
for(i in 180:200){
        
df  <- data.frame(Score = cd8_l2_subcluster@meta.data[,i],
                  Condition = cd8_l2_subcluster$Condition, 
                 Sample_ID = cd8_l2_subcluster$Sample_ID, 
                 Patient_ID = cd8_l2_subcluster$Patient_ID, 
                  
                 Disease = cd8_l2_subcluster$Disease) 
    
 p  <-    df  %>% 
    group_by(Patient_ID, Disease)  %>% 
    summarise(avg_score = mean(Score, na.rm = TRUE))  %>% 
    ggplot(aes(x = Disease, y = avg_score)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Disease), scale = "width") + 
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Disease), cex = 3, 
                shape = 21, color = "black", corral = "random") +
scale_fill_manual(values = c("#1874cdff",   "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",   "#aa2a2aff","#e88989ff")) + #ggpubr::stat_compare_means(comparisons = list( c(1,3), c(2,3), c(1,2)), size = 7)+
    ggpubr::stat_compare_means(size = 7, label = "p.format") +
  ggtitle(colnames(cd8_l2_subcluster@meta.data)[i]) + 
   theme_classic() +
    xlab("") + ylab("") +
  theme(panel.background = element_blank(),
       axis.ticks.x = element_blank()) + ggtheme()
   print(p)
    
    ggsave(paste0("../figures/tcr/peptides_cd8_bypatient/",colnames(cd8_l2_subcluster@meta.data)[i], ".svg"), width = 9, height = 9, units = "cm")
    ggsave(paste0("../figures/tcr/peptides_cd8_bypatient/",colnames(cd8_l2_subcluster@meta.data)[i], ".png"), width = 9, height = 9, units = "cm")
      
}

## One clone counted just once

In [None]:
one_random_clone  <- function(seurat){
    seurat$barcode  <- colnames(seurat)
metadata_1  <- seurat@meta.data %>% mutate(clone_nt = paste(cdr3_B_nt,cdr3_A1_nt,cdr3_A2_nt),
  clone_aa = paste("CDR3b",cdr3_B,"CDR3a",cdr3_A1))
metadata_one_clone  <- metadata_1  %>% group_by(clone_aa, Patient_Time)  %>% slice_sample(n = 1)
seurat  <- subset(seurat, barcode %in% metadata_one_clone$barcode)
    return(seurat)
    }

In [None]:
cd8_l2_subcluster_one_random_clone  <- one_random_clone(cd8_l2_subcluster)

In [None]:
dir.create("../figures/tcr/peptides_cd8_one_random_clone/")

In [None]:
options(repr.plot.width = 4, repr.plot.height = 6.5)
for(i in 180:200){
    df  <- data.frame(Score = cd8_l2_subcluster_one_random_clone@meta.data[,i],
                  Annotation = cd8_l2_subcluster_one_random_clone$Disease)
    dataMedian <- summarise(group_by(df, Annotation), MD = 1.2*median(Score, na.rm = T))
       
p  <- ggplot(df, aes(x = Annotation, y = Score)) +
    ggrastr::rasterise(geom_jitter(position=position_jitter(0.2), size = 1, color = "grey70", alpha = 0.7)) +
    geom_violin(aes(color = Annotation), scale = "width", alpha = 0.7) +  theme_classic() + 
   NoLegend() + theme(axis.text.x = element_text(angle = 45, vjust = 0.8, hjust =0.8)) +
    scale_color_manual(values = c("blue", "#c41515ff", "#d87f7fff")) + 
    stat_summary(fun = "median",
                   geom = "crossbar", 
                   width = 0.6,
                   colour = "grey20") +
    geom_text(data = dataMedian, aes(Annotation, MD, label = round(MD, digits = 2)), 
             size = 7) +
    xlab("") +
   #scale_y_continuous(limits = c(NA, max())) +
    ggpubr::stat_compare_means(size = 7, label = "p.format", label.x = 1.3,
                              label.y.npc = 0.9) +
  ggtitle(colnames(cd8_l2_subcluster_one_random_clone@meta.data)[i]) + 
  theme(panel.background = element_blank(), axis.text.x = element_text(angle = 90),
      axis.ticks.x = element_blank()) + ggtheme()
   print(p)
    
    print(p)
    ggsave(paste0("../figures/tcr/peptides_cd8_one_random_clone/",colnames(cd8_l2_subcluster_one_random_clone@meta.data[i]), ".svg"), width = 7, height = 14, units = "cm")
    ggsave(paste0("../figures/tcr/peptides_cd8_one_random_clone/",colnames(cd8_l2_subcluster_one_random_clone@meta.data[i]), ".png"), width = 7, height = 14, units = "cm")
    
}

## One clone counted just once by patient

In [None]:
dir.create("../figures/tcr/peptides_cd8_one_random_bypatient/")

In [None]:
for(i in 180:200){
        
df  <- data.frame(Score = cd8_l2_subcluster_one_random_clone@meta.data[,i],
                  Condition = cd8_l2_subcluster_one_random_clone$Condition, 
                  Sample_ID = cd8_l2_subcluster_one_random_clone$Sample_ID, 
                  Patient_ID = cd8_l2_subcluster_one_random_clone$Patient_ID, 
                  Disease = cd8_l2_subcluster_one_random_clone$Disease) 
    
 p  <-    df  %>% 
    group_by(Patient_ID, Disease)  %>% 
    summarise(avg_score = mean(Score, na.rm = TRUE))  %>% 
    ggplot(aes(x = Disease, y = avg_score)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Disease), scale = "width") + 
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Disease), cex = 3, 
                shape = 21, color = "black", corral = "random") +
scale_fill_manual(values = c("#1874cdff",   "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",   "#aa2a2aff","#e88989ff")) + #ggpubr::stat_compare_means(comparisons = list( c(1,3), c(2,3), c(1,2)), size = 7)+
    ggpubr::stat_compare_means(size = 7, label = "p.format") +
  ggtitle(colnames(cd8_l2_subcluster_one_random_clone@meta.data)[i]) + 
  theme(panel.background = element_blank()) + ggtheme()
   print(p)
    
    ggsave(paste0("../figures/tcr/peptides_cd8_one_random_bypatient/",colnames(cd8_l2_subcluster_one_random_clone@meta.data)[i], ".svg"), width = 12, height = 14, units = "cm")
    ggsave(paste0("../figures/tcr/peptides_cd8_one_random_bypatient/",colnames(cd8_l2_subcluster_one_random_clone@meta.data)[i], ".png"), width = 12, height = 14, units = "cm")
      
}

## TCR properties table

In [None]:
i = 185

In [None]:
get_tcr_prop_table  <- function(i) {
    
    # All clones
      df  <- data.frame(Score = cd8_l2_subcluster@meta.data[,i],
                  Annotation = cd8_l2_subcluster$Disease)  %>% 
    dplyr::filter(!is.na(Score))
    
    wcx  <- wilcox.test(df$Score ~ df$Annotation, conf.int = T)

    df2  <- df  %>% group_by(Annotation)  %>% summarise(mean = mean(Score), sd = sd(Score))

     df_all  <- data.frame(name = colnames(cd8_l2_subcluster@meta.data)[i], 
                          cell_type = "CD8",
                          test_type = "All clones",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         ) 
    
        df_final  <-  df_all
    
    # Random clones
       df  <- data.frame(Score = cd8_l2_subcluster_one_random_clone@meta.data[,i],
                  Annotation = cd8_l2_subcluster_one_random_clone$Disease)  %>% 
    dplyr::filter(!is.na(Score))
    
    wcx  <- wilcox.test(df$Score ~ df$Annotation, conf.int = T)

    df2  <- df  %>% group_by(Annotation)  %>% summarise(mean = mean(Score), sd = sd(Score))

     df_all  <- data.frame(name = colnames(cd8_l2_subcluster_one_random_clone@meta.data)[i], 
                          cell_type = "CD8",
                          test_type = "Random clones",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         ) 
    
        df_final  <- rbind(df_final, df_all)
    
    
    # All clones by patient
    
       df  <- data.frame(Score = cd8_l2_subcluster@meta.data[,i],
                  Disease = cd8_l2_subcluster$Disease,
                    Patient_ID = cd8_l2_subcluster$Patient_ID   )  %>% 
    dplyr::filter(!is.na(Score))  %>% group_by(Patient_ID, Disease)  %>% 
    summarise(Score = mean(Score)) 
    
    wcx  <- wilcox.test(df$Score ~ df$Disease, conf.int = T)

    df2  <- df  %>% group_by(Disease)  %>% summarise(mean = mean(Score), sd = sd(Score))

     df_all  <- data.frame(name = colnames(cd8_l2_subcluster@meta.data)[i], 
                          cell_type = "CD8",
                          test_type = "All clones by patient",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         ) 
    
        df_final  <- rbind(df_final, df_all)
    
    # Random clones by patient
    
      df  <- data.frame(Score = cd8_l2_subcluster_one_random_clone@meta.data[,i],
                  Disease = cd8_l2_subcluster_one_random_clone$Disease,
                    Patient_ID = cd8_l2_subcluster_one_random_clone$Patient_ID   )  %>% 
    dplyr::filter(!is.na(Score))  %>% group_by(Patient_ID, Disease)  %>% 
    summarise(Score = mean(Score)) 
    
    wcx  <- wilcox.test(df$Score ~ df$Disease, conf.int = T)

    df2  <- df  %>% group_by(Disease)  %>% summarise(mean = mean(Score), sd = sd(Score))

     df_all  <- data.frame(name = colnames(cd8_l2_subcluster_one_random_clone@meta.data)[i], 
                          cell_type = "CD8",
                          test_type = "Random clones by patient",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         ) 
    
        df_final  <- rbind(df_final, df_all)
    return(df_final)
}

In [None]:
tcr_all_props_table  <- map(.x = 180:200, .f = get_tcr_prop_table)

In [None]:
tcr_all_props_table2  <- bind_rows(tcr_all_props_table)

In [None]:
tcr_all_props_table2  %>% arrange(pval)

In [None]:
dir.create( "../tables/tcr/peptides/")

In [None]:
write.csv(tcr_all_props_table2, "../tables/tcr/peptides/cd8_tcr_all_props_table.csv")