In [None]:
source("diabetes_analysis_v07.R")

# Full dataset

In [None]:
new_dia_cd4  <- readRDS("../data/processed/L1/cd4_l1_full_filt.rds")

## TCR analysis

In [None]:
metadata_6 <- new_dia_cd4@meta.data 

In [None]:
metadata_6 <- metadata_6 %>% mutate(
  clone_nt = paste(cdr3_B_nt,cdr3_A1_nt,cdr3_A2_nt),
  clone_aa = paste("CDR3b",cdr3_B,"CDR3a",cdr3_A1)
)

In [None]:
clone_table <- metadata_6 %>%
  dplyr::group_by(clone_aa) %>%
          dplyr::summarize(n = n(), sum = sum()) %>%
    arrange(desc(n))

clone_table

In [None]:
metadata_6$test <- 0
metadata_6 <- metadata_6 %>% group_by(test, clone_aa) %>% mutate(clone_abundance = as.numeric(n()))


metadata_6 <- as.data.frame(metadata_6 %>% mutate(clone_abundance = as.numeric(ifelse(clone_abundance>1000,NA_integer_,clone_abundance))) %>%
  mutate(log_clone_abundance = log(clone_abundance, base = 2)))
rownames(metadata_6) <- colnames(new_dia_cd4)

new_dia_cd4@meta.data <- metadata_6
rownames(new_dia_cd4@meta.data) <- colnames(new_dia_cd4)

new_dia_cd4 <- AddMetaData(new_dia_cd4, as.numeric(metadata_6$clone_abundance), "clone_abundance")
rownames(new_dia_cd4@meta.data) <- colnames(new_dia_cd4)

FeaturePlot(new_dia_cd4, reduction = "umap", features = "log_clone_abundance", raster = T)

FeaturePlot(subset(new_dia_cd4, clone_abundance >=1 & clone_abundance < 2000), 
            reduction = "umap", features = "log_clone_abundance", cols = c("lightblue","firebrick")) + theme_classic() + theme(plot.title = element_text(hjust = 0.5)) + ggtitle("Log2 clone abundance")

DimPlot(new_dia_cd4, raster =T)

# Clone abundance bar graph
metadata_6 <- metadata_6 %>%
  mutate( clone_abundance_group = case_when(clone_abundance>30 ~ ">30",
                                            clone_abundance>10&clone_abundance<=30 ~ "11-30",
                                            clone_abundance>6&clone_abundance<=10 ~ "6-10",
                                            clone_abundance>2&clone_abundance<=5 ~ "3-5",
                                            clone_abundance==2 ~ "2",
                                            clone_abundance==1 ~ "1",
                                            TRUE ~ "1"
                                            ))

In [None]:
for(i in 2:100){
  DimPlot(new_dia_cd4, 
          cells.highlight = colnames(new_dia_cd4)[grep(new_dia_cd4$clone_aa, 
                                                       pattern = pull(clone_table, clone_aa)[i])], raster = F) +
    ggtitle(pull(clone_table, clone_aa)[i])
    ggsave(create.dir = TRUE, paste0("../figures/tcr/cd4_tcr/tcr_plots_cd4_full/tcr_clone_",i,".png"), width = 13, height = 10, units = "cm")

}

## iNKT and MAIT sequences

In [None]:
options(repr.plot.width = 6, repr.plot.height = 4.5)

DimPlot(new_dia_cd4, cells.highlight = colnames(new_dia_cd4)[grep(new_dia_cd4$cdr3_A1, pattern = "CVVSDRGSTLGRLYF")])

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
for(i in c(
"CASARGVNEQYF","CASRGQGLGEQYF","CASRYYSVQGRTDTQYF","CASSAMDTEAFF",
"CASSAPLAGHYEQYF","CASSAWDGYEQYF","CASSDGFTDTQYF","CASSDLGLAGVIEQFF",
"CASSDLMGPDNYEQYF","CASSDLPETQYF","CASSDQNTEAFF","CASSDRANEQFF",
"CASSDRLAGDTQYF","CASSDRRQGAHQPQHF","CASTSLETSQYF",
"CASSEAGSGEKLFF","CASSEALILFF","CASSEAPWRDSGNTIYF",
"CASSEEGALKESVGTQYF","CASSEEGALKESVGTQYF","CASSEFDGGQETQYF","CASSEFGGTERTQETQYF",
"CASSEFGGTERTQETQYF","CASSEFGQSADEQFF","CASSEGGQDYEQYF","CASSEGTAGTDTQYF",
"CASSEGTGPNSPLHF","CASSEGWEQYF","CASSELLRGQGRTGELFF","CASSELTDTQYF",
"CASSEMGQGVYTF","CASSENSGTGRIYEQYF","CASSEPSSGNTIYF","CASSEPTGLGTDTQYF",
"CASSESATGFSPLHF","CASSESGGSTEAFF","CASSESLAGGYNEQFF","CASSESVETQYF",
"CASSEWAGGQETQYF","CASSEWEDITDTQYF","CASSEWGRTQETQYF","CASSEWGTNEKLFF",
"CASSEYESTNEKLFF","CASSEYESTNEKLFF","CASSEYFAGFNEQYF","CASSEYGTLQETYF",
"CASSEYMEAGIPTDTQYF","CASSEYMEGGEKLFF","CASSEYRRRSGEKLFF","CASSFGGETQYF",
"CASSGDRRQGAHQPQHF","CASSGLLTGPDTQYF","CASSGLRDRGLYEQYF","CASSGTGGAFDEQFF",
"CASSGTVTEAFF","CASSGYQGGGETQYF","CASSPIGGHGYEQYF","CASSPRDRWHEQYF",
"CASSRGGFDEQYF","CASSRGGGTEAFF","CASSRGGYTEAFF","CASSTGGADEKLFF",
"CASSVPLRDYEQYF","CASTGASGTYEQYF","CASTPRKGTDVGNTIYF","CASTPSGGWSSDTQYF",
'CAVRDSNYQLIW', 'CAVMDSNYQLIW', 'CAVLDSNYQLIW', 'CAVMDSSYKLIF', 'CAVTDSNYQLIW', 
    'CAVRDGDYKLSF', 'CAVKDSNYQLIW', 'CAAMDSNYQLIW', 'CAALDSNYQLIW', 'CALNDYKLSF', 
    'CAVVDSNYQLIW', 'CVVSDRGSTLGRLYF', 'CAVIDSNYQLIW', 'CAENTGGFKTIF', 'CAVSDSNYQLIW', 
    'CALSGGSNYKLTF', 'CAVEDQTGANNLFF', 'CALSDSGGGADGLTF', 'CAVRDRDYKLSF', 'CAGMDSNYQLIW', 
    'CAVNDYKLSF', 'CAPMDSNYQLIW', 'CASMDSNYQLIW', 'CAVNRDDKIIF', 'CAENSGGSNYKLTF', 
    'CAPLDSNYQLIW', 'CALNSGGSNYKLTF', 'CVVNDYKLSF', 'CALSSNDYKLSF', 'CAASNQAGTALIF', 
    'CVVNTGNQFYF', 'CVVNTNAGKSTF', 'CAVEDTGGFKTIF', 'CAVEDSNYQLIW', 'CAVDNYGQNFVF', 
    'CALSDSGGSNYKLTF', 'CAVMDSSYKLIF', 'CAVNTGGFKTIF', 'CAVRDGNYQLIW', 
    'CALNTGFQKLVF', 'CAENTGTASKLTF', 'CAATDSNYQLIW', 'CAVNQAGTALIF', 
    'CAENYGGSQGNLIF', 'CAVLNRDDKIIF', 'CAVEDNYGQNFVF', 'CAVNDYKLSF', 
    'CVVNNARLMF', 'CAVDNYGQNFVF', 'CAVDSSASKIIF', 'CALIYNQGGKLIF', 
    'CALNTGGFKTIF', 'CAENNAGNMLTF', 'CAVLDSSYKLIF', 'CAAMDSSYKLIF'))

{
   print(DimPlot(new_dia_cd4, cells.highlight = colnames(new_dia_cd4)[grep(new_dia_cd4$cdr3_A1, pattern = i)]) + ggtitle(i))
   ggsave(create.dir = TRUE, paste0("../figures/tcr/cd4_tcr/invariant_sequences2_cd4_full/",i,".png"), width = 20, height = 16, units = "cm")
}

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
for(i in c(
"CAALDSNYQLIW",
"CAAMDSNYQLIW",
"CARSDSNYQLIW",
"CASMDSNYQLIW",
"CASSDSGESGTEAFF",
"CASSPSGGDYNEQFF",
"CASSQIAGGQQETQYF",
"CAVLDSNYQLIW",
"CAVMDSNYQLIW",
"CAVNGDDYKLSF",
"CAVRDGDYKLSF",
"CAVRDSDYKLSF",
"CAVRDSNYQLIQW",
"CAVRDSNYQLIW",
"CAVSDSNYQLIW",
"CAVSLQDYKLSF",
"CSARQGAESREQYF"

)){
   DimPlot(new_dia_cd4, pt.size = 0.5, 
           cells.highlight = colnames(new_dia_cd4)[grep(new_dia_cd4$cdr3_A1, pattern = i)]) + 
    ggtitle(i) + NoLegend() + theme(axis.text = element_text(size = 25), title = element_text(size = 25))
   ggsave(create.dir = TRUE, paste0("./figures/tcr/cd4_tcr/mait_sequences_cd4/",i,".png"), width = 18, height = 18, units = "cm")
}

### Sequencing sufficiency

In [None]:
new_dia_cd4@meta.data <- new_dia_cd4@meta.data %>% mutate(clone_status_tra = ifelse(is.na(cdr3_A1), 
                                                      ifelse(is.na(cdr3_A2),"NA", "OK"), 
                                                      ifelse(is.na(cdr3_A2),"OK", "OK")),
                            clone_status_trb = ifelse(is.na(cdr3_B), "NA", "OK"))
                 
DimPlot(new_dia_cd4, group.by = "clone_status_tra", cols = c("indianred2", "palegreen3")) + ggtitle("Clone status TRA")      
ggsave(create.dir = TRUE, "../figures/tcr/cd4_tcr/seq_sufficiency/do_we_have_data_tra.png", width = 14, height = 10, units = "cm", dpi = 120)

DimPlot(new_dia_cd4, group.by = "clone_status_trb", cols = c("indianred2", "palegreen3")) + ggtitle("Clone status TRB")      
ggsave("../figures/tcr/cd4_tcr/seq_sufficiency/do_we_have_data_trb.png", width = 14, height = 10, units = "cm", dpi = 120)


new_dia_cd4@meta.data %>% group_by(clone_status_tra) %>% summarize(n = n()) %>% 
  mutate(freq = n / sum(n))

new_dia_cd4@meta.data %>% group_by(clone_status_trb) %>% summarize(n = n()) %>% 
  mutate(freq = n / sum(n))

# CD4 subcluster

In [None]:
cd4_subcluster  <- readRDS("../data/processed/L2/cd4_subcluster.rds")

In [None]:
metadata_6 <- cd4_subcluster@meta.data 

In [None]:
metadata_6 <- metadata_6 %>% mutate(
  clone_nt = paste(cdr3_B_nt,cdr3_A1_nt,cdr3_A2_nt),
  clone_aa = paste("CDR3b",cdr3_B,"CDR3a",cdr3_A1)
)

In [None]:
clone_table <- metadata_6 %>%
  dplyr::group_by(clone_aa) %>%
          dplyr::summarize(n = n(), sum = sum()) %>%
    arrange(desc(n))

clone_table

In [None]:
cd4_subcluster@meta.data  <- metadata_6
rownames(cd4_subcluster@meta.data)  <- colnames(cd4_subcluster)

In [None]:
metadata_6$test <- 0
metadata_6 <- metadata_6 %>% group_by(test, clone_aa) %>% mutate(clone_abundance = as.numeric(n()))


metadata_6 <- as.data.frame(metadata_6 %>% mutate(clone_abundance = as.numeric(ifelse(clone_abundance>1000,NA_integer_,clone_abundance))) %>%
  mutate(log_clone_abundance = log(clone_abundance, base = 2)))
rownames(metadata_6) <- colnames(cd4_subcluster)

cd4_subcluster@meta.data <- metadata_6
rownames(cd4_subcluster@meta.data) <- colnames(cd4_subcluster)

cd4_subcluster <- AddMetaData(cd4_subcluster, as.numeric(metadata_6$clone_abundance), "clone_abundance")
rownames(cd4_subcluster@meta.data) <- colnames(cd4_subcluster)

FeaturePlot(cd4_subcluster, reduction = "umap", features = "log_clone_abundance", raster = T)

FeaturePlot(subset(cd4_subcluster, clone_abundance >=1 & clone_abundance < 2000), 
            reduction = "umap", features = "log_clone_abundance", cols = c("lightblue","firebrick")) + theme_classic() + theme(plot.title = element_text(hjust = 0.5)) + ggtitle("Log2 clone abundance")

DimPlot(cd4_subcluster, raster =T)

# Clone abundance bar graph
metadata_6 <- metadata_6 %>%
  mutate( clone_abundance_group = case_when(clone_abundance>30 ~ ">30",
                                            clone_abundance>10&clone_abundance<=30 ~ "11-30",
                                            clone_abundance>6&clone_abundance<=10 ~ "6-10",
                                            clone_abundance>2&clone_abundance<=5 ~ "3-5",
                                            clone_abundance==2 ~ "2",
                                            clone_abundance==1 ~ "1",
                                            TRUE ~ "1"
                                            ))

In [None]:
options(repr.plot.width = 4, repr.plot.height = 3)

FeaturePlot(subset(cd4_subcluster, clone_abundance >=1 & clone_abundance < 1000), 
            reduction = "umap", features = "log_clone_abundance", 
            cols = c("aliceblue","firebrick")) + theme_classic() + theme(plot.title = element_text(hjust = 0.5)) + 
ggtitle("Log2 clone abundance")


### Clones dotplots dimplots

In [None]:
cd4_subcluster@meta.data <- cd4_subcluster@meta.data %>% mutate(
  clone_nt = paste(cdr3_B_nt,cdr3_A1_nt,cdr3_A2_nt),
  clone_aa = paste("CDR3b",cdr3_B,"CDR3a",cdr3_A1)
)

In [None]:
for(i in 2:300){
  DimPlot(cd4_subcluster, raster = F, 
          cells.highlight = colnames(cd4_subcluster)[grep(cd4_subcluster$clone_aa, 
                                                       pattern = pull(clone_table, clone_aa)[i])]) +
    ggtitle(pull(clone_table, clone_aa)[i])
    ggsave(paste0("../figures/tcr_plots/tcr_plots_cd4_subcluster/tcr_clone_",i,".png"), width = 13, height = 10, units = "cm")

}

### Clone abundance bar graph

In [None]:
options(repr.plot.width = 3.5, repr.plot.height = 3)

metadata_6 <- metadata_6 %>%
  mutate( clone_abundance_group = ifelse(clone_abundance>10, "10+",
                                          ifelse(clone_abundance>5, "6-9",
                                             ifelse(clone_abundance>3, "4-5",
                                             ifelse(clone_abundance>1, "2-3",
                                             ifelse(clone_abundance==1, "1","1"))))))

metadata_6 %>% 
  filter(!is.na(clone_abundance_group)) %>% 
  ggplot(aes(x = factor(annotations_manual, 
                        levels = levels(factor(annotations_manual))[c(9,3,5,8,6,4,1,2,7)]))) + 
  geom_bar(aes(fill = factor(clone_abundance_group, levels = c("1","2-3","4-5","6-9","10+"))), position = "fill") + 
  coord_flip() +
  scale_fill_brewer(palette = "Blues") + xlab("Frequency")+
  ylab("") + 
  theme_classic() + 
  theme(plot.title = element_text(hjust = 0.5), 
        legend.position="right", 
        panel.border = element_blank(), 
        legend.title = element_blank()) + 
  ggtitle("Clone abundance in clusters")

ggsave(create.dir = TRUE, "../figures/tcr_plots/Log2CloneAbundance_bar_clusters_cd4.png", width = 14, height = 6, units = "cm", dpi = 120)
ggsave("../figures/tcr_plots/Log2CloneAbundance_bar_clusters_cd4.svg", width = 14, height = 6, units = "cm", dpi = 120)

## CDR3 beta length

In [None]:
metadata_6$cdr3_B_nchar  <- nchar(metadata_6$cdr3_B)

In [None]:
options(repr.plot.width = 17)
metadata_6 %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Ctrl T1", "Dia T1"))  %>% 
mutate(Disease == ifelse(grepl(pattern = "Ctrl", x = Condition), "Ctrl", "Dia"))  %>% 
group_by(Disease, Sample_ID, cdr3_B_nchar)  %>% 
tally  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
ggplot(aes(x = Disease, y = n)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0.5, size = 0.5, alpha = 1, aes(color = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 19, drop = T) +
 ylab("Log2 clonal abundance") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

- každou sekvenci u jednoho pacienta počítat tolikrát, kolikrát tam je
- počítám počet dané délky pro jednoho pacienta
- potom plotuji frekvenci dané délky u daného pacienta
- ctrl and dia both from T0 and T1

In [None]:
options(repr.plot.width = 17)
test_length  <- metadata_6 %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Ctrl T1", "Dia T1"))  %>% 
mutate(Disease == ifelse(grepl(pattern = "Ctrl", x = Condition), "Ctrl", "Dia"))  %>% 
group_by(Disease, Patient_ID, cdr3_B_nchar)  %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

#test_length  %>% group_by(Patient_ID)  %>% summarise(sum = sum(freq))

test_length  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
ggplot(aes(x = Disease, y = freq)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0, size = 0.5, alpha = 1, aes(color = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 19, drop = T) +
 ylab("Frequency") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

- každou sekvenci u jednoho pacienta počítat pouze jednou
- počítám počet dané délky pro jednoho pacienta
- potom plotuji frekvenci dané délky u daného pacienta

In [None]:
options(repr.plot.width = 17)
test_length  <- metadata_6 %>% 
dplyr::select(Disease, Condition, Patient_ID, cdr3_B_nchar, cdr3_B)  %>% 
unique  %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Ctrl T1", "Dia T1"))  %>% 
mutate(Disease == ifelse(grepl(pattern = "Ctrl", x = Condition), "Ctrl", "Dia"))  %>% 
group_by(Disease, Patient_ID, cdr3_B_nchar)  %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

test_length  %>% group_by(Patient_ID)  %>% summarise(sum = sum(freq))

In [None]:
options(repr.plot.width = 27, repr.plot.height = 10)
test_length  %>% 
filter(!is.na(cdr3_B_nchar))  %>% 
ggplot(aes(x = Disease, y = freq)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0, size = 0.7, shape = 21, alpha = 0.7, aes(fill = Disease)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(cdr3_B_nchar),
             scales = "fixed", ncol = 19, drop = T) +
 scale_fill_manual(values = c("dodgerblue", "red2"))  +
ylab("Frequency") +
  xlab("CDR3 beta length") +
  theme_classic() + 
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format", size = 7) + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank()) + ggtheme()
ggsave(create.dir = TRUE, filename = "../figures/tcr_plots/tcr_length/cd4.png", width = 20, height = 10, units = "cm")
ggsave(filename = "../figures/tcr_plots/tcr_length/cd4.svg", width = 20, height = 10, units = "cm")

### Size of clones - cluster

In [None]:
options(repr.plot.width = 8, repr.plot.height = 5)

metadata_6 %>% 
  group_by(clone_aa, annotations_manual) %>% 
  ggplot(aes(x = reorder(annotations_manual, log_clone_abundance, mean, na.rm = TRUE), y = log_clone_abundance)) + 
  geom_boxplot(outlier.shape = NA) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  #scale_color_manual(values = c("indianred2","dodgerblue2","green4")) +
  geom_jitter(width = 0.2, height = 0.5, size = 0.5, alpha = 0.2) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  #facet_wrap(~Annotations, scales = "free", ncol = 4) +
  ylab("Log2 clonal abundance") +
  xlab("Cluster") +
  theme_classic() + 
  ylim(c(0,8))+
  ggpubr::stat_compare_means(label.x.npc = 0.3)

#ggsave("v01_clones/plots_cd4/size_of_clones1.png", width = 11, height = 8, units = "cm", dpi = 120)

In [None]:
options(repr.plot.width = 14, repr.plot.height = 4)

metadata_6 %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0"))  %>% 
  group_by(clone_aa, annotations_manual, Condition) %>% 
  ggplot(aes(x = Condition, y = log_clone_abundance)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Condition), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0.5, size = 0.5, alpha = 0.2, aes(color = Condition)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(annotations_manual, 
                     levels = levels(factor(reorder(annotations_manual, log_clone_abundance, mean, na.rm = TRUE)))),
             scales = "fixed", ncol = 11, drop = T) +
  scale_color_manual(values = c("green4", "indianred2")) +
  scale_fill_manual(values = c("green4", "indianred2")) +
  ylab("Log2 clonal abundance") +
  xlab("Cluster") +
  theme_classic() + 
  ylim(c(0,8))+
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

#ggsave("v01_clones/plots_cd4/size_of_clones2.png", width = 11, height = 8, units = "cm", dpi = 120)

In [None]:
options(repr.plot.width = 14, repr.plot.height = 4)

metadata_6 %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0"))  %>% 
  group_by(annotations_manual, Condition, Patient_Time) %>%
summarize(mean_log_clone_abundance = mean(log_clone_abundance, na.rm = T))  %>% 
ggplot(aes(x = Condition, y = mean_log_clone_abundance)) + 
  geom_boxplot(outlier.shape = NA, aes(fill = Condition), alpha = 0.4) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) +
  geom_jitter(width = 0.2, height = 0.5, size = 0.5, alpha = 1, aes(color = Condition)) +  # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~factor(annotations_manual, 
                     levels = levels(factor(reorder(annotations_manual, mean_log_clone_abundance, mean, na.rm = TRUE)))),
             scales = "fixed", ncol = 11, drop = T) +
  scale_color_manual(values = c("green4", "indianred2")) +
  scale_fill_manual(values = c("green4", "indianred2")) +
  ylab("Log2 clonal abundance") +
  xlab("Cluster") +
  theme_classic() + 
  ylim(c(0,5))+
  ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") + 
theme(strip.background = element_blank(), axis.text.x = element_blank(), 
     axis.ticks.x = element_blank())

#ggsave("v01_clones/plots_cd4/size_of_clones3.png", width = 11, height = 8, units = "cm", dpi = 120)

# TRAV and TRBV usage

### TRAV

In [None]:
trav_usage <- metadata_6 %>%
filter(!is.na(v_gene_A1))  %>% 
  dplyr::group_by(Patient_ID, v_gene_A1) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))  %>% arrange(desc(freq))  %>% 
dplyr::select(-n)  %>% 
pivot_wider(names_from = "v_gene_A1", values_from = "freq", values_fill = 0)  %>% 
pivot_longer(!Patient_ID, names_to = "v_gene_A1", values_to = "freq")

In [None]:
trav_usage

In [None]:
trav_usage  <- trav_usage  %>% mutate(Disease = ifelse((substr(Patient_ID, 1,1)=="1"), "Dia","Ctrl"))

In [None]:
trav_usage

In [None]:
trav_usage <- metadata_6 %>%
filter(!is.na(v_gene_A1))  %>% 
  dplyr::group_by(Patient_ID, v_gene_A1) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))  %>% arrange(desc(freq))  %>% 
dplyr::select(-n)  %>% 
pivot_wider(names_from = "v_gene_A1", values_from = "freq", values_fill = 0)  %>% 
pivot_longer(!Patient_ID, names_to = "v_gene_A1", values_to = "freq")

trav_usage

trav_usage  <- trav_usage  %>% mutate(Disease = ifelse((substr(Patient_ID, 1,1)=="1"), "Dia","Ctrl"))

trav_usage

In [None]:
options(repr.plot.height = 25, repr.plot.width = 10)
trav_usage  %>% 
filter(grepl(v_gene_A1, pattern = "TRD")==F)  %>% 
group_by(v_gene_A1, Disease)  %>% 
summarise(freq2 = mean(freq, na.rm = TRUE),
          min = min(freq, na.rm = TRUE),
          max = max(freq, na.rm = TRUE))  %>% 
#filter(freq2>0.005)  %>% 
mutate(v_gene_A1 = fct_reorder(v_gene_A1, desc(freq2))) %>%
  ggplot(aes(x = Disease, y = freq2, color = Disease)) + 
facet_grid(rows = vars(fct_reorder(v_gene_A1, desc(freq2)))) +
geom_point(size = 2) + coord_flip() + 
geom_linerange(aes(ymin = min, ymax = max, color = Disease),
                 alpha = 0.5, linewidth = 1) +
ggtheme()  +
theme(strip.text.y = element_text(
        angle = 0), 
     axis.ticks.y = element_blank(),
     axis.text.y = element_blank(),
      strip.background = element_blank(),
      panel.background = element_blank(),
  panel.grid.major = element_line(size = 0.3, linetype = 'solid',
                                colour = "grey92"), 
  panel.grid.minor = element_line(size = 0.3, linetype = 'solid',
                                colour = "grey92")
     ) +
ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") +
scale_color_manual(values = c("dodgerblue", "red3"))

In [None]:
#ggsave("../figures/tcr/vdj_usage/cd4_trav.png", width = 20, height = 50, units = "cm")
#ggsave("../figures/tcr/vdj_usage/cd4_trav.svg", width = 20, height = 50, units = "cm")

### TRAJ

In [None]:
traj_usage <- metadata_6 %>%
filter(!is.na(j_gene_A1))  %>% 
  dplyr::group_by(Patient_ID, j_gene_A1) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))  %>% arrange(desc(freq))  %>% 
dplyr::select(-n)  %>% 
pivot_wider(names_from = "j_gene_A1", values_from = "freq", values_fill = 0)  %>% 
pivot_longer(!Patient_ID, names_to = "j_gene_A1", values_to = "freq")

In [None]:
traj_usage

In [None]:
traj_usage  <- traj_usage %>% mutate(Disease = ifelse((substr(Patient_ID, 1,1)=="1"), "Dia","Ctrl"))

In [None]:
traj_usage

In [None]:
options(repr.plot.height = 15, repr.plot.width = 10)
traj_usage  %>% 
filter(grepl(j_gene_A1, pattern = "TRD")==F)  %>% 
group_by(j_gene_A1, Disease)  %>% 
summarise(freq2 = mean(freq, na.rm = TRUE),
          min = min(freq, na.rm = TRUE),
          max = max(freq, na.rm = TRUE))  %>% 
#filter(freq2>0.005)  %>% 
mutate(j_gene_A1 = fct_reorder(j_gene_A1, desc(freq2))) %>%
  ggplot(aes(x = Disease, y = freq2, color = Disease)) + 
facet_grid(rows = vars(fct_reorder(j_gene_A1, desc(freq2)))) +
geom_point(size = 2) + coord_flip() + 
geom_linerange(aes(ymin = min, ymax = max, color = Disease),
                 alpha = 0.5, linewidth = 1) +
ggtheme()  +
theme(strip.text.y = element_text(
        angle = 0), 
     axis.ticks.y = element_blank(),
     axis.text.y = element_blank(),
      strip.background = element_blank(),
      panel.background = element_blank(),
  panel.grid.major = element_line(size = 0.3, linetype = 'solid',
                                colour = "grey92"), 
  panel.grid.minor = element_line(size = 0.3, linetype = 'solid',
                                colour = "grey92")
     ) +
ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") +
scale_color_manual(values = c("dodgerblue", "red3"))

In [None]:
#ggsave("../figures/tcr/vdj_usage/cd4_traj.png", width = 15, height = 35, units = "cm")
#ggsave("../figures/tcr/vdj_usage/cd4_traj.svg", width = 15, height = 35, units = "cm")

### TRBV

In [None]:
metadata_6$v_gene_B  %>% table

In [None]:
trbv_usage <- metadata_6 %>%
filter(!is.na(v_gene_B))  %>% 
  dplyr::group_by(Patient_ID, v_gene_B) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))  %>% arrange(desc(freq))  %>% 
dplyr::select(-n)  %>% 
pivot_wider(names_from = "v_gene_B", values_from = "freq", values_fill = 0)  %>% 
pivot_longer(!Patient_ID, names_to = "v_gene_B", values_to = "freq")

In [None]:
trbv_usage

In [None]:
trbv_usage  <- trbv_usage %>% mutate(Disease = ifelse((substr(Patient_ID, 1,1)=="1"), "Dia","Ctrl"))

In [None]:
trbv_usage

In [None]:
options(repr.plot.height = 15, repr.plot.width = 10)
trbv_usage  %>% 
filter(grepl(v_gene_B, pattern = "TRD")==F)  %>% 
group_by(v_gene_B, Disease)  %>% 
summarise(freq2 = mean(freq, na.rm = TRUE),
          min = min(freq, na.rm = TRUE),
          max = max(freq, na.rm = TRUE))  %>% 
#filter(freq2>0.005)  %>% 
mutate(v_gene_B = fct_reorder(v_gene_B, desc(freq2))) %>%
  ggplot(aes(x = Disease, y = freq2, color = Disease)) + 
facet_grid(rows = vars(fct_reorder(v_gene_B, desc(freq2)))) +
geom_point(size = 2) + coord_flip() + 
geom_linerange(aes(ymin = min, ymax = max, color = Disease),
                 alpha = 0.5, linewidth = 1) +
ggtheme()  +
theme(strip.text.y = element_text(
        angle = 0), 
     axis.ticks.y = element_blank(),
     axis.text.y = element_blank(),
      strip.background = element_blank(),
      panel.background = element_blank(),
  panel.grid.major = element_line(size = 0.3, linetype = 'solid',
                                colour = "grey92"), 
  panel.grid.minor = element_line(size = 0.3, linetype = 'solid',
                                colour = "grey92")
     ) +
ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") +
scale_color_manual(values = c("dodgerblue", "red3"))

In [None]:
#ggsave("../figures/tcr/vdj_usage/cd4_trbv.png", width = 15, height = 35, units = "cm")
#ggsave("../figures/tcr/vdj_usage/cd4_trbv.svg", width = 15, height = 35, units = "cm")

### TRBJ

In [None]:
metadata_6$j_gene_B  %>% table

In [None]:
trbj_usage <- metadata_6 %>%
filter(!is.na(j_gene_B))  %>% 
  dplyr::group_by(Patient_ID, j_gene_B) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))  %>% arrange(desc(freq))  %>% 
dplyr::select(-n)  %>% 
pivot_wider(names_from = "j_gene_B", values_from = "freq", values_fill = 0)  %>% 
pivot_longer(!Patient_ID, names_to = "j_gene_B", values_to = "freq")

In [None]:
trbj_usage

In [None]:
trbj_usage  <- trbj_usage %>% mutate(Disease = ifelse((substr(Patient_ID, 1,1)=="1"), "Dia","Ctrl"))

In [None]:
trbj_usage

In [None]:
options(repr.plot.height = 15, repr.plot.width = 10)
trbj_usage  %>% 
filter(grepl(j_gene_B, pattern = "TRD")==F)  %>% 
group_by(j_gene_B, Disease)  %>% 
summarise(freq2 = mean(freq, na.rm = TRUE),
          min = min(freq, na.rm = TRUE),
          max = max(freq, na.rm = TRUE))  %>% 
#filter(freq2>0.005)  %>% 
mutate(j_gene_B = fct_reorder(j_gene_B, desc(freq2))) %>%
  ggplot(aes(x = Disease, y = freq2, color = Disease)) + 
facet_grid(rows = vars(fct_reorder(j_gene_B, desc(freq2)))) +
geom_point(size = 2) + coord_flip() + 
geom_linerange(aes(ymin = min, ymax = max, color = Disease),
                 alpha = 0.5, linewidth = 1) +
ggtheme()  +
theme(strip.text.y = element_text(
        angle = 0), 
     axis.ticks.y = element_blank(),
     axis.text.y = element_blank(),
      strip.background = element_blank(),
      panel.background = element_blank(),
  panel.grid.major = element_line(size = 0.3, linetype = 'solid',
                                colour = "grey92"), 
  panel.grid.minor = element_line(size = 0.3, linetype = 'solid',
                                colour = "grey92")
     ) +
ggpubr::stat_compare_means(label.x.npc = 0.3, label = "p.format") +
scale_color_manual(values = c("dodgerblue", "red3"))

In [None]:
#ggsave("../figures/tcr/vdj_usage/cd4_trbj.png", width = 15, height = 10, units = "cm")
#ggsave("../figures/tcr/vdj_usage/cd4_trbj.svg", width = 15, height = 10, units = "cm")

### Clones shared between samples

In [None]:
cd4_subcluster$Sample_char  <- paste(cd4_subcluster$Patient_ID, 
                                  cd4_subcluster$Disease,
                                  cd4_subcluster$Time,
                                  cd4_subcluster$Age_group,
                                  cd4_subcluster$Sex,
                                  cd4_subcluster$Experiment_ID)

In [None]:
clone_table <- metadata_6 %>%
filter(clone_aa != "CDR3b NA CDR3a NA")  %>% 
  dplyr::group_by(Sample_char, clone_aa) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

In [None]:
clone_table_individual <- metadata_6 %>%
  dplyr::group_by(clone_aa, Sample_char) %>%
          dplyr::summarize(n = n()) %>%
    arrange(desc(n)) %>% pivot_wider(names_from = Sample_char, values_from = n, values_fill = 0) 

In [None]:
dim(clone_table_individual)

In [None]:
clone_table_individual_binary <- clone_table_individual %>% mutate_at(vars(2:88), is_positive)
clone_table_individual$sum <- rowSums(clone_table_individual_binary[,2:88])

clone_table_individual %>% arrange(desc(sum))

In [None]:
clone_table_individual  <- clone_table_individual %>% arrange(desc(sum))

In [None]:
clone_table_individual  <- clone_table_individual[2:nrow(clone_table_individual),]

In [None]:
clone_table_individual_small  <- clone_table_individual  %>% filter(sum > 1)

In [None]:
clone_table_individual_small

In [None]:
dir.create("../tables/tcr/")

In [None]:
write.csv(clone_table_individual_small %>% arrange(desc(sum)), "../tables/tcr/overlapping_clones_cd4_small.csv")

In [None]:
write.csv(clone_table_individual %>% arrange(desc(sum)), "../tables/tcr/overlapping_clones_cd4.csv")

### Repertoire overlap table

In [None]:
clone_table_individual_binary

In [None]:
order((colnames(clone_table_individual_binary)[2:88]))+1

In [None]:
clone_table_individual_binary  <- clone_table_individual_binary[2:nrow(clone_table_individual_binary),
                                                                c(1,order((colnames(clone_table_individual_binary)[2:88]))+1)]

In [None]:
## TRB shared by patients

clone_table <- metadata_6 %>%
filter(clone_aa != "CDR3b NA CDR3a NA")  %>% 
  dplyr::group_by(Sample_char, cdr3_B) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

clone_table_individual <- metadata_6 %>%
  dplyr::group_by(cdr3_B, Sample_char) %>%
          dplyr::summarize(n = n()) %>%
    arrange(desc(n)) %>% pivot_wider(names_from = Sample_char, values_from = n, values_fill = 0) 

clone_table_individual

clone_table_individual_binary <- clone_table_individual %>% mutate_at(vars(2:88), is_positive)
clone_table_individual$sum <- rowSums(clone_table_individual_binary[,2:88])

clone_table_individual %>% arrange(desc(sum))

clone_table_individual_binary

clone_table_individual  <- (clone_table_individual %>% arrange(desc(sum)))[2:nrow(clone_table_individual),]

clone_table_individual_binary  <- (clone_table_individual_binary)[c(2:nrow(clone_table_individual_binary)),]

write.csv(clone_table_individual %>% arrange(desc(sum)), "../tables/tcr/overlapping_trb_cd4.csv")

clone_table_individual_small  <- clone_table_individual  %>% filter(sum > 1)

clone_table_individual_small

write.csv(clone_table_individual_small %>% arrange(desc(sum)), "overlapping_trb_cd8_small.csv")

### Repertoire overlap table

clone_table_individual_binary

order_cols  <- order((colnames(clone_table_individual_binary)[2:98]))+1

order_cols

clone_table_individual_binary  <- clone_table_individual_binary[,
                                                                c(1,order_cols)]

clone_table_individual_binary

df_all4 <- data.frame("")

for(j in 2:88){
  subset1 <- clone_table_individual_binary[,c(1,j)]
 colnames(subset1)  <- c("aa", "sub1")
  vector_overlap <- c()
    subset1  <- subset1  %>% dplyr::filter(sub1 >0)
    
  for(i in 2:88){
    subset2 <- clone_table_individual_binary[,c(1,i)]
    colnames(subset2)  <- c("aa", "sub2")
    subset2  <- subset2  %>%  dplyr::filter(sub2 >0)
    is_in_second_patient <- nrow(subset1[subset1$aa %in% subset2$aa,])
    total <- nrow(subset1)
    vector_overlap <- c(vector_overlap,is_in_second_patient/total)
  }
  df <- as.data.frame(x = vector_overlap)
  colnames(df) <- colnames(clone_table_individual_binary)[j]
  df
  df_all4 <- cbind(df_all4, df)
}

df_all4

df_all4

df_all4 <- df_all4[,2:88]
rownames(df_all4) <- colnames(df_all4)

df_all4

df24 <- df_all4
df24[df24 == 1] <- 0




matrix_4  <- as.matrix(df24)

options(repr.plot.height = 17, repr.plot.width = 17)
pheatmap::pheatmap(matrix_4, cluster_rows = F, cluster_cols = F)

matrix_5  <- log(matrix_4+0.0001)

options(repr.plot.height = 17, repr.plot.width = 17)
pheatmap::pheatmap(matrix_5, cluster_rows = F, cluster_cols = F)



df_all4 <- data.frame("")

for(j in 2:88){
  subset1 <- clone_table_individual_binary[,c(1,j)]
 colnames(subset1)  <- c("aa", "sub1")
  vector_overlap <- c()
    subset1  <- subset1  %>% dplyr::filter(sub1 >0)
    
  for(i in 2:88){
    subset2 <- clone_table_individual_binary[,c(1,i)]
    colnames(subset2)  <- c("aa", "sub2")
    subset2  <- subset2  %>%  dplyr::filter(sub2 >0)
    is_in_second_patient <- nrow(subset1[subset1$aa %in% subset2$aa,])
    total <- nrow(subset1)
    vector_overlap <- c(vector_overlap,is_in_second_patient/total)
  }
  df <- as.data.frame(x = vector_overlap)
  colnames(df) <- colnames(clone_table_individual_binary)[j]
  df
  df_all4 <- cbind(df_all4, df)
}

df_all4

In [None]:
df_all4

In [None]:
df_all4 <- df_all4[,2:98]
rownames(df_all4) <- colnames(df_all4)

df_all4

In [None]:
df24 <- df_all4
df24[df24 == 1] <- 0


In [None]:
matrix_4  <- as.matrix(df24)

In [None]:
options(repr.plot.height = 17, repr.plot.width = 17)
pheatmap::pheatmap(matrix_4, cluster_rows = F, cluster_cols = F)

In [None]:
matrix_5  <- log(matrix_4+0.0001)

options(repr.plot.height = 17, repr.plot.width = 17)
pheatmap::pheatmap(matrix_5, cluster_rows = F, cluster_cols = F)

## TRB shared by patients

In [None]:
clone_table <- metadata_6 %>%
filter(clone_aa != "CDR3b NA CDR3a NA")  %>% 
  dplyr::group_by(Sample_char, cdr3_B) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

In [None]:
clone_table_individual <- metadata_6 %>%
  dplyr::group_by(cdr3_B, Sample_char) %>%
          dplyr::summarize(n = n()) %>%
    arrange(desc(n)) %>% pivot_wider(names_from = Sample_char, values_from = n, values_fill = 0) 

In [None]:
clone_table_individual

In [None]:
clone_table_individual_binary <- clone_table_individual %>% mutate_at(vars(2:88), is_positive)
clone_table_individual$sum <- rowSums(clone_table_individual_binary[,2:88])

clone_table_individual %>% arrange(desc(sum))

In [None]:
clone_table_individual_binary

In [None]:
clone_table_individual  <- (clone_table_individual %>% arrange(desc(sum)))[2:nrow(clone_table_individual),]

In [None]:
clone_table_individual_binary  <- (clone_table_individual_binary)[c(2:nrow(clone_table_individual_binary)),]

In [None]:
write.csv(clone_table_individual %>% arrange(desc(sum)), "../tables/tcr/overlapping_trb_cd4.csv")

In [None]:
clone_table_individual_small  <- clone_table_individual  %>% filter(sum > 1)

In [None]:
clone_table_individual_small

In [None]:
write.csv(clone_table_individual_small %>% arrange(desc(sum)), "../tables/tcr/overlapping_trb_cd4_small.csv")

### Repertoire overlap table

In [None]:
clone_table_individual_binary

In [None]:
order_cols  <- order((colnames(clone_table_individual_binary)[2:88]))+1

In [None]:
order_cols

In [None]:
clone_table_individual_binary  <- clone_table_individual_binary[,
                                                                c(1,order_cols)]

In [None]:
clone_table_individual_binary

In [None]:
dim(clone_table_individual_binary)

In [None]:
df_all4 <- data.frame("")

for(j in 2:88){
  subset1 <- clone_table_individual_binary[,c(1,j)]
 colnames(subset1)  <- c("aa", "sub1")
  vector_overlap <- c()
    subset1  <- subset1  %>% dplyr::filter(sub1 >0)
    
  for(i in 2:88){
    subset2 <- clone_table_individual_binary[,c(1,i)]
    colnames(subset2)  <- c("aa", "sub2")
    subset2  <- subset2  %>%  dplyr::filter(sub2 >0)
    is_in_second_patient <- nrow(subset1[subset1$aa %in% subset2$aa,])
    total <- nrow(subset1)
    vector_overlap <- c(vector_overlap,is_in_second_patient/total)
  }
  df <- as.data.frame(x = vector_overlap)
  colnames(df) <- colnames(clone_table_individual_binary)[j]
  df
  df_all4 <- cbind(df_all4, df)
}

df_all4

In [None]:
df_all4

In [None]:
df_all4 <- df_all4[,2:88]
rownames(df_all4) <- colnames(df_all4)

df_all4

In [None]:
df24 <- df_all4
df24[df24 == 1] <- 0


In [None]:
matrix_4  <- as.matrix(df24)

In [None]:
options(repr.plot.height = 17, repr.plot.width = 17)
pheatmap::pheatmap(matrix_4, cluster_rows = F, cluster_cols = F)

In [None]:
matrix_5  <- log(matrix_4+0.0001)

options(repr.plot.height = 17, repr.plot.width = 17)
pheatmap::pheatmap(matrix_5, cluster_rows = F, cluster_cols = F)

## Overlap by patient

In [None]:
clone_table_individual <- metadata_6 %>%
mutate(Condition = paste(Patient_ID, Disease))  %>% 
  dplyr::group_by(clone_aa, Condition) %>%
          dplyr::summarize(n = n()) %>%
    arrange(desc(n)) %>% pivot_wider(names_from = Condition, values_from = n, values_fill = 0) 

In [None]:
clone_table_individual

In [None]:
clone_table_individual  <- clone_table_individual[2:nrow(clone_table_individual),]

In [None]:
clone_table_individual_binary <- clone_table_individual %>% mutate_at(vars(2:44), is_positive)
clone_table_individual$sum <- rowSums(clone_table_individual_binary[,2:44])

clone_table_individual %>% arrange(desc(sum))

In [None]:
clone_table_individual_small  <- clone_table_individual  %>% filter(sum > 1)

In [None]:
write.csv(clone_table_individual_small %>% arrange(desc(sum)), "../tables/tcr/overlapping_clones_cd4_by_patient_small.csv")

In [None]:
write.csv(clone_table_individual %>% arrange(desc(sum)), "../tables/tcr/overlapping_clones_cd4_by_patient.csv")

### Repertoire overlap table

In [None]:
clone_table_individual_binary$sum  <- rowSums(clone_table_individual_binary[,2:44])

In [None]:
clone_table_individual_binary_small  <- clone_table_individual_binary  %>% filter(sum > 1)

In [None]:
clone_table_individual_binary_small

In [None]:
clone_table_individual_binary_small  <- clone_table_individual_binary_small[,
                                                                c(1,c(order(colnames(clone_table_individual_binary_small)[2:44])+1),45)]

In [None]:
clone_table_individual_binary_small  %>% arrange(desc(sum))

In [None]:
df_all5 <- data.frame("")

for(j in 2:44){
  subset1 <- clone_table_individual_binary[,c(1,j)]
 colnames(subset1)  <- c("aa", "sub1")
  vector_overlap <- c()
    subset1  <- subset1  %>% dplyr::filter(sub1 >0)
    
  for(i in 2:44){
    subset2 <- clone_table_individual_binary[,c(1,i)]
    colnames(subset2)  <- c("aa", "sub2")
    subset2  <- subset2  %>%  dplyr::filter(sub2 >0)
    is_in_second_patient <- nrow(subset1[subset1$aa %in% subset2$aa,])
    total <- nrow(subset1)
    vector_overlap <- c(vector_overlap,is_in_second_patient/total)
  }
  df <- as.data.frame(x = vector_overlap)
  colnames(df) <- colnames(clone_table_individual_binary)[j]
  df
  df_all5 <- cbind(df_all5, df)
}

df_all5

In [None]:
df_all5 <- df_all5[,2:44]
rownames(df_all5) <- colnames(df_all5)

df_all5

In [None]:
df25 <- df_all5
df25[df25 == 1] <- 0


In [None]:
matrix_6  <- as.matrix(df25)

In [None]:
options(repr.plot.height = 12, repr.plot.width = 12)
pheatmap::pheatmap(matrix_6, cluster_rows = F, cluster_cols = F)

In [None]:
pheatmap::pheatmap(matrix_6, cluster_rows = T, cluster_cols = T)

## TCRb

In [None]:
clone_table_individual <- metadata_6 %>%
mutate(Condition = paste(Patient_ID, Disease))  %>% 
  dplyr::group_by(cdr3_B, Condition) %>%
          dplyr::summarize(n = n()) %>%
    arrange(desc(n)) %>% pivot_wider(names_from = Condition, values_from = n) 

In [None]:
clone_table_individual

In [None]:
clone_table_individual_binary <- clone_table_individual %>% mutate_at(vars(2:44), is_positive)
clone_table_individual$sum <- rowSums(clone_table_individual_binary[,2:44])

clone_table_individual %>% arrange(desc(sum))

In [None]:
clone_table_individual  <- (clone_table_individual %>% arrange(desc(sum)))[2:nrow(clone_table_individual),]

In [None]:
clone_table_individual_binary  <- (clone_table_individual_binary)[c(2:nrow(clone_table_individual_binary)),]

In [None]:
write.csv(clone_table_individual %>% arrange(desc(sum)), "../tables/tcr/overlapping_trb_cd4_by_patient.csv")

In [None]:
clone_table_individual_small  <- clone_table_individual  %>% filter(sum > 1)

In [None]:
clone_table_individual_small

In [None]:
write.csv(clone_table_individual_small %>% arrange(desc(sum)), "../tables/tcr/overlapping_trb_cd4_small_by_patient.csv")

### Repertoire overlap table

In [None]:
clone_table_individual_binary

In [None]:
clone_table_individual_binary  <- clone_table_individual_binary[,
                                                                c(1,order((colnames(clone_table_individual_binary)[2:44]))+1)]

In [None]:
df_all5 <- data.frame("")

for(j in 2:44){
  subset1 <- clone_table_individual_binary[,c(1,j)]
 colnames(subset1)  <- c("aa", "sub1")
  vector_overlap <- c()
    subset1  <- subset1  %>% dplyr::filter(sub1 >0)
    
  for(i in 2:44){
    subset2 <- clone_table_individual_binary[,c(1,i)]
    colnames(subset2)  <- c("aa", "sub2")
    subset2  <- subset2  %>%  dplyr::filter(sub2 >0)
    is_in_second_patient <- nrow(subset1[subset1$aa %in% subset2$aa,])
    total <- nrow(subset1)
    vector_overlap <- c(vector_overlap,is_in_second_patient/total)
  }
  df <- as.data.frame(x = vector_overlap)
  colnames(df) <- colnames(clone_table_individual_binary)[j]
  df
  df_all5 <- cbind(df_all5, df)
}

df_all5

In [None]:
df_all5 <- df_all5[,2:44]
rownames(df_all5) <- colnames(df_all5)

df_all5

In [None]:
df25 <- df_all5
df25[df25 == 1] <- 0


In [None]:
matrix_6  <- as.matrix(df25)

In [None]:
options(repr.plot.height = 12, repr.plot.width = 12)
pheatmap::pheatmap(matrix_6, cluster_rows = F, cluster_cols = F)

In [None]:
pheatmap::pheatmap(matrix_6, cluster_rows = T, cluster_cols = T)

# Published clones

In [None]:
published_clones  <- read_excel("../data/published_clones.xlsx")

In [None]:
published_tra  <- published_clones$CDR3a[which(!is.na(published_clones$CDR3a))]

In [None]:
published_trb  <- published_clones$CDR3b[which(!is.na(published_clones$CDR3b))]

### TRA

In [None]:
table(cd4_subcluster$cdr3_A1 %in% published_tra)

In [None]:
clones_detected_cd4  <- published_clones  %>% filter(CDR3a %in% cd4_subcluster$cdr3_A1)  %>% filter(!is.na(CDR3a) & grepl(`Source of T cells`, pattern = "CD4"))

In [None]:
clones_detected_cd4

In [None]:
cd4_subcluster@meta.data  %>% 
dplyr::select(Patient_ID, Condition, Experiment_ID, cdr3_A1, cdr3_B)  %>% 
filter(cdr3_A1 %in% clones_detected_cd4$CDR3a)

In [None]:
options(repr.plot.width = 3, repr.plot.height = 3)
for(i in pull(clones_detected_cd4, CDR3a)){
    print(DimPlot(cd4_subcluster, 
                  raster = F, 
                  cells.highlight = colnames(cd4_subcluster)[grep(cd4_subcluster$cdr3_A1, pattern = i)],
                 sizes.highlight = c(2,1)) + NoLegend() + ggtitle(i))}

### TRB

In [None]:
table(cd4_subcluster$cdr3_B %in% published_trb)

In [None]:
published_clones  %>% filter(CDR3b %in% cd4_subcluster$cdr3_B)  %>% filter(!is.na(CDR3b))

In [None]:
cd4_subcluster@meta.data  %>% 
dplyr::select(Patient_ID, Condition, Experiment_ID,cdr3_A1, cdr3_B)  %>% 
filter(cdr3_B %in% published_trb)

HLA of our patients:

### Kmers

In [None]:
library(kebabs)

In [None]:
published_trb2  <- published_clones$CDR3b[which(!is.na(published_clones$CDR3b))]

In [None]:
published_trb2  <- published_clones  %>% filter(!is.na(CDR3b) & grepl(`Source of T cells`, pattern = "CD4"))  %>% pull(CDR3b)

In [None]:
published_trb2

In [None]:
kebabs::AAVector(published_trb)

In [None]:
s1 <- kebabs::AAVector(published_trb)

In [None]:
sk14 <- spectrumKernel(k=4, normalized=T)

In [None]:
kmers_trb  <- as.data.frame(colSums(as.data.frame(drop(getExRep(s1, sk14)))))

In [None]:
kmers_trb

In [None]:
kmers_trb$kmer  <- rownames(kmers_trb)

In [None]:
colnames(kmers_trb)  <- c("ref", "kmer")

In [None]:
rownames(kmers_trb)  <- NULL

In [None]:
kmers_trb

In [None]:
metadata_y  <- metadata_6  %>% filter(Time == "T0")

In [None]:
df_all  <- kmers_trb

for(i in 1:length(levels(factor(metadata_y$Patient_ID)))){
    pt1_betas  <- metadata_y %>%
    filter(Patient_ID == levels(factor(metadata_y$Patient_ID))[i])  %>% 
      dplyr::group_by(cdr3_B) %>%
          dplyr::summarize(n = n()) %>%
    arrange(desc(n))
    
    rep_pt  <- rep(pt1_betas$cdr3_B[2:nrow(pt1_betas)], pt1_betas$n[2:nrow(pt1_betas)])
    
    s1 <- kebabs::AAVector(rep_pt)
    sk14 <- spectrumKernel(k=4, normalized=TRUE)  
  
    kmers_trb  <- as.data.frame(colSums(as.data.frame(drop(getExRep(s1, sk14)))))
    kmers_trb$kmer  <- rownames(kmers_trb)
    colnames(kmers_trb)  <- c(levels(factor(metadata_y$Patient_ID))[i], "kmer")
    rownames(kmers_trb)  <- NULL
    
    df_all  <- left_join(df_all, kmers_trb)
}

In [None]:
df_all

In [None]:
df_all[is.na(df_all)] <- 0

In [None]:
df_all$ref  <- NULL

In [None]:
df_all2  <- df_all  %>% pivot_longer(!kmer, names_to = "Patient_ID", values_to = "freq")  %>% mutate(group = substr(Patient_ID, 1,1))

In [None]:
df_all2

In [None]:
m_w  <- data.frame(kmer = "kmer", pvalue = "pvalue")

In [None]:
for(i in 2:726){
    df3  <- df_all2  %>% filter(kmer == levels(factor(df_all2$kmer))[i] & group %in% c(1,2))
    df3$group  <- factor(df3$group, levels = c(1,2))
    df4  <- data.frame(kmer = levels(factor(df_all2$kmer))[i], 
                       pvalue = wilcox.test(freq~group, data = df3)$p.value)
    m_w  <- rbind(m_w, df4)
}

In [None]:
m_w2 <- m_w[2:nrow(m_w),] 

In [None]:
m_w  %>% arrange(pvalue)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 16)
df_all2  %>% filter(kmer %in% (m_w  %>% arrange(pvalue))$kmer[1:24])  %>% 
ggplot(aes(x = group, y = freq)) + # you can change the x to whatever variable you're interested in
  geom_boxplot(outlier.shape = NA, 
           alpha = 0.7, width = 0.9, aes(fill = group)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
   geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0), 
  size = 3, shape = 21, stackdir='center', aes(fill = group), color = "black") + 
  facet_wrap(~kmer, scales = "free", ncol = 6) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
 ylim(0,NA) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) 

### HPAP Kmers

In [None]:
hpap_kmers  <- read_csv("../../231003_VN_DiabetesV02/signif_kmers_HPAP.csv")

In [None]:
hpap_kmers  <- hpap_kmers$x

In [None]:
our_kmers  <-  m_w  %>% dplyr::filter(pvalue < 0.05)  %>% pull(kmer)

In [None]:
our_kmers

In [None]:
hpap_kmers

In [None]:
our_kmers %in% hpap_kmers

In [None]:
options(repr.plot.width = 7, repr.plot.height = 4)
df_all2  %>% filter(kmer %in% our_kmers[our_kmers %in% hpap_kmers])  %>% 
ggplot(aes(x = factor(group, levels = c(2,1), labels = c("Ctrl", "T1DM")), y = freq)) + # you can change the x to whatever variable you're interested in
  geom_boxplot(outlier.shape = NA, 
           alpha = 0.7, width = 0.9, aes(fill = group)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
   geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0), 
  size = 3, shape = 21, stackdir='center', aes(fill = group), color = "black") + 
  facet_wrap(~kmer, scales = "free", ncol = 6) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
scale_fill_manual(values = c("indianred", "dodgerblue"))+
ggpubr::stat_compare_means(size = 7, label = "p.format", label.x = 1.3,
                              label.y.npc = 0.9) +
ggtheme() +
 ylim(0,NA) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) 

## Are Panc Kmers enriched in Dia in general?

In [None]:
options(repr.plot.width = 15, repr.plot.height = 4)
df_all2  %>% 
ggplot(aes(x = factor(group, levels = c(2,1), labels = c("Ctrl", "T1DM")), 
           y = freq)) + # you can change the x to whatever variable you're interested in
  geom_boxplot(outlier.shape = NA, 
           alpha = 0.7, width = 0.9, aes(fill = group)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
   geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0), 
  size = 3, shape = 21, stackdir='center', aes(fill = group), color = "black") + 
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
scale_fill_manual(values = c("indianred", "dodgerblue"))+
ggpubr::stat_compare_means(size = 7, label = "p.format", label.x = 1.3,
                              label.y.npc = 0.9) +
ggtheme() +
 ylim(0,NA) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) 

In [None]:
options(repr.plot.width = 15, repr.plot.height = 4)
df_all2  %>% 
ggplot(aes(x = factor(group, levels = c(2,1), labels = c("Ctrl", "T1DM")), 
           y = log(freq+0.0001))) + # you can change the x to whatever variable you're interested in
  geom_boxplot(outlier.shape = NA, 
           alpha = 0.7, width = 0.9, aes(fill = group)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
   geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0), 
  size = 3, shape = 21, stackdir='center', aes(fill = group), color = "black") + 
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
scale_fill_manual(values = c("indianred", "dodgerblue"))+
ggpubr::stat_compare_means(size = 7, label = "p.format", label.x = 1.3,
                              label.y.npc = 0.9) +
ggtheme() +
 ylim(0,NA) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) 

### Percentage occupied by 10 largest clones

In [None]:
clone_table <- metadata_6 %>%
filter(clone_aa != "CDR3b NA CDR3a NA")  %>% 
  dplyr::group_by(Sample_char, clone_aa) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

In [None]:
clone_table  %>% arrange(desc(freq))

In [None]:
options(repr.plot.width = 12, repr.plot.height = 4)

clone_table  %>% ungroup()  %>% 
  dplyr::group_by(Sample_char) %>%
slice_max(order_by = freq, n = 10, with_ties = F)  %>% 
ungroup()  %>% 
 dplyr::group_by(Sample_char) %>%
summarize(freq_top10 = sum(freq))  %>% 
separate(Sample_char, into = c("Patient_ID", "Disease", "Time", "Age_group", "Sex", "Exp"), remove = F, sep = " ")  %>% 
mutate(Condition = paste(Disease, Time))  %>% 
ggplot(aes(x = Condition, y = freq_top10)) + # you can change the x to whatever variable you're interested in
  geom_boxplot(outlier.shape = NA, 
           alpha = 0.7, width = 0.9, aes(fill = Condition)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
   geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0), 
  size = 3, shape = 21, stackdir='center', aes(fill = Condition), color = "black") + 
   ylab("Frequency") +
  xlab("Condition") +
facet_wrap(~Age_group, scales = "free") +
  theme_classic() +
ggpubr::stat_compare_means()+
 ylim(0,NA) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text.x = element_text(angle = 90)) + ggtitle("Frequency of top10 clones")

In [None]:
    clone_table  %>% ungroup()  %>% 
  dplyr::group_by(Sample_char) %>%
slice_max(order_by = freq, n = 10, with_ties = F)  %>% 
ungroup()  %>% 
 dplyr::group_by(Sample_char) %>%
summarize(freq_top10 = sum(freq))  %>% 
separate(Sample_char, into = c("Patient_ID", "Disease", "Time", "Age_group", "Sex", "Exp"), remove = F, sep = " ")  %>% 
mutate(Condition = paste(Disease, Time))  %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0"))  %>% 
ggplot(aes(x = Condition, y = freq_top10)) + # you can change the x to whatever variable you're interested in
  geom_boxplot(outlier.shape = NA, 
           alpha = 0.7, width = 0.9, aes(fill = Condition)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
   geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0), 
  size = 3, shape = 21, stackdir='center', aes(fill = Condition), color = "black") + 
   ylab("Frequency") +
  xlab("Condition") +
facet_wrap(~Age_group, scales = "free") +
  theme_classic() +
ggpubr::stat_compare_means()+
 ylim(0,NA) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text.x = element_text(angle = 90)) + ggtitle("Frequency of top10 clones")

## Function to generate overlap heatmap

In [None]:
cd4_l2_subcluster  <- readRDS("../../240617_VN_Diabetes_V06/data/processed/L2/cd4_subcluster.rds")

In [None]:
source("diabetes_analysis_v07.R")

In [None]:
metadata_6  <- cd4_l2_subcluster@meta.data

In [None]:
dir.create("../figures/tcr/")

In [None]:
## CDR3_B

In [None]:
clone_table_individual <- metadata_6 %>%
  dplyr::group_by(cdr3_B, Sample_char) %>%
          dplyr::summarize(n = n()) %>%
    arrange(desc(n)) %>% pivot_wider(names_from = Sample_char, values_from = n, values_fill = 0) 

In [None]:
df_all4  <- get_df_all4_for_tcr_analysis(clone_table_individual)

In [None]:
plot_tcr_overlap_matrix(df_all4 = df_all4, sample_name = "cdr3b_CD4")

In [None]:
## CDR3_both

metadata_6 <- metadata_6 %>% mutate(
  clone_nt = paste(cdr3_B_nt,cdr3_A1_nt,cdr3_A2_nt),
  clone_aa = paste(cdr3_B,cdr3_A1)
)

clone_table_individual <- metadata_6 %>%
  dplyr::group_by(clone_aa, Sample_char) %>%
          dplyr::summarize(n = n()) %>%
    arrange(desc(n)) %>% pivot_wider(names_from = Sample_char, values_from = n, values_fill = 0) 

source("diabetes_analysis_v07.R")

df_all4  <- get_df_all4_for_tcr_analysis(clone_table_individual)

plot_tcr_overlap_matrix(df_all4 = df_all4, sample_name = "clone_aa_CD8")

plot_overlap_index(df_all4, "clone_aa_CD8")    

tcr_overlap_heatmap(clone_table_individual = clone_table_individual, sample = "clone_aa_CD8")plot_overlap_index(df_all4, "cdr3b_CD4")    

In [None]:
tcr_overlap_heatmap(clone_table_individual = clone_table_individual, sample = "cdr3b_CD4")

In [None]:
## CDR3_A1

In [None]:
clone_table_individual <- metadata_6 %>%
  dplyr::group_by(cdr3_A1, Sample_char) %>%
          dplyr::summarize(n = n()) %>%
    arrange(desc(n)) %>% pivot_wider(names_from = Sample_char, values_from = n, values_fill = 0) 

In [None]:
df_all4  <- get_df_all4_for_tcr_analysis(clone_table_individual)

In [None]:
plot_tcr_overlap_matrix(df_all4 = df_all4, sample_name = "cdr3a1_CD4")

In [None]:
plot_overlap_index(df_all4, "cdr3a1_CD4")    

In [None]:
tcr_overlap_heatmap(clone_table_individual = clone_table_individual, sample = "cdr3a1_CD4")

In [None]:
## CDR3_both

In [None]:
metadata_6 <- metadata_6 %>% mutate(
  clone_nt = paste(cdr3_B_nt,cdr3_A1_nt,cdr3_A2_nt),
  clone_aa = paste(cdr3_B,cdr3_A1)
)

In [None]:
clone_table_individual <- metadata_6 %>%
  dplyr::group_by(clone_aa, Sample_char) %>%
          dplyr::summarize(n = n()) %>%
    arrange(desc(n)) %>% pivot_wider(names_from = Sample_char, values_from = n, values_fill = 0) 

In [None]:
source("diabetes_analysis_v07.R")

In [None]:
df_all4  <- get_df_all4_for_tcr_analysis(clone_table_individual)

In [None]:
plot_tcr_overlap_matrix(df_all4 = df_all4, sample_name = "clone_aa_CD4")

In [None]:
plot_overlap_index(df_all4, "clone_aa_CD4")    

In [None]:
tcr_overlap_heatmap(clone_table_individual = clone_table_individual, sample = "clone_aa_CD4")

In [None]:
tcr_overlap_heatmap_patient  <- function(clone_table_individual = clone_table_individual, 
                                 sample, md = md){
clone_table_individual_binary <- clone_table_individual %>% mutate_at(vars(2:ncol(clone_table_individual)), is_positive)
clone_table_individual$sum <- rowSums(clone_table_individual_binary[,2:ncol(clone_table_individual)])

clone_table_individual  <- (clone_table_individual %>% arrange(desc(sum)))[2:nrow(clone_table_individual),]
clone_table_individual_binary  <- (clone_table_individual_binary)[c(2:nrow(clone_table_individual_binary)),]
order_cols  <- order((colnames(clone_table_individual_binary)[2:ncol(clone_table_individual_binary)]))+1
clone_table_individual_binary  <- clone_table_individual_binary[,c(1,order_cols)]
df_all4 <- data.frame("")

for(j in 2:ncol(clone_table_individual_binary)){
  subset1 <- clone_table_individual_binary[,c(1,j)]
 colnames(subset1)  <- c("aa", "sub1")
  vector_overlap <- c()
    subset1  <- subset1  %>% dplyr::filter(sub1 >0)
  for(i in 2:ncol(clone_table_individual_binary)){
    subset2 <- clone_table_individual_binary[,c(1,i)]
    colnames(subset2)  <- c("aa", "sub2")
    subset2  <- subset2  %>%  dplyr::filter(sub2 >0)
    is_in_second_patient <- nrow(subset1[subset1$aa %in% subset2$aa,])
    total <- nrow(subset1)
    vector_overlap <- c(vector_overlap,is_in_second_patient/total)
  }
  df <- as.data.frame(x = vector_overlap)
  colnames(df) <- colnames(clone_table_individual_binary)[j]
  df_all4 <- cbind(df_all4, df)
}

df_all4 <- df_all4[,2:ncol(clone_table_individual_binary)]
rownames(df_all4) <- colnames(df_all4)
df24 <- df_all4
df24[df24 == 1] <- 0

    matrix_4  <- as.matrix(df24)
    
sample_annot <- data.frame(row.names = rownames(matrix_4), 
                         rn = rownames(matrix_4))  %>% 
    left_join(md) %>% 
mutate(Disease = ifelse(Disease == "Pre-Dia", "PreDia", Disease))  %>% dplyr::select(-rn)
rownames(sample_annot)  <- rownames(matrix_4)
    
ann_colors = list(
    Sex = c(F = "indianred2", M = "dodgerblue"),
Disease = c(Dia = "indianred2", Ctrl = "dodgerblue1", PreDia = "rosybrown1"))
    
    pheatmap::pheatmap(matrix_4, 
                       cluster_rows = F, 
                       cluster_cols = F, 
                       filename = paste0("../figures/tcr/",sample,"_heatmap_by_patient.png"), 
                   width = 12, 
                   height = 12)
    
    pheatmap::pheatmap(matrix_4, 
                   cluster_rows = T, 
                   cluster_cols = T, 
                   filename = paste0("../figures/tcr/",sample,"_heatmap_cluster_by_patient.png"), 
                   width = 12, 
                   height = 13,
                  annotation_col = sample_annot,
                  annotation_colors = ann_colors)

    matrix_5  <- log(matrix_4+0.0001)
    
    pheatmap::pheatmap(matrix_5, 
                       cluster_rows = F, 
                       cluster_cols = F, 
                       filename = paste0("../figures/tcr/",sample,"_heatmap_log_by_patient.png"), 
                   width = 12, 
                   height = 12)
    
    pheatmap::pheatmap(matrix_5, 
                   cluster_rows = T, 
                   cluster_cols = T, 
                   filename = paste0("../figures/tcr/",sample,"_heatmap_log_cluster_by_patient.png"), 
                   width = 12, 
                   height = 13,
                  annotation_col = sample_annot,
                  annotation_colors = ann_colors) 
    
    overlap_index  <- df_all4  %>% 
rownames_to_column("var1")  %>% 
pivot_longer(!var1, names_to = "var2", values_to = "overlap")  %>% 
unique  %>% 
mutate(Disease_1 = substr(var1,1,1))  %>% 
mutate(Disease_2 = substr(var2,1,1))  %>% 
mutate(comparison_type = ifelse(
var1 == var2, "SELF - SELF", ifelse(
Disease_1 == "1" & Disease_2 == "1", "DIA - DIA", ifelse(
Disease_1 == "2" & Disease_2 == "2", "CTRL - CTRL", ifelse(
Disease_1 == "3" & Disease_2 == "3", "Pre-Dia - Pre-Dia", ifelse(
Disease_1 == "3" & Disease_2 == "1" | Disease_2 == "3" & Disease_1 == "1", "Dia - Pre-Dia", ifelse(
Disease_1 == "3" & Disease_2 == "2" | Disease_2 == "3" & Disease_1 == "2", "Ctrl - Pre-Dia",
    "DIA - CTRL"
)))))))

overlap_index %>% 
filter(comparison_type != "SELF - SELF")  %>% 
ggplot(aes(x = comparison_type, y = overlap)) +  
geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
  geom_jitter(position=position_jitter(0.2), size = 1, color = "grey70", alpha = 0.1) +
   geom_violin(aes(color = comparison_type), scale = "width", alpha = 0.7) +  theme_classic() + 

   NoLegend() + theme(axis.text.x = element_text(angle = 45, vjust = 0.8, hjust =0.8)) +
  ggtitle("Overlap between diagnoses") + 
  xlab("Compared diagnoses") +
  ylab("Percentage of shared") 

#ggsave(paste0("../figures/tcr/",sample, "_overlap1_by_patient.png"), width = 15, height = 11, units = "cm")
#ggsave(paste0("../figures/tcr/",sample, "_overlap1_by_patient.svg"), width = 15, height = 11, units = "cm")

}

# TCR properties peptides

In [None]:
library(Peptides)
library(Seurat)
library(dplyr)
    
add_TCR_properties  <- function(seurat){
    seurat@meta.data  <- seurat@meta.data  %>% mutate(pI_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,pI(cdr3_B)),
           boman_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,boman(cdr3_B)),
          charge_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,charge(cdr3_B)),
          hmoment_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,hmoment(cdr3_B)),
          hydrophobicity_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,hydrophobicity(cdr3_B)),
          mw_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,mw(cdr3_B)),
          mz_cdr3_B = ifelse(is.na(cdr3_B),NA_integer_,mz(cdr3_B)),
          pI_cdr3_A1 = ifelse(is.na(cdr3_A1),NA_integer_,pI(cdr3_A1)),
           boman_cdr3_A1 = ifelse(is.na(cdr3_A1),NA_integer_,boman(cdr3_A1)),
          charge_cdr3_A1 = ifelse(is.na(cdr3_A1),NA_integer_,charge(cdr3_A1)),
          hmoment_cdr3_A1 = ifelse(is.na(cdr3_A1),NA_integer_,hmoment(cdr3_A1)),
          hydrophobicity_cdr3_A1 = ifelse(is.na(cdr3_A1),NA_integer_,hydrophobicity(cdr3_A1)),
          mw_cdr3_A1 = ifelse(is.na(cdr3_A1),NA_integer_,mw(cdr3_A1)),
          mz_cdr3_A1 = ifelse(is.na(cdr3_A1),NA_integer_,mz(cdr3_A1)),
          pI_cdr3_clone = ifelse(is.na(cdr3_B)|is.na(cdr3_A1),NA_integer_,pI(paste0(cdr3_B,cdr3_A1))),
           boman_cdr3_clone = ifelse(is.na(cdr3_B)|is.na(cdr3_A1),NA_integer_,boman(paste0(cdr3_B,cdr3_A1))),
          charge_cdr3_clone = ifelse(is.na(cdr3_B)|is.na(cdr3_A1),NA_integer_,charge(paste0(cdr3_B,cdr3_A1))),
          hmoment_cdr3_clone = ifelse(is.na(cdr3_B)|is.na(cdr3_A1),NA_integer_,hmoment(paste0(cdr3_B,cdr3_A1))),
          hydrophobicity_cdr3_clone = ifelse(is.na(cdr3_B)|is.na(cdr3_A1),NA_integer_,hydrophobicity(paste0(cdr3_B,cdr3_A1))),
          mw_cdr3_clone = ifelse(is.na(cdr3_B)|is.na(cdr3_A1),NA_integer_,mw(paste0(cdr3_B,cdr3_A1))),
          mz_cdr3_clone = ifelse(is.na(cdr3_B)|is.na(cdr3_A1),NA_integer_,mz(paste0(cdr3_B,cdr3_A1))))
    return(seurat)
    }

In [None]:
cd4_l2_subcluster  <- readRDS("../data/processed/L2/cd4_subcluster.rds")

In [None]:
cd4_l2_subcluster  <- add_TCR_properties(cd4_l2_subcluster)

In [None]:
dim(cd4_l2_subcluster@meta.data)

In [None]:
cd4_l2_subcluster@meta.data[,177:197]

In [None]:
library(ggpubr)

In [None]:
dir.create("../figures/tcr/peptides_cd4_allclones/")

## All clones

In [None]:
options(repr.plot.width = 4, repr.plot.height = 6.5)


for(i in 177:197){
   df  <- data.frame(Score = cd4_l2_subcluster@meta.data[,i],
                  Annotation = cd4_l2_subcluster$Disease)
    dataMedian <- summarise(group_by(df, Annotation), MD = 1.2*median(Score, na.rm = T))
       
p  <- ggplot(df, aes(x = Annotation, y = Score)) +
    ggrastr::rasterise(geom_jitter(position=position_jitter(0.2), size = 1, color = "grey70", alpha = 0.7)) +
    geom_violin(aes(color = Annotation), scale = "width", alpha = 0.7) +  theme_classic() + 
   NoLegend() + theme(axis.text.x = element_text(angle = 45, vjust = 0.8, hjust =0.8)) +
    scale_color_manual(values = c("blue", "#c41515ff")) + 
    stat_summary(fun = "median",
                   geom = "crossbar", 
                   width = 0.6,
                   colour = "grey20") +
    geom_text(data = dataMedian, aes(Annotation, MD, label = round(MD, digits = 2)), 
             size = 7) +
    xlab("") +
    scale_fill_continuous(guide=FALSE) +
   #scale_y_continuous(limits = c(NA, max())) +
   ggpubr::stat_compare_means(size = 7, label = "p.format", label.x = 1.3,
                              label.y.npc = 0.9) +
  ggtitle(colnames(cd4_l2_subcluster@meta.data)[i]) + 
  theme(panel.background = element_blank(), axis.text.x = element_text(angle = 90),
      axis.ticks.x = element_blank()) + ggtheme()
   print(p)
    ggsave(paste0("../figures/tcr/peptides_cd4_allclones/",colnames(cd4_l2_subcluster@meta.data)[i], ".svg"), width = 7, height = 14, units = "cm")
    ggsave(paste0("../figures/tcr/peptides_cd4_allclones/",colnames(cd4_l2_subcluster@meta.data)[i], ".png"), width = 7, height = 14, units = "cm")
}

In [None]:
library(ggbeeswarm)

## All clones by patient

In [None]:
dir.create("../figures/tcr/peptides_cd4_bypatient/")

In [None]:
for(i in 177:197){
        
df  <- data.frame(Score = cd4_l2_subcluster@meta.data[,i],
                  Condition = cd4_l2_subcluster$Condition, 
                 Sample_ID = cd4_l2_subcluster$Sample_ID, 
                 Patient_ID = cd4_l2_subcluster$Patient_ID, 
                  
                 Disease = cd4_l2_subcluster$Disease) 
    
 p  <-    df  %>% 
    group_by(Patient_ID, Disease)  %>% 
    summarise(avg_score = mean(Score, na.rm = TRUE))  %>% 
    ggplot(aes(x = Disease, y = avg_score)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Disease), scale = "width") + 
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Disease), cex = 3, 
                shape = 21, color = "black", corral = "random") +
scale_fill_manual(values = c("#1874cdff",   "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",   "#aa2a2aff","#e88989ff")) + #ggpubr::stat_compare_means(comparisons = list( c(1,3), c(2,3), c(1,2)), size = 7)+
    ggpubr::stat_compare_means(size = 7, label = "p.format") +
  ggtitle(colnames(cd4_l2_subcluster@meta.data)[i]) + 
    theme_classic() +
    xlab("") + ylab("") +
  theme(panel.background = element_blank(),
       axis.ticks.x = element_blank()) + ggtheme()
   print(p)
    
    ggsave(paste0("../figures/tcr/peptides_cd4_bypatient/",colnames(cd4_l2_subcluster@meta.data)[i], ".svg"), width = 9, height = 9, units = "cm")
    ggsave(paste0("../figures/tcr/peptides_cd4_bypatient/",colnames(cd4_l2_subcluster@meta.data)[i], ".png"), width = 9, height = 9, units = "cm")
      
}

## One clone counted just once

In [None]:
one_random_clone  <- function(seurat){
    seurat$barcode  <- colnames(seurat)
metadata_1  <- seurat@meta.data %>% mutate(clone_nt = paste(cdr3_B_nt,cdr3_A1_nt,cdr3_A2_nt),
  clone_aa = paste("CDR3b",cdr3_B,"CDR3a",cdr3_A1))
metadata_one_clone  <- metadata_1  %>% group_by(clone_aa, Patient_Time)  %>% slice_sample(n = 1)
seurat  <- subset(seurat, barcode %in% metadata_one_clone$barcode)
    return(seurat)
    }

In [None]:
cd4_l2_subcluster_one_random_clone  <- one_random_clone(cd4_l2_subcluster)

In [None]:
dir.create("../figures/tcr/peptides_cd4_one_random_clone/")

In [None]:
options(repr.plot.width = 4, repr.plot.height = 6.5)
for(i in 177:197){
    df  <- data.frame(Score = cd4_l2_subcluster_one_random_clone@meta.data[,i],
                  Annotation = cd4_l2_subcluster_one_random_clone$Disease)
    dataMedian <- summarise(group_by(df, Annotation), MD = 1.2*median(Score, na.rm = T))
       
p  <- ggplot(df, aes(x = Annotation, y = Score)) +
    ggrastr::rasterise(geom_jitter(position=position_jitter(0.2), size = 1, color = "grey70", alpha = 0.7)) +
    geom_violin(aes(color = Annotation), scale = "width", alpha = 0.7) +  theme_classic() + 
   NoLegend() + theme(axis.text.x = element_text(angle = 45, vjust = 0.8, hjust =0.8)) +
    scale_color_manual(values = c("blue", "#c41515ff", "#d87f7fff")) + 
    stat_summary(fun = "median",
                   geom = "crossbar", 
                   width = 0.6,
                   colour = "grey20") +
    geom_text(data = dataMedian, aes(Annotation, MD, label = round(MD, digits = 2)), 
             size = 7) +
    xlab("") +
   #scale_y_continuous(limits = c(NA, max())) +
    ggpubr::stat_compare_means(size = 7, label = "p.format", label.x = 1.3,
                              label.y.npc = 0.9) +
  ggtitle(colnames(cd4_l2_subcluster_one_random_clone@meta.data)[i]) + 
  theme(panel.background = element_blank(), axis.text.x = element_text(angle = 90),
      axis.ticks.x = element_blank()) + ggtheme()
   print(p)
    
    print(p)
    ggsave(paste0("../figures/tcr/peptides_cd4_one_random_clone/",colnames(cd4_l2_subcluster_one_random_clone@meta.data)[i], ".svg"), width = 7, height = 14, units = "cm")
    ggsave(paste0("../figures/tcr/peptides_cd4_one_random_clone/",colnames(cd4_l2_subcluster_one_random_clone@meta.data)[i], ".png"), width = 7, height = 14, units = "cm")
    
}

## One clone counted just once by patient

In [None]:
dir.create("../figures/tcr/peptides_cd4_one_random_bypatient/")

In [None]:
for(i in 177:197){
        
df  <- data.frame(Score = cd4_l2_subcluster_one_random_clone@meta.data[,i],
                  Condition = cd4_l2_subcluster_one_random_clone$Condition, 
                 Sample_ID = cd4_l2_subcluster_one_random_clone$Sample_ID, 
                 Patient_ID = cd4_l2_subcluster_one_random_clone$Patient_ID, 
                  
                 Disease = cd4_l2_subcluster_one_random_clone$Disease) 
    
 p  <-    df  %>% 
    group_by(Patient_ID, Disease)  %>% 
    summarise(avg_score = mean(Score, na.rm = TRUE))  %>% 
    ggplot(aes(x = Disease, y = avg_score)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Disease), scale = "width") + 
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Disease), cex = 3, 
                shape = 21, color = "black", corral = "random") +
scale_fill_manual(values = c("#1874cdff",   "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",   "#aa2a2aff","#e88989ff")) + #ggpubr::stat_compare_means(comparisons = list( c(1,3), c(2,3), c(1,2)), size = 7)+
    ggpubr::stat_compare_means(size = 7, label = "p.format") +
  ggtitle(colnames(cd4_l2_subcluster_one_random_clone@meta.data)[i]) + 
  theme(panel.background = element_blank()) + ggtheme()
   print(p)
    
    ggsave(paste0("../figures/tcr/peptides_cd4_one_random_bypatient/",colnames(cd4_l2_subcluster_one_random_clone@meta.data)[i], ".svg"), width = 12, height = 14, units = "cm")
    ggsave(paste0("../figures/tcr/peptides_cd4_one_random_bypatient/",colnames(cd4_l2_subcluster_one_random_clone@meta.data)[i], ".png"), width = 12, height = 14, units = "cm")
      
}

## TCR properties table

In [None]:
get_tcr_prop_table  <- function(i) {
    
    # All clones
      df  <- data.frame(Score = cd4_l2_subcluster@meta.data[,i],
                  Annotation = cd4_l2_subcluster$Disease)  %>% 
    dplyr::filter(!is.na(Score))
    
    wcx  <- wilcox.test(df$Score ~ df$Annotation, conf.int = T)

    df2  <- df  %>% group_by(Annotation)  %>% summarise(mean = mean(Score), sd = sd(Score))

     df_all  <- data.frame(name = colnames(cd4_l2_subcluster@meta.data)[i], 
                          cell_type = "CD4",
                          test_type = "All clones",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         ) 
    
        df_final  <-  df_all
    
    # Random clones
       df  <- data.frame(Score = cd4_l2_subcluster_one_random_clone@meta.data[,i],
                  Annotation = cd4_l2_subcluster_one_random_clone$Disease)  %>% 
    dplyr::filter(!is.na(Score))
    
    wcx  <- wilcox.test(df$Score ~ df$Annotation, conf.int = T)

    df2  <- df  %>% group_by(Annotation)  %>% summarise(mean = mean(Score), sd = sd(Score))

     df_all  <- data.frame(name = colnames(cd4_l2_subcluster_one_random_clone@meta.data)[i], 
                          cell_type = "CD4",
                          test_type = "Random clones",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         ) 
    
        df_final  <- rbind(df_final, df_all)
    
    
    # All clones by patient
    
       df  <- data.frame(Score = cd4_l2_subcluster@meta.data[,i],
                  Disease = cd4_l2_subcluster$Disease,
                    Patient_ID = cd4_l2_subcluster$Patient_ID   )  %>% 
    dplyr::filter(!is.na(Score))  %>% group_by(Patient_ID, Disease)  %>% 
    summarise(Score = mean(Score)) 
    
    wcx  <- wilcox.test(df$Score ~ df$Disease, conf.int = T)

    df2  <- df  %>% group_by(Disease)  %>% summarise(mean = mean(Score), sd = sd(Score))

     df_all  <- data.frame(name = colnames(cd4_l2_subcluster@meta.data)[i], 
                          cell_type = "CD4",
                          test_type = "All clones by patient",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         ) 
    
        df_final  <- rbind(df_final, df_all)
    
    # Random clones by patient
    
      df  <- data.frame(Score = cd4_l2_subcluster_one_random_clone@meta.data[,i],
                  Disease = cd4_l2_subcluster_one_random_clone$Disease,
                    Patient_ID = cd4_l2_subcluster_one_random_clone$Patient_ID   )  %>% 
    dplyr::filter(!is.na(Score))  %>% group_by(Patient_ID, Disease)  %>% 
    summarise(Score = mean(Score)) 
    
    wcx  <- wilcox.test(df$Score ~ df$Disease, conf.int = T)

    df2  <- df  %>% group_by(Disease)  %>% summarise(mean = mean(Score), sd = sd(Score))

     df_all  <- data.frame(name = colnames(cd4_l2_subcluster_one_random_clone@meta.data)[i], 
                          cell_type = "CD4",
                          test_type = "Random clones by patient",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         ) 
    
        df_final  <- rbind(df_final, df_all)
    return(df_final)
}

In [None]:
tcr_all_props_table  <- map(.x = 177:197, .f = get_tcr_prop_table)

In [None]:
tcr_all_props_table2  <- bind_rows(tcr_all_props_table)

In [None]:
tcr_all_props_table2  %>% arrange(pval)

In [None]:
dir.create( "../tables/tcr/peptides/")

In [None]:
write.csv(tcr_all_props_table2, "../tables/tcr/peptides/cd4_tcr_all_props_table.csv")

In [None]:
tcr_all_props_table2  <- 