In [None]:
library(ggplot2)

In [None]:
source("diabetes_analysis_v06.R")

# Honardoost

In [None]:
honardoost  <- readRDS("../data/published_data/Honardoost_2024/honardoost_tcells.rds")

In [None]:
DimPlot(honardoost)

In [None]:
md  <- honardoost@meta.data  %>% dplyr::select(Sample_ID, HLA_Haplotypes, COND)

In [None]:
avgexp = AverageExpression(honardoost, features = c("NOTCH4","TABP","TAP2","LMP7","TAP1","LMP2","LST1",
                                                     "LTB","TNF","LTA","NFKBIL1", "BTN3A2","BTN3A1"),
                           return.seurat = F, group.by = "Sample_ID", 
                          assay = "RNA")

btn_etc  <- t(avgexp$RNA)  %>% as.data.frame()  %>% 
rownames_to_column("Sample_ID")  %>% 
pivot_longer(!Sample_ID, names_to = "gene", values_to = "expression")  %>% 
left_join(md)  %>% unique

In [None]:
btn_etc

In [None]:
options(repr.plot.width = 7, repr.plot.height = 7)

btn_etc  %>% filter(gene == "BTN3A2")  %>% 
ggplot(aes(x = HLA_Haplotypes,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = COND)) + 
    facet_wrap(~gene, scales = "free") +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle("Avg Expression by HLA CD4 T cells")

In [None]:
options(repr.plot.width = 7, repr.plot.height = 7)

btn_etc  %>% filter(gene == "BTN3A2")  %>% 
ggplot(aes(x = HLA_Haplotypes,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = COND)) + 
    facet_wrap(~gene, scales = "free") +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle("Avg Expression by HLA honardoost")

In [None]:
VlnPlot(honardoost, features = "nCount_RNA", pt.size = 0)

In [None]:
VlnPlot(honardoost, features = "nFeature_RNA", pt.size = 0)

In [None]:
btn_etc  <- btn_etc  %>% separate(HLA_Haplotypes, into = c("DQ1","DQ2","DR1","DR2"), 
                                  sep = "_", remove = F)

In [None]:
btn_etc <- btn_etc  %>% mutate(DR1 = gsub(DR1, pattern = "RR", replacement = "DR"))

In [None]:
btn_etc

In [None]:
possible_dq  <- unique(c(btn_etc$DQ1, btn_etc$DQ2))

In [None]:
possible_dq

In [None]:
possible_dr  <- unique(c(btn_etc$DR1, btn_etc$DR2))

In [None]:
possible_dr

In [None]:
plot_list  <- list()

In [None]:
for(i in 1:length(possible_dq)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   btn_etc  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((DQ1 == possible_dq[i] & DQ2 == possible_dq[i]), 
                             paste("Hom", possible_dq[i]), 
                       ifelse(DQ1 == possible_dq[i] | DQ2 == possible_dq[i], 
                             paste("Het", possible_dq[i]),
                                  "Other")))
p  <- df2   %>% 
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = COND)) + 
    facet_wrap(~COND) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_dq[i]) + NoLegend()
    
    print(p)
    
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 4)
cowplot::plot_grid(plotlist = plot_list, ncol = 7)

In [None]:
for(i in 1:length(possible_dr)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   btn_etc  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((DR1 == possible_dr[i] & DR2 == possible_dr[i]), 
                             paste("Hom", possible_dr[i]), 
                       ifelse(DR1 == possible_dr[i] | DR2 == possible_dr[i], 
                             paste("Het", possible_dr[i]),
                                  "Other")))
p  <- df2   %>% 
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = COND)) + 
    facet_wrap(~COND) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_dr[i]) + NoLegend()
    
    print(p)
    
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 18, repr.plot.height = 4)
cowplot::plot_grid(plotlist = plot_list, ncol = 10)

### DF for cross-study comparison

In [None]:
md  <- honardoost@meta.data  %>% dplyr::select(Sample_ID, HLA_Haplotypes, COND)

avgexp = AverageExpression(honardoost, features = c("BTN3A2"),
                           return.seurat = F, group.by = "Sample_ID", 
                          assay = "RNA")

btn_etc  <- t(avgexp$RNA)  %>% as.data.frame()  %>% 
rownames_to_column("Sample_ID")  %>% 
pivot_longer(!Sample_ID, names_to = "gene", values_to = "expression")  %>% 
left_join(md)  %>% unique

In [None]:
btn_etc

In [None]:
btn_etc$gene  <- "BTN3A2"

In [None]:
btn_etc$expression_scale  <-  2/(max(btn_etc$expression)-min(btn_etc$expression))*(btn_etc$expression-max(btn_etc$expression))+1

In [None]:
max(btn_etc$expression_scale)

In [None]:
min(btn_etc$expression_scale)

In [None]:
scale_exp_honardoost  <- btn_etc

In [None]:
scale_exp_honardoost$study  <- "Honardoost"

In [None]:
scale_exp_honardoost

In [None]:
scale_exp_honardoost  <-  scale_exp_honardoost  %>% separate(HLA_Haplotypes, into = c("DQ"))

# Kallionpaa

## Bulk

In [None]:
kallionpaa_bulk <- read_csv("../data/published_data/Kallionpaa_2019/bulk_seq_counts_with_md.csv")

In [None]:
kallionpaa_bulk$`...1`  <- NULL

In [None]:
kallionpaa_bulk

In [None]:
hla  <- read_delim("../data/published_data/Kallionpaa_2019/genotypes.tsv")

hla

hla2  <- hla  %>% 
mutate(sample = gsub(subject, pattern = "10_1_", replacement = "10.1_"))  %>% 
mutate(sample = gsub(sample, pattern = "10_2_", replacement = "10.2_"))  %>% 
mutate(sample = gsub(sample, pattern = "Ctrll", replacement = "Control"))  %>% 
mutate(sample = gsub(sample, pattern = "Ctrl", replacement = "Control"))  %>% 

separate(sample, into = c("Sample_ID","Patient_ID","TimePoint","CellType","Index"), sep = "_", remove = F)  %>% 
dplyr::select(-sample, -Sample_ID, -TimePoint, -CellType, -Index, -subject)


In [None]:
colnames(hla2)

In [None]:
hla3  <- hla2  %>% group_by(Patient_ID, A1, A2, B1, B2, C1, C2, DQA11, DQA12,
                  DQB11, DQB12, DRA1, DRA2, DRB11, DRB12)  %>% 
tally  %>% arrange(desc(n))  %>% ungroup  %>% 
group_by(Patient_ID)  %>% slice_max(order_by = n, n = 1)

In [None]:
count_all4  <- left_join(kallionpaa_bulk, hla3)

In [None]:
count_all4   %>% colnames

In [None]:
possible_a  <- unique(c(count_all4$A1, count_all4$A2))
possible_b <- unique(c(count_all4$B1, count_all4$B2))
possible_c <- unique(c(count_all4$C1, count_all4$C2))
possible_dqa <- unique(c(count_all4$DQA11, count_all4$DQA12))
possible_dqb <- unique(c(count_all4$DQB11, count_all4$DQB12))
possible_dra <- unique(c(count_all4$DRA1, count_all4$DRA2))
possible_drb <- unique(c(count_all4$DRB11, count_all4$DRB12))

In [None]:
possible_a  <- possible_a[!is.na(possible_a)]
possible_b <- possible_b[!is.na(possible_b)]
possible_c <- possible_c[!is.na(possible_c)]
possible_dqa <- possible_dqa[!is.na(possible_dqa)]
possible_dqb <- possible_dqb[!is.na(possible_dqb)]
possible_dra <- possible_dra[!is.na(possible_dra)]
possible_drb <- possible_drb[!is.na(possible_drb)]

In [None]:
plot_list  <- list()

### A

In [None]:
for(i in 1:length(possible_a)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((A1 == possible_a[i] & A2 == possible_a[i]), 
                             paste("Hom"), 
                       ifelse(A1 == possible_a[i] | A2 == possible_a[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype, CellType)  %>% 
    summarise(expression = mean(value, na.rm = T))  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease, shape = CellType)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_a[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 3)
cowplot::plot_grid(plotlist = plot_list, ncol = length(possible_a))

### B

In [None]:
plot_list  <- list()
for(i in 1:length(possible_b)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((B1 == possible_b[i] & B2 == possible_b[i]), 
                             paste("Hom"), 
                       ifelse(B1 == possible_b[i] | B2 == possible_b[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype, CellType)  %>% 
    summarise(expression = mean(value, na.rm = T))  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease, shape = CellType)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_b[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 6)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

### C


In [None]:
plot_list  <- list()
for(i in 1:length(possible_c)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((C1 == possible_c[i] & C2 == possible_c[i]), 
                             paste("Hom"), 
                       ifelse(C1 == possible_c[i] | C2 == possible_c[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype, CellType)  %>% 
    summarise(expression = mean(value, na.rm = T))  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease, shape = CellType)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_c[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 6)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

### DQA

In [None]:
count_all4  %>% colnames

In [None]:
plot_list  <- list()
for(i in 1:length(possible_dqa)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((DQA11 == possible_dqa[i] & DQA12 == possible_dqa[i]), 
                             paste("Hom"), 
                       ifelse(DQA11 == possible_dqa[i] | DQA12 == possible_dqa[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype, CellType)  %>% 
    summarise(expression = mean(value, na.rm = T))  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease, shape = CellType)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_dqa[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 3)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

In [None]:
plot_list  <- list()
for(i in 1:length(possible_dqb)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((DQB11 == possible_dqb[i] & DQB12 == possible_dqb[i]), 
                             paste("Hom"), 
                       ifelse(DQB11 == possible_dqb[i] | DQB12 == possible_dqb[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype, CellType)  %>% 
    summarise(expression = mean(value, na.rm = T))  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease, shape = CellType)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_dqb[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 3)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

In [None]:
plot_list  <- list()
for(i in 1:length(possible_drb)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((DRB11 == possible_drb[i] & DRB12 == possible_drb[i]), 
                             paste("Hom"), 
                       ifelse(DRB11 == possible_drb[i] | DRB12 == possible_drb[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype, CellType)  %>% 
    summarise(expression = mean(value, na.rm = T))  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease, shape = CellType)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_drb[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 18, repr.plot.height = 3)
cowplot::plot_grid(plotlist = plot_list, ncol = 9)

In [None]:
#write.csv(counts_all3, "table_shiny_41BB_2.csv")


plot_bulk <- function(gene2){
    options(repr.plot.width = 5, repr.plot.height = 4)

    df <- kallionpaa_bulk %>% dplyr::filter(gene == gene2)
 p  <-  df %>% mutate(CellType_Disease = paste(CellType, Disease))  %>% 
    ggplot(aes(x = CellType_Disease, y = value)) + 
geom_boxplot(outlier.shape = NA, aes(color = Disease)) +
 geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
    geom_jitter(shape=16, position=position_jitter(0.05), aes(colour = Disease)) 
    return(p)
  }


plot_bulk2 <- function(gene2){
    options(repr.plot.width = 7, repr.plot.height = 4)
    df <- kallionpaa_bulk %>% dplyr::filter(gene == gene2)
 p  <-  df %>% mutate(CellType_Disease = paste(CellType, Disease))  %>% 
    ggplot(aes(x = dq2_8, y = value)) + 
geom_boxplot(outlier.shape = NA) +
 geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
    geom_jitter(shape=16, position=position_jitter(0.05), aes(colour = Disease)) +
    facet_wrap(~CellType)
    return(p)
  }

plot_bulk3 <- function(gene2){
    options(repr.plot.width = 7, repr.plot.height = 4)
    df <- kallionpaa_bulk %>% dplyr::filter(gene == gene2)
 p  <-  df %>% mutate(CellType_Disease = paste(CellType, Disease))  %>% 
    ggplot(aes(x = c7_b8, y = value)) + 
geom_boxplot(outlier.shape = NA) +
    ggnewscale::new_scale_colour()+
 geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
    geom_jitter(shape=16, position=position_jitter(0.1), size = 2, aes(colour = Disease)) +
    facet_wrap(~CellType)
    return(p)
  }

In [None]:
plot_bulk2("BTN3A2")

In [None]:
plot_bulk("BTN3A2")

In [None]:
count_all4

In [None]:
count_all4  %>% colnames

### DF for cross study comparison

In [None]:
count_all5  <- count_all4  %>% 
dplyr::filter(gene == "BTN3A2")  %>% 
dplyr::select(5,7,12:25, value)  %>% 
group_by(across(c(-value)))  %>% 
summarise(expression = mean(value))

In [None]:
count_all5

In [None]:
count_all5$gene  <- "BTN3A2"

In [None]:
count_all5$expression_scale  <-  2/(max(count_all5$expression)-min(count_all5$expression))*(count_all5$expression-max(count_all5$expression))+1

In [None]:
max(count_all5$expression_scale)

In [None]:
min(count_all5$expression_scale)

In [None]:
scale_exp_kallionpaa  <- count_all5

In [None]:
scale_exp_kallionpaa$study  <- "Kallionpaa"

## scRNAseq

# Lab48

In [None]:
cd4_full_filt  <- readRDS("../data/processed/L1/cd4_l1_full_filt.rds")
cd8_full_filt  <- readRDS("../data/processed/L1/cd8_l1_full_filt.rds")

In [None]:
colnames(cd4_full_filt@meta.data)

In [None]:
md  <- cd4_full_filt@meta.data  %>% dplyr::select(Patient_ID, starts_with("HLA"), Disease)  %>% unique

In [None]:
cd4_full_filt$Patient_ID <- paste0("g", cd4_full_filt$Patient_ID)

In [None]:
avgexp = AverageExpression(cd4_full_filt, features = c( "BTN3A2"),
                           return.seurat = F, group.by = "Patient_ID", 
                          assay = "RNA")

count_all4  <- t(avgexp$RNA)  %>% as.data.frame()  %>% 
rownames_to_column("Patient_ID")  %>% 
pivot_longer(!Patient_ID, names_to = "gene", values_to = "expression")  %>% 
left_join(md)  %>% unique

count_all4

In [None]:
count_all4$gene = "BTN3A2"

In [None]:
possible_a  <- unique(c(count_all4$HLA_A1, count_all4$HLA_A2))
possible_b <- unique(c(count_all4$HLA_B1, count_all4$HLA_B2))
possible_c <- unique(c(count_all4$HLA_C1, count_all4$HLA_C2))
possible_dqa <- unique(c(count_all4$HLA_DQA11, count_all4$HLA_DQA12))
possible_dqb <- unique(c(count_all4$HLA_DQB11, count_all4$HLA_DQB12))
possible_dra <- unique(c(count_all4$HLA_DRA1, count_all4$HLA_DRA2))
possible_drb <- unique(c(count_all4$HLA_DRB11, count_all4$HLA_DRB12))

In [None]:
possible_a  <- possible_a[!is.na(possible_a)]
possible_b <- possible_b[!is.na(possible_b)]
possible_c <- possible_c[!is.na(possible_c)]
possible_dqa <- possible_dqa[!is.na(possible_dqa)]
possible_dqb <- possible_dqb[!is.na(possible_dqb)]
possible_dra <- possible_dra[!is.na(possible_dra)]
possible_drb <- possible_drb[!is.na(possible_drb)]

In [None]:
plot_list  <- list()

In [None]:
count_all4$CellType = "CD4"

In [None]:
count_cd4  <- count_all4

In [None]:
cd8_full_filt$Patient_ID <- paste0("g", cd8_full_filt$Patient_ID)

In [None]:
avgexp = AverageExpression(cd8_full_filt, features = c( "BTN3A2"),
                           return.seurat = F, group.by = "Patient_ID", 
                          assay = "RNA")

count_all4  <- t(avgexp$RNA)  %>% as.data.frame()  %>% 
rownames_to_column("Patient_ID")  %>% 
pivot_longer(!Patient_ID, names_to = "gene", values_to = "expression")  %>% 
left_join(md)  %>% unique

count_all4

In [None]:
count_all4$gene = "BTN3A2"

In [None]:
possible_a  <- unique(c(count_all4$HLA_A1, count_all4$HLA_A2))
possible_b <- unique(c(count_all4$HLA_B1, count_all4$HLA_B2))
possible_c <- unique(c(count_all4$HLA_C1, count_all4$HLA_C2))
possible_dqa <- unique(c(count_all4$HLA_DQA11, count_all4$HLA_DQA12))
possible_dqb <- unique(c(count_all4$HLA_DQB11, count_all4$HLA_DQB12))
possible_dra <- unique(c(count_all4$HLA_DRA1, count_all4$HLA_DRA2))
possible_drb <- unique(c(count_all4$HLA_DRB11, count_all4$HLA_DRB12))

In [None]:
count_all4$CellType  <- "CD8"

In [None]:
count_all4  <- rbind(count_all4, count_cd4)

In [None]:
count_all4

In [None]:
colnames(count_all4)  <- gsub(colnames(count_all4), pattern = "HLA_", replacement = "")

### A

In [None]:
plot_list  <- list()

In [None]:
for(i in 1:length(possible_a)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((A1 == possible_a[i] & A2 == possible_a[i]), 
                             paste("Hom"), 
                       ifelse(A1 == possible_a[i] | A2 == possible_a[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>%
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease, shape = CellType)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_a[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 6)
cowplot::plot_grid(plotlist = plot_list, ncol = (length(possible_a)/2))

### B

In [None]:
plot_list  <- list()
for(i in 1:length(possible_b)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((B1 == possible_b[i] & B2 == possible_b[i]), 
                             paste("Hom"), 
                       ifelse(B1 == possible_b[i] | B2 == possible_b[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease, shape = CellType)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_b[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 12)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

### C


In [None]:
plot_list  <- list()
for(i in 1:length(possible_c)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((C1 == possible_c[i] & C2 == possible_c[i]), 
                             paste("Hom"), 
                       ifelse(C1 == possible_c[i] | C2 == possible_c[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2    %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease, shape = CellType)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_c[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 9)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

### DQA

In [None]:
count_all4  %>% colnames

In [None]:
plot_list  <- list()
for(i in 1:length(possible_dqa)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((DQA11 == possible_dqa[i] & DQA12 == possible_dqa[i]), 
                             paste("Hom"), 
                       ifelse(DQA11 == possible_dqa[i] | DQA12 == possible_dqa[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease, shape = CellType)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_dqa[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 6)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

In [None]:
plot_list  <- list()
for(i in 1:length(possible_dqb)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((DQB11 == possible_dqb[i] & DQB12 == possible_dqb[i]), 
                             paste("Hom"), 
                       ifelse(DQB11 == possible_dqb[i] | DQB12 == possible_dqb[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype, CellType)  %>% 
    summarise(expression = mean(value, na.rm = T))  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease, shape = CellType)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_dqb[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 3)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

In [None]:
plot_list  <- list()
for(i in 1:length(possible_drb)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((DRB11 == possible_drb[i] & DRB12 == possible_drb[i]), 
                             paste("Hom"), 
                       ifelse(DRB11 == possible_drb[i] | DRB12 == possible_drb[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype, CellType)  %>% 
    summarise(expression = mean(value, na.rm = T))  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease, shape = CellType)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_drb[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 18, repr.plot.height = 3)
cowplot::plot_grid(plotlist = plot_list, ncol = 9)

### DF for cross study comparison

In [None]:
count_all4

In [None]:
count_all5  <- count_all4

In [None]:
count_all5$expression_scale  <-  2/(max(count_all5$expression)-min(count_all5$expression))*(count_all5$expression-max(count_all5$expression))+1

In [None]:
max(count_all5$expression_scale)

In [None]:
min(count_all5$expression_scale)

In [None]:
scale_exp_lab48  <- count_all5

In [None]:
scale_exp_lab48$study  <- "Lab48"

In [None]:
scale_exp_lab48

# HPAP

In [None]:
hpap  <- readRDS("../data/published_data/HPAP/hpap_5p_t_nk_filt.rds")

In [None]:
DimPlot(hpap, raster = T)

In [None]:
hpap

In [None]:
colnames(hpap@meta.data)

In [None]:
hpap$source  %>% table

In [None]:
options(repr.plot.width = 18, repr.plot.height = 9)

FeaturePlot(hpap, features = c("FOXP3","CD4","CD8A", "SELL"), raster = T)

In [None]:
md  <- hpap@meta.data  %>% separate(source, into = c("Patient_ID", "Tissue",
                                                     "Method","Assay","Run","Well", NA, NA, NA),
                                   remove = F, sep = "_")



In [None]:
md  %>% tail

In [None]:
hpap$Patient_ID  <- md$Patient_ID

In [None]:
avgexp = AverageExpression(hpap, features = c("BTN3A2"),
                           return.seurat = F, group.by = "Patient_ID", 
                          assay = "RNA")

btn_etc  <- t(avgexp$RNA)  %>% as.data.frame()  %>% 
rownames_to_column("Patient_ID")  %>% 
pivot_longer(!Patient_ID, names_to = "gene", values_to = "expression")   %>% unique

In [None]:
btn_etc

In [None]:
hla  <- read_delim("../data/published_data/HPAP/genotypes.tsv")

hla

In [None]:
hla2  <- hla  %>% 
separate(subject, into = c("Patient_ID"), sep = "_", remove = T) 

In [None]:
hla2

In [None]:
patient_metadata  <- read_delim("../../../DATA_scRNAseq/Analysis of previously published data/069_HPANCDB_Immune_TCRseq/hpap_medatata.csv")

In [None]:
patient_metadata2  <- patient_metadata  %>% dplyr::select(donor_ID, gender, age_years, clinical_diagnosis)  %>% 
mutate(Patient_ID = gsub(donor_ID, pattern = "-", replacement = ""))

In [None]:
hla3  <- hla2  %>% left_join(patient_metadata2)

In [None]:
hla3

In [None]:
count_all4  <- left_join(btn_etc, hla3)

In [None]:
count_all4$gene  <- "BTN3A2"

In [None]:
count_all4  <- count_all4  %>% mutate(Disease = ifelse(grepl(clinical_diagnosis, pattern = "control"),"Ctrl","Dia"))

In [None]:
count_all4

In [None]:
possible_a  <- unique(c(count_all4$A1, count_all4$A2))
possible_b <- unique(c(count_all4$B1, count_all4$B2))
possible_c <- unique(c(count_all4$C1, count_all4$C2))
possible_dqa <- unique(c(count_all4$DQA11, count_all4$DQA12))
possible_dqb <- unique(c(count_all4$DQB11, count_all4$DQB12))
possible_dra <- unique(c(count_all4$DRA1, count_all4$DRA2))
possible_drb <- unique(c(count_all4$DRB11, count_all4$DRB12))

In [None]:
possible_a  <- possible_a[!is.na(possible_a)]
possible_b <- possible_b[!is.na(possible_b)]
possible_c <- possible_c[!is.na(possible_c)]
possible_dqa <- possible_dqa[!is.na(possible_dqa)]
possible_dqb <- possible_dqb[!is.na(possible_dqb)]
possible_dra <- possible_dra[!is.na(possible_dra)]
possible_drb <- possible_drb[!is.na(possible_drb)]

In [None]:
plot_list  <- list()

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

df2   %>%   
ggplot(aes(x = Disease,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_a[i])

### A

In [None]:
for(i in 1:length(possible_a)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((A1 == possible_a[i] & A2 == possible_a[i]), 
                             paste("Hom"), 
                       ifelse(A1 == possible_a[i] | A2 == possible_a[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype)  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_a[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 6)
cowplot::plot_grid(plotlist = plot_list, ncol = length(possible_a)/2)

### B

In [None]:
plot_list  <- list()
for(i in 1:length(possible_b)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((B1 == possible_b[i] & B2 == possible_b[i]), 
                             paste("Hom"), 
                       ifelse(B1 == possible_b[i] | B2 == possible_b[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype)  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_b[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 9)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

### C


In [None]:
plot_list  <- list()
for(i in 1:length(possible_c)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((C1 == possible_c[i] & C2 == possible_c[i]), 
                             paste("Hom"), 
                       ifelse(C1 == possible_c[i] | C2 == possible_c[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype)   %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_c[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 6)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

### DQA

In [None]:
count_all4  %>% colnames

In [None]:
plot_list  <- list()
for(i in 1:length(possible_dqa)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((DQA11 == possible_dqa[i] & DQA12 == possible_dqa[i]), 
                             paste("Hom"), 
                       ifelse(DQA11 == possible_dqa[i] | DQA12 == possible_dqa[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype) %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_dqa[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 3)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

In [None]:
plot_list  <- list()
for(i in 1:length(possible_dqb)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((DQB11 == possible_dqb[i] & DQB12 == possible_dqb[i]), 
                             paste("Hom"), 
                       ifelse(DQB11 == possible_dqb[i] | DQB12 == possible_dqb[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype)   %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_dqb[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 3)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

In [None]:
plot_list  <- list()
for(i in 1:length(possible_drb)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((DRB11 == possible_drb[i] & DRB12 == possible_drb[i]), 
                             paste("Hom"), 
                       ifelse(DRB11 == possible_drb[i] | DRB12 == possible_drb[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype)  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_drb[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 18, repr.plot.height = 6)
cowplot::plot_grid(plotlist = plot_list, ncol = 9)

### DF for cross study comparison

In [None]:
count_all5  <- count_all4  

In [None]:
count_all5

In [None]:
count_all5$expression_scale  <-  2/(max(count_all5$expression)-min(count_all5$expression))*(count_all5$expression-max(count_all5$expression))+1

In [None]:
max(count_all5$expression_scale)

In [None]:
min(count_all5$expression_scale)

In [None]:
scale_exp_hpap  <- count_all5

In [None]:
scale_exp_hpap$study  <- "HPAP"

# GSE123658 Transimmunom

In [None]:
count_all4  <- read_delim("../data/published_data/Transimmunome_2018/BTN3A2_expression_by_HLA_transimmunome.csv")

In [None]:
count_all4$gene  <- "BTN3A2"

In [None]:
count_all4$expression  <- count_all4$BTN3A2

In [None]:
colnames(count_all4)  <- gsub(colnames(count_all4), pattern = "HLA_", replacement = "")

In [None]:
count_all4

In [None]:
transimmunome_all_counts = read_csv("../data/published_data/Transimmunome_2018/transimmunome_allcounts_for_normalization.csv")

In [None]:
colnames(transimmunome_all_counts)  <- c("Patient_ID", "All_counts")

In [None]:
count_all4  <- left_join(count_all4, transimmunome_all_counts)

In [None]:
count_all4$expression  <- count_all4$BTN3A2/count_all4$All_counts*1000000

In [None]:
count_all4

In [None]:
possible_a  <- unique(c(count_all4$A1, count_all4$A2))
possible_b <- unique(c(count_all4$B1, count_all4$B2))
possible_c <- unique(c(count_all4$C1, count_all4$C2))
possible_dqa <- unique(c(count_all4$DQA11, count_all4$DQA12))
possible_dqb <- unique(c(count_all4$DQB11, count_all4$DQB12))
possible_dra <- unique(c(count_all4$DRA1, count_all4$DRA2))
possible_drb <- unique(c(count_all4$DRB11, count_all4$DRB12))

In [None]:
possible_a  <- possible_a[!is.na(possible_a)]
possible_b <- possible_b[!is.na(possible_b)]
possible_c <- possible_c[!is.na(possible_c)]
possible_dqa <- possible_dqa[!is.na(possible_dqa)]
possible_dqb <- possible_dqb[!is.na(possible_dqb)]
possible_dra <- possible_dra[!is.na(possible_dra)]
possible_drb <- possible_drb[!is.na(possible_drb)]

In [None]:
plot_list  <- list()

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

count_all4   %>%   
ggplot(aes(x = Disease,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle("BTN3A2 in Transimmunome")

### A

In [None]:
for(i in 1:length(possible_a)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((A1 == possible_a[i] & A2 == possible_a[i]), 
                             paste("Hom"), 
                       ifelse(A1 == possible_a[i] | A2 == possible_a[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype)  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
 #   facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_a[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 9)
cowplot::plot_grid(plotlist = plot_list, ncol = 9)

### B

In [None]:
plot_list  <- list()
for(i in 1:length(possible_b)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((B1 == possible_b[i] & B2 == possible_b[i]), 
                             paste("Hom"), 
                       ifelse(B1 == possible_b[i] | B2 == possible_b[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype)  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
  #  facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_b[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 18)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

### C


In [None]:
plot_list  <- list()
for(i in 1:length(possible_c)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((C1 == possible_c[i] & C2 == possible_c[i]), 
                             paste("Hom"), 
                       ifelse(C1 == possible_c[i] | C2 == possible_c[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype)   %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
   # facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_c[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 12)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

### DQA

In [None]:
count_all4  %>% colnames

In [None]:
plot_list  <- list()
for(i in 1:length(possible_dqa)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((DQA11 == possible_dqa[i] & DQA12 == possible_dqa[i]), 
                             paste("Hom"), 
                       ifelse(DQA11 == possible_dqa[i] | DQA12 == possible_dqa[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype) %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
  #  facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_dqa[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 6)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

In [None]:
plot_list  <- list()
for(i in 1:length(possible_dqb)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((DQB11 == possible_dqb[i] & DQB12 == possible_dqb[i]), 
                             paste("Hom"), 
                       ifelse(DQB11 == possible_dqb[i] | DQB12 == possible_dqb[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype)   %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
  #  facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_dqb[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 6)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

In [None]:
plot_list  <- list()
for(i in 1:length(possible_drb)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((DRB11 == possible_drb[i] & DRB12 == possible_drb[i]), 
                             paste("Hom"), 
                       ifelse(DRB11 == possible_drb[i] | DRB12 == possible_drb[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype)  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_drb[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 18, repr.plot.height = 6)
cowplot::plot_grid(plotlist = plot_list, ncol = 9)

### DF for cross study comparison

In [None]:
count_all5  <- count_all4  

In [None]:
count_all5

In [None]:
count_all5$expression_scale  <-  2/(max(count_all5$expression)-min(count_all5$expression))*(count_all5$expression-max(count_all5$expression))+1

In [None]:
max(count_all5$expression_scale)

In [None]:
min(count_all5$expression_scale)

In [None]:
scale_exp_transimmunome  <- count_all5

In [None]:
scale_exp_transimmunome$study  <- "Transimmunome"

# Newman 2024

In [None]:
library(GEOquery)

In [None]:
gset <- getGEO("GSE237218", GSEMatrix =TRUE, getGPL=TRUE, AnnotGPL=TRUE)

In [None]:
varLabels(gset$GSE237218_series_matrix.txt.gz)

In [None]:
gset$GSE237218_series_matrix.txt.gz$`cell type:ch1`

In [None]:
gset$GSE237218_series_matrix.txt.gz$`disease status:ch1`

In [None]:
metadata_gse  <- data.frame(gse = gset$GSE237218_series_matrix.txt.gz$geo_accession,
                            Sample_ID = gset$GSE237218_series_matrix.txt.gz$description                           
                           )

In [None]:
metadata_gse

In [None]:
metadata_sra  <- read_delim("../data/published_data/Newman_2023/newman_sra_metadata.txt")

In [None]:
metadata_sra$gse = metadata_sra$`Sample Name`

In [None]:
metadata_sra_and_gse = left_join(metadata_sra, metadata_gse)

In [None]:
    mtx  <- read_delim("../data/published_data/Newman_2023/data/GSE237218_processedCounts_log_TPM.tsv.gz")

In [None]:
colnames(mtx)  %>% length

In [None]:
colnames(mtx) 

In [None]:
grep(mtx$gene_id, pattern = "CD4", value = T)

In [None]:
mtx[mtx<0] = 0

In [None]:
mtx2  <- mtx  %>% dplyr::select(-transcript_id)  %>% group_by(gene_id)  %>% 
summarise_all(.funs = sum)

In [None]:
ls()

In [None]:
dim(mtx2)

In [None]:
metadata_sra

In [None]:
newman_exp  <- as.data.frame(unlist(mtx2[which(mtx2$gene_id == "BTN3A2"),])[2:443])

In [None]:
newman_exp$Sample_ID  <- rownames(newman_exp)

In [None]:
colnames(newman_exp)[1]  <- "expression"

In [None]:
metadata_sra$Experiment  %>% head

In [None]:
newman_exp2  <- left_join(newman_exp, metadata_sra_and_gse)

In [None]:
table(newman_exp$Sample_ID %in% metadata_sra_and_gse$Sample_ID)

In [None]:
newman_exp2

In [None]:
hla  <- read_delim("../data/published_data/Newman_2023/data/genotypes.tsv")
hla

In [None]:
hla2  <- hla  %>% 
separate(subject, into = c("Run"), sep = "_", remove = T) 

In [None]:
btn_etc  <- left_join(hla2, newman_exp2)

In [None]:
btn_etc$subject_id  %>% duplicated  %>% table

In [None]:
btn_etc$disease_status  %>% table

In [None]:
count_all4  <- btn_etc

In [None]:
count_all4$gene  <- "BTN3A2"

In [None]:
colnames(count_all4)

In [None]:
count_all4  <- count_all4  %>% mutate(Disease = ifelse(grepl(disease_status, pattern = "CTL"),"Ctrl","Dia"))

In [None]:
count_all4

In [None]:
possible_a  <- unique(c(count_all4$A1, count_all4$A2))
possible_b <- unique(c(count_all4$B1, count_all4$B2))
possible_c <- unique(c(count_all4$C1, count_all4$C2))
possible_dqa <- unique(c(count_all4$DQA11, count_all4$DQA12))
possible_dqb <- unique(c(count_all4$DQB11, count_all4$DQB12))
possible_dra <- unique(c(count_all4$DRA1, count_all4$DRA2))
possible_drb <- unique(c(count_all4$DRB11, count_all4$DRB12))

In [None]:
possible_a  <- possible_a[!is.na(possible_a)]
possible_b <- possible_b[!is.na(possible_b)]
possible_c <- possible_c[!is.na(possible_c)]
possible_dqa <- possible_dqa[!is.na(possible_dqa)]
possible_dqb <- possible_dqb[!is.na(possible_dqb)]
possible_dra <- possible_dra[!is.na(possible_dra)]
possible_drb <- possible_drb[!is.na(possible_drb)]

In [None]:
plot_list  <- list()

In [None]:
count_all4$expression

In [None]:
count_all4  <-  count_all4  %>% dplyr::filter(as.numeric(expression)>0)

In [None]:
count_all4$expression

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

count_all4   %>%   
ggplot(aes(x = Disease,
             y = as.numeric(expression))) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle("BTN3A2 in Newman")

In [None]:
count_all4$Patient_ID  <- count_all4$subject_id

In [None]:
count_all4$expression  <- as.numeric(count_all4$expression)

### A

In [None]:
for(i in 1:length(possible_a)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((A1 == possible_a[i] & A2 == possible_a[i]), 
                             paste("Hom"), 
                       ifelse(A1 == possible_a[i] | A2 == possible_a[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype)  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
   # facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_a[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 9)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

### B

In [None]:
plot_list  <- list()
for(i in 1:length(possible_b)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((B1 == possible_b[i] & B2 == possible_b[i]), 
                             paste("Hom"), 
                       ifelse(B1 == possible_b[i] | B2 == possible_b[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype)  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
   # facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_b[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 9)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

### C


In [None]:
plot_list  <- list()
for(i in 1:length(possible_c)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((C1 == possible_c[i] & C2 == possible_c[i]), 
                             paste("Hom"), 
                       ifelse(C1 == possible_c[i] | C2 == possible_c[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype)   %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
   # facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_c[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 6)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

### DQA

In [None]:
count_all4  %>% colnames

In [None]:
plot_list  <- list()
for(i in 1:length(possible_dqa)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((DQA11 == possible_dqa[i] & DQA12 == possible_dqa[i]), 
                             paste("Hom"), 
                       ifelse(DQA11 == possible_dqa[i] | DQA12 == possible_dqa[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype) %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_dqa[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 3)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

In [None]:
plot_list  <- list()
for(i in 1:length(possible_dqb)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((DQB11 == possible_dqb[i] & DQB12 == possible_dqb[i]), 
                             paste("Hom"), 
                       ifelse(DQB11 == possible_dqb[i] | DQB12 == possible_dqb[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype)   %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_dqb[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 3)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

In [None]:
plot_list  <- list()
for(i in 1:length(possible_drb)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   count_all4  %>% filter(gene == "BTN3A2")  %>% 
    mutate(haplotype = ifelse((DRB11 == possible_drb[i] & DRB12 == possible_drb[i]), 
                             paste("Hom"), 
                       ifelse(DRB11 == possible_drb[i] | DRB12 == possible_drb[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Patient_ID, Disease, haplotype)  %>%     
ggplot(aes(x = haplotype,
             y = expression)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
    facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
     scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
  ylim(0,NA) + ggtitle(possible_drb[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 18, repr.plot.height = 6)
cowplot::plot_grid(plotlist = plot_list, ncol = 9)

### DF for cross study comparison

In [None]:
count_all5  <- count_all4  

In [None]:
count_all5

In [None]:
count_all5$expression

In [None]:
count_all5$expression_scale  <-  2/(max(count_all5$expression)-min(count_all5$expression))*(count_all5$expression-max(count_all5$expression))+1

In [None]:
max(count_all5$expression_scale)

In [None]:
min(count_all5$expression_scale)

In [None]:
scale_exp_newman  <- count_all5

In [None]:
scale_exp_newman$study  <- "Newman"

# Comparison HLA across Study

In [None]:
scale_exp_kallionpaa$Disease  <- ifelse(grepl(scale_exp_kallionpaa$Patient_ID, pattern = "Case"), "Dia", "Ctrl")

In [None]:
colnames(scale_exp_lab48)  <- gsub(colnames(scale_exp_lab48), pattern = "HLA_", replacement = "")

In [None]:
scale_exp_hpap2  <- scale_exp_hpap  %>% ungroup  %>% dplyr::select(A1, A2, B1, B2, C1, C2, DQA11, DQA12, DQB11, DQB12, DRA1, DRA2, DRB11, DRB12,
                                                    expression_scale, Disease, study)
scale_exp_kallionpaa2  <- scale_exp_kallionpaa  %>% ungroup  %>%  dplyr::select(A1, A2, B1, B2, C1, C2, DQA11, DQA12, DQB11, DQB12, DRA1, DRA2, DRB11, DRB12,
                                                    expression_scale, Disease, study)  
scale_exp_lab482  <- scale_exp_lab48  %>% ungroup  %>% dplyr::select(A1, A2, B1, B2, C1, C2, DQA11, DQA12, DQB11, DQB12, DRA1, DRA2, DRB11, DRB12,
                                                    expression_scale, Disease, study)
scale_exp_newman2  <- scale_exp_newman  %>% ungroup  %>% dplyr::select(A1, A2, B1, B2, C1, C2, DQA11, DQA12, DQB11, DQB12, DRA1, DRA2, DRB11, DRB12,
                                                    expression_scale, Disease, study)


In [None]:
scale_exp_transimmunome2  <- scale_exp_transimmunome  %>% ungroup  %>% dplyr::select(A1, A2, B1, B2, C1, C2, DQA11, DQA12, DQB11, DQB12, DRA1, DRA2, DRB11, DRB12,
                                                    expression_scale, Disease, study)

In [None]:
all_exp  <- rbind(scale_exp_hpap2, scale_exp_kallionpaa2, scale_exp_lab482, scale_exp_transimmunome2, scale_exp_newman2)

In [None]:
all_exp

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

all_exp   %>%   
ggplot(aes(x = Disease,
             y = expression_scale)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = study)) + 
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
    # scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 0),
        axis.ticks = element_line(colour = "black")) +
  ggtitle("BTN3A2 by disease")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

all_exp   %>%   
ggplot(aes(x = Disease,
             y = expression_scale)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = study)) + 
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
    # scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 0),
        axis.ticks = element_line(colour = "black")) +
  ggtitle("BTN3A2 by disease")

In [None]:
possible_a  <- unique(c(all_exp$A1, all_exp$A2))
possible_b <- unique(c(all_exp$B1, all_exp$B2))
possible_c <- unique(c(all_exp$C1, all_exp$C2))
possible_dqa <- unique(c(all_exp$DQA11, all_exp$DQA12))
possible_dqb <- unique(c(all_exp$DQB11, all_exp$DQB12))
possible_dra <- unique(c(all_exp$DRA1, all_exp$DRA2))
possible_drb <- unique(c(all_exp$DRB11, all_exp$DRB12))

possible_a  <- possible_a[!is.na(possible_a)]
possible_b <- possible_b[!is.na(possible_b)]
possible_c <- possible_c[!is.na(possible_c)]
possible_dqa <- possible_dqa[!is.na(possible_dqa)]
possible_dqb <- possible_dqb[!is.na(possible_dqb)]
possible_dra <- possible_dra[!is.na(possible_dra)]
possible_drb <- possible_drb[!is.na(possible_drb)]

### A

In [None]:
possible_a

In [None]:
plot_list  <- list()

In [None]:
for(i in 1:length(possible_a)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   all_exp   %>% 
    mutate(haplotype = ifelse((A1 == possible_a[i] & A2 == possible_a[i]), 
                             paste("Hom"), 
                       ifelse(A1 == possible_a[i] | A2 == possible_a[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>%    
ggplot(aes(x = factor(haplotype, levels = c("Hom", "Het","Other")),
             y = expression_scale)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
  #  facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
    #  scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
   ggtitle(possible_a[i]) + NoLegend()
     plot_list[[i]]  <- p       
}

In [None]:
options(repr.plot.width = 14, repr.plot.height = 16)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

### B

In [None]:
plot_list  <- list()
for(i in 1:length(possible_b)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   all_exp   %>% 
    mutate(haplotype = ifelse((B1 == possible_b[i] & B2 == possible_b[i]), 
                             paste("Hom"), 
                       ifelse(B1 == possible_b[i] | B2 == possible_b[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>%    
ggplot(aes(x = factor(haplotype, levels = c("Hom", "Het","Other")),
             y = expression_scale)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
   # facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
    #  scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
   ggtitle(possible_b[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 14, repr.plot.height = 16)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

### C


In [None]:
plot_list  <- list()
for(i in 1:length(possible_c)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   all_exp   %>% 
    mutate(haplotype = ifelse((C1 == possible_c[i] & C2 == possible_c[i]), 
                             paste("Hom"), 
                       ifelse(C1 == possible_c[i] | C2 == possible_c[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Disease, haplotype)   %>%     
ggplot(aes(x = factor(haplotype, levels = c("Hom", "Het","Other")),
             y = expression_scale)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
   # facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
    #  scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) + NoLegend() +
   ggtitle(possible_c[i]) 
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 16, repr.plot.height = 16)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

### DQA

In [None]:
all_exp  %>% colnames

In [None]:
plot_list  <- list()
for(i in 1:length(possible_dqa)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   all_exp   %>% 
         dplyr::filter(!is.na(DQA11)&!is.na(DQA12))  %>% 
    mutate(haplotype = ifelse((DQA11 == possible_dqa[i] & DQA12 == possible_dqa[i]), 
                             paste("Hom"), 
                       ifelse(DQA11 == possible_dqa[i] | DQA12 == possible_dqa[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Disease, haplotype) %>%     
ggplot(aes(x = factor(haplotype, levels = c("Hom", "Het","Other")),
             y = expression_scale)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
  #  facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
    #  scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
   ggtitle(possible_dqa[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 14, repr.plot.height = 16)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

### DQB

In [None]:
plot_list  <- list()
for(i in 1:length(possible_dqb)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   all_exp   %>% 
    dplyr::filter(!is.na(DQB11)&!is.na(DQB12))  %>% 
    mutate(haplotype = ifelse((DQB11 == possible_dqb[i] & DQB12 == possible_dqb[i]), 
                             paste("Hom"), 
                       ifelse(DQB11 == possible_dqb[i] | DQB12 == possible_dqb[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>% group_by(Disease, haplotype)   %>%     
ggplot(aes(x = factor(haplotype, levels = c("Hom", "Het","Other")),
             y = expression_scale)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
   # facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
    #  scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
   ggtitle(possible_dqb[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 14, repr.plot.height = 4.5)
cowplot::plot_grid(plotlist = plot_list, ncol = 8)

### DRB

In [None]:
plot_list  <- list()
for(i in 1:length(possible_drb)){
  
options(repr.plot.width = 5, repr.plot.height = 4)

df2  <-   all_exp   %>%
        dplyr::filter(!is.na(DRB11)&!is.na(DRB12))  %>% 

    mutate(haplotype = ifelse((DRB11 == possible_drb[i] & DRB12 == possible_drb[i]), 
                             paste("Hom"), 
                       ifelse(DRB11 == possible_drb[i] | DRB12 == possible_drb[i], 
                             paste("Het"),
                                  "Other")))
p  <- df2   %>%    
ggplot(aes(x = factor(haplotype, levels = c("Hom", "Het","Other")),
             y = expression_scale)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.01), 
                size = 2, stackdir='center', aes(color = Disease)) + 
    # facet_wrap(~Disease) +
  theme_classic() + xlab("") +  
 xlab("") + ylab("Value") +
   scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
    #  scale_color_manual(values = c("dodgerblue", "indianred3")) +
  ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3.5, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 18),
          axis.line = element_line(colour = "black"), 
            axis.text.x = element_text(angle = 90),
        axis.ticks = element_line(colour = "black")) +
   ggtitle(possible_drb[i]) + NoLegend()
    plot_list[[i]]  <- p
           
}

In [None]:
options(repr.plot.width = 18, repr.plot.height = 24)
cowplot::plot_grid(plotlist = plot_list, ncol = 9)

## Heatmap Het+ Homoz. 

In [None]:
for(i in 1:length(possible_drb)){
  
df2  <-   all_exp   %>% 
            dplyr::filter(!is.na(DRB11)&!is.na(DRB12))  %>% 
    mutate(haplotype = ifelse((DRB11 == possible_drb[i] & DRB12 == possible_drb[i]), 
                             paste("Allele+"), 
                       ifelse(DRB11 == possible_drb[i] | DRB12 == possible_drb[i], 
                             paste("Allele+"),
                                  "Allele-")))
    
wcx = wilcox.test(df2$expression_scale ~ df2$haplotype, conf.int = T)
    
    
    df3  <- data.frame(allele = possible_drb[i],
                       pval = wcx$p.value,
                      ci_upper = wcx$conf.int[1],
                      ci_lower = wcx$conf.int[2],
                       estimate = wcx$estimate)

 if(i == 1){
     df_all  <- df3
 } else {
     df_all  <- rbind(df_all, df3)
 }   
}

In [None]:
df_all  %>% arrange(pval)

In [None]:
for(i in 1:length(possible_a)){
  
df2  <-   all_exp   %>% 
    mutate(haplotype = ifelse((A1 == possible_a[i] & A2 == possible_a[i]), 
                             paste("Allele+"), 
                       ifelse(A1 == possible_a[i] | A2 == possible_a[i], 
                             paste("Allele+"),
                                  "Allele-")))
    
wcx = wilcox.test(df2$expression_scale ~ df2$haplotype, conf.int = T)
    
    
    df3  <- data.frame(allele = possible_a[i],
                       pval = wcx$p.value,
                      ci_upper = wcx$conf.int[1],
                      ci_lower = wcx$conf.int[2],
                       estimate = wcx$estimate)

     df_all  <- rbind(df_all, df3)

}

In [None]:
for(i in 1:length(possible_b)){
  
df2  <-   all_exp   %>% 
    mutate(haplotype = ifelse((B1 == possible_b[i] & B2 == possible_b[i]), 
                             paste("Allele+"), 
                       ifelse(B1 == possible_b[i] | B2 == possible_b[i], 
                             paste("Allele+"),
                                  "Allele-")))
    
wcx = wilcox.test(df2$expression_scale ~ df2$haplotype, conf.int = T)
    
    
    df3  <- data.frame(allele = possible_b[i],
                       pval = wcx$p.value,
                      ci_upper = wcx$conf.int[1],
                      ci_lower = wcx$conf.int[2],
                       estimate = wcx$estimate)

     df_all  <- rbind(df_all, df3)

}

In [None]:
for(i in 1:length(possible_c)){
  
df2  <-   all_exp   %>% 
    mutate(haplotype = ifelse((C1 == possible_c[i] & C2 == possible_c[i]), 
                             paste("Allele+"), 
                       ifelse(C1 == possible_c[i] | C2 == possible_c[i], 
                             paste("Allele+"),
                                  "Allele-")))
    
wcx = wilcox.test(df2$expression_scale ~ df2$haplotype, conf.int = T)
    
    
    df3  <- data.frame(allele = possible_c[i],
                       pval = wcx$p.value,
                      ci_upper = wcx$conf.int[1],
                      ci_lower = wcx$conf.int[2],
                       estimate = wcx$estimate)

     df_all  <- rbind(df_all, df3)

}

In [None]:
for(i in 1:length(possible_dqa)){
  
df2  <-   all_exp   %>% 
        dplyr::filter(!is.na(DQA11)&!is.na(DQA12))  %>% 
    mutate(haplotype = ifelse((DQA11 == possible_dqa[i] & DQA12 == possible_dqa[i]), 
                             paste("Allele+"), 
                       ifelse(DQA11 == possible_dqa[i] | DQA12 == possible_dqa[i], 
                             paste("Allele+"),
                                  "Allele-")))
    
wcx = wilcox.test(df2$expression_scale ~ df2$haplotype, conf.int = T)
    
    
    df3  <- data.frame(allele = possible_dqa[i],
                       pval = wcx$p.value,
                      ci_upper = wcx$conf.int[1],
                      ci_lower = wcx$conf.int[2],
                       estimate = wcx$estimate)

     df_all  <- rbind(df_all, df3)

}

In [None]:
for(i in 1:length(possible_dqb)){
  
df2  <-   all_exp   %>% 
        dplyr::filter(!is.na(DQB11)&!is.na(DQB12))  %>% 
    mutate(haplotype = ifelse((DQB11 == possible_dqb[i] & DQB12 == possible_dqb[i]), 
                             paste("Allele+"), 
                       ifelse(DQB11 == possible_dqb[i] | DQB12 == possible_dqb[i], 
                             paste("Allele+"),
                                  "Allele-")))
    
wcx = wilcox.test(df2$expression_scale ~ df2$haplotype, conf.int = T)
    
    
    df3  <- data.frame(allele = possible_dqb[i],
                       pval = wcx$p.value,
                      ci_upper = wcx$conf.int[1],
                      ci_lower = wcx$conf.int[2],
                       estimate = wcx$estimate)

     df_all  <- rbind(df_all, df3)

}

In [None]:
for(i in 1:length(possible_dra)){
  
df2  <-   all_exp   %>% 
    mutate(haplotype = ifelse((DRA1 == possible_dra[i] & DRA2 == possible_dra[i]), 
                             paste("Allele+"), 
                       ifelse(DRA1 == possible_dra[i] | DRA2 == possible_dra[i], 
                             paste("Allele+"),
                                  "Allele-")))
    
wcx = wilcox.test(df2$expression_scale ~ df2$haplotype, conf.int = T)
    
    
    df3  <- data.frame(allele = possible_dra[i],
                       pval = wcx$p.value,
                      ci_upper = wcx$conf.int[1],
                      ci_lower = wcx$conf.int[2],
                       estimate = wcx$estimate)

     df_all  <- rbind(df_all, df3)

}

In [None]:
nrow(df_all)

In [None]:
df_all  %>% arrange(pval)

In [None]:
options(repr.plot.width = 18, repr.plot.height = 50)

plot <- df_all  %>% arrange(pval) %>% 
#filter(pval < 0.05)  %>% 
  ggplot(aes(estimate, fct_reorder(allele, estimate), color = ifelse(pval < 0.05 & ci_lower > 0, "1", 
                          ifelse(pval < 0.05 & ci_upper < 0, "2", "3")))) +
  geom_vline(xintercept = 0, color = "gray75") +
  geom_linerange(aes(xmin = ci_lower, xmax = ci_upper), size = 1.5, alpha = 0.5) +
  geom_point(size = 4) +
  theme_minimal(base_size = 16) +
  scale_color_manual(values = c("green4", "red3", "grey"), guide = "none") +
  labs(title = "Change in populations", y = NULL,
       x = "Ratio estimate \n (95% CI)") + ggtheme() +
  theme(axis.text.x = element_text(hjust = 0, size = 18), panel.grid = element_blank())

plot

In [None]:
all_exp

In [None]:
ggsave(filename = "../../240617_VN_Diabetes_V06/figures/hla/btn3a2.svg", width = 60, height = 30, units = "cm",
      create.dir = TRUE)

In [None]:
all_exp

In [None]:
options(repr.plot.width = 18, repr.plot.height = 7)

plot <- df_all  %>% arrange(pval) %>% 
filter(pval < 0.05)  %>% 
  ggplot(aes(estimate, fct_reorder(allele, estimate), color = ifelse(pval < 0.05 & ci_lower > 0, "1", 
                          ifelse(pval < 0.05 & ci_upper < 0, "2", "3")))) +
  geom_vline(xintercept = 0, color = "gray75") +
  geom_linerange(aes(xmin = ci_lower, xmax = ci_upper), size = 1.5, alpha = 0.5) +
  geom_point(size = 4) +
  theme_minimal(base_size = 16) +
  scale_color_manual(values = c("green4", "red3", "grey"), guide = "none") +
  labs(title = "Change in populations", y = NULL,
       x = "Ratio estimate \n (95% CI)") +
  theme(axis.text.y = element_text(hjust = 0, size = 18), panel.grid = element_blank())

plot

In [None]:
signif_alleles  <- df_all  %>% arrange(pval) %>% 
filter(pval < 0.05)  %>% pull(allele)

In [None]:
signif_alleles

In [None]:
colnames(all_exp)

In [None]:
remove_third_level  <- function(x){
    x  <- sub('^([^:]+:[^:]+).*', '\\1', x)
    return(x)
}


In [None]:
hla_dia3  <- all_exp  %>% pivot_longer(cols = c('A1', 'A2', 'B1', 'B2', 'C1', 'C2', 'DQA11', 'DQA12', 'DQB11', 'DQB12', 'DRA1', 'DRA2', 'DRB11', 'DRB12'),
                                     names_to = "loci", values_to = "allele")  %>% 
mutate(allele = remove_third_level(allele))  %>% 
separate(allele, into = c(NA, "allele"), sep = "\\*")

In [None]:
hla_dia3

In [None]:
hla_dia4  <- hla_dia3  %>% 
mutate(loci = str_extract(loci, "^\\D+"))  %>% 
group_by(Disease, loci, allele, study)  %>% 
 summarise(n = n()) 

In [None]:
hla_dia4

In [None]:
sum_alleles_per_study  <- hla_dia3  %>% 
mutate(loci = str_extract(loci, "^\\D+"))  %>% 
group_by(Disease, loci, study)  %>% 
 summarise(n = n())

In [None]:
sum_alleles_per_study$total_n  <- sum_alleles_per_study$n

In [None]:
sum_alleles_per_study$n  <- NULL

In [None]:
sum_alleles_per_study

In [None]:
hla_dia5  <- left_join(hla_dia4,  sum_alleles_per_study)

In [None]:
hla_dia5

In [None]:
hla_dia5 <- hla_dia5  %>% mutate(freq = n/total_n)

In [None]:
hla_dia5

In [None]:
hla_dia5  %>% group_by(Disease, loci, study)  %>% summarise(sum = sum(freq))

In [None]:
hla_dia5

In [None]:
hla_dia6 <- hla_dia5 %>% transmute(allele, loci, population = paste(Disease, study), Disease, study, allele_freq = freq)  %>% 
ungroup  %>% 
dplyr::select(allele, loci, population, allele_freq, Disease, study)

In [None]:
hla_dia6

In [None]:
hla_dia6  %>% group_by(loci,study, Disease)  %>% summarise(sum = sum(allele_freq))

In [None]:
options(repr.plot.height = 30)
hla_dia6  %>% ggplot(aes(x = factor(allele, levels = rev(levels(factor(allele)))), y = allele_freq)) + 
#facet_wrap(~allele, scales = "free") +
facet_grid(rows = vars(loci), scales = "free", space = "free") +
geom_point(aes(color = Disease, shape = study), size = 3) + coord_flip() + 
scale_color_manual(values = c("blue", "red")) +
xlab("") +
theme_bw() +
ggtheme() 

In [None]:
hla_dia5

In [None]:
hla_dia7  <- hla_dia5 %>% transmute(allele, loci, population = paste(Disease, study), 
                                    allele_freq = freq, sample_size = total_n, Disease, study)  %>% 
ungroup  %>% 
dplyr::select(allele, loci, population, allele_freq, sample_size, Disease, study)

In [None]:
hla_dia7

In [None]:
hla_czech  <- read_csv("../tables/hla_czech.csv")

In [None]:
hla_czech$`...1`  <- NULL

In [None]:
hla_czech  <- hla_czech  %>% separate(allele, into = c(NA, "allele"), sep = "\\*")

In [None]:
hla_czech <- hla_czech  %>% dplyr::filter((population %in% c("Ctrl", "Dia") == F))

In [None]:
hla_czech$population  %>% table

In [None]:
hla_czech$study  <- "Czech_Ref"

In [None]:
hla_czech$Study  <- NULL

In [None]:
hla_czech$Disease  <- "Czech_Ref"

In [None]:
hla_czech$loci  <- gsub(hla_czech$loci, pattern = "1", replacement = "")

In [None]:
hla_czech$loci  %>% table

In [None]:
hla_both  <- rbind(hla_dia7, hla_czech)

In [None]:
hla_both

In [None]:
write.csv(hla_dia7, "../tables/hla_btn3a2_other_studies.csv")

## Plot two places


In [None]:
alleles_to_plot  <- paste(hla_dia7$loci, hla_dia7$allele)  %>% unique

In [None]:
alleles_to_plot

In [None]:
hla_both$loci  %>% table

In [None]:
options(repr.plot.height = 40)
hla_both  %>% 
mutate(allele_to_plot = paste(loci, allele))  %>% 
dplyr::filter(allele_to_plot %in% alleles_to_plot & !is.na(allele))  %>% 
ggplot(aes(x = factor(allele, levels = rev(levels(factor(allele)))), y = allele_freq)) + 
#facet_wrap(~allele, scales = "free") +
facet_grid(rows = vars(loci), scales = "free", space = "free") +
geom_point(aes(color = Disease, shape = study), size = 4) + coord_flip() + 
scale_color_manual(values = c("blue",  "grey", "red")) +
xlab("") +
theme_bw() +
ggtheme() 

In [None]:
hla_both$population  %>% table

In [None]:
options(repr.plot.height = 30)
hla_both  %>% 
dplyr::filter(grepl(population, pattern = "Czech") | grepl(population, pattern = "Lab48"))  %>% 
dplyr::filter(!grepl(population, pattern = "Romani") )  %>% 
mutate(allele_to_plot = paste(loci, allele))  %>% 
dplyr::filter(allele_to_plot %in% alleles_to_plot & !is.na(allele))  %>% 
ggplot(aes(x = factor(allele, levels = rev(levels(factor(allele)))), y = allele_freq)) + 
#facet_wrap(~allele, scales = "free") +
facet_grid(rows = vars(loci), scales = "free", space = "free") +
geom_point(aes(color = population), size = 3) + coord_flip() + 
scale_color_manual(values = c("blue", "grey50","grey60","grey70","grey80","red")) +
xlab("") +
theme_bw() +
ggtheme() 

In [None]:
ggsave("../figures/hla/hla.png", width = 30, height = 80, units = "cm")
ggsave("../figures/hla/hla.svg", width = 30, height = 80, units = "cm")

## Model

In [None]:
all_exp

In [None]:
hla_dia3  <- all_exp  %>% pivot_longer(cols = c('A1', 'A2', 'B1', 'B2', 'C1', 'C2', 'DQA11', 'DQA12', 'DQB11', 'DQB12', 'DRA1', 'DRA2', 'DRB11', 'DRB12'),
                                     names_to = "loci", values_to = "allele")  %>% 
mutate(allele = remove_third_level(allele))  %>% 
separate(allele, into = c(NA, "allele"), sep = "\\*")

In [None]:
all_exp2  <- all_exp  %>% pivot_longer(cols = 1:14, names_to = "loci", 
                                      values_to = "allele")  %>% 
separate(allele, into = c("loci","allele"), sep = "\\*")  

In [None]:
all_exp2

In [None]:
glm <- glm(expression_scale ~ . , 
           data = all_exp2  %>% dplyr::filter(loci == "A")  %>% dplyr::select(-loci), 
           family = "gaussian")
summary(glm)

In [None]:
df_alleles  <- summary(glm)$coefficients  %>% 
as.data.frame()   %>% rownames_to_column("variable")
df_alleles$loci  <- "A"

In [None]:
for(i in loci){
    glm <- glm(expression_scale ~ . , 
           data = all_exp2  %>% dplyr::filter(loci == i)  %>% dplyr::select(-loci), 
           family = "gaussian")
    df_a  <- summary(glm)$coefficients  %>% 
as.data.frame()   %>% rownames_to_column("variable")
    df_alleles  <- rbind(df_alleles, df_a)
}

In [None]:
df_alleles

In [None]:
colnames(df_alleles)[5]  <- "pval"
colnames(df_alleles)[3]  <- "SE"

In [None]:
library(tidytext)

In [None]:

options(repr.plot.width = 40, repr.plot.height = 5)
df_alleles  %>% 
dplyr::filter(grepl(variable, pattern = "allele"))  %>% 
mutate(variable = gsub(variable, pattern = "allele", replacement = ""))  %>% 
mutate(text_x = variable)  %>% 
ggplot(aes(x = reorder_within(variable, Estimate, loci), y = Estimate, ymin=Estimate-2*SE,ymax=Estimate+2*SE)) +
geom_pointrange(aes(color = pval<0.05), size = 1, linewidth = 1) +
scale_x_reordered() +
facet_grid(cols = vars(loci), scales = "free", space = "free") +
scale_color_manual(values = c("grey80","red3")) +
xlab("") + ylab("") + theme_bw() +
geom_hline(yintercept = 0)+
ggtheme() +
theme(axis.text.x = element_text(angle = 90)) 

In [None]:
ggsave(filename = "../../240617_VN_Diabetes_V06/figures/hla/btn3a2_v02_model.svg",
      width = 90, height = 13, units = "cm")

## Model two places

In [None]:
all_exp

In [None]:
all_exp2  <- all_exp  %>% pivot_longer(cols = 1:14, names_to = "loci", 
                                      values_to = "allele")  %>% 
separate(allele, into = c("loci","allele"), sep = "\\*")   %>% 
mutate(allele = remove_third_level(allele)) 


In [None]:
hla_dia7

In [None]:
all_exp2

In [None]:
glm <- glm(expression_scale ~ . , 
           data = all_exp2  %>% dplyr::filter(loci == "A")  %>% dplyr::select(-loci), 
           family = "gaussian")
summary(glm)

In [None]:
df_alleles  <- summary(glm)$coefficients  %>% 
as.data.frame()   %>% rownames_to_column("variable")
df_alleles$loci  <- "A"

In [None]:
loci  <- all_exp2$loci  %>% unique

In [None]:
all_exp2

In [None]:
loci  <- loci[1:7]

In [None]:
loci

In [None]:
i

In [None]:
  glm <- glm(expression_scale ~ . , 
           data = all_exp2  %>% dplyr::filter(loci == i)  %>% dplyr::select(-loci), 
           family = "gaussian")

In [None]:
glm

In [None]:
    df_a  <- summary(glm)$coefficients  %>% 
as.data.frame()   %>% rownames_to_column("variable")

In [None]:
df_alleles

In [None]:
    df_alleles  <- rbind(df_alleles, df_a)

In [None]:
for(i in loci){
    glm <- glm(expression_scale ~ . , 
           data = all_exp2  %>% dplyr::filter(loci == i)  %>% dplyr::select(-loci), 
           family = "gaussian")
    df_a  <- summary(glm)$coefficients  %>% 
as.data.frame()   %>% rownames_to_column("variable")
    df_a$loci  <- i
    df_alleles  <- rbind(df_alleles, df_a)
}

In [None]:
df_alleles

In [None]:
colnames(df_alleles)[5]  <- "pval"
colnames(df_alleles)[3]  <- "SE"

In [None]:
library(tidytext)

In [None]:

options(repr.plot.width = 40, repr.plot.height = 5)
df_alleles  %>% 
dplyr::filter(grepl(variable, pattern = "allele"))  %>% 
mutate(variable = gsub(variable, pattern = "allele", replacement = ""))  %>% 
mutate(text_x = variable)  %>% 
ggplot(aes(x = reorder_within(variable, Estimate, loci), y = Estimate, ymin=Estimate-2*SE,ymax=Estimate+2*SE)) +
geom_pointrange(aes(color = pval<0.05), size = 1, linewidth = 1) +
scale_x_reordered() +
facet_grid(cols = vars(loci), scales = "free", space = "free") +
scale_color_manual(values = c("grey80","red3")) +
xlab("") + ylab("") + theme_bw() +
geom_hline(yintercept = 0)+
ggtheme() +
theme(axis.text.x = element_text(angle = 90)) 

In [None]:
ggsave(filename = "../../240617_VN_Diabetes_V06/figures/hla/btn3a2_v02_model_2places.png",
      width = 90, height = 10, units = "cm")

In [None]:
ggsave(filename = "../../240617_VN_Diabetes_V06/figures/hla/btn3a2_v02_model_2places.svg",
      width = 90, height = 10, units = "cm")

In [None]:

options(repr.plot.width = 12, repr.plot.height = 6)
df_alleles  %>% 
dplyr::filter(!grepl(variable, pattern = "allele"))  %>% 
dplyr::filter(!grepl(variable, pattern = "tercep"))  %>% 
mutate(text_x = variable)  %>% 
ggplot(aes(x = reorder_within(variable, Estimate, loci), y = Estimate, ymin=Estimate-2*SE,ymax=Estimate+2*SE)) +
geom_pointrange(aes(color = pval<0.05), size = 1, linewidth = 1) +
scale_x_reordered() +
facet_grid(cols = vars(loci), scales = "free", space = "free") +
scale_color_manual(values = c("grey80","red3")) +
xlab("") + ylab("") + theme_bw() +
geom_hline(yintercept = 0)+
ggtheme() +
theme(axis.text.x = element_text(angle = 90)) 

In [None]:
ggsave(filename = "../../240617_VN_Diabetes_V06/figures/hla/btn3a2_v02_model_study.svg",
      width = 28, height = 16, units = "cm")