In [None]:
source("../code/diabetes_analysis_v06.R")

## Czech reference HLA

In [None]:
czech_hla  <- read_csv("../../240218_VN_Diabetes_V05/data/Czech Republic_raw.csv")

In [None]:
czech_hla

In [None]:
czech_hla

In [None]:
options(repr.plot.height = 40)
czech_hla  %>% 
filter(population != "Czech Republic Romani")  %>% ggplot(aes(x = allele, y = allele_freq)) + 
#facet_wrap(~allele, scales = "free") +
facet_grid(rows = vars(loci), scales = "free", space = "free") +
geom_point(aes(color = population)) + coord_flip() + 
scale_color_manual(values = c("grey50","grey60","grey70","grey80"))

In [None]:
czech_hla2  <- czech_hla  %>% group_by(allele, loci)  %>% 
summarise(allele_freq2 = mean(allele_freq, na.rm = TRUE),
          allele_min = min(allele_freq, na.rm = TRUE),
          allele_max = max(allele_freq, na.rm = TRUE))

In [None]:
czech_hla2

In [None]:
czech_hla2  %>% ggplot(aes(x = allele, y = allele_freq2)) + 
#facet_wrap(~allele, scales = "free") +
facet_grid(rows = vars(loci), scales = "free", space = "free") +
geom_point(size = 3) + coord_flip() + 
geom_linerange(aes(ymin = allele_min, ymax = allele_max),
                 alpha = 0.5) 

## Our HLA


In [None]:
hla_dia  <- read_xlsx('../../240218_VN_Diabetes_V05/data/metadata_v05.xlsx')

In [None]:
remove_third_level  <- function(x){
    x  <- sub('^([^:]+:[^:]+).*', '\\1', x)
    return(x)
}

In [None]:
hla_dia2  <- hla_dia  %>% filter(time_taken == "T0")  %>% 
dplyr::select(starts_with("HLA"), patient)  %>% 
mutate(Disease = ifelse(substr(patient,1,1)=="1",'Dia',"Ctrl"))   %>% 
mutate_at(.vars = vars(starts_with("HLA")), .funs = remove_third_level)  

In [None]:
hla_dia2

In [None]:
hla_dia3  <- hla_dia2  %>% pivot_longer(cols = starts_with("HLA"), names_to = "loci", values_to = "allele")

In [None]:
hla_dia3  <- hla_dia3  %>% mutate(loci2 = loci)  %>% 
mutate(loci = stringr::str_extract(allele, "^[^*]+"))

In [None]:
czech_hla

In [None]:
hla_dia3  %>% 
group_by(Disease, patient)  %>%  unique %>% tally  %>% dplyr::select(Disease)  %>% tally

In [None]:
hla_dia4  <- hla_dia3  %>% 
group_by(Disease, loci, allele)  %>% 
 filter((patient %in% c("206","207") == F))  %>%
 summarise(n = n()) %>%
  mutate(freq = n / sum(n))  %>% 
transmute(allele, loci, population = Disease, allele_freq = freq, sample_size = ifelse(Disease == "Dia",30,13))  %>% 
ungroup  %>% 
dplyr::select(allele, loci, population, allele_freq, sample_size)

In [None]:
hla_dia5  <- hla_dia4  %>% dplyr::select(-loci, -sample_size)  %>% 
pivot_wider(names_from = "allele", values_from = "allele_freq", values_fill = 0)

In [None]:
hla_dia5

In [None]:
 hla_dia6  <- hla_dia5  %>% pivot_longer(!population, names_to = "allele", values_to = "allele_freq")

In [None]:
hla_dia7  <-  hla_dia6  %>%  mutate(loci = stringr::str_extract(allele, "^[^*]+"),
                                   sample_size = ifelse(population == "Dia",30,13))  %>% 
dplyr::select(allele, loci, population, allele_freq, sample_size)

In [None]:
hla_dia7

In [None]:
czech_hla

In [None]:
all_hlas  <- rbind(hla_dia7, czech_hla)

In [None]:
all_hlas

In [None]:
options(repr.plot.height = 40)
all_hlas  %>% 
filter(loci %in% c("A","B","C", "DPB1", "DQA1", "DQB1", "DRB1"))  %>% 
filter(allele %in% hla_dia4$allele)  %>% 
filter(population != "Czech Republic Romani")  %>% 
ggplot(aes(x = factor(allele, levels = rev(levels(factor(allele)))), y = allele_freq)) + 
#facet_wrap(~allele, scales = "free") +
facet_grid(rows = vars(loci), scales = "free", space = "free") +
geom_point(aes(color = population), size = 3) + coord_flip() + 
scale_color_manual(values = c("blue", "grey50","grey60","grey70","grey80", "red")) +
xlab("") +
theme_bw() +
ggtheme() 

In [None]:
write.csv(all_hlas, "../tables/hla_czech.csv")

In [None]:
options(repr.plot.height = 40)
all_hlas  %>% 
filter(loci %in% c("A","B","C", "DPB1", "DQA1", "DQB1", "DRB1"))  %>% 
filter(allele %in% hla_dia4$allele)  %>% 
filter(population != "Czech Republic Romani")  %>% 
ggplot(aes(x = factor(allele, levels = rev(levels(factor(allele)))), y = allele_freq)) + 
#facet_wrap(~allele, scales = "free") +
facet_grid(rows = vars(loci), scales = "free", space = "free") +
geom_point(aes(color = population), size = 3) + coord_flip() + 
scale_color_manual(values = c("blue", "grey50","grey60","grey70","grey80", "red")) +
xlab("") +
theme_bw() +
ggtheme() 

In [None]:
ggsave("../240218_VN_Diabetes_V05/figures/hla/hla.png", width = 30, height = 70, units = "cm")
ggsave("../240218_VN_Diabetes_V05/figures/hla/hla.svg", width = 30, height = 70, units = "cm")

In [None]:
all_hlas2  <- all_hlas  %>% 
filter(loci %in% c("A","B","C", "DPB1", "DQA1", "DQB1", "DRB1"))  %>% 
filter(allele %in% hla_dia4$allele)  %>% 
filter(population != "Czech Republic Romani")  %>% 
mutate(population2 = ifelse(population == "Dia", "Dia", "Global pop."))  %>% 
dplyr::select(-population)  %>% 
group_by(allele, loci, population2)  %>% 
summarise(mean_allele_freq = mean(allele_freq),
         sum_sample_size = sum(sample_size))

In [None]:
all_hlas2$allele_occurrences  <- all_hlas2$sum_sample_size*all_hlas2$mean_allele_freq*2

In [None]:
all_hlas2  %>% nrow

In [None]:
all_hlas2

In [None]:
for(i in 1:121){
    
    binom  <- binom.test(x = all_hlas2$allele_occurrences[(2*i)-1],
                        n = 60, 
                        p = all_hlas2$mean_allele_freq[(2*i)])
    
    df  <- data.frame(allele = all_hlas2$allele[(2*i)],
                      pval = binom$p.value, 
                     padj = ifelse(binom$p.value*121>1,1,binom$p.value*121))
    
    if(i == 1){
        df_all  <- df
    } else {
        df_all  <- rbind(df_all, df)
    }
}

In [None]:
write.csv(df_all, "../240218_VN_Diabetes_V05/tables/hla.csv")