In [None]:
install.packages('HIBAG')
library(HIBAG)
library(tidyverse)

In [None]:
#get recommended HLARES model from hibag aws
system('wget https://hibag.s3.amazonaws.com/download/HLARES/Illumina1M-Broad-HLA4-hg19.RData',intern=T)

In [None]:
hlaSetKernelTarget("max")

In [None]:
#HLARES object

mobj <- get(load("Illumina1M-Broad-HLA-hg19.RData"))
#check how many reference
mobj

In [None]:
#get positions to update
for (i in mobj){
# make a "hlaAlleleClass" object
model <- hlaModelFromObj(i)

hla.idx <- model$hla.locus

write.table(paste0('hg19_',hla.idx,'_pos.txt'))
}
#switch to notebook 7.1.1 here to get lifted over coordinates, then run the rest of the notebook

In [None]:
# create hla df

ids <- read.table("chr6_total.fam")

hla_df <- data.frame(sample.id=ids$V2)


In [None]:
#read in genetic data
bed.fn <- "chr6_new_samples.bed"
fam.fn <- "chr6_new_samples.fam"
bim.fn <- "chr6_new_samples.bim"
CeD_samples <- hlaBED2Geno(bed.fn, fam.fn, bim.fn, assembly="hg38")


In [None]:
#HIBAG

for (i in mobj){

# make a "hlaAlleleClass" object
model <- hlaModelFromObj(i)

hla.idx <- model$hla.locus

#update position

hg38 <- read.table(paste0('hg38_',hla.idx,'_pos.csv'))
model$snp.position <- hg38$V1
model$assembly <- 'hg38'

# predict

pred <- hlaPredict(model, CeD_samples, type="response")
head(pred$value)

hla_df[,paste0(hla.idx,'.1')] <- pred$value$allele1
hla_df[,paste0(hla.idx,'.2')] <- pred$value$allele2
rm(pred, model, hg38, hla.idx)
head(hla_df)}

In [None]:
write.csv(hla_df,'hibag_hla_typing.csv', row.names=False)

In [None]:
#append condition
pheno <- read.table('pheno_plink.tsv')
pheno <- pheno %>% mutate(sample.id = V1) %>% select(c(sample.id,V3)) %>% mutate(label =as.character(V3)) %>% mutate(label = recode(label, '1' = 'control', '2' = 'celiac'))
table(pheno$label)

In [None]:
hla5 <- hla_df %>% left_join(pheno, by='sample.id') %>% mutate(condition = label) %>% unique()

In [None]:
hla5$DPB1.1 <- as.character(hla5$DPB1.1)

In [None]:
#Dq typing
dqhap <- hladq %>% 
  mutate(Dq2_5_1 = ifelse((DQA1.1=='05:01'|DQA1.2=='05:01') & (DQB1.1=='02:01'|DQB1.2=='02:01') & (dqa1_zygo=='het'|dqb1_zygo=='het'),1,0), 
         Dq2_5_2 = ifelse((DQA1.1=='05:05' | DQA1.2=='05:05') & (DQB1.1=='02:02' | DQB1.2=='02:02'),1,0), 
         Dq2_2 = ifelse((DQA1.1=='02:01'|DQA1.2=='02:01') & (DQB1.1=='02:02'|DQB1.2=='02:02') & (dqa1_zygo=='het'|dqb1_zygo=='het'),1,0),
         Dq7_5 = ifelse((DQA1.1=='05:05'|DQA1.2=='05:05') & (DQB1.1=='03:01'|DQB1.2=='03:01') & (dqa1_zygo=='het'|dqb1_zygo=='het'),1,0),
         Dq8 = ifelse((DQA1.1=='03:01'|DQA1.2=='03:01'|DQA1.1=='03:02'|DQA1.2=='03:02'|DQA1.1=='03:03'|DQA1.2=='03:03') & (DQB1.1=='03:02'|DQB1.2=='03:02') & (dqa1_zygo=='het'|dqb1_zygo=='het'),1,0)) %>%
  mutate(Dq2_5_1 = ifelse((DQA1.1=='05:01') & (dqa1_zygo=='hom') & (DQB1.1=='02:01') & (dqb1_zygo=='hom'),2,Dq2_5_1),
         Dq2_2 = ifelse((DQA1.1=='02:01') & (dqa1_zygo=='hom') & (DQB1.1=='02:02') & (dqb1_zygo=='hom'),2,Dq2_2),
         Dq7_5 = ifelse((DQA1.1=='05:05') & (dqa1_zygo=='hom') & (DQB1.1=='03:01') & (dqb1_zygo=='hom'),2,Dq7_5),
         Dq8 = ifelse((DQA1.1=='03:01'|DQA1.1=='03:02'|DQA1.1=='03:03') & (dqa1_zygo=='hom') & (DQB1.1=='03:02') & (dqb1_zygo=='hom'),2,Dq8))

In [None]:
#molecules
dqmol <- dqhap %>% 
   mutate(genotype = case_when(Dq2_5_1==2 ~ 'DQ2.5/DQ2.5',
                               Dq2_2==2 ~ 'DQ2.2/DQ2.2',
                               Dq7_5==2 ~ 'DQ7.5/DQ7.5',
                               Dq8 == 2 ~ 'DQ8/DQ8',
                               Dq2_5_2 == 1 ~ 'DQ2.5 trans',
                               Dq2_5_1==1 & Dq2_2==1 ~ 'DQ2.5/DQ2.2',
                               Dq2_5_1==1 & Dq7_5==1 ~ 'DQ2.5/DQ7.5',
                               Dq2_5_1==1 & Dq8==1 ~ 'DQ2.5/DQ8',
                               Dq2_5_1==1 ~ 'DQ2.5/X',
                               Dq2_2==1 & Dq7_5==1 ~ 'DQ2.2/DQ7.5',
                               Dq2_2==1 & Dq8==1 ~ 'DQ2.2/DQ8',
                               Dq2_2==1 ~ 'DQ2.2/X',
                               Dq7_5==1 & Dq8==1 ~ 'DQ7.5/DQ8',
                               Dq7_5==1 ~ 'DQ7.5/X',
                               Dq8==1 ~ 'DQ8/X',
                               TRUE ~ 'X/X'))

In [None]:
hla6 <- hla5 %>% merge(dqmol %>% select(c('sample.id','genotype')), by='sample.id')

In [None]:
hla6 %>% write.csv('hla_types_with_DQ.csv')