## Prevalent CH GWAS loci

In [None]:
library(readxl)
library(data.table)
library(liftOver)
library(rtracklayer)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(GenomicRanges)

In [None]:
setwd("/ch_progression/aric/gwas/rg2022/")

In [None]:
## Kessler et al 2022
ch_rg22 <- read_excel("41586_2022_5448_MOESM5_ESM.xlsx", 
                      sheet = 3)
dnmt3a_rg22 <- read_excel("41586_2022_5448_MOESM5_ESM.xlsx", 
                      sheet = 12)
tet2_rg22 <- read_excel("41586_2022_5448_MOESM5_ESM.xlsx", 
                      sheet = 15)


In [None]:
# Load Chain file
hg38_to_19_chain = import.chain( "hg38ToHg19.over.chain")

ch_rg22$CHROM <- paste0("chr",ch_rg22$Chr)
hg38.chip_df <- ch_rg22[,c(46, 4, 4,2)]
names(hg38.chip_df) <- c("chrom", "start", "end","varID")
gr.chip.hg38 <- as(hg38.chip_df, "GenomicRanges")

dnmt3a_rg22$CHROM <- paste0("chr",dnmt3a_rg22$chr)
hg38.dnmt3a_df <- dnmt3a_rg22[,c(43, 4, 4,2)]
names(hg38.dnmt3a_df) <- c("chrom", "start", "end","varID")
gr.dnmt3a.hg38 <- as(hg38.dnmt3a_df, "GenomicRanges")

tet2_rg22$CHROM <- paste0("chr",tet2_rg22$chr)
hg38.tet2_df <- tet2_rg22[,c(42, 3, 3,1)]
names(hg38.tet2_df) <- c("chrom", "start", "end","varID")
gr.tet2.hg38 <- as(hg38.tet2_df, "GenomicRanges")

chip_hg38tohg19 <- liftOver(gr.chip.hg38, hg38_to_19_chain)
chip_hg38tohg19 <- as.data.frame(chip_hg38tohg19)
chip_hg38tohg19$chr_hg19 <- gsub(pattern = "chr",replacement = "", 
                           x = as.character(chip_hg38tohg19$seqnames))
chip_hg38tohg19$POS_hg19 <- chip_hg38tohg19$start
chip_hg38tohg19 <- merge(chip_hg38tohg19[,c(8,9,10)], ch_rg22, 
                         by.x="varID",by.y="Name")
chip_hg38tohg19$varID_hg19 <- paste(chip_hg38tohg19$chr_hg19, 
                                    chip_hg38tohg19$POS_hg19, 
                                    chip_hg38tohg19$Ref, 
                                    chip_hg38tohg19$Alt, sep=":")
## DNMT3A
dnmt3a_rg22$CHROM <- paste0("chr",dnmt3a_rg22$chr)
hg38.dnmt3a_df <- dnmt3a_rg22[,c(43, 4, 4,2)]
names(hg38.dnmt3a_df) <- c("chrom", "start", "end","varID")
gr.dnmt3a.hg38 <- as(hg38.dnmt3a_df, "GenomicRanges")

dnmt3a_hg38tohg19 <- liftOver(gr.dnmt3a.hg38, hg38_to_19_chain)
dnmt3a_hg38tohg19 <- as.data.frame(dnmt3a_hg38tohg19)
dnmt3a_hg38tohg19$chr_hg19 <- gsub(pattern = "chr",replacement = "", 
                                 x = as.character(dnmt3a_hg38tohg19$seqnames))
dnmt3a_hg38tohg19$POS_hg19 <- dnmt3a_hg38tohg19$start
dnmt3a_hg38tohg19 <- merge(dnmt3a_hg38tohg19[,c(8,9,10)], dnmt3a_rg22, 
                         by.x="varID",by.y="name")
dnmt3a_hg38tohg19$varID_hg19 <- paste(dnmt3a_hg38tohg19$chr_hg19, 
                                      dnmt3a_hg38tohg19$POS_hg19, 
                                      dnmt3a_hg38tohg19$ref, 
                                      dnmt3a_hg38tohg19$alt, 
                                      sep=":")

## TET2
tet2_rg22$CHROM <- paste0("chr",tet2_rg22$chr)
hg38.tet2_df <- tet2_rg22[,c(42, 3, 3,1)]
names(hg38.tet2_df) <- c("chrom", "start", "end","varID")
gr.tet2.hg38 <- as(hg38.tet2_df, "GenomicRanges")

tet2_hg38tohg19 <- liftOver(gr.tet2.hg38, hg38_to_19_chain)
tet2_hg38tohg19 <- as.data.frame(tet2_hg38tohg19)
tet2_hg38tohg19$chr_hg19 <- gsub(pattern = "chr",replacement = "", 
                                 x = as.character(tet2_hg38tohg19$seqnames))
tet2_hg38tohg19$POS_hg19 <- tet2_hg38tohg19$start

tet2_hg38tohg19 <- merge(tet2_hg38tohg19[,c(8,9,10)], tet2_rg22, 
                         by.x="varID",by.y="name")
tet2_hg38tohg19$varID_hg19 <- paste(tet2_hg38tohg19$chr_hg19, 
                                    tet2_hg38tohg19$POS_hg19, 
                                    tet2_hg38tohg19$ref, 
                                    tet2_hg38tohg19$alt, sep=":")

chip_hg38tohg19$Start_500kb <- chip_hg38tohg19$POS_hg19-500000
chip_hg38tohg19$End_500kb <- chip_hg38tohg19$POS_hg19+500000
dnmt3a_hg38tohg19$Start_500kb <- dnmt3a_hg38tohg19$POS_hg19-500000
dnmt3a_hg38tohg19$End_500kb <- dnmt3a_hg38tohg19$POS_hg19+500000
tet2_hg38tohg19$Start_500kb <- tet2_hg38tohg19$POS_hg19-500000
tet2_hg38tohg19$End_500kb <- tet2_hg38tohg19$POS_hg19+500000

fwrite(chip_hg38tohg19, "ch_rg22.hg19.csv", 
       row.names = F, col.names = T, sep = ",")
fwrite(dnmt3a_hg38tohg19, "dnmt3a_rg22.hg19.csv", 
       row.names = F, col.names = T, sep = ",")
fwrite(tet2_hg38tohg19, "tet2_rg22.hg19.csv", 
       row.names = F, col.names = T, sep = ",")

save.image("rg22_gwas_loci.hg19.rda")
