In [1]:
library(dplyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [2]:
# print working directory
getwd()

In [5]:
pheno <- "hypertension"

# Loading GWAS

In [6]:
# copy the SNP list from cedars server
# create empty .txt file, paste in the .txt file (save and delete the extension ".txt")
# read GWAS in 
GWAS_dir <- paste0("/home/jupyter/workspaces/hypertensionukbb/AB_PRS_AoU/external_GWAS/",pheno,"_external_GWAS")
GWAS <- read.table(GWAS_dir, header=T)
print(head(GWAS))

  chr  position         SNP effect_allele alt_allele        beta
1   1  10796866    rs880315             T          C -0.03027034
2   1  11880731 rs149764880             G          T  0.03648623
3   1 113046879   rs3790604             C          A -0.04481796
4   1  11901694  rs41275502             G          A  0.05016358
5   1  11906609  rs12744433             C          T  0.04266361
6   1  11854476   rs1801131             T          G  0.02216711


# PRS calculation

In [10]:
func_SNP_swap <- function(vec){
    # swap 0 and 2 in .raw
    vec[vec==0] <- 9
    vec[vec==2] <- 0
    vec[vec==9] <- 2
}

func_process_raw <- function(raw, GWAS, SNP_complete){
    ## replace NA with 0
    raw <- as.matrix(raw)
    raw[is.na(raw)] <- 0
    colnames(raw) <- SNP_complete$SNP

    ## check mis-matched effective allele
    SNP_select <- inner_join(GWAS, SNP_complete, by="SNP")
    SNP_select$match <- (SNP_select$effect_allele == SNP_select$effect_AoU)

    ## select SNPs from .raw
    raw_select <- raw[, SNP_select$SNP]
  
    if (ncol(raw_select) > 0){
      for (i in ncol(raw_select)){
        # if effective allele mis-matched, swap 0 and 2
        if (!SNP_select$match[i]) {
          raw_select[,i] <- func_SNP_swap(raw_select[,i])
        }
      }
      return(list(raw_select, SNP_select))
    } else {
      return(list(raw_select, SNP_select))
    }
}

func_prs <- function(rslt){
    raw <- rslt[[1]]
    gwas <- rslt[[2]]
    prs <- raw %*% gwas$beta
    return(prs)
}

In [11]:
print(paste("chr",1,"start..."))
# read chr_1.raw
raw_dir <- paste0("/home/jupyter/workspaces/hypertensionukbb/Version2/mainFiles/chr",1,"_small.raw")
raw <- read.table(raw_dir, header=T)
IID <- raw[,2]
raw <- raw[,-c(1,2,3,4,5,6)]
## get the SNP list of .raw of the AoU
SNP_complete <- data.frame(SNP = gsub("_.*","",colnames(raw)),
                           effect_AoU = gsub(".*_","",colnames(raw)))
rslt <- func_process_raw(raw, GWAS, SNP_complete)
prs <- func_prs(rslt)

for (i in 2:22){
    print(paste("chr",i,"start..."))
    # read chr_i.raw
    raw_dir <- paste0("/home/jupyter/workspaces/hypertensionukbb/Version2/mainFiles/chr",i,"_small.raw")
    raw <- read.table(raw_dir, header=T)
    raw <- raw[,-c(1,2,3,4,5,6)]
    ## get the SNP list of .raw of the AoU
    SNP_complete <- data.frame(SNP = gsub("_.*","",colnames(raw)),
                               effect_AoU = gsub(".*_","",colnames(raw)))
    rslt <- func_process_raw(raw, GWAS, SNP_complete)
    
    ## if there are selected SNPs in chr_i
    if (nrow(rslt[[2]]) > 0){
        prs_temp <- func_prs(rslt)
        prs <- prs + prs_temp
    } 
}

[1] "chr 1 start..."
[1] "chr 2 start..."
[1] "chr 3 start..."
[1] "chr 4 start..."
[1] "chr 5 start..."
[1] "chr 6 start..."
[1] "chr 7 start..."
[1] "chr 8 start..."
[1] "chr 9 start..."
[1] "chr 10 start..."
[1] "chr 11 start..."
[1] "chr 12 start..."
[1] "chr 13 start..."
[1] "chr 14 start..."
[1] "chr 15 start..."
[1] "chr 16 start..."
[1] "chr 17 start..."
[1] "chr 18 start..."
[1] "chr 19 start..."
[1] "chr 20 start..."
[1] "chr 21 start..."
[1] "chr 22 start..."


In [13]:
## save prs dataframe
df_rslt <- data.frame(IID=IID, prs=prs)
print(dim(df_rslt))
print(head(df_rslt))

prs_dir <- paste0("/home/jupyter/workspaces/hypertensionukbb/AB_PRS_AoU/external_PRS/",pheno,"_external_PRS.csv")
write.csv(df_rslt, prs_dir, row.names = FALSE)

[1] 245394      2
      IID        prs
1 1000004  0.2972681
2 1000033 -0.5614180
3 1000039 -0.1817284
4 1000042 -0.7456061
5 1000045 -0.5583750
6 1000059 -0.6431872
