In [3]:
library(dplyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [4]:
pheno <- "hypertension"

# Functions

## functions for loading selected SNP

In [14]:
func_SNP_swap <- function(vec){
    # swap 0 and 2 in .raw
    vec[vec==0] <- 9
    vec[vec==2] <- 0
    vec[vec==9] <- 2
}

func_process_raw <- function(raw, SNP_list, SNP_complete){
    ## replace NA with 0
    raw <- as.matrix(raw)
    raw[is.na(raw)] <- 0
    colnames(raw) <- SNP_complete$SNP

    ## check mis-matched effective allele
    SNP_select <- inner_join(SNP_list, SNP_complete, by="SNP")
    SNP_select$match <- (SNP_select$effect_allele == SNP_select$effect_AoU)

    ## select SNPs from .raw
    raw_select <- raw[, SNP_select$SNP]
  
    if (nrow(SNP_select) > 0){
      for (i in ncol(raw_select)){
        # if effective allele mis-matched, swap 0 and 2
        if (!SNP_select$match[i]) {
          raw_select[,i] <- func_SNP_swap(raw_select[,i])
        }
      }
      if (nrow(SNP_select) == 1){
          ## if only one SNP selected
              ## force the vector to be column matrix
          raw_select <- matrix(raw_select, ncol=1)
          colnames(raw_select) <- SNP_select$SNP[1]
      }
      return(raw_select)
    } else {
      return(NULL)
    }
}

## functions for converting theta 

In [6]:
convert_theta <- function(geno, betas_mat){
  # for (i in 1:ncol(geno)){
  #   geno[,i] <- ifelse(geno[,i] == 0, 0,
  #                      ifelse(geno[,i] == 1, betas_mat[i,2], betas_mat[i,3]))
  # }
  geno<-t(t(I(geno==1))*betas_mat$beta1+t(I(geno==2))*betas_mat$beta2)
  return(geno)
}

# Load raw and convert theta (for each chr)

In [16]:
SNP_dir <- paste0("/home/jupyter/workspaces/hypertensionukbb/AB_PRS_AoU/selected_SNP/",pheno,"_SNP_list")
SNP_list <- read.table(SNP_dir, header=T)
print(head(SNP_list))

betas_dir <- paste0("/home/jupyter/workspaces/hypertensionukbb/AB_PRS_AoU/selected_SNP/",pheno,"_betas_mat")
betas_mat <- read.table(betas_dir, header=T)
print(head(betas_mat))

for (i in 1:22){
    print(paste("chr",i,"start..."))
    # read chr_i.raw
    raw_dir <- paste0("/home/jupyter/workspaces/hypertensionukbb/Version2/mainFiles/chr",i,"_small.raw")
    raw <- read.table(raw_dir, header=T)
    IID <- raw[,2]
    raw <- raw[,-c(1,2,3,4,5,6)]
    ## get the SNP list of .raw of the AoU
    SNP_complete <- data.frame(SNP = gsub("_.*","",colnames(raw)),
                               effect_AoU = gsub(".*_","",colnames(raw)))
    ## process .raw
    raw <- func_process_raw(raw, SNP_list, SNP_complete)
    
    ## if there are selected SNPs in chr_i, convert and save theta
    if (!is.null(raw)){
        ## convert theta chr_i
        df_selected <- data.frame(SNP=colnames(raw))
        betas_mat_temp <- inner_join(df_selected, betas_mat, by="SNP")
        theta <- convert_theta(raw, betas_mat_temp)
        
        ## save theta chr_i
        theta_dir <- paste0("/home/jupyter/workspaces/hypertensionukbb/AB_PRS_AoU/theta_SNP/",pheno,"_chr",i,"_theta")
        write.table(theta, theta_dir,quote=F,row.names=F,col.names=T)
    }
}

  chr  position         SNP effect_allele alt_allele
1   1  10799577  rs12046278             C          T
2   1  27042706 rs566260414             G          A
3   1 112344367    rs631037             T          C
4   1 113044328  rs10776752             T          G
5   1 152932031   rs6587713             G          A
6   1 201783682   rs2820290             G          A
          SNP       beta1       beta2
1  rs12046278  0.03316409  0.10024643
2 rs566260414  0.05209848  0.16483387
3    rs631037  0.03820311  0.04471653
4  rs10776752  0.04634664  0.16209420
5   rs6587713  0.04068713 -0.03985691
6   rs2820290 -0.01516441 -0.06268501
[1] "chr 1 start..."
[1] "chr 2 start..."
[1] "chr 3 start..."
[1] "chr 4 start..."
[1] "chr 5 start..."
[1] "chr 6 start..."
[1] "chr 7 start..."
[1] "chr 8 start..."
[1] "chr 9 start..."
[1] "chr 10 start..."
[1] "chr 11 start..."
[1] "chr 12 start..."
[1] "chr 13 start..."
[1] "chr 14 start..."
[1] "chr 15 start..."
[1] "chr 16 start..."
[1] "chr 17 start...