In [1]:
setwd("/frazer01/projects/CARDIPS/analysis/cardiac_gwas_coloc")

source("script/functions.R"  )


# Get LD for figures

In [25]:
dir.create("pipeline/ld", showWarnings = FALSE)

In [2]:
fm_list   = readRDS("pipeline/eqtl.4.finemap_single/fm_list.rds"  )

In [8]:
coloc        =              fread("pipeline/eqtls.0.1.coloc_gwas/coloc.txt"           , sep = "\t", header = TRUE, data.table = FALSE)
enr1         =              fread("pipeline/eqtls.0.1.coloc_gwas/enrichment_by_pp.txt", sep = "\t", header = TRUE, data.table = FALSE)
manifest     = add_rownames(fread("pipeline/eqtls.0.1.coloc_gwas/manifest.txt"        , sep = "\t", header = TRUE, data.table = FALSE))
pph4         = coloc[ coloc$PP.H4.abf >= 0.8,]
coloc_old    =              fread("pipeline/eqtl.2.gwas_coloc/coloc.txt"           , sep = "\t", header = TRUE, data.table = FALSE)
pph4_old     = coloc_old[ coloc_old$PP.H4.abf >= 0.8,]


In [15]:
toplot1 = merge(pph4    [ pph4$interaction.ipsc_cvpc == 1 & pph4$gwas == "continuous-PP-both_sexes-combined_medadj_irnt", c("transcript_id", "gene_id", "type", "gwas")],
                pph4_old[ , c("transcript_id", "gene_id", "type", "gwas", "locus", "gene_name")],
                by = c("transcript_id", "gene_id", "type", "gwas")
               )
toplot2 = merge(pph4    [ pph4$interaction.adult == 1 & pph4$gwas == "continuous-102-both_sexes-irnt", c("transcript_id", "gene_id", "type", "gwas")],
                pph4_old[ , c("transcript_id", "gene_id", "type", "gwas", "locus", "gene_name")],
                by = c("transcript_id", "gene_id", "type", "gwas")
               )

toplot       = rbind(toplot1, toplot2)
toplot$coloc = paste(toplot$transcript_id, toplot$type, toplot$gwas, toplot$locus, sep = ":")
fm           = fm_list[toplot$coloc]

#toplot = toplot[order(toplot$gene_name, toplot$transcript_id),]


In [52]:
rsid2chrom = data.frame(rsid = c("rs28473516", "rs17584", "rs61876335", "rs7589901", "rs2549009", "rs2738413", "rs3493", "rs74181299", "rs11000060", "rs1708618", "rs12724121"), chrom = c(5, 11, 11, 2, 5, 14, 13, 2, 10, 17, 1))
rsid2chrom = unique(rbind(rsid2chrom, as.data.frame(rbindlist(lapply(toplot$coloc, function(x)
{    
    indata            = fm_list[[x]]
    gwas              = indata$gwas
    gwas              = gwas[is.na(gwas$pval) == FALSE,]
    qtl               = indata$qtl
    finemap           = indata$finemap
    ids               = indata$credible_set
    id                = finemap[1,"id"]
    rsid              = qtl    [id, c("rsid", "chrom")]
    
    return(rsid)
})), stringsAsFactors = FALSE))) %>% arrange(chrom, rsid)

In [53]:
calculate_ld_by_snp = function(rsid, chrom)
{
    prefix  = paste(getwd(), "pipeline/ld", rsid, sep = "/")
    vcf     = paste("/frazer01/reference/public/1000Genomes/ALL", paste0("chr", chrom), "phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz", sep = ".")
    #bfile   = paste("/sdsc/cast/ukbb/array_imputed"   , paste("ukb_imp", paste0("chr", chrom), "v3", sep = "_"), sep = "/")
    #bgen    = paste("/sdsc/cast/ukbb/array_imputed"   , paste("ukb_imp", paste0("chr", chrom), "v3.bgen", sep = "_"), sep = "/")
    pgen    = paste("/sdsc/cast/ukbb/array_imputed/pfile_converted"   , paste0("chr", chrom), sep = "/")
    outfile = paste(getwd(), "pipeline/ld", paste(rsid, "ld.txt", sep = "."), sep = "/")
    
    if(file.exists(outfile) == FALSE)
    {
        subjects_file = paste(prefix, "subjects.txt", sep = ".")
        command       = paste("head", "-n", 5000, paste(pgen, "psam", sep = "."), ">", subjects_file)
        system(command) # uncomment to run
        
        command = paste(plink2,
                        "--make-bed",
                        "--out"     , prefix,
                        "--memory"  , 96000,
                        "--threads" , 12,
                        "--pgen"    , paste(pgen, "pgen", sep = "."),
                        "--pvar"    , paste(pgen, "pvar", sep = "."),
                        "--psam"    , paste(pgen, "psam", sep = "."),
                        "--keep"    , subjects_file,
                        "--maf"     , 0.01,
                        "--hwe"     , 0.000001,
                        ""
                       )

        message(command)
        if(file.exists(paste(prefix, "bed", sep = ".")) == FALSE){system(command)} # uncomment to run
        
        
        command = paste(plink,
                        "--out"           , prefix,
                        "--memory"        , 96000,
                        "--threads"       , 12,
                        "--vcf"           , vcf,
                        #"--bed"           , paste(prefix, "bed", sep = "."),
                        #"--bim"           , paste(prefix, "bim", sep = "."),
                        #"--fam"           , paste(prefix, "fam", sep = "."),
                        #"--bfile"         , bfile,
                        #"--bgen"          , bgen,
                        #"--sample"        , "/sdsc/cast/ukbb/array_imputed/ukb46122_imp_chr1_v3_s487283.sample",
                        "--ld-snp"        , rsid,
                        #"--indep-pairwise", 1000, 5000, 0.2,
                        "--ld-window"     , 1000,
                        "--ld-window-kb"  , 5000,
                        "--ld-window-r2"  ,    0,
                        "--r2"            , "dprime",
                        #"--maf"           , 0.01,
                        #"--hwe"           , 0.000001,
                        ""
                       )

        message(command)
        if(file.exists(paste(prefix, "ld", sep = ".")) == FALSE){system(command)} # uncomment to run

        ld           = read.table(paste(prefix, "ld", sep = "."), sep = "", header = TRUE)[,c("SNP_A", "SNP_B", "R2", "DP")]
        colnames(ld) = tolower(colnames(ld))

        ld = mutate(ld,
               bg = case_when(r2 <= 0.2 ~ "#00007e",
                              r2 <= 0.4 ~ "#86cdf9",
                              r2 <= 0.6 ~ "#01fe02",
                              r2 <= 0.8 ~ "#fea500",
                              r2 <= 1   ~ "#fc0000",
                              TRUE ~ NA_character_
                             )
              )   

        fwrite(ld, outfile, sep = "\t", col.names = TRUE, row.names = FALSE)
    }
}

plink    = "/software/plink-1.90b3x/plink"
plink2   = "/software/plink-2.3/plink2_64"
ii = 1

invisible(lapply(1:nrow(rsid2chrom), function(ii)
{
    message(ii, appendLF = FALSE)
    
    try(calculate_ld_by_snp(rsid2chrom[ii, "rsid"], rsid2chrom[ii, "chrom"]))
}))


1
/software/plink-2.3/plink2_64 --make-bed --out /frazer01/projects/CARDIPS/analysis/cardiac_gwas_coloc/pipeline/ld/rs12724121 --memory 96000 --threads 12 --pgen /sdsc/cast/ukbb/array_imputed/pfile_converted/chr1.pgen --pvar /sdsc/cast/ukbb/array_imputed/pfile_converted/chr1.pvar --psam /sdsc/cast/ukbb/array_imputed/pfile_converted/chr1.psam --keep /frazer01/projects/CARDIPS/analysis/cardiac_gwas_coloc/pipeline/ld/rs12724121.subjects.txt --maf 0.01 --hwe 1e-06 

/software/plink-1.90b3x/plink --out /frazer01/projects/CARDIPS/analysis/cardiac_gwas_coloc/pipeline/ld/rs12724121 --memory 96000 --threads 12 --vcf /frazer01/reference/public/1000Genomes/ALL.chr1.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz --ld-snp rs12724121 --ld-window 1000 --ld-window-kb 5000 --ld-window-r2 0 --r2 dprime 

2
3
4
/software/plink-2.3/plink2_64 --make-bed --out /frazer01/projects/CARDIPS/analysis/cardiac_gwas_coloc/pipeline/ld/rs74181299 --memory 96000 --threads 12 --pgen /sdsc/cast/ukbb/ar

In [35]:
str(fm_list[[1]])

List of 11
 $ transcript_id : chr "ENSG00000000971.16_4"
 $ phenotype     : chr "rna"
 $ type          : int 0
 $ locus         : chr "1_196248779_197362184"
 $ trait         : chr "biomarkers-30600-both_sexes-irnt"
 $ coord         : chr "1:196248779-197362184"
 $ gwas          :'data.frame':	2529 obs. of  5 variables:
  ..$ pos : int [1:2529] 196248826 196249082 196249394 196250413 196250589 196250988 196251522 196251694 196252118 196252159 ...
  ..$ id  : chr [1:2529] "VAR_1_196248826_G_A" "VAR_1_196249082_T_A" "VAR_1_196249394_G_C" "VAR_1_196250413_C_T" ...
  ..$ beta: num [1:2529] 0.01319 -0.00768 -0.03075 0.00587 -0.01241 ...
  ..$ se  : num [1:2529] 0.00276 0.0025 0.06342 0.0033 0.00498 ...
  ..$ pval: num [1:2529] 1.69e-06 2.14e-03 6.28e-01 7.58e-02 1.27e-02 ...
 $ qtl           :'data.frame':	1293 obs. of  11 variables:
  ..$ chrom     : int [1:1293] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ pos       : int [1:1293] 196121869 196121998 196122305 196122733 196122848 196123070 196123091 196

In [47]:
rsid2chrom

rsid,chrom
<chr>,<dbl>
rs1190441,2
rs1656398,2
rs7589901,2
rs7687399,4
rs2549009,5
rs28473516,5
rs10247315,7
rs12705096,7
rs478882,9
rs11000060,10
