# Perform colocalization at each locus, between each pair of traits, for all ethnicities

In [1]:
setwd("/frazer01/projects/CARDIPS/analysis/cardiac_gwas_coloc")

source("script/functions.R"  )


In [2]:
dir.create("pipeline/4.1.coloc_qtls"               , showWarnings = FALSE)
dir.create("pipeline/4.1.coloc_qtls/coloc_by_locus", showWarnings = FALSE)


In [3]:
manifest   = add_rownames(fread("pipeline/1.1.sumstats/manifest.txt"                     , sep = "\t", header = TRUE, data.table = FALSE))
loci       = add_rownames(fread("pipeline/1.2.genomewide_significant_loci/loci.txt"      , sep = "\t", header = TRUE, data.table = FALSE))
loci2study = add_rownames(fread("pipeline/1.2.genomewide_significant_loci/loci2study.txt", sep = "\t", header = TRUE, data.table = FALSE))


In [4]:
populations = c('meta','AFR','AMR','CSA','EAS','EUR','MID')

# Get QTL data
- 4 phenotypes (missing footprints for now: do separately): rna, isoform, atac, chip
- QTL data is in `/frazer01/projects/CARDIPS/analysis/cardiac_qtls_combined/input/qtl/qtls.RDS`
- Single QTL files are in `/frazer01/projects/CARDIPS/analysis/cardiac_qtls_combined/input/qtl/processing`


In [9]:
qtl_list = readRDS("/frazer01/projects/CARDIPS/analysis/cardiac_qtls_combined/input/qtl/qtls.RDS"   )
exp_list = readRDS("/frazer01/projects/CARDIPS/analysis/cardiac_qtls_combined/input/expdata_qtl.rds")

In [8]:
phenotypes = qtl_list$phenotypes
qtls       = qtl_list$qtl


In [91]:
table(exp_list$metadata$phenotype)


atac chip  rna 
 259  131  966 

In [128]:
find_qtls_to_test = function(phenotype, qtls)
{
    x = qtls[[phenotype]]
    x = unique(x[x$egene == TRUE, c("gene_id", "gene_name", "transcript_id", "type", "chrom", "start", "end")])
    x$phenotype = phenotype
    
    return(x)
}

qtls2test     = as.data.frame(rbindlist(lapply(phenotypes$phenotype, function(phenotype){find_qtls_to_test(phenotype, qtls)})), stringsAsFactors = FALSE)
qtls2test_bed = "pipeline/4.1.coloc_qtls/qtls2test.bed"

fwrite(qtls2test, "pipeline/4.1.coloc_qtls/qtls2test.txt"                                                                                  , sep = "\t", col.names = TRUE, row.names = FALSE)
fwrite(unique(qtls2test[order(qtls2test$chrom, qtls2test$start, qtls2test$end),c("chrom", "start", "end", "transcript_id")]), qtls2test_bed, sep = "\t", col.names = FALSE, row.names = FALSE)


In [129]:
loci_bed = "pipeline/4.1.coloc_qtls/loci.bed"

fwrite(loci[order(loci$chrom, loci$from, loci$to),c("chrom", "from", "to", "locus")], loci_bed, sep = "\t", col.names = FALSE, row.names = FALSE)


In [138]:
command               = paste("bedtools", "intersect", "-loj", "-a", qtls2test_bed, "-b", loci_bed)
intersected           = fread(cmd = command, sep = "\t", header = FALSE, data.table = FALSE)[,c(4,8)]
colnames(intersected) = c("transcript_id", "locus")
intersected           = unique(merge(intersected[intersected$locus != ".",], qtls2test[,c("transcript_id", "gene_id", "phenotype")]))
intersected$qtl_file  = paste("/frazer01/projects/CARDIPS/analysis/cardiac_qtls_combined/input/qtl/processing", intersected$phenotype, paste("qtl", intersected$transcript_id, "txt", sep = "."), sep = "/")

fwrite(intersected, "pipeline/4.1.coloc_qtls/intersected_qtls_loci.txt", sep = "\t", col.names = TRUE, row.names = FALSE)

# Create qsub for each locus

In [125]:
run_qsub = function(locus)
{
    qsub = paste("qsub", paste(getwd(), "script", "4.1.coloc_with_qtls.sh", sep = "/"), locus)
    
    system(qsub)
    
    #message(qsub)
}

invisible(lapply(loci$locus, run_qsub))


# After colocalization is done, combine all data

In [143]:
loci$n = unlist(lapply(loci$locus, function(locus){nrow(intersected[ intersected$locus == locus, ])}))

In [144]:
coloc = as.data.frame(rbindlist(lapply(loci[ loci$n > 0, "locus"], function(x)
{
    infile = paste0("pipeline/4.1.coloc_qtls/coloc_by_locus/", x, ".txt")
    indata = fread(infile, sep = "\t", header = TRUE, data.table = FALSE)
    
    return(indata)
})), stringsAsFactors = FALSE)

fwrite(coloc, "pipeline/4.1.coloc_qtls/coloc.txt", sep = "\t", col.names = TRUE, row.names = FALSE)

# SCRATCH:
- Create functions to run coloc

In [31]:
suppressPackageStartupMessages(library(coloc))

In [118]:
get_qtl_data = function(qtl_file)
{
    indata = fread(qtl_file, sep = "\t", header = TRUE, data.table = FALSE)
    return(indata)
}


locus                 = "1_965800_2544414"
intersected           = intersected[intersected$locus == locus,]
rownames(intersected) = intersected$transcript_id
studies               = unlist(strsplit(loci[locus, "ids"], ","))
manifest              = manifest[studies,]
totest_qtl            = lapply(intersected$qtl_file, get_qtl_data)
names(totest_qtl)     = intersected$transcript_id


In [107]:
get_gwas_data = function(coord, gwas_file)
{
    my_head              = colnames(fread(cmd = paste("zcat", gwas_file, "|", "head -n 2"), sep = "\t", header = TRUE, data.table = FALSE))
    gwas_data            = suppressWarnings(tabix.read.table(gwas_file, coord, col.names = TRUE, stringsAsFactors = FALSE))
    colnames(gwas_data)  = my_head
    gwas_data$id         = paste("VAR", gwas_data$chr, gwas_data$pos, gwas_data$ref, gwas_data$alt, sep = "_")
    rownames(gwas_data)  = gwas_data$id
    
    return(gwas_data)
}

create_dataset = function(study, trait_type, totest, variants, pop, populations, manifest)
{
    if(trait_type %in% c("categorical", "icd10", "phecode"))
    {
        if(pop == "meta"){n = sum(manifest[study, paste("n_cases", populations, sep = "_")]) + sum(manifest[study, paste("n_controls", populations, sep = "_")])}
        if(pop != "meta"){n =     manifest[study, paste("n_cases", pop        , sep = "_")]  +     manifest[study, paste("n_controls", pop        , sep = "_")] }
        
        if(pop == "meta"){s = sum(manifest[study, paste("n_cases", populations, sep = "_")]) / n}
        if(pop != "meta"){s =     manifest[study, paste("n_cases", pop        , sep = "_")]  / n}
        
        totest  = totest[is.na(totest[,paste("pval", pop, sep = "_")]) == FALSE & is.na(totest[,paste("af_controls", pop, sep = "_")]) == FALSE, ]
        dataset = list(snp = variants, pvalues = totest[variants, paste("pval", pop, sep = "_")], N = n, s = s, MAF = totest[variants, paste("af_controls", pop, sep = "_")], type = "cc")
        
    }
    if(trait_type %in% c("biomarkers", "continuous"))
    {
        if(pop == "meta"){n = sum(manifest[study, paste("n_cases", populations, sep = "_")])}
        if(pop != "meta"){n =     manifest[study, paste("n_cases", pop        , sep = "_")] }
        
        totest  = totest[is.na(totest[,paste("pval", pop, sep = "_")]) == FALSE & is.na(totest[,paste("af", pop, sep = "_")]) == FALSE, ]
        dataset = list(snp = variants, pvalues = totest[variants, paste("pval", pop, sep = "_")], N = n, MAF = totest[variants, paste("af", pop, sep = "_")], type = "quant")
    }
    if(trait_type == "qtl")
    {
        rownames(totest) = totest$id
        phenotype2n      = data.frame(phenotype = c("atac", "chip", "rna", "isoform"), n = c(259,131,966,966))
        dataset = list(snp     = variants, 
                       pvalues = totest     [variants, "pval"], 
                       N       = phenotype2n[phenotype2n$phenotype == pop, "n"], 
                       MAF     = totest     [variants, "af"], 
                       type    = "quant")
    }
    
    
    return(dataset)
}

run_coloc_by_pop = function(locus, study1, study2, pop, populations, totest1, totest2, qtls2test, manifest)
{
    trait_type1     = manifest[study1, "trait_type"]
    trait_type2     = "qtl"
    
    if( trait_type1 %in% c("categorical", "icd10", "phecode")){variants1  = totest1[is.na(totest1[,paste("pval", pop, sep = "_")]) == FALSE & is.na(totest1[,paste("af_controls", pop, sep = "_")]) == FALSE, "id"]}
    if( trait_type1 %in% c("biomarkers", "continuous"       )){variants1  = totest1[is.na(totest1[,paste("pval", pop, sep = "_")]) == FALSE & is.na(totest1[,paste("af"         , pop, sep = "_")]) == FALSE, "id"]}
    
    types     = sort  (qtls2test[qtls2test$transcript_id == study2, "type"     ])
    phenotype = unique(qtls2test[qtls2test$transcript_id == study2, "phenotype"])
    
    out = as.data.frame(rbindlist(lapply(types, function(type)
    {
        totest_type = totest2[totest2$type == type,]
        variants2   = totest_type$id
        variants    = intersect(variants1, variants2)

        if(length(variants) > 100)
        {
            dataset1        = create_dataset(study1, trait_type1, totest1    , variants, pop      , populations[populations != "meta"], manifest)
            dataset2        = create_dataset(study2, trait_type2, totest_type, variants, phenotype, c()                               , manifest)
            coloc_mapped    = coloc.abf(dataset1 = dataset1, dataset2 = dataset2) 
            probs           = as.data.frame(t(coloc_mapped$summary))
            myres           = coloc_mapped$results
            myres           = myres[, c(which(colnames(myres) == "snp"), ncol(myres))]
            colnames(myres) = c("id", "pp_snp")
            myres           = cbind(data.frame(locus         = locus,
                                               gwas          = study1,
                                               pop           = pop,
                                               transcript_id = study2,
                                               type          = type
                                              ) , myres)
            myres           = myres[order(myres$pp_snp, decreasing = TRUE), ]
            out             = cbind(probs, myres[1, ])
        }else
        {
            out = data.frame(nsnps = 0, PP.H0.abf = 1, PP.H1.abf = 0, PP.H2.abf = 0, PP.H3.abf = 0, PP.H4.abf = 0, 
                             locus = locus, gwas = study1, pop = pop, transcript_id = study2, type = type, id = "", pp_snp = 0
                            )
        }

        return(out)
    })), stringsAsFactors = FALSE)
    return(out)
}

run_coloc = function(locus, gwas, loci, manifest, totest_qtl, qtls2test, intersected)
{
    chrom             = loci    [locus , "chrom" ]
    from              = loci    [locus , "from"  ]
    to                = loci    [locus , "to"    ]
    coord             = paste0(chrom, ":", from, "-", to)
    totest_gwas       = get_gwas_data(coord, manifest[gwas, "sumstat_file"])
    populations       = c("meta", unlist(strsplit(manifest[gwas, "pops"], ",")))
    
    out = as.data.frame(rbindlist(lapply(names(totest_qtl), function(transcript_id)
    {
        as.data.frame(rbindlist(lapply(populations, function(pop){run_coloc_by_pop(locus, gwas, transcript_id, pop, populations, totest_gwas, totest_qtl[[transcript_id]], qtls2test, manifest)})), stringsAsFactors = FALSE)
    })), stringsAsFactors = FALSE)
    
    return(out)
}

#ii    = 1
#jj    = 2

gwas                  = "biomarkers-30600-both_sexes-irnt"


x = run_coloc(locus, gwas, loci, manifest, totest_qtl, qtls2test, intersected)

str(x)

#out = as.data.frame(rbindlist(lapply(1:(nrow(manifest) - 1), function(ii)
#{
#    id1 = manifest[ii, "id"]
#    return(as.data.frame(rbindlist(lapply((ii + 1):nrow(manifest), function(jj)
#    {
#        id2   = manifest[jj, "id"]
#        
#        return(run_coloc_gwas(locus, id1, id2, loci, manifest))
#    })), stringsAsFactors = FALSE))
#})), stringsAsFactors = FALSE)
#
#fwrite(out, paste0("pipeline/2.1.coloc/coloc_by_locus/", locus, ".txt"), sep = "\t", col.names = TRUE, row.names = FALSE)




PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
  0.91900   0.01990   0.05790   0.00125   0.00159 
[1] "PP abf for shared variant: 0.159%"
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
  0.94700   0.02050   0.03010   0.00065   0.00139 
[1] "PP abf for shared variant: 0.139%"
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 0.930000  0.009330  0.058600  0.000587  0.001380 
[1] "PP abf for shared variant: 0.138%"
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 0.959000  0.009620  0.030500  0.000305  0.000790 
[1] "PP abf for shared variant: 0.079%"
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
  0.91400   0.02240   0.05760   0.00141   0.00422 
[1] "PP abf for shared variant: 0.422%"
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 0.944000  0.023100  0.030000  0.000734  0.001980 
[1] "PP abf for shared variant: 0.198%"
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
  0.92300   0.01480   0.05820   0.00093   0.00297 
[1] "PP abf for shared variant: 0.297%"
PP.H0.

In [124]:
get_qtl_data = function(qtl_file)
{
    indata = fread(qtl_file, sep = "\t", header = TRUE, data.table = FALSE)
    
    return(indata)
}

get_gwas_data = function(coord, gwas_file)
{
    my_head              = colnames(fread(cmd = paste("zcat", gwas_file, "|", "head -n 2"), sep = "\t", header = TRUE, data.table = FALSE))
    gwas_data            = suppressWarnings(tabix.read.table(gwas_file, coord, col.names = TRUE, stringsAsFactors = FALSE))
    colnames(gwas_data)  = my_head
    gwas_data$id         = paste("VAR", gwas_data$chr, gwas_data$pos, gwas_data$ref, gwas_data$alt, sep = "_")
    rownames(gwas_data)  = gwas_data$id
    
    return(gwas_data)
}

create_dataset = function(study, trait_type, totest, variants, pop, populations, manifest)
{
    if(trait_type %in% c("categorical", "icd10", "phecode"))
    {
        if(pop == "meta"){n = sum(manifest[study, paste("n_cases", populations, sep = "_")]) + sum(manifest[study, paste("n_controls", populations, sep = "_")])}
        if(pop != "meta"){n =     manifest[study, paste("n_cases", pop        , sep = "_")]  +     manifest[study, paste("n_controls", pop        , sep = "_")] }
        
        if(pop == "meta"){s = sum(manifest[study, paste("n_cases", populations, sep = "_")]) / n}
        if(pop != "meta"){s =     manifest[study, paste("n_cases", pop        , sep = "_")]  / n}
        
        totest  = totest[is.na(totest[,paste("pval", pop, sep = "_")]) == FALSE & is.na(totest[,paste("af_controls", pop, sep = "_")]) == FALSE, ]
        dataset = list(snp = variants, pvalues = totest[variants, paste("pval", pop, sep = "_")], N = n, s = s, MAF = totest[variants, paste("af_controls", pop, sep = "_")], type = "cc")
    }
    if(trait_type %in% c("biomarkers", "continuous"))
    {
        if(pop == "meta"){n = sum(manifest[study, paste("n_cases", populations, sep = "_")])}
        if(pop != "meta"){n =     manifest[study, paste("n_cases", pop        , sep = "_")] }
        
        totest  = totest[is.na(totest[,paste("pval", pop, sep = "_")]) == FALSE & is.na(totest[,paste("af", pop, sep = "_")]) == FALSE, ]
        dataset = list(snp = variants, pvalues = totest[variants, paste("pval", pop, sep = "_")], N = n, MAF = totest[variants, paste("af", pop, sep = "_")], type = "quant")
    }
    if(trait_type == "qtl")
    {
        rownames(totest) = totest$id
        phenotype2n      = data.frame(phenotype = c("atac", "chip", "rna", "isoform"), n = c(259,131,966,966))
        dataset = list(snp     = variants, 
                       pvalues = totest     [variants, "pval"], 
                       N       = phenotype2n[phenotype2n$phenotype == pop, "n"], 
                       MAF     = totest     [variants, "af"], 
                       type    = "quant")
    }
    return(dataset)
}

run_coloc_by_pop = function(locus, study1, study2, pop, populations, totest1, totest2, qtls2test, manifest)
{
    trait_type1     = manifest[study1, "trait_type"]
    trait_type2     = "qtl"
    
    if( trait_type1 %in% c("categorical", "icd10", "phecode")){variants1  = totest1[is.na(totest1[,paste("pval", pop, sep = "_")]) == FALSE & is.na(totest1[,paste("af_controls", pop, sep = "_")]) == FALSE, "id"]}
    if( trait_type1 %in% c("biomarkers", "continuous"       )){variants1  = totest1[is.na(totest1[,paste("pval", pop, sep = "_")]) == FALSE & is.na(totest1[,paste("af"         , pop, sep = "_")]) == FALSE, "id"]}
    
    types     = sort  (qtls2test[qtls2test$transcript_id == study2, "type"     ])
    phenotype = unique(qtls2test[qtls2test$transcript_id == study2, "phenotype"])
    
    out = as.data.frame(rbindlist(lapply(types, function(type)
    {
        message("AAAAAAAAAAAA")
        totest_type = totest2[totest2$type == type,]
        variants2   = totest_type$id
        variants    = intersect(variants1, variants2)
        
        message(paste(length(variants1), length(variants2), length(variants)))
        message(paste(variants1[1:10], collapse = "; "))
        message(paste(variants2[1:10], collapse = "; "))

        if(length(variants) > 100)
        {
            message("BBBBBBBBBBBBB")
            dataset1        = create_dataset(study1, trait_type1, totest1    , variants, pop      , populations[populations != "meta"], manifest)
            dataset2        = create_dataset(study2, trait_type2, totest_type, variants, phenotype, c()                               , manifest)
            coloc_mapped    = coloc.abf(dataset1 = dataset1, dataset2 = dataset2) 
            probs           = as.data.frame(t(coloc_mapped$summary))
            myres           = coloc_mapped$results
            myres           = myres[, c(which(colnames(myres) == "snp"), ncol(myres))]
            colnames(myres) = c("id", "pp_snp")
            myres           = cbind(data.frame(locus         = locus,
                                               gwas          = study1,
                                               pop           = pop,
                                               transcript_id = study2,
                                               type          = type
                                              ) , myres)
            myres           = myres[order(myres$pp_snp, decreasing = TRUE), ]
            out             = cbind(probs, myres[1, ])
        }else
        {
            out = data.frame(nsnps = 0, PP.H0.abf = 1, PP.H1.abf = 0, PP.H2.abf = 0, PP.H3.abf = 0, PP.H4.abf = 0, 
                             locus = locus, gwas = study1, pop = pop, transcript_id = study2, type = type, id = "", pp_snp = 0
                            )
        }

        return(out)
    })), stringsAsFactors = FALSE)
    return(out)
}

run_coloc = function(locus, gwas, loci, manifest, totest_qtl, qtls2test, intersected)
{
    chrom       = loci    [locus , "chrom" ]
    from        = loci    [locus , "from"  ]
    to          = loci    [locus , "to"    ]
    coord       = paste0(chrom, ":", from, "-", to)
    totest_gwas = get_gwas_data(coord, manifest[gwas, "sumstat_file"])
    populations = c("meta", unlist(strsplit(manifest[gwas, "pops"], ",")))
    
    out = as.data.frame(rbindlist(lapply(names(totest_qtl)[1:2], function(transcript_id)
    {
        as.data.frame(rbindlist(lapply(populations, function(pop){run_coloc_by_pop(locus, gwas, transcript_id, pop, populations, totest_gwas, totest_qtl[[transcript_id]], qtls2test, manifest)})), stringsAsFactors = FALSE)
    })), stringsAsFactors = FALSE)
    
    return(out)
}

locus                 = "1_965800_2544414"

manifest              = add_rownames(fread("pipeline/1.1.sumstats/manifest.txt"               , sep = "\t", header = TRUE, data.table = FALSE))
loci                  = add_rownames(fread("pipeline/1.2.genomewide_significant_loci/loci.txt", sep = "\t", header = TRUE, data.table = FALSE))
qtls2test             = fread("pipeline/4.1.coloc_qtls/qtls2test.txt"                         , sep = "\t", header = TRUE, data.table = FALSE)
intersected           = fread("pipeline/4.1.coloc_qtls/intersected_qtls_loci.txt"             , sep = "\t", header = TRUE, data.table = FALSE)
intersected           = intersected[intersected$locus == locus,]
rownames(intersected) = intersected$transcript_id
studies               = unlist(strsplit(loci[locus, "ids"], ","))
manifest              = manifest[studies,]
totest_qtl            = lapply(intersected$qtl_file, get_qtl_data)
names(totest_qtl)     = intersected$transcript_id

#out = as.data.frame(rbindlist(lapply(studies[1:2], function(gwas)
out = lapply(studies[1:2], function(gwas)
{
	message(gwas)
	return(run_coloc(locus, gwas, loci, manifest, totest_qtl, qtls2test, intersected))
})#), stringsAsFactors = FALSE)

#fwrite(out, paste0("pipeline/4.1.coloc_qtls/coloc_by_locus/", locus, ".txt"), sep = "\t", col.names = TRUE, row.names = FALSE)

str(out)

biomarkers-30600-both_sexes-irnt

AAAAAAAAAAAA

5107 553 0

VAR_1_965939_A_T; VAR_1_966435_T_A; VAR_1_967000_C_T; VAR_1_967658_C_T; VAR_1_968226_C_A; VAR_1_969659_G_A; VAR_1_969773_C_T; VAR_1_970215_G_C; VAR_1_971224_A_G; VAR_1_971367_T_C

1_1143451_A_G; 1_1143657_C_G; 1_1144499_GGA_G; 1_1144653_G_A; 1_1144696_T_C; 1_1145411_G_A; 1_1145441_A_G; 1_1145467_C_G; 1_1145481_CA_C; 1_1145481_CAA_C

AAAAAAAAAAAA

5107 553 0

VAR_1_965939_A_T; VAR_1_966435_T_A; VAR_1_967000_C_T; VAR_1_967658_C_T; VAR_1_968226_C_A; VAR_1_969659_G_A; VAR_1_969773_C_T; VAR_1_970215_G_C; VAR_1_971224_A_G; VAR_1_971367_T_C

1_1143451_A_G; 1_1143657_C_G; 1_1144499_GGA_G; 1_1144653_G_A; 1_1144696_T_C; 1_1145411_G_A; 1_1145441_A_G; 1_1145467_C_G; 1_1145481_CA_C; 1_1145481_CAA_C

AAAAAAAAAAAA

5092 553 0

VAR_1_965939_A_T; VAR_1_967000_C_T; VAR_1_967658_C_T; VAR_1_968226_C_A; VAR_1_969659_G_A; VAR_1_969773_C_T; VAR_1_970215_G_C; VAR_1_971224_A_G; VAR_1_971367_T_C; VAR_1_972134_C_T

1_1143451_A_G; 1_1143657_C_G; 1_114449

List of 2
 $ :'data.frame':	21 obs. of  13 variables:
  ..$ nsnps        : num [1:21] 0 0 0 0 0 0 0 0 0 0 ...
  ..$ PP.H0.abf    : num [1:21] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ PP.H1.abf    : num [1:21] 0 0 0 0 0 0 0 0 0 0 ...
  ..$ PP.H2.abf    : num [1:21] 0 0 0 0 0 0 0 0 0 0 ...
  ..$ PP.H3.abf    : num [1:21] 0 0 0 0 0 0 0 0 0 0 ...
  ..$ PP.H4.abf    : num [1:21] 0 0 0 0 0 0 0 0 0 0 ...
  ..$ locus        : chr [1:21] "1_965800_2544414" "1_965800_2544414" "1_965800_2544414" "1_965800_2544414" ...
  ..$ gwas         : chr [1:21] "biomarkers-30600-both_sexes-irnt" "biomarkers-30600-both_sexes-irnt" "biomarkers-30600-both_sexes-irnt" "biomarkers-30600-both_sexes-irnt" ...
  ..$ pop          : chr [1:21] "meta" "meta" "AFR" "AFR" ...
  ..$ transcript_id: chr [1:21] "atac_chr1_1242748_1244743" "atac_chr1_1242748_1244743" "atac_chr1_1242748_1244743" "atac_chr1_1242748_1244743" ...
  ..$ type         : int [1:21] 0 1 0 1 0 1 0 1 0 1 ...
  ..$ id           : chr [1:21] "" "" "" "" ...
  ..$ pp

In [82]:
str(intersected)

'data.frame':	120 obs. of  5 variables:
 $ transcript_id: chr  "atac_chr1_1242748_1244743" "atac_chr1_1259650_1261092" "atac_chr1_1293173_1296384" "atac_chr1_1440087_1441150" ...
 $ locus        : chr  "1_965800_2544414" "1_965800_2544414" "1_965800_2544414" "1_965800_2544414" ...
 $ gene_id      : chr  "atac_chr1_1242748_1244743" "atac_chr1_1259650_1261092" "atac_chr1_1293173_1296384" "atac_chr1_1440087_1441150" ...
 $ phenotype    : chr  "atac" "atac" "atac" "atac" ...
 $ qtl_file     : chr  "/frazer01/projects/CARDIPS/analysis/cardiac_qtls_combined/input/qtl/processing/atac/qtl.atac_chr1_1242748_1244743.txt" "/frazer01/projects/CARDIPS/analysis/cardiac_qtls_combined/input/qtl/processing/atac/qtl.atac_chr1_1259650_1261092.txt" "/frazer01/projects/CARDIPS/analysis/cardiac_qtls_combined/input/qtl/processing/atac/qtl.atac_chr1_1293173_1296384.txt" "/frazer01/projects/CARDIPS/analysis/cardiac_qtls_combined/input/qtl/processing/atac/qtl.atac_chr1_1440087_1441150.txt" ...


In [87]:
str(qtls2test)

'data.frame':	40176 obs. of  8 variables:
 $ gene_id      : chr  "ENSG00000099260.11_5" "ENSG00000099260.11_5" "ENSG00000137996.12_3" "ENSG00000156876.10_6" ...
 $ gene_name    : chr  "PALMD" "PALMD" "RTCA" "SASS6" ...
 $ transcript_id: chr  "ENSG00000099260.11_5" "ENSG00000099260.11_5" "ENSG00000137996.12_3" "ENSG00000156876.10_6" ...
 $ type         : int  0 1 2 1 0 0 0 0 0 1 ...
 $ chrom        : int  1 1 1 1 1 1 1 1 1 1 ...
 $ start        : int  100111669 100111669 100731763 100549119 100614005 100434907 100549119 100731763 100315917 9732484 ...
 $ end          : int  100160097 100160097 100758325 100598511 100643829 100501190 100598511 100758325 100389579 9747613 ...
 $ phenotype    : chr  "rna" "rna" "rna" "rna" ...


In [79]:
a1[grepl("1329332", a1) == TRUE]