In [144]:
setwd("/frazer01/projects/CARDIPS/analysis/cardiac_eqtls")

source("script/packages.R"  )
source("script/input_data.R")
source("script/functions.R" )
source("script/colors.R"    )

library(shape)

In [2]:
geneinfo_gene       = fread("pipeline/1.2.expression/gene_info.txt"                          , sep = "\t", header = TRUE , data.table = FALSE)
geneinfo_isoform    = fread("pipeline/1.2.expression/isoform_info.txt"                       , sep = "\t", header = TRUE , data.table = FALSE)
diffexp             = fread("pipeline/4.1.differential_expression/diffexp.txt"               , sep = "\t", header = TRUE , data.table = FALSE)
diffexp_cell        = fread("pipeline/4.1.differential_expression/diffexp_cell.txt"          , sep = "\t", header = TRUE , data.table = FALSE)
diffexp_cell_tissue = fread("pipeline/4.1.differential_expression/diffexp_cell_tissue.txt"   , sep = "\t", header = TRUE , data.table = FALSE)
coloc_gene          = fread("pipeline/6.1.coloc_gwas/coloc.eqtls.gene.txt"                   , sep = "\t", header = TRUE , data.table = FALSE)
coloc_isoform       = fread("pipeline/6.1.coloc_gwas/coloc.eqtls.isoform.txt"                , sep = "\t", header = TRUE , data.table = FALSE)
eqtl_genes          = fread("pipeline/3.2.eqtls/eqtls/cardiac_eqtls.gene.egenes.txt"         , sep = "\t", header = TRUE , data.table = FALSE)
int_genes           = fread("pipeline/3.2.eqtls/eqtls/cardiac_eqtls.gene.interactions.txt"   , sep = "\t", header = TRUE , data.table = FALSE)
eqtl_isoforms       = fread("pipeline/3.2.eqtls/eqtls/cardiac_eqtls.isoform.egenes.txt"      , sep = "\t", header = TRUE , data.table = FALSE)
int_isoforms        = fread("pipeline/3.2.eqtls/eqtls/cardiac_eqtls.isoform.interactions.txt", sep = "\t", header = TRUE , data.table = FALSE)

# external
diffexp_quadrant = fread("/frazer01/home/jennifer/projects/cardiac_eqtls/diffexp/results/summary.cell_stage_associations.txt", sep = "\t", header = TRUE , data.table = FALSE)



In [3]:
#diffexp_quadrant                = unique(diffexp_quadrant[,c("cell_type", "tissue1", "tissue2")])
diffexp            $tissue      = paste(diffexp$tissue1, diffexp$tissue2, sep = "_vs_")
diffexp_cell       $cell_type   = gsub("cibersort.regular\\.", "", diffexp_cell       $cell_type)
diffexp_cell_tissue$cell_type   = gsub("cibersort.regular\\.", "", diffexp_cell_tissue$cell_type)
diffexp_cell_tissue$tissue2cell = paste(diffexp_cell_tissue$cell_type, paste(diffexp_cell_tissue$tissue1, diffexp_cell_tissue$tissue2, sep = "_vs_"), sep = ".")
diffexp_quadrant   $tissue2cell = paste(diffexp_quadrant   $cell_type, paste(diffexp_quadrant   $tissue1, diffexp_quadrant   $tissue2, sep = "_vs_"), sep = ".")
diffexp_quadrant   $is_tissue1  = FALSE
diffexp_quadrant   $is_tissue2  = FALSE

diffexp_quadrant[diffexp_quadrant$is_stage_associated == TRUE & diffexp_quadrant$stage.tissue1_qval < 0.05 & diffexp_quadrant$stage.tissue1_beta > 0, "is_tissue1"] = TRUE
diffexp_quadrant[diffexp_quadrant$is_stage_associated == TRUE & diffexp_quadrant$stage.tissue2_qval < 0.05 & diffexp_quadrant$stage.tissue2_beta > 0, "is_tissue2"] = TRUE


In [5]:
build_tables = function(name, geneinfo, diffexp, diffexp_cell, diffexp_quadrant, eqtls, ints, coloc)
{
    out = geneinfo[,c("gene_id", "gene_name", "transcript_id")]
    
    for(tissue in sort(unique(diffexp$tissue)))
    {
        mycol        = paste("diffexp", tissue, sep = ".")
        out[, mycol] = FALSE
        out[out$transcript_id %in% diffexp[diffexp$tissue == tissue & diffexp$diffexp == TRUE & diffexp$type == name, "transcript_id"], mycol] = TRUE
        
        this         = diffexp[diffexp$tissue == tissue & diffexp$diffexp == TRUE & diffexp$type == name,]
        mycol        = paste("diffexp", tissue, unique(this$tissue1), sep = ".")
        out[, mycol] = FALSE
        out[out$transcript_id %in% this[this$beta > 0, "transcript_id"], mycol] = TRUE
        
        mycol        = paste("diffexp", tissue, unique(this$tissue2), sep = ".")
        out[, mycol] = FALSE
        out[out$transcript_id %in% this[this$beta < 0, "transcript_id"], mycol] = TRUE
        
    }
    
    for(cell in sort(unique(diffexp_cell$cell_type)))
    {
        mycol        = paste("diffexp_cell", cell, sep = ".")
        out[, mycol] = FALSE
        out[out$transcript_id %in% diffexp_cell[diffexp_cell$cell_type == cell & diffexp_cell$qval <= 0.05 & diffexp_cell$beta > 0 & diffexp_cell$type == name, "transcript_id"], mycol] = TRUE
    }
    
    for(cell in sort(unique(diffexp_quadrant$tissue2cell)))
    {
        this = diffexp_quadrant[diffexp_quadrant$type == name & diffexp_quadrant$tissue2cell == cell,]
        
        mycol        = paste("diffexp_quadrant", cell, unique(this$tissue1), sep = ".")
        out[, mycol] = FALSE
        out[out$transcript_id %in% this[this$is_tissue1 == TRUE, "transcript_id"], mycol] = TRUE
        mycol        = paste("diffexp_quadrant", cell, unique(this$tissue2), sep = ".")
        out[, mycol] = FALSE
        out[out$transcript_id %in% this[this$is_tissue2 == TRUE, "transcript_id"], mycol] = TRUE
    }
    
    out$egene = FALSE
    out[out$transcript_id %in% eqtls[eqtls$egene == TRUE, "transcript_id"], "egene"] = TRUE
    
    tissues = c("heart", "arteria", "heart_atrium", "heart_ventricle", "arteria_aorta", "arteria_coronary")
    for(cell in sort(unique(ints$interaction)))
    {
        tissue       = "cell"
        if(cell %in% c("ipsc_cvpc", "adult")){tissue = "stage" }
        if(cell %in% tissues                ){tissue = "tissue"}
        
        mycol        = paste("eqtl_interaction", tissue, gsub("cibersort.regular\\.", "", cell), sep = ".")
        out[, mycol] = FALSE
        out[out$transcript_id %in% ints[ints$interaction == cell & ints$cell == TRUE, "transcript_id"], mycol] = TRUE
    }
    
    out$gwas = FALSE
    out[out$transcript_id %in% coloc[coloc$PP.H4.abf > 0.5, "transcript_id"], "gwas"] = TRUE
    
    fwrite(out, paste("pipeline/X.1.combine", paste("combined", name, "txt", sep = "."), sep = "/"), sep = "\t", col.names = TRUE, row.names = FALSE)
    
    return(out)
}

tab_gene    = build_tables("gene_tpm"   , geneinfo_gene   , diffexp, diffexp_cell, diffexp_quadrant, eqtl_genes   , int_genes   , coloc_gene   )
tab_isoform = build_tables("isoform_use", geneinfo_isoform, diffexp, diffexp_cell, diffexp_quadrant, eqtl_isoforms, int_isoforms, coloc_isoform)

colSums(tab_gene   [,4:ncol(tab_gene   )])
colSums(tab_isoform[,4:ncol(tab_isoform)])



# Find interesting candidates (genes)

In [7]:
tab_isof = tab_isoform

In [8]:
message(paste("iPSC-CVPC-specific eGenes that are also overexpressed in iPSC-CVPCs and colocalize with GWAS"   , paste(tab_gene[tab_gene$diffexp_quadrant.cardiac_muscle.ipsc_cvpc_vs_heart.ipsc_cvpc == TRUE & tab_gene$egene == TRUE & tab_gene$eqtl_interaction.stage.ipsc_cvpc == TRUE & tab_gene$gwas == TRUE, "gene_name"], collapse = "; "), sep = ": "))
message(paste("Adult-specific eGenes that are also overexpressed in adult heart and colocalize with GWAS"      , paste(tab_gene[tab_gene$diffexp_quadrant.cardiac_muscle.ipsc_cvpc_vs_heart.heart     == TRUE & tab_gene$egene == TRUE & tab_gene$eqtl_interaction.stage.adult     == TRUE & tab_gene$gwas == TRUE, "gene_name"], collapse = "; "), sep = ": "))
message(paste("iPSC-CVPC-specific eIsoforms that are also overexpressed in iPSC-CVPCs and colocalize with GWAS", paste(tab_isof[tab_isof$diffexp.ipsc_cvpc_vs_heart.ipsc_cvpc                         == TRUE & tab_isof$egene == TRUE & tab_isof$eqtl_interaction.stage.ipsc_cvpc == TRUE & tab_isof$gwas == TRUE, "gene_name"], collapse = "; "), sep = ": "))
message(paste("Adult-specific eIsoforms that are also overexpressed in adult heart and colocalize with GWAS"   , paste(tab_isof[tab_isof$diffexp.ipsc_cvpc_vs_heart.heart                             == TRUE & tab_isof$egene == TRUE & tab_isof$eqtl_interaction.stage.adult     == TRUE & tab_isof$gwas == TRUE, "gene_name"], collapse = "; "), sep = ": "))

message(paste("iPSC-CVPC-specific eIsoforms that colocalize with GWAS, but their associated eGene does not", paste(setdiff(tab_isof[tab_isof$gwas == TRUE & tab_isof$eqtl_interaction.stage.ipsc_cvpc == TRUE, "gene_name"], tab_gene[tab_gene$gwas == TRUE & tab_gene$eqtl_interaction.stage.ipsc_cvpc == TRUE, "gene_name"]), collapse = "; "), sep = ": "))
message(paste("Adult-specific eIsoforms that colocalize with GWAS, but their associated eGene does not"    , paste(setdiff(tab_isof[tab_isof$gwas == TRUE & tab_isof$eqtl_interaction.stage.adult     == TRUE, "gene_name"], tab_gene[tab_gene$gwas == TRUE & tab_gene$eqtl_interaction.stage.adult     == TRUE, "gene_name"]), collapse = "; "), sep = ": "))


iPSC-CVPC-specific eGenes that are also overexpressed in iPSC-CVPCs and colocalize with GWAS: LINC01405; ENPP4; TMEM71

Adult-specific eGenes that are also overexpressed in adult heart and colocalize with GWAS: PFKFB2; SLC22A18; CARNS1; AC116903.1; PGPEP1L; CIAO3; GNAO1; LINC01535; PIGF; LAMA5-AS1; KREMEN1; MKRN2; PDZRN3; AC117489.1; PFN2; PLD1; FBN2; HDDC2; DMTN; PEBP4

iPSC-CVPC-specific eIsoforms that are also overexpressed in iPSC-CVPCs and colocalize with GWAS: HES4; ADAM15; LGALS8; GPR137; CIB1; NOMO3; AC005670.3; AP3D1; HLA-C; NOD1

Adult-specific eIsoforms that are also overexpressed in adult heart and colocalize with GWAS: PALMD; FMO2; MS4A7; XRRA1; TMEM255B; MLH3; ATXN3; XRCC3; XRCC3; HAGHL; EMP2; RPL13; NUP88; FAM106A; FAM106A; AC005670.3; PCAT19; ZNF814; ZSCAN18; USP34; AC073254.1; SERHL; FAM118A; PIGG; NPR3; ITGA1; ATG10; MRNIP; NOTCH4; HLA-DQA1; HLA-DPB1; HLA-DPB1; TMEM63B; PRKRIP1

iPSC-CVPC-specific eIsoforms that colocalize with GWAS, but their associated eGene does no

In [25]:
manifest = fread("pipeline/6.1.coloc_gwas/traits.manifest.txt", sep = "\t", header = TRUE, data.table = FALSE)


In [79]:
coloc0 = fread("pipeline/3.2.eqtls/eqtl_overlap/cardiac_eqtls.coloc_by_gene.txt"     , sep = "\t", header = TRUE, data.table = FALSE)
coloc  = coloc0
coloc  = coloc[grepl("ENSG", coloc$transcript_id2) == TRUE, c("transcript_id1", "transcript_id2", "type1", "type2", "PP.H3.abf", "PP.H4.abf")]
colnames(coloc) = c("transcript_id", "gene_id", "type_isoform", "type_gene", "pp3_iso2gene", "pp4_iso2gene")

coloc = aggregate(list(ppa = coloc$pp4_iso2gene), by = list(transcript_id = coloc$transcript_id, type = coloc$type_isoform), FUN = max)

In [257]:
a = merge(coloc_isoform[ coloc_isoform$transcript_id == "ENST00000617246.4_3" & coloc_isoform$PP.H4.abf > 0.5, c("id", "chrom", "pos", "ref", "alt", "PP.H4.abf", "pp_snp", "trait")], manifest[,c("id", "description", "category")], by.x = "trait", by.y = "id")
a[order(a$PP.H4.abf, decreasing = TRUE),]

Unnamed: 0_level_0,trait,id,chrom,pos,ref,alt,PP.H4.abf,pp_snp,description,category
Unnamed: 0_level_1,<chr>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<chr>
7,continuous-DBP-both_sexes-combined_medadj_raw,VAR_3_188066953_T_G,3,188066953,T,G,0.995601,0.9677503,"Diastolic blood pressure, combined automated + manual reading, adjusted by medication",
15,continuous-MAP-both_sexes-combined_medadj_raw,VAR_3_188066953_T_G,3,188066953,T,G,0.9954974,0.9704706,"Mean arterial pressure, combined automated + manual reading, adjusted by medication",
6,continuous-DBP-both_sexes-combined_medadj_irnt,VAR_3_188066953_T_G,3,188066953,T,G,0.9954903,0.9668499,"Diastolic blood pressure, combined automated + manual reading, adjusted by medication",
14,continuous-MAP-both_sexes-combined_medadj_irnt,VAR_3_188066953_T_G,3,188066953,T,G,0.9953985,0.9695669,"Mean arterial pressure, combined automated + manual reading, adjusted by medication",
4,continuous-DBP-both_sexes-auto_medadj_raw,VAR_3_188066953_T_G,3,188066953,T,G,0.9945712,0.9689628,"Diastolic blood pressure, automated reading, adjusted by medication",
3,continuous-DBP-both_sexes-auto_medadj_irnt,VAR_3_188066953_T_G,3,188066953,T,G,0.9944503,0.9681868,"Diastolic blood pressure, automated reading, adjusted by medication",
11,continuous-MAP-both_sexes-auto_medadj_raw,VAR_3_188066953_T_G,3,188066953,T,G,0.9914709,0.9721203,"Mean arterial pressure, automated reading, adjusted by medication",
10,continuous-MAP-both_sexes-auto_medadj_irnt,VAR_3_188066953_T_G,3,188066953,T,G,0.9904437,0.9713708,"Mean arterial pressure, automated reading, adjusted by medication",
5,continuous-DBP-both_sexes-combined_irnt,VAR_3_188066953_T_G,3,188066953,T,G,0.9869303,0.9647412,"Diastolic blood pressure, combined automated + manual reading",
8,continuous-DBP-both_sexes-combined_raw,VAR_3_188066953_T_G,3,188066953,T,G,0.9859885,0.9650935,"Diastolic blood pressure, combined automated + manual reading",


In [281]:
tab1                 = unique(tab_isof[tab_isof$egene == TRUE & tab_isof$eqtl_interaction.stage.ipsc_cvpc == TRUE & tab_isof$gwas == TRUE, c("gene_id", "transcript_id", "gene_name")])
tab1                 = merge(tab1, eqtl_isoforms[eqtl_isoforms$egene == TRUE & eqtl_isoforms$pos >= eqtl_isoforms$start & eqtl_isoforms$pos <= eqtl_isoforms$end, c("transcript_id", "type", "id", "chrom", "pos", "ref", "alt", "rsid", "af", "beta", "se", "pval", "fdr", "distance")])
tab1        $tr2type = paste(tab1        $transcript_id, tab1        $type)
int_isoforms$tr2type = paste(int_isoforms$transcript_id, int_isoforms$type)
tab1                 = tab1[tab1$tr2type %in% int_isoforms[ int_isoforms$cell == TRUE & int_isoforms$interaction == "ipsc_cvpc", "tr2type"],]
tab1                 = merge(tab1, coloc, by = c("transcript_id", "type"))
tab1                 = tab1[tab1$ppa < 0.5,]
tab1                 = as.data.frame(rbindlist(lapply(1:nrow(tab1), function(ii)
{
    this                   = tab1[ii,]
    tr2type                = this[1, "tr2type"]
    coloc_tab              = coloc_isoform[ coloc_isoform$tr2type == tr2type,]
    coloc_tab              = coloc_tab[coloc_tab$PP.H4.abf > 0.5, ]
    coloc_tab              = coloc_tab[order(coloc_tab$PP.H4.abf, decreasing = TRUE),]
    coloc_tab$pval_gwas    = 1
    
    for(jj in 1:nrow(coloc_tab))
    {
        chrom           = coloc_tab[jj, "chrom"]
        pos             = coloc_tab[jj, "pos"  ]
        trait           = coloc_tab[jj, "trait"]
        coord           = paste(chrom, ":", pos - 1, "-", pos + 1, sep = "")
        infile          = manifest[manifest$id == trait, "filename"]
        command         = paste("tabix", "-h", infile, coord)
        gwas            = fread(cmd = command, sep = "\t", header = FALSE, data.table = FALSE)
        colnames(gwas)  = unlist(strsplit(system(paste("zcat", infile, "|", "head", "-n", 1), intern = TRUE), "\t"))
        gwas            = gwas[gwas$pos == pos,]
        gwas$id         = paste("VAR", gwas$chr, gwas$pos, gwas$ref, gwas$alt, sep = "_")
        gwas            = gwas[,c("id", "beta_meta", "se_meta", "pval_meta")]
        
        coloc_tab[jj, "pval_gwas"] = gwas[1, "pval_meta"]
    }
    
    out = merge(this, coloc_tab[, c("tr2type", "trait", "id", "pval_gwas", "PP.H4.abf")], by = "tr2type", suffixes = c("_eqtl", "_coloc"))
    
    return(out)
})), stringsAsFactors = FALSE)

tab1_all = tab1
fwrite(tab1_all, "pipeline/X.1.combine/eisoforms_no_coloc_with_egene_all.txt", sep = "\t", col.names = TRUE, row.names = FALSE)

In [282]:
tab1                 = unique(tab_isof[tab_isof$egene == TRUE & tab_isof$eqtl_interaction.stage.ipsc_cvpc == TRUE & tab_isof$gwas == TRUE, c("gene_id", "transcript_id", "gene_name")])
tab1                 = merge(tab1, eqtl_isoforms[eqtl_isoforms$egene == TRUE & eqtl_isoforms$pos >= eqtl_isoforms$start & eqtl_isoforms$pos <= eqtl_isoforms$end, c("transcript_id", "type", "id", "chrom", "pos", "ref", "alt", "rsid", "af", "beta", "se", "pval", "fdr", "distance")])
tab1        $tr2type = paste(tab1        $transcript_id, tab1        $type)
int_isoforms$tr2type = paste(int_isoforms$transcript_id, int_isoforms$type)
tab1                 = tab1[tab1$tr2type %in% int_isoforms[ int_isoforms$cell == TRUE & int_isoforms$interaction == "ipsc_cvpc", "tr2type"],]
tab1                 = merge(tab1, coloc, by = c("transcript_id", "type"))
tab1                 = tab1[tab1$ppa < 0.5,]
tab1                 = as.data.frame(rbindlist(lapply(1:nrow(tab1), function(ii)
{
    this                   = tab1[ii,]
    tr2type                = this[1, "tr2type"]
    coloc_tab              = coloc_isoform[ coloc_isoform$tr2type == tr2type,]
    coloc_tab              = coloc_tab[coloc_tab$PP.H4.abf > 0.5, ]
    coloc_tab              = coloc_tab[order(coloc_tab$PP.H4.abf, decreasing = TRUE),]
    coloc_tab$pval_gwas    = 1
    
    for(jj in 1:nrow(coloc_tab))
    {
        chrom           = coloc_tab[jj, "chrom"]
        pos             = coloc_tab[jj, "pos"  ]
        trait           = coloc_tab[jj, "trait"]
        coord           = paste(chrom, ":", pos - 1, "-", pos + 1, sep = "")
        infile          = manifest[manifest$id == trait, "filename"]
        command         = paste("tabix", "-h", infile, coord)
        gwas            = fread(cmd = command, sep = "\t", header = FALSE, data.table = FALSE)
        colnames(gwas)  = unlist(strsplit(system(paste("zcat", infile, "|", "head", "-n", 1), intern = TRUE), "\t"))
        gwas            = gwas[gwas$pos == pos,]
        gwas$id         = paste("VAR", gwas$chr, gwas$pos, gwas$ref, gwas$alt, sep = "_")
        gwas            = gwas[,c("id", "beta_meta", "se_meta", "pval_meta")]
        
        coloc_tab[jj, "pval_gwas"] = gwas[1, "pval_meta"]
    }
    
    if(min(coloc_tab$pval_gwas) <= 1e-5)
    {
        trait = coloc_tab[which.min(coloc_tab$pval_gwas), "trait"    ]
        pval  = coloc_tab[which.min(coloc_tab$pval_gwas), "pval_gwas"]
        ppa   = coloc_tab[which.min(coloc_tab$pval_gwas), "PP.H4.abf"]
    }else
    {
        trait = coloc_tab[1, "trait"    ]
        pval  = coloc_tab[1, "pval_gwas"]
        ppa   = coloc_tab[1, "PP.H4.abf"]
        trait = coloc_tab[which.min(coloc_tab$pval_gwas), "trait"    ]
        pval  = coloc_tab[which.min(coloc_tab$pval_gwas), "pval_gwas"]
        ppa   = coloc_tab[which.min(coloc_tab$pval_gwas), "PP.H4.abf"]
    }
    
    this$trait             = trait
    this$trait_description = manifest[manifest$id == trait, "description"]
    this$trait_category    = manifest[manifest$id == trait, "category"   ]
    this$pval_gwas         = pval
    this$ppa_gwas          = ppa
    
    return(this)
})), stringsAsFactors = FALSE)

tab1 = tab1[order(tab1$ppa_gwas, decreasing = TRUE),]

fwrite(tab1, "pipeline/X.1.combine/eisoforms_no_coloc_with_egene.txt", sep = "\t", col.names = TRUE, row.names = FALSE)

In [236]:
exons    = fread("/frazer01/reference/private/Gencode.v34lift37/exon_info.txt"      , sep = "\t", header = TRUE, data.table = FALSE)
cds      = fread("/frazer01/reference/private/Gencode.v34lift37/cds_info.txt"       , sep = "\t", header = TRUE, data.table = FALSE)


In [249]:
plot_sumstats = function(indata, myvar, myrange, ylim = 0, y_axis = "", rsid = FALSE, is_x_axis = TRUE, is_log = FALSE, is_legend = FALSE, ...)
{
    indata$myvar = indata[,myvar]
    
    if(is_log == TRUE){indata$myvar = -log10(indata$myvar)}
    
    if(ylim == 0){ylim = max(indata$myvar)}
    
    par(...)
    plot(1,1, type = "n", xlim = myrange, ylim = c(min(indata$myvar), ylim), xlab = "", ylab = "", axes = FALSE)
    axis(2)
    if(is_x_axis == TRUE){axis(1)}
    
    mtext(text = y_axis, side = 2, line = 2.5)
    
    points(indata$pos, indata$myvar, pch = indata$pch, col = "#000000", bg = indata$ld_color, cex = indata$cex)
    
    abline(h = 0)
    abline(v = indata[indata$pch == 23, "pos"], lty = "dashed", col = "#ff0000")
    
    mylead   = indata[which.max(indata$pp_snp_isoform), ]
    mypos    =     3
    x_legend =     0.6

    if(mylead[1, "pos"] >= myrange[[1]] + (myrange[[2]] - myrange[[1]]) / 2)
    {
        x_legend = 0.01
    }
    
    if(rsid == TRUE)
    {
        if(mylead[1, "myvar"] > 0.9){mypos = 1}
        
        text(x = mylead[1, "pos"], y = mylead[1, "myvar"], labels = mylead[1, "rsid"], pos = mypos, cex = 2)
    }
    
    if(is_legend == TRUE)
    {
        tol = data.frame(r2 = (0:4) / 5, name = paste("(", (0:4) / 5, "-", (1:5) / 5, ")", sep = ""), y = (5:1 + 3)/10 * ylim, color = c("#000088", "#aaffff", "#00ff00", "#ffa500", "#ff0000"))
        
        points(x = rep(myrange[[1]] + (myrange[[2]] - myrange[[1]]) * x_legend, nrow(tol)    ), y =   tol$y              , pch    = 22      , bg  = tol$color, cex = 3)
        text  (x = rep(myrange[[1]] + (myrange[[2]] - myrange[[1]]) * x_legend, nrow(tol) + 1), y = c(tol$y, ylim * 9/10), labels = c(paste("", tol$name, ""), " Linkage disequilibrium "), pos = 4, cex = 2)
    }
}

plot_scatter = function(indata, x, y, is_log = FALSE)
{
    par(mar = c(4,4,1,1))
    
    if(is_log == TRUE)
    {
        indata$x = -log10(indata[,x])
        indata$y = -log10(indata[,y])
    }else
    {
        indata$x = indata[,x]
        indata$y = indata[,y]
    }
    
    mymax = max(c(indata$x, indata$y))
    
    #plot(indata$x, indata$y, pch = 16, col = "#000000", cex = 0.8, xlim = c(0, mymax), ylim = c(0, mymax), xlab = "", ylab = "")
    plot(indata$x, indata$y, pch = 16, col = "#000000", cex = 0.8, xlab = "", ylab = "")
    
    mtext(x, side = 1, line = 2.5)
    mtext(y, side = 2, line = 2.5)
}

plot_transcript = function(transcript_id, indata, xlims, exons, cds, ...)
{
    exons  = exons[exons$transcript_id == transcript_id,]
    cds    = cds  [cds  $transcript_id == transcript_id,]
    chrom  = unique(exons$chrom)
    strand = unique(exons$strand)
    
    par(...)
    plot(1,1, type = "n", xlim = xlims, ylim = c(0, 1), xlab = "", ylab = "", axes = FALSE)
    axis(1, at = pretty(xlims, n = 5), labels = pretty(xlims, n = 5) * 1e-6)
    
    segments(x0 = min(exons$start), x1 = max(exons$end), y0 = 0.5, col = "#0000ff")
    
    if(nrow(exons) > 0){rect    (xleft =     exons$start , xright =     exons$end , ybottom = 0.3, col = "#0000ff", ytop = 0.7, border = "#0000ff")}
    if(nrow(cds  ) > 0){rect    (xleft =     cds  $start , xright =     cds  $end , ybottom = 0.1, col = "#0000ff", ytop = 0.9, border = "#0000ff")}
    
    #text(x = max(exons$end), y = 0.5, labels = transcript_id, col = "#0000ff", pos = 4, cex = 1.5, font = 3)
    
    mtext(text = paste(chrom, "(Mb)"), side = 1, line = 2.5)
    
    abline(v = indata[indata$pch == 23, "pos"], lty = "dashed", col = "#ff0000")
    
    myangle = 0
    if(strand == "-"){myangle = 180}
    
    toarrow = xlims[[1]] + (1:39) / 40 * (xlims[[2]] - xlims[[1]])
    toarrow = toarrow[toarrow >= min(c(exons$start, cds$start)) & toarrow <= max(c(exons$end, cds$end))]
    #toarrow = min(c(exons$start, cds$start)) + (max(c(exons$end, cds$end)) - min(c(exons$start, cds$start))) * (0:29) / 30
    #Arrowhead(x0 = toarrow, y0 = 0.5, angle = myangle, arr.length = 0.1, arr.width = 0.1, arr.type = "triangle", arr.col = "#0000ff", arr.lwd = 0.01)
    
    if(strand == "+"){arrows(x0 = toarrow[1:(length(toarrow) - 1)], y0 = 0.5, x1 = toarrow[2:(length(toarrow)    )], length = 0.1, angle = 45, col = "#0000ff")}
    if(strand == "-"){arrows(x0 = toarrow[2:(length(toarrow)    )], y0 = 0.5, x1 = toarrow[1:(length(toarrow) - 1)], length = 0.1, angle = 45, col = "#0000ff")}
}


plot_ex = function(transcript_id, type, trait, geneinfo, manifest, exons, cds)
{
    gene            = geneinfo[geneinfo$transcript_id == transcript_id,]
    gene_id         = gene[1, "gene_id"]
    coloc_gene_file = paste("pipeline/6.1.coloc_gwas/coloc.gene", paste("snp", gene_id      , "txt", sep = "."), sep = "/")
    egene           =              fread(paste("pipeline/3.2.eqtls/eqtls_by_gene/cardiac_eqtls.gene"   , paste("qtl"    , gene_id      , "txt", sep = "."), sep = "/"), sep = "\t", header = TRUE, data.table = FALSE)
    eisof           =              fread(paste("pipeline/3.2.eqtls/eqtls_by_gene/cardiac_eqtls.isoform", paste("qtl"    , transcript_id, "txt", sep = "."), sep = "/"), sep = "\t", header = TRUE, data.table = FALSE)
    pp2snp_isof     =              fread(paste("pipeline/6.1.coloc_gwas/coloc.isoform"                 , paste("snp"    , transcript_id, "txt", sep = "."), sep = "/"), sep = "\t", header = TRUE, data.table = FALSE)
    gtdata          = add_rownames(fread(paste("pipeline/1.3.genotype/use_isoform"                     , paste("gt_data", transcript_id, "txt", sep = "."), sep = "/"), sep = "\t", header = TRUE, data.table = FALSE))
    
    if(file.exists(coloc_gene_file) == TRUE)
    {
        pp2snp_gene     = fread(coloc_gene_file, sep = "\t", header = TRUE, data.table = FALSE)
        pp2snp_gene     = pp2snp_gene[pp2snp_gene$trait == trait & pp2snp_gene$type == 0,]
    }
    
    pp2snp_isof     = pp2snp_isof[pp2snp_isof$trait == trait & pp2snp_isof$type == type,]
    egene           = egene[egene$type == 0   ,]
    eisof           = eisof[eisof$type == type,]
    egene           = merge(eisof, egene[,c("id", "beta", "se", "pval")], by = "id", suffixes = c("", "_gene"))
    infile          = manifest[manifest$id == trait, "filename"]
    myrange         = range(egene$pos)
    coord           = paste(sub("chr", "", gene[1, "chrom"]), ":", myrange[[1]], "-", myrange[[2]], sep = "")
    command         = paste("tabix", "-h", infile, coord)
    gwas            = fread(cmd = command, sep = "\t", header = TRUE, data.table = FALSE)
    colnames(gwas)  = unlist(strsplit(system(paste("zcat", infile, "|", "head", "-n", 1), intern = TRUE), "\t"))
    gwas$id         = paste("VAR", gwas$chr, gwas$pos, gwas$ref, gwas$alt, sep = "_")
    gwas            = gwas[,c("id", "beta_meta", "se_meta", "pval_meta")]
    colnames(gwas)  = gsub("meta", "gwas", colnames(gwas))
    toplot          = merge(egene, gwas)
    toplot          = merge(toplot, pp2snp_isof[,c("id", "pp_snp")])
    
    if(file.exists(coloc_gene_file) == TRUE)
    {
        toplot          = merge(toplot, pp2snp_gene[,c("id", "pp_snp")], by = "id", suffixes = c("_isoform", "_gene"))
    }else
    {
        toplot$pp_snp_isoform = toplot$pp_snp
    }
    
    mylead    = toplot[which.max(toplot$pp_snp_isoform), "id"]
    lddata    = data.frame(id = rownames(gtdata), r2 = 0, ld_color = "#000088")
    mylead_gt = as.numeric(gtdata[mylead,])
    lddata$r2 = unlist(lapply(1:nrow(gtdata), function(jj)
    {
        tocor = data.frame(x = mylead_gt, y = as.numeric(gtdata[jj, ]))
        tocor = tocor[is.na(tocor$x) == FALSE & is.na(tocor$y) == FALSE, ]
        out   = (cor(tocor$x, tocor$y))^2
        
        return(out)
    }))
    
    if(nrow(lddata[lddata$r2 >= 0.2, ]) > 0){lddata[lddata$r2 >= 0.2, "ld_color"] = "#aaffff"}
    if(nrow(lddata[lddata$r2 >= 0.4, ]) > 0){lddata[lddata$r2 >= 0.4, "ld_color"] = "#00ff00"}
    if(nrow(lddata[lddata$r2 >= 0.6, ]) > 0){lddata[lddata$r2 >= 0.6, "ld_color"] = "#ffa500"}
    if(nrow(lddata[lddata$r2 >= 0.8, ]) > 0){lddata[lddata$r2 >= 0.8, "ld_color"] = "#ff0000"}
    
    toplot            = merge(toplot, lddata)
    ylim              = -log10(min(c(toplot$pval, toplot$pval_gene)))
    #myrange           = range(c(gene$start, gene$end, egene[which.min(egene$pval), "pos"] - 10000, egene[which.min(egene$pval), "pos"] + 10000))
    myrange           = range(c(gene$start, gene$end, toplot[toplot$id == mylead, "pos"] - 10000, toplot[toplot$id == mylead, "pos"] + 10000))
    
    #return(manifest[manifest$id == trait, c("id", "description", "category")])
    
    if(manifest[manifest$id == trait, "description"] == "")
    {
        trait_description = sub("prescriptions-", "", sub("-both_sexes", "", manifest[manifest$id == trait, "id"]))
    }else
    {
        trait_description = unlist(strsplit(manifest[manifest$id == trait, "description"], ","))[[1]]
    }
    
    toplot$cex =  2
    toplot$pch = 21
    toplot     = toplot[order(toplot$r2),]
    
    toplot[toplot$id == mylead, "cex"] =  3
    toplot[toplot$id == mylead, "pch"] = 23
    
    png(filename = paste("pipeline/X.1.combine", paste("example", gene[1, "gene_name"], "png", sep = "."), sep = "/"), width = 6.5, height = 11, units = "in", res = 300)
    options(repr.plot.width = 6.5, repr.plot.height = 11)
    #layout(cbind(c(1:5), c(1:4,6), c(1:4,7)))
    layout(cbind(sort(c(rep(1:4, 2), 5))))
    
    plot_sumstats(toplot, "pval"          , myrange, ylim * 1.1, "-log(p) eIsoform"  , FALSE, FALSE, TRUE , FALSE, mar = c(0,4,0.5,1))
    plot_sumstats(toplot, "pval_gene"     , myrange, ylim * 1.1, "-log(p) eGene"     , FALSE, FALSE, TRUE , FALSE, mar = c(0,4,0.5,1))
    plot_sumstats(toplot, "pval_gwas"     , myrange, 0         , trait_description   , FALSE, FALSE, TRUE , FALSE, mar = c(0,4,0.5,1))
    plot_sumstats(toplot, "pp_snp_isoform", myrange, 1         , "PPA colocalization", TRUE , FALSE, FALSE, TRUE , mar = c(0,4,0.5,1))
    
    plot_transcript(transcript_id, toplot, myrange, exons, cds, mar = c(4,4,0.5,1))
    
    #plot_scatter(toplot, "pval"     , "pval_gene", TRUE)
    #plot_scatter(toplot, "pval"     , "pval_gwas", TRUE)
    #plot_scatter(toplot, "pval_gene", "pval_gwas", TRUE)
    
    dev.off()
    
    message(paste(gene[1, "gene_name"], gene_id, transcript_id, type, toplot[which.max(toplot$pp_snp_isoform), "rsid"]))
    
}

ii = 9
#plot_ex(tab1[ii, "transcript_id"], tab1[ii, "type"], tab1[ii, "trait"], geneinfo_isoform, manifest, exons, cds)

invisible(lapply(1:nrow(tab1), function(ii){plot_ex(tab1[ii, "transcript_id"], tab1[ii, "type"], tab1[ii, "trait"], geneinfo_isoform, manifest, exons, cds)}))



LPP ENSG00000145012.13_7 ENST00000617246.4_3 0 rs6787621

KANK2 ENSG00000197256.10_9 ENST00000588787.5_5 1 rs11668313

PRKG1 ENSG00000185532.20_9 ENST00000643582.1_1 0 rs11000060

NOD1 ENSG00000106100.11_7 ENST00000222823.9_3 0 rs1558067

GPR137 ENSG00000173264.14_7 ENST00000536017.1_1 0 rs11231745

MXI1 ENSG00000119950.21_7 ENST00000651516.1_1 0 rs2855469

TOR1AIP1 ENSG00000143337.18_6 ENST00000435319.8_2 0 rs2245425

LGALS8 ENSG00000116977.18_6 ENST00000528782.5_1 0 rs1969746

UCHL3 ENSG00000118939.17_8 ENST00000377595.7_1 0 rs9530456

DKK3 ENSG00000050165.18_5 ENST00000527132.1_2 0 rs142843448

MLST8 ENSG00000167965.18_7 ENST00000397124.5_2 0 rs27425

CIB1 ENSG00000185043.12_6 ENST00000328649.11_3 0 rs8041338



In [195]:
message(paste('render_html("', geneinfo_isoform[geneinfo_isoform$transcript_id %in% tab1$transcript_id, "gene_name"], '")', sep = "", collapse = "\n"))

render_html("TOR1AIP1")
render_html("LGALS8")
render_html("PRKG1")
render_html("MXI1")
render_html("DKK3")
render_html("GPR137")
render_html("UCHL3")
render_html("CIB1")
render_html("MLST8")
render_html("KANK2")
render_html("LPP")
render_html("NOD1")



In [238]:
tab1

Unnamed: 0_level_0,transcript_id,type,gene_id,gene_name,id,chrom,pos,ref,alt,rsid,⋯,pval,fdr,distance,tr2type,ppa,trait,trait_description,trait_category,pval_gwas,ppa_gwas
Unnamed: 0_level_1,<chr>,<int>,<chr>,<chr>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,⋯,<dbl>,<dbl>,<int>,<chr>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>
10,ENST00000617246.4_3,0,ENSG00000145012.13_7,LPP,VAR_3_188066953_T_G,3,188066953,T,G,rs6787621,⋯,1.033506e-47,9.435914e-45,123760,ENST00000617246.4_3 0,0.0511866064,continuous-MAP-both_sexes-combined_medadj_raw,"Mean arterial pressure, combined automated + manual reading, adjusted by medication",,1.36e-07,0.9954974
9,ENST00000588787.5_5,1,ENSG00000197256.10_9,KANK2,VAR_19_11275811_CTG_C,19,11275811,CTG,C,rs16423,⋯,5.991958e-10,2.81622e-07,10812,ENST00000588787.5_5 1,0.0003362076,continuous-102-both_sexes-irnt,"Pulse rate, automated reading",UK Biobank Assessment Centre > Physical measures > Blood pressure,6.846e-14,0.9933029
11,ENST00000643582.1_1,0,ENSG00000185532.20_9,PRKG1,VAR_10_53668890_T_C,10,53668890,T,C,rs11000060,⋯,5.553895e-14,4.037681e-11,834620,ENST00000643582.1_1 0,0.0244994442,continuous-PP-both_sexes-combined_medadj_irnt,"Pulse pressure, combined automated + manual reading, adjusted by medication",,4.72e-09,0.9682591
1,ENST00000222823.9_3,0,ENSG00000106100.11_7,NOD1,VAR_7_30503280_T_C,7,30503280,T,C,rs1558067,⋯,9.393609999999999e-26,3.8607740000000004e-23,15120,ENST00000222823.9_3 0,0.0261553526,continuous-LDLC-both_sexes-medadj_irnt,"LDL direct, adjusted by medication",,6.431e-06,0.9546053
8,ENST00000536017.1_1,0,ENSG00000173264.14_7,GPR137,VAR_11_64053848_GA_G,11,64053848,GA,G,rs3831423,⋯,2.6323780000000002e-28,7.897134e-26,266,ENST00000536017.1_1 0,0.032516485,categorical-6150-both_sexes-100,Vascular/heart problems diagnosed by doctor,UK Biobank Assessment Centre > Touchscreen > Health and medical history > Medical conditions,3.508e-05,0.9151178
12,ENST00000651516.1_1,0,ENSG00000119950.21_7,MXI1,VAR_10_112041579_T_C,10,112041579,T,C,rs2855469,⋯,6.310338e-33,2.8712039999999998e-30,54966,ENST00000651516.1_1 0,0.0262055309,categorical-6150-both_sexes-100,Vascular/heart problems diagnosed by doctor,UK Biobank Assessment Centre > Touchscreen > Health and medical history > Medical conditions,2.222e-05,0.8427219
5,ENST00000435319.8_2,0,ENSG00000143337.18_6,TOR1AIP1,VAR_1_179858444_G_A,1,179858444,G,A,rs2245425,⋯,2.203381e-55,9.915216e-53,6613,ENST00000435319.8_2 0,0.0146587812,prescriptions-ramipril-both_sexes,,"ACE inhibitor,anti-hypertensive",6.66e-05,0.8262158
7,ENST00000528782.5_1,0,ENSG00000116977.18_6,LGALS8,VAR_1_236695882_C_T,1,236695882,C,T,rs2794787,⋯,9.024229e-06,0.00620867,8597,ENST00000528782.5_1 0,0.1176807258,continuous-PP-both_sexes-combined_irnt,"Pulse pressure, combined automated + manual reading",,1.687e-06,0.7636977
3,ENST00000377595.7_1,0,ENSG00000118939.17_8,UCHL3,VAR_13_76124151_C_T,13,76124151,C,T,rs8192738,⋯,1.268516e-07,6.012765e-05,224,ENST00000377595.7_1 0,0.0223341706,continuous-DBP-both_sexes-auto_medadj_raw,"Diastolic blood pressure, automated reading, adjusted by medication",,3.578e-05,0.7393361
6,ENST00000527132.1_2,0,ENSG00000050165.18_5,DKK3,VAR_11_11985133_GTTTACT_G,11,11985133,GTTTACT,G,rs142843448,⋯,8.239718000000001e-22,7.844210999999999e-19,45044,ENST00000527132.1_2 0,0.010809715,continuous-6033-both_sexes-irnt,Maximum heart rate during fitness test,UK Biobank Assessment Centre > Physical measures > ECG during exercise,0.0005205,0.5672378


# Comments

## iPSC-CVPC-specific eGenes that are also overexpressed in iPSC-CVPCs and colocalize with GWAS: LINC01405; TBX2-AS1; ENPP4; TMEM71
- ***TBX2***: TF involved in cardiac development. The TBX2/TBX2-AS1 locus is known to be associated with blood pressure. Maybe TBX2 and TBX2-AS1 are associated with the same eQTLs? 
- ***ENPP4***: involved in arterial septal defects (GeneCards)
- *TMEM71* and *LINC01405* don't have a very well-defined function 

## Adult-specific eGenes that are also overexpressed in adult heart and colocalize with GWAS: PFKFB2; SLC22A18; CARNS1; AC116903.1; PGPEP1L; CIAO3; GNAO1; ROCK1P1; LINC01535; PIGF; GPR17; LAMA5-AS1; KREMEN1; MKRN2; PDZRN3; AC117489.1; PFN2; PLD1; FBN2; HDDC2; DMTN; PEBP4; AC084024.3
- ***PFKFB2***: synthesis and degradation of fructose-2,6-bisphosphate, a regulatory molecule that controls glycolysis. important regulator of glycolytic flux in cardiac cells ([27802586](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5193105/)). Involved in blood pressure
- ***CARNS1***: Carnosine syntase. Associated with LDL levels and pulse pressure. In animal studies, carnosine has been shown to suppress many biochemical processes that accompany aging and age related chronic diseases such as obesity, type 2 diabetes and diabetes complications, cardiovascular diseases, cancer and dementia (Introduction in [26439389](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4595442/))
- ***ROCK1P1*** (LITTLE ROCK): pseudogene for ***ROCK1*** (involved in blood pressure by promoting smooth muscle contraction: [20398283](https://bmcgenet.biomedcentral.com/articles/10.1186/1471-2156-11-22)): has 5 exons that share 99% seuqence identity with ROCK1, is expressed (especially in smooth muscle) but seems to be NMD. This seems to be a case similar to the *PTEN*/*PTENP1* regulation by paralogous pseudogenes that are expressed only to regulate the expression of their associated protein coding gene. Maybe there is something more interesting here? Our eQTLs are associated with hypertension.
- ***GPR17***: regulation of heart-resident mesenchymal cells and blood-borne cellular species recruitment following myocardial infarction, orchestrated by GPR17 ([24909956](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4196654/)). GPR17 is a novel negative regulator of Shh signalling in a wide range of cellular contexts ([31444216](https://dev.biologists.org/content/146/17/dev176784)). Its eQTL is associated with cholesterol
- ***LAMA5-AS1***: AS of *LAMA5* (Laminins, a family of extracellular matrix glycoproteins, are the major noncollagenous constituent of basement membranes: GeneCards). A *LAMA5* variant (rs659822) is associated with low cholesterol in elderly individuals ([20951195](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2998567/)). *LAMA5-AS1* eQTLs are associated with pulse pressure, but *LAMA5* is not associated with GWAS.
- ***KREMEN1***: Receptor for Dickkopf proteins. Cooperates with DKK1/2 to inhibit Wnt/beta-catenin signaling by promoting the endocytosis of Wnt receptors LRP5 and LRP6. In the absence of DKK1, potentiates Wnt-beta-catenin signaling by maintaining LRP5 or LRP6 at the cell membrane. Can trigger apoptosis in a Wnt-independent manner and this apoptotic activity is inhibited upon binding of the ligand DKK1 (GeneCards). *KREMEN1* eQTLs are associated with heart failure. Something interesting with the return to fetal?
- ***PDZRN3***: cardiac Pdzrn3 deficiency protected against heart failure while over expression of Pdzrn3 in mouse cardiomyocytes during the first weeks of life, impaired postnatal cardiomyocyte maturation leading to premature death ([https://doi.org/10.1101/2020.07.29.226597](https://www.biorxiv.org/content/10.1101/2020.07.29.226597v1.full)). Is also required for cardiac differentiation ([17118964](https://jcs.biologists.org/content/119/24/5106)). *PDZRN3* eQTLs are associated with QRS duration.
- ***PFN2***: regulates actin polymerization (GeneCards). *PFN2* eQTLs are associated with pulse pressure.
- ***PLD1***: associated with the cardiac sarcolemmal (SL) membrane hydrolyses phosphatidyl-choline to produce phosphatidic acid, an important phospholipid signaling molecule known to influence cardiac func-tion ([15601581](https://onlinelibrary.wiley.com/doi/epdf/10.1111/j.1582-4934.2004.tb00477.x)). *PLD1* eQTLs are associated with pulse pressure.
- ***FBN2***. Involved in Congenital contractural arachnodactyly (CCA), which is often associated with cardiac defects ([16740166](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1524931/)). *FBN2* eQTls are associated with heart failure. 
- ***DMTN***: actin binding protein. Involved in erythrocyte membrane stability: its absence results in anemia ([27073223](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4937361/)). Regulates Calcium Mobilization and Signaling in Platelets ([23060452](https://www.jbc.org/content/287/49/41218.full)). *DMTN* eQTLs are associated with MI and atherosclerosis.
- ***PEBP4***: silencing inhibits TGF-β1-induced epithelial-mesenchymal transition of osteosarcoma cells by suppressing the PI3K/Akt pathway ([X](http://www.ijcep.com/files/ijcep0020912.pdf)). eQTLs are associated with pulse rate.

## iPSC-CVPC-specific eIsoforms that are also overexpressed in iPSC-CVPCs and colocalize with GWAS: HES4; ADAM15; LGALS8; GPR137; CIB1; NOMO3; AC005670.3; AP3D1; HLA-C; NOD1
- ***ADAM15***: ADAM (a Disintegrin and Metalloproteinase) 15 Deficiency Exacerbates Ang II (Angiotensin II)–Induced Aortic Remodeling Leading to Abdominal Aortic Aneurysm ([32522006](https://www.ahajournals.org/doi/10.1161/ATVBAHA.120.314600)). A Disintegrin and Metalloproteinase 15 Contributes to Atherosclerosis by Mediating Endothelial Barrier Dysfunction via Src Family Kinase Activity ([22904271](https://www.ahajournals.org/doi/full/10.1161/atvbaha.112.252205)). Is also associated with MI ([19563617](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2697557/)). eQTLs are associated with blood pressure and varicose veins.
- ***LGALS8*** (galectin-8): ([review of functions](https://jcs.biologists.org/content/joces/131/9/jcs208884.full.pdf)). ieQTLs are associated with pulse pressure but not gene eQTLs.
- ***GPR137***: G protein coupled receptor. Has different isoforms with known functions during development ([25514843](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4337595/)). Its alternative splicing is associated with ESRP1 ([28975893](https://elifesciences.org/articles/28366)) ieQTLs only are associated with heart failure. 
- ***AP3D1***: associated with albinism and a lot of developmental issues. ieQTLs only are associated with blood pressure.
- ***NOD1***: NOD1 Activation Induces Cardiac Dysfunction and Modulates Cardiac Fibrosis and Cardiomyocyte Apoptosis ([23028889](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3445482/)). Deficiency of NOD1 Improves the β-Adrenergic Modulation of Ca2+ Handling in a Mouse Model of Heart Failure ([29962957](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6010671/)). ieQTLs only are associated with LDL levels, while gene eQTLs are associated with triglycerides (low PP)

## Adult-specific eIsoforms that are also overexpressed in adult heart and colocalize with GWAS: PALMD; FMO2; MS4A7; XRRA1; TMEM255B; MLH3; ATXN3; XRCC3; HAGHL; EMP2; RPL13; NUP88; FAM106A; AC005670.3; PCAT19; ZNF814; ZSCAN18; USP34; AC073254.1; SERHL; FAM118A; PIGG; NPR3; ITGA1; ATG10; MRNIP; NOTCH4; HLA-DQA1; HLA-DPB1; HLA-DPB1; TMEM63B; PRKRIP1
- ***PALMD*** (palmdelphin): susceptibility gene for calcific aortic valve stenosis ([29511167](https://www.nature.com/articles/s41467-018-03260-6)). ieQTL is associated with pulse rate, gene eQTL is associated with blood pressure
- ***FMO2***: NADPH-dependent enzyme that catalyzes the N-oxidation of some primary alkylamines through an N-hydroxylamine intermediate. However, some human populations contain an allele (FMO2\*2A) with a premature stop codon, resulting in a protein that is C-terminally-truncated, has no catalytic activity, and is likely degraded rapidly (GeneCards and [15864117](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1351039/)). eQTLs are associated with ventricular rate
- ***XRCC3***: DNA repair. Associated with increased carotid artery thickness upon exposure to ionizing radiations ([X](https://academic.oup.com/eurheartj/article/34/suppl_1/P3922/2862076)). XRCC3 polymorphism is associated with hypertension-induced left ventricular hypertrophy ([29626209](https://www.nature.com/articles/s41440-018-0038-0?platform=hootsuite)). Xrcc3 Induces Cisplatin Resistance by Stimulation of Rad51-Related Recombinational Repair, S-Phase Checkpoint Activation, and Reduced Apoptosis ([15843498](https://jpet.aspetjournals.org/content/314/2/495.abstract)). iEQTLs are associated with pulse rate, BMI, atrial fibrillation, heart failure and hypertension (gene eQTLs only with LDL levels).
- ***ATXN3***: . Splice Isoforms of the Polyglutamine Disease Protein Ataxin-3 Exhibit Similar Enzymatic yet Different Aggregation Properties ([21060878](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2965175/)). A knockin mouse model of spinocerebellar ataxia type 3 exhibits prominent aggregate pathology and aberrant splicing of the disease gene transcript ([25320121](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4321438/)). ieQTLs are associated with HDL levels and pulse rate (no associations for gene-level).
- ***RPL13***: ribosomal protein. candidate for congenital heart disease ([31625562](https://academic.oup.com/hmg/article/28/23/3954/5589181?casa_token=qwbNCOmtIvQAAAAA:oIHuTEm5rI_a5uKnV2mm_cyOrh-O3z316JvNWNi3uuLMd4Xum37s-RMDq8HmKW4wzLlkLvMJgVcZ)). ieQTLs and gene eQTLs are associated with blood pressure and QRS duration.
- ***NUP88***: nucleoporin complex. Other NUPs are associated with heart disease ([PMC5027676](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5027676/)). ieQTLs only are associated with LDL levels.
- ***PCAT19***: intereseting because this is a lncRNA with oncogenic properties. eQTLs are associated with cardiac conduction disorders.
- ***USP34***: Acts as an activator of the Wnt signaling pathway downstream of the beta-catenin destruction complex by deubiquitinating and stabilizing AXIN1 and AXIN2, leading to promote nuclear accumulation of AXIN1 and AXIN2 and positively regulate beta-catenin (CTNBB1)-mediated transcription (GeneCards). Associated with CHD ([29555671](https://www.ahajournals.org/doi/full/10.1161/circgen.117.001978)). eQTLs are associated with blood pressure.
- ***NPR3***: protects cardiomyocytes from apoptosis ([PMC5026813](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5026813/)). ieQTLs are associated with triglycerides levels; gene eQTLs with blood pressure.
- ***ITGA1***: Cardiac Fibroblasts Regulate Myocardial Proliferation through β1 Integrin Signaling ([19217425](https://www.sciencedirect.com/science/article/pii/S1534580708005170)). ieQTLs only are associated with unstable angina.
- ***ATG10***: involved in autophagy ([PMC2838272](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2838272/)). ieQTLs only are associated with HDL, triglycerides levels and palpitations.
- ***NOTCH4***: a lot of associations with heart functions. eQTLs are associated with blood pressure
- ***TMEM63B***: OSCA/TMEM63 are an evolutionarily conserved family of mechanically activated ion channels ([30382938](https://elifesciences.org/articles/41844)). ieQTLs only are associated with Acute pulmonary heart disease; Pulmonary embolism and infarction, acute

## iPSC-CVPC-specific eIsoforms that colocalize with GWAS, but their associated eGene does not: HES4; ADAM15; TOR1AIP1; LGALS8; PRKG1; MXI1; DKK3; GPR137; UCHL3; CIB1; MLST8; NOMO3; ATXN2L; CTDNEP1; AC005670.3; AP3D1; KANK2; DMPK; DIDO1; THOC5; STAB1; LPP; RPL9; CAST; HLA-F-AS1; HLA-C; AC147651.1; NOD1; PTP4A3
- ***ADAM15***: ADAM (a Disintegrin and Metalloproteinase) 15 Deficiency Exacerbates Ang II (Angiotensin II)–Induced Aortic Remodeling Leading to Abdominal Aortic Aneurysm ([32522006](https://www.ahajournals.org/doi/10.1161/ATVBAHA.120.314600)). A Disintegrin and Metalloproteinase 15 Contributes to Atherosclerosis by Mediating Endothelial Barrier Dysfunction via Src Family Kinase Activity ([22904271](https://www.ahajournals.org/doi/full/10.1161/atvbaha.112.252205)). Is also associated with MI ([19563617]
- ***LGALS8*** (galectin-8): ([review of functions](https://jcs.biologists.org/content/joces/131/9/jcs208884.full.pdf)). ieQTLs are associated with pulse pressure but not gene eQTLs.
- ***PRKG1***: key mediator of the nitric oxide (NO)/cGMP signaling pathway (GeneCards). Recurrent Gain-of-Function Mutation in PRKG1 Causes Thoracic Aortic Aneurysms and Acute Aortic Dissections ([PMC3738837](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3738837/)). genetic variability in PRKG1 might affect left ventricular (LV) function and structure ([24060898](https://www.ahajournals.org/doi/pdf/10.1161/HYPERTENSIONAHA.113.01630)).ieQTLs are associated with blood pressure.
- ***MXI1***: transcriptional repressor thought to negatively regulate MYC function (GeneCards). MXI1-0, an Alternatively Transcribed Mxi1 Isoform, Is Overexpressed in Glioblastomas
 ([PMC1531670](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1531670/)). ieQTLs are associated with ischaemic heart disease.
- ***DKK3***: involved in embryonic development through its interactions with the Wnt signaling pathway (GeneCards). Its function is associated with KREMEN, which is adult-specific (see above). Dickkopf-3 protects against cardiac dysfunction and ventricular remodelling following myocardial infarction ([25840773](https://pubmed.ncbi.nlm.nih.gov/25840773/)). ieQTLs are associated with heart rate
- ***GPR137***: G protein coupled receptor. Has different isoforms with known functions during development ([25514843](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4337595/)). Its alternative splicing is associated with ESRP1 ([28975893](https://elifesciences.org/articles/28366)) ieQTLs only are associated with heart failure. 
- ***ATXN2L***: Mouse embryonal fibroblast cells revealed more multinucleated giant cells upon ATXN2L deficiency. In addition, in human neural cells, transcript levels of ATXN2L were induced upon starvation and glucose and amino acids exposure, but this induction was partially prevented by serum or low cholesterol administration ([32698485](https://www.mdpi.com/1422-0067/21/14/5124/pdf)). ieQTLs are associated with blood pressure, HDL levels, BMI.
- ***CTDNEP1***: controls fatty acid metabolism (GeneCards, [PMC3283218](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3283218/)). ieQTLs are associated with blood pressure medication.
- ***AP3D1***: associated with albinism and a lot of developmental issues. ieQTLs only are associated with blood pressure.
- ***KANK2***: Involved in actin stress fibers formation(GeneCards). ieQTLs are associated with HDL cholesterol; Pulse rate, automated reading
- ***DMPK***: critical to the modulation of cardiac contractility and to the maintenance of proper cardiac conduction activity probably through the regulation of cellular calcium homeostasis. Phosphorylates PLN, a regulator of calcium pumps and may regulate sarcoplasmic reticulum calcium uptake in myocytes (GeneCards). Dmpk gene deletion or antisense knockdown does not compromise cardiac or skeletal muscle function in mice ([27522499](https://pubmed.ncbi.nlm.nih.gov/27522499/)). ieQTLs are associated with calcium levels, atherosclerosis and blood pressure (also eQTLs)
- ***LPP***: mechanosensitive protein regulated by nitric oxide in the heart ([PMC3642136](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3642136/)). ieQTLs are associated with blood pressure
- ***CAST*** (calpastatin): involved in cardiac hypertrophy ([PMC3151485](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3151485/)). ieQTLs are associated with Medication for cholesterol, blood pressure or diabetes; high blood pressure
- ***NOD1***: NOD1 Activation Induces Cardiac Dysfunction and Modulates Cardiac Fibrosis and Cardiomyocyte Apoptosis ([23028889](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3445482/)). Deficiency of NOD1 Improves the β-Adrenergic Modulation of Ca2+ Handling in a Mouse Model of Heart Failure ([29962957](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6010671/)). ieQTLs only are associated with LDL levels, while gene eQTLs are associated with triglycerides (low PP)


## Adult-specific eIsoforms that colocalize with GWAS, but their associated eGene does not: PALMD; FMO2; B3GAT3; XRRA1; TMEM255B; MLH3; ATXN3; XRCC3; ICE2; HAGHL; EMP2; HP; RPL13; NUP88; GRAMD1A; PCAT19; AC073254.1; SERHL; FAM118A; TDGF1; PIGG; CRMP1; AC093752.1; NPR3; ITGA1; ATG10; MRNIP; HLA-C; NOTCH4; HLA-DQA1; HLA-DPB1; TMEM63B; AC147651.1; PRKRIP1; NMRK1
- ***PALMD*** (palmdelphin): susceptibility gene for calcific aortic valve stenosis ([29511167](https://www.nature.com/articles/s41467-018-03260-6)). ieQTL is associated with pulse rate, gene eQTL is associated with blood pressure
- ***FMO2***: NADPH-dependent enzyme that catalyzes the N-oxidation of some primary alkylamines through an N-hydroxylamine intermediate. However, some human populations contain an allele (FMO2\*2A) with a premature stop codon, resulting in a protein that is C-terminally-truncated, has no catalytic activity, and is likely degraded rapidly (GeneCards and [15864117](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1351039/)). eQTLs are associated with ventricular rate
- ***XRCC3***: DNA repair. Associated with increased carotid artery thickness upon exposure to ionizing radiations ([X](https://academic.oup.com/eurheartj/article/34/suppl_1/P3922/2862076)). XRCC3 polymorphism is associated with hypertension-induced left ventricular hypertrophy ([29626209](https://www.nature.com/articles/s41440-018-0038-0?platform=hootsuite)). Xrcc3 Induces Cisplatin Resistance by Stimulation of Rad51-Related Recombinational Repair, S-Phase Checkpoint Activation, and Reduced Apoptosis ([15843498](https://jpet.aspetjournals.org/content/314/2/495.abstract)). iEQTLs are associated with pulse rate, BMI, atrial fibrillation, heart failure and hypertension (gene eQTLs only with LDL levels).
- ***ATXN3***: . Splice Isoforms of the Polyglutamine Disease Protein Ataxin-3 Exhibit Similar Enzymatic yet Different Aggregation Properties ([21060878](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2965175/)). A knockin mouse model of spinocerebellar ataxia type 3 exhibits prominent aggregate pathology and aberrant splicing of the disease gene transcript ([25320121](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4321438/)). ieQTLs are associated with HDL levels and pulse rate (no associations for gene-level).
- ***RPL13***: ribosomal protein. candidate for congenital heart disease ([31625562](https://academic.oup.com/hmg/article/28/23/3954/5589181?casa_token=qwbNCOmtIvQAAAAA:oIHuTEm5rI_a5uKnV2mm_cyOrh-O3z316JvNWNi3uuLMd4Xum37s-RMDq8HmKW4wzLlkLvMJgVcZ)). ieQTLs and gene eQTLs are associated with blood pressure and QRS duration.
- ***NUP88***: nucleoporin complex. Other NUPs are associated with heart disease ([PMC5027676](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5027676/)). ieQTLs only are associated with LDL levels.
- ***PCAT19***: intereseting because this is a lncRNA with oncogenic properties. eQTLs are associated with cardiac conduction disorders.
- ***USP34***: Acts as an activator of the Wnt signaling pathway downstream of the beta-catenin destruction complex by deubiquitinating and stabilizing AXIN1 and AXIN2, leading to promote nuclear accumulation of AXIN1 and AXIN2 and positively regulate beta-catenin (CTNBB1)-mediated transcription (GeneCards). Associated with CHD ([29555671](https://www.ahajournals.org/doi/full/10.1161/circgen.117.001978)). eQTLs are associated with blood pressure.
- ***NPR3***: protects cardiomyocytes from apoptosis ([PMC5026813](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5026813/)). ieQTLs are associated with triglycerides levels; gene eQTLs with blood pressure.
- ***ITGA1***: Cardiac Fibroblasts Regulate Myocardial Proliferation through β1 Integrin Signaling ([19217425](https://www.sciencedirect.com/science/article/pii/S1534580708005170)). ieQTLs only are associated with unstable angina.
- ***ATG10***: involved in autophagy ([PMC2838272](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2838272/)). ieQTLs only are associated with HDL, triglycerides levels and palpitations.
- ***NOTCH4***: a lot of associations with heart functions. eQTLs are associated with blood pressure
- ***TMEM63B***: OSCA/TMEM63 are an evolutionarily conserved family of mechanically activated ion channels ([30382938](https://elifesciences.org/articles/41844)). ieQTLs only are associated with Acute pulmonary heart disease; Pulmonary embolism and infarction, acute
- ***B3GAT3***: catalyzes the formation of the glycosaminoglycan-protein linkage. Novel Splicing Mutation in B3GAT3 Associated with Short Stature, GH Deficiency, Hypoglycaemia, Developmental Delay, and Multiple Congenital Anomalies ([29318063](https://www.hindawi.com/journals/crig/2017/3941483/)). ieQTLs are associated with blood pressure
- ***HP*** (Haptoglobin): associated with MI risk ([19657769](https://pubmed.ncbi.nlm.nih.gov/19657769/)) and cardiac disease in diabetes ([PMC5019011](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5019011/)). ieQTLs are associated with blood pressure, atherosclerosis, hypertension, pulse rate
- ***GRAMD1A***: cholesterol transporter (GeneCards). ieQTLs are associated with HDL levels
- ***TDGF1***: epdermal growth factor associated with NKX2-5 ([PMC3779917](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3779917/)) and MEF2C ([26811383](https://dev.biologists.org/content/143/5/774)) and involved in early cardiac development. ieQTLs are associated with atenolol prescription(beta blocker used to treat high blood pressure
- ***NMRK1***: Nicotinamide adenine dinucleotide (NAD+) synthesis (GeneCards). ieQTLs are associated with MI




In [184]:
gene = "NMRK1"
message(paste("GENE"   , paste(sort(unique(merge(coloc_gene   [ coloc_gene   $PP.H4.abf > 0.5 & coloc_gene   $transcript_id ==   geneinfo_gene   [geneinfo_gene   $gene_name == gene, "transcript_id"],], manifest[,c("id", "description", "coding_description")], by.x = "trait", by.y = "id")[, "description"])), collapse = "; "), sep = " = "))
message(paste("ISOFORM", paste(sort(unique(merge(coloc_isoform[ coloc_isoform$PP.H4.abf > 0.5 & coloc_isoform$transcript_id %in% geneinfo_isoform[geneinfo_isoform$gene_name == gene, "transcript_id"],], manifest[,c("id", "description", "coding_description")], by.x = "trait", by.y = "id")[, "description"])), collapse = "; "), sep = " = "))


GENE = 

ISOFORM = I21 Acute myocardial infarction



In [176]:
manifest[grepl("categorical-6150-both_sexes", manifest$id) == TRUE,]

Unnamed: 0_level_0,id,trait_type,phenocode,description,description_more,coding_description,category,n_cases_full_cohort_both_sexes,n_controls_total,saige_heritability_EUR,filename
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<int>,<dbl>,<chr>
17,categorical-6150-both_sexes-3,categorical,6150,Vascular/heart problems diagnosed by doctor,"ACE touchscreen question """"Has a doctor ever told you that you have had any of the following conditions? (You can select more than one answer)"""" The following checks were performed: If code -7 was selected, then no additional choices were allowed. If code -3 was selected, then no additional choices were allowed. If the participant activated the Help button they were shown the message: If you do not know if you have had any of the listed conditions, enter None of the above. You can check this with an interviewer later in the visit.",Stroke,UK Biobank Assessment Centre > Touchscreen > Health and medical history > Medical conditions,7332,428034,0.051107,/frazer01/projects/CARDIPS/analysis/cardiac_eqtls/input/gwas/pan_ukbb/summary_statistics/categorical-6150-both_sexes-3.txt.gz
18,categorical-6150-both_sexes-100,categorical,6150,Vascular/heart problems diagnosed by doctor,"ACE touchscreen question """"Has a doctor ever told you that you have had any of the following conditions? (You can select more than one answer)"""" The following checks were performed: If code -7 was selected, then no additional choices were allowed. If code -3 was selected, then no additional choices were allowed. If the participant activated the Help button they were shown the message: If you do not know if you have had any of the listed conditions, enter None of the above. You can check this with an interviewer later in the visit.",None of the above,UK Biobank Assessment Centre > Touchscreen > Health and medical history > Medical conditions,338784,131602,0.076389,/frazer01/projects/CARDIPS/analysis/cardiac_eqtls/input/gwas/pan_ukbb/summary_statistics/categorical-6150-both_sexes-100.txt.gz
19,categorical-6150-both_sexes-4,categorical,6150,Vascular/heart problems diagnosed by doctor,"ACE touchscreen question """"Has a doctor ever told you that you have had any of the following conditions? (You can select more than one answer)"""" The following checks were performed: If code -7 was selected, then no additional choices were allowed. If code -3 was selected, then no additional choices were allowed. If the participant activated the Help button they were shown the message: If you do not know if you have had any of the listed conditions, enter None of the above. You can check this with an interviewer later in the visit.",High blood pressure,UK Biobank Assessment Centre > Touchscreen > Health and medical history > Medical conditions,130711,320160,0.079343,/frazer01/projects/CARDIPS/analysis/cardiac_eqtls/input/gwas/pan_ukbb/summary_statistics/categorical-6150-both_sexes-4.txt.gz
50,categorical-6150-both_sexes-2,categorical,6150,Vascular/heart problems diagnosed by doctor,"ACE touchscreen question """"Has a doctor ever told you that you have had any of the following conditions? (You can select more than one answer)"""" The following checks were performed: If code -7 was selected, then no additional choices were allowed. If code -3 was selected, then no additional choices were allowed. If the participant activated the Help button they were shown the message: If you do not know if you have had any of the listed conditions, enter None of the above. You can check this with an interviewer later in the visit.",Angina,UK Biobank Assessment Centre > Touchscreen > Health and medical history > Medical conditions,15585,422159,0.098137,/frazer01/projects/CARDIPS/analysis/cardiac_eqtls/input/gwas/pan_ukbb/summary_statistics/categorical-6150-both_sexes-2.txt.gz


In [148]:
str(coloc_gene)

'data.frame':	2011740 obs. of  17 variables:
 $ nsnps        : int  2817 2817 2817 2817 2815 2816 2563 2814 2817 2813 ...
 $ PP.H0.abf    : num  0.321 0.302 0.247 0.329 0.301 ...
 $ PP.H1.abf    : num  0.622 0.585 0.478 0.638 0.584 ...
 $ PP.H2.abf    : num  0.0167 0.0346 0.0834 0.0092 0.0336 ...
 $ PP.H3.abf    : num  0.0324 0.067 0.1616 0.0178 0.0651 ...
 $ PP.H4.abf    : num  0.00717 0.01221 0.03008 0.00592 0.0161 ...
 $ transcript_id: chr  "ENSG00000000457.14_7" "ENSG00000000457.14_7" "ENSG00000000457.14_7" "ENSG00000000457.14_7" ...
 $ type         : int  0 0 0 0 0 0 0 0 0 0 ...
 $ id           : chr  "VAR_1_169860528_C_A" "VAR_1_169860528_C_A" "VAR_1_169690313_ACT_A" "VAR_1_169852613_A_G" ...
 $ chrom        : int  1 1 1 1 1 1 1 1 1 1 ...
 $ pos          : int  169860528 169860528 169690313 169852613 169860528 169860528 169860528 169860528 169747630 169860528 ...
 $ ref          : chr  "C" "C" "ACT" "A" ...
 $ alt          : chr  "A" "A" "A" "G" ...
 $ af           : num  0.13 0.