# eQTLs with associations with the microbiome
- 2022/04/22

In [1]:
setwd("/frazer01/projects/CARDIPS/analysis/cardiac_gwas_coloc")

source("script/functions.R"  )


In [2]:
dir.create("pipeline/microbiome_eqtls", showWarnings = FALSE)


In [3]:
suppressPackageStartupMessages(library(stringr   ))
suppressPackageStartupMessages(library(colorspace))
suppressPackageStartupMessages(library(kohonen   ))
suppressPackageStartupMessages(library(tempR     ))
suppressPackageStartupMessages(library(latticeExtra))
suppressPackageStartupMessages(library(tis))


In [4]:
exp_data = readRDS("/frazer01/projects/CARDIPS/analysis/cardiac_qtls_combined/input/expdata_qtl.rds")
qtl_data = readRDS("/frazer01/projects/CARDIPS/analysis/cardiac_qtls_combined/input/qtl/qtls.RDS"   )


In [5]:
coordinates = exp_data[["coordinates"]]
gene_info   = coordinates[ coordinates$phenotype == "rna",]


In [6]:
colordata_tissue = exp_data[["color"]][["by_tissue"]]
colordata_organ  = exp_data[["color"]][["by_organ" ]]
colordata_cell   = exp_data[["color"]][["by_cell"  ]]

colordata_tissue = colordata_tissue[colordata_tissue$body_site != "ipsc_cvpc",]
colordata_stage  = colordata_organ [c("ipsc_cvpc", "adult"),]
colordata_organ  = colordata_organ [c("arteria"  , "heart"),]

colordata_stage $order = 1:2
colordata_organ $order = 1:2
colordata_tissue$order = 1:nrow(colordata_tissue)

rownames(colordata_cell) = colordata_cell$cell_type
colordata_cell           = colordata_cell[,c("cell_type", "name", "color", "order")] 

mycols                    = c("body_site", "name", "color", "order")
colnames(colordata_stage ) = mycols
colnames(colordata_organ ) = mycols
colnames(colordata_tissue) = mycols
colnames(colordata_cell  ) = mycols

colordata_stage $type = "stage"
colordata_organ $type = "organ"
colordata_tissue$type = "tissue"
colordata_cell  $type = "cell"

colordata = list(by_stage = colordata_stage, by_organ = colordata_organ, by_tissue = colordata_tissue, by_cell = colordata_cell)

In [7]:
colordata_df = as.data.frame(rbindlist(colordata))

In [81]:
gene_names = c("EGR1", "GPBAR1", "NR1I2", "FXR1", "SLCO1B1", "SLCO1B3", "SLCO1B7","UGT1A1", "UGT1A3", "UGT1A4", "UGT1A5", "UGT1A6", "UGT1A7", "UGT1A8", "UGT1A9", "UGT1A10", "TMPRSS11E",
               "TGFB1", "SMAD3", "ADRB2","CPS1","NOS3","ARG1","ALPL","FXR","ZNF268","CPS1","CPS1","CPS1","CPS1","CPS1","SYNE2","TGFB1","ARG1","NOS3","NOS3","PYROXD2")


In [82]:
eqtls = rbind(qtl_data[["qtl"]][["rna"]], qtl_data[["qtl"]][["isoform"]])
ints  = rbind(qtl_data[["interactions"]][["rna"]], qtl_data[["interactions"]][["isoform"]])
eqtls = eqtls[ eqtls$gene_name %in% gene_names & eqtls$egene == TRUE,]
ints  = ints [ ints $gene_name %in% gene_names & ints $cell  == TRUE,]

eqtls = eqtls[ order(eqtls$gene_name, eqtls$type),]

In [84]:
plot_eqtl = function(ii, eqtls)
{
    gene_name     = eqtls[ ii, "gene_name"    ]
    transcript_id = eqtls[ ii, "transcript_id"]
    id            = eqtls[ ii, "id"           ]
    type          = eqtls[ ii, "type"         ]
    qval          = signif(eqtls[ ii, "qval"], digits = 3)
    source        = ifelse(grepl("ENST", transcript_id) == TRUE, yes = "use_isoform", no = "tpm_gene")
    
    if(source == "tpm_gene"   ){phenotype = "rna"}
    if(source == "use_isoform"){phenotype = "isoform"}
    
    gtdata      = add_rownames(fread(paste("/frazer01/projects/CARDIPS/analysis/cardiac_eqtls/pipeline/1.3.genotype"  , source, paste("gt_data", transcript_id, "txt", sep = "."), sep = "/"), sep = "\t", header = TRUE , data.table = FALSE))
    gtinfo      =              fread(paste("/frazer01/projects/CARDIPS/analysis/cardiac_eqtls/pipeline/1.3.genotype"  , source, paste("gt_info", transcript_id, "txt", sep = "."), sep = "/"), sep = "\t", header = TRUE , data.table = FALSE)
    gtdata      = data.frame(wgs_id = colnames(gtdata), gt = as.numeric(gtdata[id,]))
    expdata     = exp_data[["expression"]][[phenotype]][["normalized"]][transcript_id,]
    expdata     = data.frame(run = colnames(expdata), norm = as.numeric(expdata[1,]))
    toplot      = merge(exp_data$covariates, exp_data$metadata[,c("run", "wgs_id", "phenotype")], by = "run")
    toplot      = toplot[toplot$phenotype == "rna",]
    toplot      = merge(toplot, expdata, by = "run")
    toplot      = merge(toplot, gtdata , by = "wgs_id"   )
    gtinfo      = gtinfo[gtinfo$id == id,]
    eqtls       = qtl_data[["qtl"]][[phenotype]]
    toplot$x    = toplot$gt * 2 + 1
    
    png(filename = paste("pipeline/microbiome_eqtls", paste(gene_name, type, transcript_id, "png", sep = "."), sep = "/"), width = 6, height = 6, units = "in", res = 300)
    
    par(mar = c(2,4,5,1))
    #plot(1,1, type = "n", xlim = c(0,7), ylim = range(toplot$norm), axes = FALSE, xlab = "", ylab = "")
    plot(1,1, type = "n", xlim = c(0.5,3.5), ylim = c(-3,3), axes = FALSE, xlab = "", ylab = "")
    axis(2)
    
    boxplot(norm ~ x, data = toplot, outline = FALSE, col = "#cccccc", add = TRUE, at = (1:3), axes = FALSE)
    points (x = jitter(toplot$x, amount = 0.25), y = toplot$norm, pch = 16, cex = 0.75)
    abline (lm(norm ~ x, data = toplot), lty = "dashed", col = "#0000ff", lwd = 3)
    
    gt1  = gtinfo[1, "ref" ]
    gt2  = gtinfo[1, "alt" ]
    rsid = gtinfo[1, "rsid"]
   gene = gene_name
    gts  = c(paste(gt1, gt1, sep = "/"), paste(gt1, gt2, sep = "/"), paste(gt2, gt2, sep = "/"))
    
    if(source == "use_isoform"){gene = paste0(transcript_id, " (", gene, ")")}
    
    mtext(gts                          , side = 1, cex = 1.25, line =  0  , at = 1:3)
    mtext(paste0(gene, " (", type, ")"), side = 3, cex = 1.25, line =  3.5, font = 3)
    mtext(transcript_id                , side = 3, cex = 1.25, line =  2  , font = 3)
    mtext(paste0(rsid, " q = ", qval)  , side = 3, cex = 1.25, line =  0.5)
    mtext("Normalized expression"      , side = 2, cex = 1.25, line =  2.5)
    #mtext(text = panel           , side = 2, line =  2.5, las = 2, at = max(toplot$norm) * 1.05, font = 2, cex = 1.5)
    
    dev.off()
}


ii = 1

#pdf(file = "pipeline/microbiome_eqtls/plots.pdf", width = 6, height = 6)

invisible(lapply(1:nrow(eqtls), function(ii){plot_eqtl (ii, eqtls)}))

#dev.off()

In [63]:
x

In [65]:
str(eqtls)

'data.frame':	19 obs. of  30 variables:
 $ id             : chr  "VAR_14_64651663_C_A" "VAR_19_41790086_GTTATGGTA_G" "VAR_2_211457261_C_T" "VAR_2_211461880_T_A" ...
 $ transcript_id  : chr  "ENSG00000054654.18_8" "ENSG00000105329.10_7" "ENSG00000021826.17_7" "ENSG00000021826.17_7" ...
 $ gene_id        : chr  "ENSG00000054654.18_8" "ENSG00000105329.10_7" "ENSG00000021826.17_7" "ENSG00000021826.17_7" ...
 $ gene_name      : chr  "SYNE2" "TGFB1" "CPS1" "CPS1" ...
 $ gene_type      : chr  "protein_coding" "protein_coding" "protein_coding" "protein_coding" ...
 $ start          : int  64228617 41807492 211342406 211342406 211342406 211342406 211342406 219124219 137801168 131791972 ...
 $ end            : int  64693151 41859827 211543831 211543831 211543831 211543831 211543831 219128584 137804992 131905472 ...
 $ strand         : chr  "+" "-" "+" "+" ...
 $ chrom          : int  14 19 2 2 2 2 2 2 5 6 ...
 $ pos            : int  64651663 41790086 211457261 211461880 211675576 211781209 2120

In [80]:
eqtls = rbind(qtl_data[["qtl"]][["rna"]], qtl_data[["qtl"]][["isoform"]])
eqtls[ (eqtls$gene_name == "PYROXD2" | eqtls$rsid == "rs942814") & eqtls$egene == TRUE, c("gene_name", "transcript_id", "type", "id", "rsid", "beta", "se", "pval", "qval")]

Unnamed: 0_level_0,gene_name,transcript_id,type,id,rsid,beta,se,pval,qval
Unnamed: 0_level_1,<chr>,<chr>,<int>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
3831,PYROXD2,ENSG00000119943.13_6,0,VAR_10_100152055_C_G,rs6584191,0.741582,0.04963883,4.735138e-59,1.840302e-54
42005,PYROXD2,ENST00000462874.1_2,0,VAR_10_100102771_G_A,rs11189559,0.4997102,0.09838592,1.006478e-07,0.0003616606
42007,PYROXD2,ENST00000370575.5_3,0,VAR_10_100144009_T_TGAC,rs5787280,1.1496729,0.08619978,2.38487e-45,1.052761e-40
42010,PYROXD2,ENST00000464808.1_2,0,VAR_10_100154545_G_A,rs11189587,-0.4512822,0.08178558,2.129927e-08,8.408225e-05
42011,PYROXD2,ENST00000483923.5_2,0,VAR_10_100156853_T_C,rs7072216,-0.6555906,0.07387968,1.274535e-16,1.106006e-12
42013,PYROXD2,ENST00000370575.5_3,1,VAR_10_100167322_C_T,rs11189595,0.4021789,0.0913339,9.837673e-06,0.03241165
42015,PYROXD2,ENST00000494941.1_2,0,VAR_10_100170762_C_T,rs3814140,-0.8363076,0.09096532,2.795769e-21,3.409871e-17
