In [1]:
setwd("/frazer01/projects/CARDIPS/analysis/cardiac_eqtls")

source("script/packages.R"  )
source("script/input_data.R")
source("script/functions.R" )


In [2]:
gene_info               = fread("pipeline/1.2.expression/gene_info.txt"    , sep = "\t", header = TRUE, data.table = FALSE)
isof_info               = fread("pipeline/1.2.expression//isoform_info.txt", sep = "\t", header = TRUE, data.table = FALSE)
gene_info$transcript_id = gene_info$gene_id

In [3]:
dir.create("pipeline/9.1.eqtls_vs_diffexp", showWarnings = FALSE)

# Input data

In [39]:
diffexp       = fread("pipeline/4.15.differential_expression_ridge/diffexp.txt"        , sep = "\t", header = TRUE , data.table = FALSE)
diffcell      = fread("pipeline/4.1.differential_expression/diffexp_cell.txt"          , sep = "\t", header = TRUE , data.table = FALSE)
eqtl_genes    = fread("pipeline/3.2.eqtls/eqtls/cardiac_eqtls.gene.egenes.txt"         , sep = "\t", header = TRUE, data.table = FALSE)
int_genes     = fread("pipeline/3.2.eqtls/eqtls/cardiac_eqtls.gene.interactions.txt"   , sep = "\t", header = TRUE, data.table = FALSE)
eqtl_isoforms = fread("pipeline/3.2.eqtls/eqtls/cardiac_eqtls.isoform.egenes.txt"      , sep = "\t", header = TRUE, data.table = FALSE)
int_isoforms  = fread("pipeline/3.2.eqtls/eqtls/cardiac_eqtls.isoform.interactions.txt", sep = "\t", header = TRUE, data.table = FALSE)
rbps          = gene_info[ gene_info$gene_name %in% readLines("pipeline/rbps.txt"), "gene_id"]

In [29]:
eqtl_genes   $rbp = FALSE
int_genes    $rbp = FALSE
eqtl_isoforms$rbp = FALSE
int_isoforms $rbp = FALSE

eqtl_genes   [eqtl_genes   $gene_id %in% rbps, "rbp"] = TRUE
int_genes    [int_genes    $gene_id %in% rbps, "rbp"] = TRUE
eqtl_isoforms[eqtl_isoforms$gene_id %in% rbps, "rbp"] = TRUE
int_isoforms [int_isoforms $gene_id %in% rbps, "rbp"] = TRUE


# Analysis
## Are differentially expressed genes/isoforms more likely to have stage-specific eQTLs?

In [27]:
test_eqtl_vs_diffexp = function(type, interaction, tissue1, tissue2, eqtls, diffexp)
{
    diffexp          = diffexp[diffexp$type == type & diffexp$tissue1 == tissue1 & diffexp$tissue2 == tissue2 & diffexp$covariate == "tissue",]
    diffexp$specific = FALSE
    
    if(tissue1 == interaction){diffexp[diffexp$diffexp == TRUE & diffexp$beta > 0, "specific"] = TRUE}
    if(tissue2 == interaction){diffexp[diffexp$diffexp == TRUE & diffexp$beta < 0, "specific"] = TRUE}
    
    eqtls_all  = unique(eqtls$transcript_id)
    eqtls_cell = unique(eqtls[eqtls$interaction == interaction & eqtls$cell == TRUE, "transcript_id"])
    genes_all  = unique(diffexp$transcript_id)
    genes_cell = unique(diffexp[diffexp$specific == TRUE, "transcript_id"])
    eqtls_all  = setdiff(eqtls_all, eqtls_cell)
    genes_all  = setdiff(genes_all, genes_cell)
    totest     = matrix(c(length(intersect(eqtls_cell, genes_cell)),
                          length(intersect(eqtls_cell, genes_all )),
                          length(intersect(eqtls_all , genes_cell)),
                          length(intersect(eqtls_all , genes_all ))
                         ), 
                        nrow = 2, byrow = TRUE)
    
    test = fisher.test(totest)
    
    return(list(totest, test))
    
}

type        = "gene_tpm"
interaction = "heart"
tissue1     = "ipsc_cvpc"
tissue2     = "heart"

test_eqtl_vs_diffexp("gene_tpm", "heart"    , "ipsc_cvpc", "heart", int_genes, diffexp)
test_eqtl_vs_diffexp("gene_tpm", "ipsc_cvpc", "ipsc_cvpc", "heart", int_genes, diffexp)


[[1]]
     [,1] [,2]
[1,]  137  368
[2,] 2871 8316

[[2]]

	Fisher's Exact Test for Count Data

data:  totest
p-value = 0.4664
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
 0.8756108 1.3219233
sample estimates:
odds ratio 
  1.078336 



[[1]]
     [,1] [,2]
[1,]   58  123
[2,] 4161 7350

[[2]]

	Fisher's Exact Test for Count Data

data:  totest
p-value = 0.2752
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
 0.5973283 1.1500562
sample estimates:
odds ratio 
 0.8329235 



## Are cell type-specific genes/isoforms more likely to have cell type-specific eQTLs?

In [54]:
test_eqtl_vs_diffexp_cell = function(type, cell, eqtls, diffexp)
{
    diffexp = diffexp[diffexp$type == type & diffexp$cell_type == cell,]
    
    eqtls_all  = unique(eqtls$transcript_id)
    eqtls_cell = unique(eqtls[eqtls$interaction == cell & eqtls$cell == TRUE, "transcript_id"])
    genes_all  = unique(diffexp$transcript_id)
    genes_cell = unique(diffexp[diffexp$qval <= 0.05, "transcript_id"])
    eqtls_all  = setdiff(eqtls_all, eqtls_cell)
    genes_all  = setdiff(genes_all, genes_cell)
    totest     = matrix(c(length(intersect(eqtls_cell, genes_cell)),
                          length(intersect(eqtls_cell, genes_all )),
                          length(intersect(eqtls_all , genes_cell)),
                          length(intersect(eqtls_all , genes_all ))
                         ), 
                        nrow = 2, byrow = TRUE)
    
    test = fisher.test(totest)
    
    out = data.frame(type        = type, cell = cell,
                     eqtl1_cell1 = length(intersect(eqtls_cell, genes_cell)),
                     eqtl1_cell0 = length(intersect(eqtls_cell, genes_all )),
                     eqtl0_cell1 = length(intersect(eqtls_all , genes_cell)),
                     eqtl0_cell0 = length(intersect(eqtls_all , genes_all )),
                     or          = test$estimate,
                     ci1         = test$conf.int[[1]],
                     ci2         = test$conf.int[[2]],
                     pval        = test$p.value
                    )
    
    return(out)
    
}

tests = as.data.frame(rbindlist(lapply(sort(unique(diffcell$cell_type)), function(cell)
{
    out1 = test_eqtl_vs_diffexp_cell("gene_tpm"   , cell, int_genes   , diffcell)
    out2 = test_eqtl_vs_diffexp_cell("isoform_use", cell, int_isoforms, diffcell)
    
    return(rbind(out1, out2))
})), stringsAsFactors = FALSE)

tests      = tests[order(tests$type, tests$cell),]
tests$qval = p.adjust(tests$pval)


Unnamed: 0_level_0,type,cell,eqtl1_cell1,eqtl1_cell0,eqtl0_cell1,eqtl0_cell0,or,ci1,ci2,pval,qval
Unnamed: 0_level_1,<chr>,<chr>,<int>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,gene_tpm,cibersort.regular.cardiac_muscle,269,158,5756,5509,1.629397,1.3289776,2.0030467,1.239477e-06,1.859216e-05
3,gene_tpm,cibersort.regular.cardiac_neuron,42,76,3792,7782,1.1340988,0.7572788,1.6782099,0.5543799,1.0
5,gene_tpm,cibersort.regular.endocardial,34,45,4381,7232,1.2472037,0.7734297,1.9943343,0.3523202,1.0
7,gene_tpm,cibersort.regular.endothelial,4,6,4501,7181,1.0636093,0.2206371,4.4879649,1.0,1.0
9,gene_tpm,cibersort.regular.fibroblast,93,126,5711,5762,0.7447078,0.5619634,0.9839192,0.03435873,0.4466634
11,gene_tpm,cibersort.regular.immune,111,192,3766,7623,1.1701904,0.9147752,1.4910569,0.1946124,1.0
13,gene_tpm,cibersort.regular.myofibroblast,2,1,2078,9611,9.2459638,0.4811982,543.781673,0.08366004,0.9202604
15,gene_tpm,cibersort.regular.smooth_muscle,120,348,2704,8520,1.086515,0.8714346,1.3472281,0.4406049,1.0
2,isoform_use,cibersort.regular.cardiac_muscle,41,45,1318,5761,3.981427,2.5313447,6.2468613,1.1845e-09,1.8952e-08
4,isoform_use,cibersort.regular.cardiac_neuron,4,28,510,6623,1.854893,0.47098,5.3316927,0.2857271,1.0


## Describe RBPs and their isoforms: are they more likely to have eQTLs?

In [34]:
table(eqtl_genes   [ eqtl_genes   $type == 0,c("egene", "rbp")])
table(eqtl_isoforms[ eqtl_isoforms$type == 0,c("egene", "rbp")])

fisher.test(table(eqtl_genes   [ eqtl_genes   $type == 0,c("egene", "rbp")]))
fisher.test(table(eqtl_isoforms[ eqtl_isoforms$type == 0,c("egene", "rbp")]))


       rbp
egene   FALSE  TRUE
  FALSE  7835    59
  TRUE  11635    57

       rbp
egene   FALSE  TRUE
  FALSE 29518   349
  TRUE   7107    58


	Fisher's Exact Test for Count Data

data:  table(eqtl_genes[eqtl_genes$type == 0, c("egene", "rbp")])
p-value = 0.02254
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
 0.4435180 0.9537343
sample estimates:
odds ratio 
  0.650604 



	Fisher's Exact Test for Count Data

data:  table(eqtl_isoforms[eqtl_isoforms$type == 0, c("egene", "rbp")])
p-value = 0.007973
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
 0.5128156 0.9147867
sample estimates:
odds ratio 
 0.6902554 
