In [3]:
setwd("/frazer01/projects/CARDIPS/analysis/cardiac_gwas_coloc")

source("script/functions.R"  )


In [4]:
dir.create("pipeline/diffexp"     , showWarnings = FALSE)


In [5]:
qtl_list = readRDS("/frazer01/projects/CARDIPS/analysis/cardiac_qtls_combined/input/qtl/qtls.RDS"   )
exp_list = readRDS("/frazer01/projects/CARDIPS/analysis/cardiac_qtls_combined/input/expdata_qtl.rds")

In [6]:
phenotypes     = qtl_list$phenotypes[c("rna", "isoform"),]
qtls           = qtl_list$qtl$rna
qtls           = qtls[ qtls$egene == TRUE, ]
qtls$trid      = paste(qtls$transcript_id, qtls$id  )
qtls$tr2type   = paste(qtls$transcript_id, qtls$type)

In [54]:
ints           = qtl_list$interactions$rna


In [18]:
tissues              = exp_list$color$by_tissue
metadata             = exp_list$metadata
covariates           = exp_list$covariates[, c("run", "sex", "total_reads_norm", "uniquely_mapped_reads_to_canonical_chromsomes", "mitochondrial_reads", tissues$body_site)]
expdata              = exp_list$expression$rna$normalized
rownames(covariates) = covariates$run

# Differential expression 1 vs all

In [59]:
calculate_diffexp_gene = function(gene_id, expdata, totest)
{
    totest$exp = as.numeric(expdata[ gene_id, rownames(totest)])
    mylm       = as.data.frame(coefficients(summary(lm(exp ~ ., data = totest))))["tissue", ]
    
    return(mylm)
}

calculate_diffexp = function(tissue, expdata, metadata, covariates)
{
    message(tissue, appendLF = FALSE)
    totest            = covariates[ colnames(expdata), c("sex", "total_reads_norm", "uniquely_mapped_reads_to_canonical_chromsomes", "mitochondrial_reads")]
    totest$tissue     = covariates[ colnames(expdata), tissue]
    out               = as.data.frame(rbindlist(lapply(rownames(expdata), function(gene_id){calculate_diffexp_gene(gene_id, expdata, totest)})), stringsAsFactors = FALSE)
    colnames(out)     = c("beta", "se", "tval", "pval")
    out$transcript_id = rownames(expdata)
    out$qval          = p.adjust(out$pval, method = "BH")
    
    return(out)
}

tissue = "ipsc_cvpc"

diffexp        = lapply(tissues$body_site, function(tissue){calculate_diffexp(tissue, expdata, metadata, covariates)})
names(diffexp) = tissues$body_site

saveRDS(diffexp, "pipeline/diffexp/diffexp.rds")

ipsc_cvpc
heart_atrium
heart_ventricle
arteria_aorta
arteria_coronary


# Test if tissue-specific are more diffexp in the same tissue

In [68]:
test_ts_diffexp = function(tissue, de, ints)
{
    de    = de[ de$transcript_id %in% ints$transcript_id, ]
    de$ts = FALSE
    de$de = FALSE
    
    de[ de$qval <= 0.05, "de"] = TRUE
    de[ de$transcript_id %in% ints[ ints$cell == TRUE & ints$interaction == tissue, "transcript_id"], "ts"] = TRUE
    
    totest = table(de[, c("ts", "de")])
    test   = fisher.test(totest)
    out    = data.frame(tissue = tissue, or = test$estimate, ci1 = test$conf.int[[1]], ci2 = test$conf.int[[2]], pval = test$p.value)
    
    return(out)
}

tissue = "ipsc_cvpc"

tests = as.data.frame(rbindlist(lapply(tissues$body_site, function(tissue){test_ts_diffexp(tissue, diffexp[[tissue]], ints)})), stringsAsFactors = FALSE)
tests$qval = p.adjust(tests$pval, method = "bonferroni")

In [69]:
tests

tissue,or,ci1,ci2,pval,qval
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
ipsc_cvpc,0.8828144,0.5446318,1.510643,0.60679657,1.0
heart_atrium,1.7517371,1.1925172,2.642562,0.00268343,0.01341715
heart_ventricle,1.3193624,1.0002053,1.75846,0.04959098,0.24795492
arteria_aorta,1.2724191,0.9466962,1.739674,0.11265988,0.5632994
arteria_coronary,1.2532989,0.73044,2.255053,0.44812741,1.0


In [15]:
str(tissues)

'data.frame':	5 obs. of  4 variables:
 $ body_site: chr  "ipsc_cvpc" "heart_atrium" "heart_ventricle" "arteria_aorta" ...
 $ tissue   : chr  "iPSC-CVPC" "Atrium" "Ventricle" "Aorta" ...
 $ color    : chr  "#0066CC" "#FF34B3" "#8B1C62" "#8B636C" ...
 $ order    : int  1 2 3 4 5


In [55]:
str(ints)

'data.frame':	288480 obs. of  39 variables:
 $ transcript_id: chr  "ENSG00000000457.14_7" "ENSG00000000460.17_7" "ENSG00000000971.16_4" "ENSG00000001036.14_5" ...
 $ gene_id      : chr  "ENSG00000000457.14_7" "ENSG00000000460.17_7" "ENSG00000000971.16_4" "ENSG00000001036.14_5" ...
 $ gene_name    : chr  "SCYL3" "C1orf112" "CFH" "FUCA2" ...
 $ gene_type    : chr  "protein_coding" "protein_coding" "protein_coding" "protein_coding" ...
 $ start        : int  169818772 169631245 196621173 143815949 53362139 41040684 41040684 24683489 24742292 46097726 ...
 $ end          : int  169863408 169823221 196716634 143832857 53481768 41067715 41067715 24743424 24799466 46114425 ...
 $ strand       : chr  "-" "+" "+" "-" ...
 $ chrom        : int  1 1 1 6 6 6 6 1 1 6 ...
 $ pos          : int  169860528 169653795 196357746 143927360 53377619 41098510 41222926 24731659 24759685 46134213 ...
 $ ref          : chr  "C" "A" "A" "G" ...
 $ alt          : chr  "A" "G" "G" "T" ...
 $ rsid         : chr  "