In [1]:
setwd("/frazer01/projects/CARDIPS/analysis/cardiac_eqtls")

source("script/packages.R"  )
source("script/input_data.R")
source("script/functions.R" )


In [2]:
covs2qtl = add_rownames(fread("pipeline/3.1.covariates/covariates.txt", sep = "\t", header = TRUE, data.table = FALSE))


# Prepare eQTLs:
- define covariates and save to text
- run eQTL analysis

In [3]:
covs_standard = c("sex", "total_reads_norm", "uniquely_mapped_reads_to_canonical_chromsomes", "mitochondrial_reads", paste("pc", 1:20, sep = ""))

In [4]:
gene_ids = readLines("pipeline/1.2.expression/tpm_gene.gene_ids.txt")
gene_ids = data.frame(gene_id = gene_ids, n = 1:length(gene_ids))


In [5]:
iso_ids = readLines("pipeline/1.2.expression/use_isoform.gene_ids.txt")
iso_ids = data.frame(gene_id = iso_ids, n = 1:length(iso_ids))


## Prepare variables for interaction test
- for test: only use a small subset
- for the real analysis: use all

In [6]:
covariates        = colnames(add_rownames(fread    ("pipeline/3.1.covariates/covariates.txt", sep = "\t", header = TRUE , data.table = FALSE)))


In [7]:
toint_test = c('ipsc_cvpc', 'heart', 'arteria')
toint_real = covariates[grepl("cibersort.regular", covariates) == TRUE | 
                        grepl("heart"    , covariates) == TRUE | 
                        grepl("arteria"  , covariates) == TRUE | 
                        grepl("ipsc_cvpc", covariates) == TRUE]

In [8]:
toint_test_file = paste(getwd(), "pipeline/3.2.eqtls/vars/covariates_to_interaction_test.txt", sep = "/")
toint_real_file = paste(getwd(), "pipeline/3.2.eqtls/vars/covariates_to_interaction.txt"     , sep = "/")

writeLines(toint_test, con = toint_test_file, sep = "\n")
writeLines(toint_real, con = toint_real_file, sep = "\n")


## Test different combinations of PEER factors
- keep standard covariates
- try PEER 20 by 20
- select 200 random genes (20 for each expression decile)

### Select random genes

In [9]:
tpm      = add_rownames(fread("pipeline/1.2.expression/tpm_gene.expressed.txt", sep = "\t", header = TRUE, data.table = FALSE))
geneinfo = fread("pipeline/1.2.expression/gene_info.txt", sep = "\t", header = TRUE, data.table = FALSE)
tpm      = tpm[geneinfo[geneinfo$chrom %in% paste("chr", 1:22, sep = ""), "gene_id"],]

In [None]:
gene2exp           = as.data.frame(rowMeans(tpm))
gene2exp$gene_id   = rownames(gene2exp)
colnames(gene2exp) = c("tpm", "gene_id")
deciles            = as.data.frame(quantile(gene2exp$tpm, probs = (0:10)/10))
deciles$decile     = as.numeric(gsub("%", "", rownames(deciles)))
colnames(deciles)  = c("tpm", "decile")

genes2test         = unlist(lapply(2:nrow(deciles), function(x)
{
    this = gene2exp[gene2exp$tpm >= deciles[x - 1, "tpm"] & gene2exp$tpm <= deciles[x, "tpm"], "gene_id"]
    
    set.seed(1)
    
    return(sample(this, size = 20, replace = FALSE))
}))

genes2test = gene_ids[gene_ids$gene_id %in% genes2test, ]

### Prepare covariate files and run eQTLs on PEER factor number 

In [None]:
file_genes_to_test = paste(getwd(), "pipeline/3.2.eqtls/genes_to_test.txt", sep = "/")
writeLines(genes2test$gene_id, con = file_genes_to_test, sep = "\n")

In [10]:
run_eqtls_qsub = function(x)
{
    writeLines(c(covs_standard, paste("peer", 1:x, sep = "")), con = paste("pipeline/3.2.eqtls/vars/test_peer", x, "txt", sep = "."), sep = "\n")
    
    name    = paste("test_peer", x, sep = ".")
    sh_file = paste(getwd(), "pipeline/3.2.eqtls/script", paste("run.3.3", name, "gene", "sh", sep = "."), sep = "/")
    
    writeLines(text = c("#!/usr/bin/sh",
                        "source /frazer01/home/matteo/.bashrc",
                        paste("Rscript", paste(paste(getwd()       , "script", "3.3.run_eQTLs.R", sep = "/"),
                                               "--taskid"          , "$SGE_TASK_ID",
                                               "--gene_file"       , file_genes_to_test,
                                               "--interaction_file", toint_test_file,
                                               "--name"            , name,
                                               "--analysis"        , "gene")
                             )
                       ), 
               con  = sh_file, 
               sep  = "\n\n")
    
    qsub_command = paste("qsub",
                         "-t", paste(1, "-", nrow(genes2test), ":1", sep = ""),
                         "-tc", 200, 
                         "-o" , paste(getwd(), paste("logs/eqtls", name, "gene", "out", sep = "."), sep = "/"),
                         "-e" , paste(getwd(), paste("logs/eqtls", name, "gene", "err", sep = "."), sep = "/"),
                         sh_file
                        )
    
    #message(qsub_command)
    system(qsub_command)
}


#### first run: every 20 PEER factors

In [None]:
invisible(lapply(c(5, 10, (1:15) * 20), run_eqtls_qsub))


#### second run: focus around the best PEER factor combination

In [None]:
invisible(lapply(c(270, 275, 285, 290), run_eqtls_qsub))


# Run eQTLs (gene)
- 285 PEER factors

In [11]:
file_genes    = paste(getwd(), "pipeline/1.2.expression/tpm_gene.gene_ids.txt"   , sep = "/")
file_isoforms = paste(getwd(), "pipeline/1.2.expression/use_isoform.gene_ids.txt", sep = "/")


In [13]:
run_eqtls_qsub = function(name, analysis_type, x, file_genes, gene_ids, toint_file)
{
    dir.create(paste("pipeline/3.2.eqtls/eqtls_by_gene", paste(name, analysis_type, sep = "."), sep = "/"), showWarnings = FALSE)
    
    writeLines(c(covs_standard, paste("peer", 1:x, sep = "")), con = paste("pipeline/3.2.eqtls/vars", paste(name, analysis_type, "txt", sep = "."), sep = "/"), sep = "\n")
    
    sh_file = paste(getwd(), "pipeline/3.2.eqtls/script", paste("run.3.3", name, analysis_type, "sh", sep = "."), sep = "/")
    
    writeLines(text = c("#!/usr/bin/sh",
                        "source /frazer01/home/matteo/.bashrc",
                        paste("Rscript", paste(paste(getwd()       , "script", "3.3.run_eQTLs.R", sep = "/"),
                                               "--taskid"          , "$SGE_TASK_ID",
                                               "--gene_file"       , file_genes,
                                               "--interaction_file", toint_file,
                                               "--name"            , name,
                                               "--analysis"        , analysis_type)
                             )
                       ), 
               con  = sh_file, 
               sep  = "\n\n")
    
    qsub_command = paste("qsub",
                         "-l", "short",
                         "-t", paste(1, "-", nrow(gene_ids), ":1", sep = ""),
                         "-tc", 500, 
                         "-o" , paste(getwd(), paste("logs/eqtls", name, analysis_type, "out", sep = "."), sep = "/"),
                         "-e" , paste(getwd(), paste("logs/eqtls", name, analysis_type, "err", sep = "."), sep = "/"),
                         sh_file
                        )
    
    message(qsub_command)
    system(qsub_command)
}

run_eqtls_qsub("cardiac_eqtls", "gene"   , 285, file_genes   , gene_ids, toint_real_file) # run all genes
run_eqtls_qsub("cardiac_eqtls", "isoform",  80, file_isoforms, iso_ids , toint_real_file) # run all isoforms


qsub -l short -t 1-20647:1 -tc 500 -o /frazer01/projects/CARDIPS/analysis/cardiac_eqtls/logs/eqtls.cardiac_eqtls.gene.out -e /frazer01/projects/CARDIPS/analysis/cardiac_eqtls/logs/eqtls.cardiac_eqtls.gene.err /frazer01/projects/CARDIPS/analysis/cardiac_eqtls/pipeline/3.2.eqtls/script/run.3.3.cardiac_eqtls.gene.sh

qsub -l short -t 1-44961:1 -tc 500 -o /frazer01/projects/CARDIPS/analysis/cardiac_eqtls/logs/eqtls.cardiac_eqtls.isoform.out -e /frazer01/projects/CARDIPS/analysis/cardiac_eqtls/logs/eqtls.cardiac_eqtls.isoform.err /frazer01/projects/CARDIPS/analysis/cardiac_eqtls/pipeline/3.2.eqtls/script/run.3.3.cardiac_eqtls.isoform.sh



In [11]:
toint_real