In [1]:
setwd("/frazer01/projects/CARDIPS/analysis/cardiac_eqtls")

source("script/packages.R"  )
#source("script/input_data.R")
#source("script/functions.R" )
source("script/colors.R"    )


In [2]:
dir.create("pipeline/8.1.tool"           , showWarnings = FALSE)
dir.create("pipeline/8.1.tool/input_data", showWarnings = FALSE)
dir.create("pipeline/8.1.tool/tool"      , showWarnings = FALSE)


# Functions

In [5]:
add_rownames = function(x) # add rownames to fread
{
	rownames(x) = x[,1]
	x[,1]       = NULL
	return(x)
}


# Input data

In [8]:
geneinfo_gene       = fread("pipeline/1.2.expression/gene_info.txt"                          , sep = "\t", header = TRUE , data.table = FALSE)
geneinfo_isoform    = fread("pipeline/1.2.expression/isoform_info.txt"                       , sep = "\t", header = TRUE , data.table = FALSE)
diffexp             = fread("pipeline/4.1.differential_expression/diffexp.txt"               , sep = "\t", header = TRUE , data.table = FALSE)
diffexp_cell        = fread("pipeline/4.1.differential_expression/diffexp_cell.txt"          , sep = "\t", header = TRUE , data.table = FALSE)
diffexp_cell_tissue = fread("pipeline/4.1.differential_expression/diffexp_cell_tissue.txt"   , sep = "\t", header = TRUE , data.table = FALSE)
eqtl_genes          = fread("pipeline/3.2.eqtls/eqtls/cardiac_eqtls.gene.egenes.txt"         , sep = "\t", header = TRUE , data.table = FALSE)
eqtl_isoforms       = fread("pipeline/3.2.eqtls/eqtls/cardiac_eqtls.isoform.egenes.txt"      , sep = "\t", header = TRUE , data.table = FALSE)
int_genes           = fread("pipeline/3.2.eqtls/eqtls/cardiac_eqtls.gene.interactions.txt"   , sep = "\t", header = TRUE , data.table = FALSE)
int_isoforms        = fread("pipeline/3.2.eqtls/eqtls/cardiac_eqtls.isoform.interactions.txt", sep = "\t", header = TRUE , data.table = FALSE)
coloc               = fread("pipeline/3.2.eqtls/eqtl_overlap/cardiac_eqtls.coloc_by_gene.txt", sep = "\t", header = TRUE, data.table = FALSE)
diffexp_quadrant    = fread("/frazer01/home/jennifer/projects/cardiac_eqtls/diffexp/results/summary.cell_stage_associations.txt", sep = "\t", header = TRUE , data.table = FALSE)
manifest            = fread("pipeline/6.1.coloc_gwas/traits.manifest.txt"                    , sep = "\t", header = TRUE, data.table = FALSE)
coloc               = merge(coloc, geneinfo_isoform[,c("transcript_id", "gene_id", "gene_name", "gene_type")], by.x = "transcript_id1", by.y = "transcript_id")
rownames(manifest)  = manifest$id

metadata   =              fread("pipeline/3.1.covariates/metadata.txt"     , sep = "\t", header = TRUE , data.table = FALSE)
covariates = add_rownames(fread("pipeline/3.1.covariates/covariates.txt"   , sep = "\t", header = TRUE , data.table = FALSE))
int_list   = readLines         ("pipeline/3.2.eqtls/vars/cardiac_eqtls.gene.txt")
int2_list  = readLines         ("pipeline/3.2.eqtls/vars/covariates_to_interaction.txt")
covariates = covariates[, grepl("^peer", colnames(covariates)) == FALSE & grepl("^pc", colnames(covariates)) == FALSE]
gene_tpm   = add_rownames(fread("pipeline/1.2.expression/tpm_gene.expressed.txt"   , sep = "\t", header = TRUE , data.table = FALSE))
isof_use   = add_rownames(fread("pipeline/1.2.expression/use_isoform.expressed.txt", sep = "\t", header = TRUE , data.table = FALSE))

cell2color           = data.frame(cell_type = sort(unique(diffexp_cell$cell_type)), color = c('#bb0000', '#3D3D3D', '#ff8888', '#0000bb', '#cdcd00', '#FF34B3', '#8B814C', '#00bb00')) 
cell2color$cell_name = gsub("cibersort.regular\\.", "", gsub("_", " ", cell2color$cell_type))

covariates$adult = abs(1 - covariates$ipsc_cvpc)


In [9]:
geneinfo_gene       = geneinfo_gene      [geneinfo_gene      $gene_name %in% c("SCN5A", "TNNT2", "RBM5", "NME7", "MIIP"),]
geneinfo_isoform    = geneinfo_isoform   [geneinfo_isoform   $gene_id   %in% geneinfo_gene$gene_id,]
diffexp             = diffexp            [diffexp            $gene_id   %in% geneinfo_gene$gene_id,]
diffexp_cell        = diffexp_cell       [diffexp_cell       $gene_id   %in% geneinfo_gene$gene_id,]
diffexp_cell_tissue = diffexp_cell_tissue[diffexp_cell_tissue$gene_id   %in% geneinfo_gene$gene_id,]
eqtl_genes          = eqtl_genes         [eqtl_genes         $gene_id   %in% geneinfo_gene$gene_id,]
eqtl_isoforms       = eqtl_isoforms      [eqtl_isoforms      $gene_id   %in% geneinfo_gene$gene_id,]
int_genes           = int_genes          [int_genes          $gene_id   %in% geneinfo_gene$gene_id,]
int_isoforms        = int_isoforms       [int_isoforms       $gene_id   %in% geneinfo_gene$gene_id,]
coloc               = coloc              [coloc              $gene_id   %in% geneinfo_gene$gene_id,]
diffexp_quadrant    = diffexp_quadrant   [diffexp_quadrant   $gene_id   %in% geneinfo_gene$gene_id,]

gene_tpm = gene_tpm[geneinfo_gene   $transcript_id,]
isof_use = isof_use[geneinfo_isoform$transcript_id,]


In [10]:
save.image(file = "pipeline/8.1.tool/input_data/input_data_all.V.0.1.RData")


In [11]:
str(diffexp)

'data.frame':	141 obs. of  18 variables:
 $ transcript_id: chr  "ENSG00000003756.17_8" "ENSG00000116691.11_4" "ENSG00000118194.20_8" "ENSG00000143156.14_7" ...
 $ gene_id      : chr  "ENSG00000003756.17_8" "ENSG00000116691.11_4" "ENSG00000118194.20_8" "ENSG00000143156.14_7" ...
 $ gene_name    : chr  "RBM5" "MIIP" "TNNT2" "NME7" ...
 $ gene_type    : chr  "protein_coding" "protein_coding" "protein_coding" "protein_coding" ...
 $ beta         : num  -0.639 -1.066 -0.161 -0.942 -0.71 ...
 $ se           : num  0.1317 0.1328 0.0995 0.0854 0.0928 ...
 $ tval         : num  -4.86 -8.03 -1.62 -11.03 -7.65 ...
 $ pval         : num  1.52e-06 5.07e-15 1.06e-01 6.62e-26 7.87e-14 ...
 $ qval         : num  3.11e-02 1.03e-10 1.00 1.35e-21 1.60e-09 ...
 $ tissue1      : chr  "ipsc_cvpc" "ipsc_cvpc" "ipsc_cvpc" "ipsc_cvpc" ...
 $ tissue2      : chr  "heart" "heart" "heart" "heart" ...
 $ type         : chr  "gene_tpm" "gene_tpm" "gene_tpm" "gene_tpm" ...
 $ diffexp      : logi  TRUE TRUE FALSE TRUE

In [12]:
sort(unique(diffexp$tissue1))