## TREM2  study for Cory

In [1]:
library(RPostgreSQL)
library(TReNA)
library(gplots)
library (RColorBrewer)

Loading required package: DBI
Loading required package: glmnet
Loading required package: Matrix
Loading required package: foreach
Loaded glmnet 2.0-5

Loading required package: randomForest
randomForest 4.6-12
Type rfNews() to see new features/changes/bug fixes.
Loading required package: vbsr
Loading required package: GenomicRanges
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following object is masked from ‘package:randomForest’:

    combine

The following object is masked from ‘package:Matrix’:

    which

The following objects are masked from ‘package:stats’:

    IQR, mad, xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, a

In [2]:
source("../trenadb-utils.R")  # symlink to ~/github/BDDS/trenadb/src/utils.R

In [3]:
db.trena <- dbConnect(PostgreSQL(), user="trena", password="trena", dbname="trena", host="whovian")
db.gtf <- dbConnect(PostgreSQL(), user= "trena", password="trena", dbname="gtf", host="whovian")
tbl.trem2 <- dbGetQuery(db.gtf, "select * from hg38human where gene_name='TREM2' and moleculetype='gene'")
db.hint <- dbConnect(PostgreSQL(), user="trena", password="trena", dbname="hint", host="whovian")
db.wellington <- dbConnect(PostgreSQL(), user="trena", password="trena", dbname="wellington", host="whovian")
tbl.genesmotifs <- dbGetQuery(db.trena, "select * from tfmotifs")

In [4]:
tbl.trem2

chr,start,endpos,score,strand,frame,moleculetype,gene_id,gene_version,gene_name,⋯,havana_transcript_version,tag,transcript_support_level,exon_number,exon_id,exon_version,ccds_id,protein_id,protein_version,annotation
chr6,41158506,41163186,.,-,.,gene,ENSG00000095970,16,TREM2,⋯,,,,,,,,,,ensembl_havana


In [5]:
target.gene <- "TREM2"
shoulder <- 1000
chrom <- tbl.trem2$chrom
start <- tbl.trem2$start
end <- tbl.trem2$end
strand <- tbl.trem2$strand
tss <- tbl.trem2$start
if(tbl.trem2$strand == '-')
   tss <- tbl.trem2$end
start <- tss - shoulder
end   <- tss + shoulder

In [6]:
start; end;

In [None]:
tbl.h <- createHintTable(chrom, start, end)
motifs <- unique(tbl.h$motif.h)
candidate.tfs <- sort(unique(subset(tbl.genesmotifs, motif %in% tbl.h$motif)$gene))
tbl.h

In [None]:
gene.list <- lapply(tbl.h$motif, function(mot) subset(tbl.genesmotifs, motif == mot)$gene)
names(gene.list) <- tbl.h$motif
gene.list.collapsed <- lapply(gene.list, function(x) paste(x, collapse=","))
genes.by.motif <- unlist(gene.list.collapsed, use.names=FALSE)
tbl.mg <- data.frame(motif=names(gene.list.collapsed), genes=genes.by.motif, stringsAsFactors=FALSE)
tbl <- merge(tbl.h, tbl.mg, by.x="motif.h", by.y="motif")

In [None]:
tbl.bed <- locStringToBedTable(tbl$loc)

In [None]:
tbl <- cbind(tbl, tbl.bed)
colnames(tbl)
preferred.colnames <- c("chrom", "start", "end", "motif.h", "samplecount.h", "length.h",
                       "score1.h.median", "score1.h.best", "score2.h.median", "score2.h.best",
                       "score3.h.median", "score3.h.best", "genes")
tbl <- tbl[, preferred.colnames]
tbl

In [None]:
write.table(tbl, file="trem2.hint.withMotifs.1kb.tsv", row.names=FALSE, col.names=TRUE, sep="\t")

In [None]:
print(load("~/github/Private_Cory_Data/inst/extdata/prepped.tcx.matrices.RData"))

In [None]:
matrices <- list(all=mtx.tcx, ad=mtx.tcx.ad, ctl=mtx.tcx.ctl)

results <- lapply(matrices, function(mtx){
   stopifnot(target.gene %in% rownames(mtx))
   candidate.regulators <- intersect(candidate.tfs, rownames(mtx))
   genes.of.interest <- c(target.gene, candidate.regulators)
   mtx.sub <- mtx[genes.of.interest,]
   mtx.adjusted <- asinh(mtx.sub)
   trena <- TReNA(mtx.assay=mtx.adjusted, solver="lasso", quiet=FALSE)
   tbl <- solve(trena, target.gene, candidate.regulators, extraArgs=list(alpha=0.1, lambda=NULL))
   })


In [None]:
results2 <- lapply(names(results), function(name){
    column.names <- colnames(results[[name]])
    new.names <- paste(name, column.names, sep=".")
    colnames(results[[name]]) <- new.names
    results[[name]]
    })

names(results2) <- names(results)

all.tfs <- sort(unique(c(rownames(results2[[1]]),
                         rownames(results2[[2]]),
                         rownames(results2[[3]]))))
all.conditions <- c(colnames(results2[[1]]),
                    colnames(results2[[2]]),
                    colnames(results2[[3]]))
names(results2) <- names(results)

all.tfs <- sort(unique(c(rownames(results2[[1]]),
                         rownames(results2[[2]]),
                         rownames(results2[[3]]))))
all.conditions <- c(colnames(results2[[1]]),
                    colnames(results2[[2]]),
                    colnames(results2[[3]]))


In [None]:
options(width=120)
m <- matrix(0, nrow=length(all.tfs), ncol=length(all.conditions), dimnames=list(all.tfs, all.conditions))
for(i in 1:length(results2)){
    mtx <- as.matrix(results2[[i]])
    row.names <- rownames(mtx)
    column.names <- colnames(mtx)
    m[row.names, column.names] <- mtx
    }

m.beta <- m[, grep("beta", colnames(m))]
m.beta.noAll <- m.beta[, -(grep("all", colnames(m.beta)))]
heatmap.2(m.beta, margins=c(10,10), trace='none')
heatmap.2(m.beta.noAll, margins=c(10,10), trace='none', cexCol=2)
