In [6]:
library(RPostgreSQL)
library(GenomicRanges)
library(TReNA)
library(FimoClient)
library(RUnit)
library(BSgenome.Hsapiens.UCSC.hg38)
hg38 = BSgenome.Hsapiens.UCSC.hg38
library(SNPlocs.Hsapiens.dbSNP144.GRCh38)
snps <- SNPlocs.Hsapiens.dbSNP144.GRCh38

### create and validate all the dynamic resources

In [29]:
## did the fimo service start properly?  does it respond?
if(!exists("fimo.services"))
   fimo.service <-  FimoClient("whovian", 5558, quiet=TRUE)
result <- requestMatch(fimo.service, list(bogus='xxxxx'))
checkEquals(result, data.frame())

In [23]:
x

A ZeroMQ <Socket> R6 class
Type: ZMQ_REQ 
Connected Address: tcp://whovian:5558 

In [25]:
zmq.getsockopt(x, option.name="help", option.value='help')

ERROR: Error in zmq.getsockopt(x, option.name = "help", option.value = "help"): INTEGER() can only be applied to a 'integer', not a 'character'


In [3]:
load("tbl.gwas.level_1.RData")
source("src/createIgapFimoTrack.R")

Loading required package: GenomicFeatures
Loading required package: AnnotationDbi
Loading required package: Biobase
Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.



In [4]:
goi <- c("SNRNP70", "SNRPA", "SNRPC", "SMN1", "SNRPB", "PLCD1",                     # from emory, sinai, ufl
         "PTRHD1", "SFRP1", "PPP1R7", "DNM3", "RTN4", "EPB41L3", "TUBB3",
         "PLEC", "ANXA5", "MSN", "CD44", "LMNA", "DOCK2", "GABBR2", "GABRB2",
         "GIGYF1", "ITGB2", "JPH3", "LAPTM5", "NCKAP1L", "OPCML", "RBM3",
         "SCAMP1", "SCN2A", "SELT", "SNAP25", "SNAP91", "STXBP1", "SUB1",
         "SYT1", "TARBP1", "YWHAG", "TGFBR1", "BMPR1A", "BMPR1B", "VGF", "CRH",
         "TREM2", "TYROBP", "S100A8", "S100A9", "P2RY2", "P2RX7", "P2RY12",
         "P2RY13", "OSMR", "TLR4", "CR1", "CSF1R", "CX3CR1", "SPI1",
         "TNFRSF10A", "TNFRSF10B"
         "HS3ST1", "SQSTM1", "TREML2", "NDUFAF6", "ECHDC3", "AP2A2", "ADAMTS20",   # from igap paper
         "SPPL2A", "TRIP4", "SCIMP", "ACE"

In [5]:
db.gtf <- dbConnect(PostgreSQL(), user= "trena", password="trena", dbname="gtf", host="whovian")
query <- "select * from hg38human where moleculetype='gene' and gene_biotype='protein_coding'"
tbl <- dbGetQuery(db.gtf, query) [, c("chr", "gene_name", "start", "endpos", "strand")]
tbl.genes <- subset(tbl, gene_name %in% goi)

tbl.genes$TSS <- tbl.genes$start
minus.strand.genes <- which(tbl.genes$strand=='-')
tbl.genes$TSS[minus.strand.genes] <- tbl.genes$endpos[minus.strand.genes]

In [None]:
shoulder <- 1000
gr.genes <- with(tbl.genes, GRanges(seqnames=chr, IRanges(start=TSS-shoulder, end=TSS+shoulder)))
gr.snps   <- with(tbl.gwas, GRanges(seqnames=CHR, IRanges(start=BP, end=BP)))
suppressWarnings(
    tbl.overlaps <- as.data.frame(findOverlaps(gr.genes, gr.snps, type="any"))
    )
tbl.combined <- cbind(tbl.genes[tbl.overlaps$queryHits,], tbl.gwas[tbl.overlaps$subjectHits,])
head(tbl.combined)

In [None]:
genome.db.uri    <- "postgres://whovian/hg38"                  # has gtf and motifsgenes tables
footprint.db.uri <- "postgres://whovian/brain_hint"            # has hits and regions tables
fpf <- FootprintFinder(genome.db.uri, footprint.db.uri, quiet=FALSE)

In [None]:
# now create a table of footprints around the tss of each gene in tbl.combined
tbl.fp <- data.frame()
for(gene in unique(tbl.combined$gene_name)){
    #printf("--- gene: %s", gene)
    tbl.fpForGene <- getFootprintsForGene(fpf, gene, size.upstream=1000, size.downstream=1000)
    #printf("%d fps for %s", nrow(tbl.fpForGene), gene)
    tbl.fp <- rbind(tbl.fp, tbl.fpForGene)
    }
dim(tbl.fp)

In [None]:
gr.snpsInPromoters <- with(tbl.combined, GRanges(seqnames=chr, IRanges(start=BP, end=BP)))
gr.fpInPromoters   <- with(tbl.fp, GRanges(seqnames=chrom, IRanges(start=start-10, end=endpos+10)))
tbl.ov2 <- suppressWarnings(as.data.frame(findOverlaps(gr.snpsInPromoters, gr.fpInPromoters, type="any")))

tbl.snpsInPromotersInFootprints <- cbind(tbl.combined[tbl.ov2$queryHits,], tbl.fp[tbl.ov2$subjectHits,])
print(table(tbl.snpsInPromotersInFootprints$gene_name))

In [None]:
gene.prospects <- unique(tbl.snpsInPromotersInFootprints$gene_name)
tbl.prospects <- data.frame()
for(gene in gene.prospects){
   tbl.new <- unique(subset(tbl.snpsInPromotersInFootprints, gene_name==gene)[, c("gene_name", "SNP", "chr", "BP", "strand")])
   tbl.prospects <- rbind(tbl.prospects, tbl.new)
   }

In [None]:
for(r in 1:nrow(tbl.prospects)){
  rsid <- tbl.prospects$SNP[r]
  if(rsid == "rs79037040") rsid <- "rs13278062"
  chrom <- tbl.prospects$chr[r]
  loc <- tbl.prospects$BP[r]
  gene <- tbl.prospects$gene_name[r]
  ambiguity.code <- snpsById(snps, rsid)$alleles_as_ambig
  elements.string <- IUPAC_CODE_MAP[[ambiguity.code]]
  elements <- strsplit(elements.string,'')[[1]]
  wt <- as.character(getSeq(hg38, chrom, loc, loc))
  mut <- setdiff(elements, wt)
  status <- doComparativeFimo(chrom, loc, wt, mut, 10, quiet=TRUE)
  printf("---- %s: %s", gene, status)
  }