Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Search page #39

Closed
wants to merge 12 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
106 changes: 54 additions & 52 deletions util.py → Util.py
Expand Up @@ -4,54 +4,17 @@
import webapp2
from jinja2 import Environment, FileSystemLoader
import os
import json
import simplejson
import logging
from google.appengine.ext import db
from google.appengine.api import memcache, users
from google.appengine.api import memcache, users, search
from datetime import datetime
from models.users import UserData
from models.study import Study, GWAS, Gene, Snp, Disease

from Bio import Entrez
import StringIO

"""
imports the GWAS catalog file - and creates the

Date Added to Catalog
PUBMEDID
First Author
Date
Journal
Link
Study
Disease_Trait
Initial Sample Size
Replication Sample Size
Region
Chr_id
Chr_pos
Reported Gene(s)
Mapped_gene
Upstream_gene_id
Downstream_gene_id
Snp_gene_ids
Upstream_gene_distance
Downstream_gene_distance
Strongest SNP-Risk Allele
SNPs
Merged
Snp_id_current
Context
Intergenic
Risk Allele Frequency
p-Value
Pvalue_mlog
p-Value (text)
OR or beta
95\% CI (text)
Platform [SNPs passing QC]
CNV"""
from models.study import Study, GWAS, Gene, Snp, Disease

import csv
from google.appengine.api import memcache
Expand All @@ -61,6 +24,31 @@ def reset():
"""
memcache.flush_all()

def AddStudyDocument(study):
doc = search.Document(doc_id=study.pubmed_id, # Treat pubmed_id as key
fields=[
search.TextField(name='name', value=study.name),
search.TextField(name='disease_trait', value=study.disease_trait),
search.TextField(name='abstract', value=study.abstract),
search.TextField(name='id', value=study.pubmed_id)
])
search.Index(name=study._index).add(doc)

def AddSNPDocument(snp):
doc = search.Document(
fields=[
search.TextField(name='snpid', value=snp.snpid),
])
search.Index(name=snp._index).add(doc)

def AddGeneDocument(gene):
doc = search.Document(doc_id=gene.geneid,
fields=[
search.TextField(name='id', value=gene.geneid),
search.TextField(name='name', value=gene.name),
search.TextField(name='alias', value=str(gene.alias)),
])
search.Index(name=gene._index).add(doc)

def populate(path="gwascatalog.txt", limit=200):
"""Populate the database with data from gwascatalog.txt"""
Expand Down Expand Up @@ -98,7 +86,8 @@ def populate(path="gwascatalog.txt", limit=200):
study.repl_sample= rel["Replication Sample Size"].strip()
study.platform = rel["Platform [SNPs passing QC]"].strip()
study.put()

AddStudyDocument(study)

for rel in line:
# init gene relation
gene = None
Expand All @@ -122,17 +111,18 @@ def populate(path="gwascatalog.txt", limit=200):
if not disease.key() in gene.diseases:
gene.diseases.append(disease.key())
gene.put()
AddGeneDocument(gene)
else:
# up and downstream genes must be set
down_id = rel["Downstream_gene_id"].strip()
up_id = rel["Upstream_gene_id"].strip()
up_id = rel["Upstream_gene_id"].strip()

if up_id != "" and down_id != "":
up_down_names = rel["Mapped_gene"].split(" - ")
if len(up_down_names) < 2:
# gene = NR / NS or whatever..
up_down_names = ["N/A", "N/A"]

# create upstream gene
up_name = up_down_names[0]
down_name = up_down_names[1]
Expand Down Expand Up @@ -170,10 +160,11 @@ def populate(path="gwascatalog.txt", limit=200):
if not disease.key() in snp.diseases:
snp.diseases.append(disease.key())
snp.put()
AddSnpDocument(snp)
except:
# haplotype?
snpid = "N/A"

# if no gene or snp relation is mentioned - ignore and just insert study
if (gene is None or up_gene is None) and snp is None:
print "skipping gwas"
Expand All @@ -192,12 +183,12 @@ def populate(path="gwascatalog.txt", limit=200):
gwas.p_string = rel["p-Value"].strip()
# could be none
gwas.snps = snpid

# parse out the exponent: 6E-8 => -8
try:
# test that p-Value is actually a float before parsing out
float(rel["p-Value"])
gwas.p_val = int(rel["p-Value"].split("E")[1])
gwas.p_val = int(rel["p-Value"].split("E")[1])
except Exception, e:
# print e
# forces the filter to downgrade this gwas wrt. p-value
Expand All @@ -222,11 +213,22 @@ def purge():
q = db.GqlQuery("SELECT __key__ FROM %s" % model)
assert q.count()
db.delete(q.fetch(200))
# time.sleep(0.5)
except Exception, e:
print e
pass
print "done"
print "Datastore cleared"

for model in [Snp, Gene,GWAS, Study, Disease]:
index = search.Index(name=model._index)
while True:
# Get a list of documents populating only the doc_id field and extract the ids.
document_ids = [document.doc_id
for document in index.list_documents(ids_only=True)]
if not document_ids:
break
# Remove the documents for the given ids from the Index.
index.remove(document_ids)
print "Fulltext docs cleared"

def snp_omim(snpids=None):
"""given list of snpids - returns the list of related OMIM IDs"""
Expand Down Expand Up @@ -270,10 +272,8 @@ def render(template, variables, printer):
t = jTemplate._e.get_template(template)
printer(t.render(variables))

# import StringIO
env = Environment(loader=FileSystemLoader(os.path.join(
os.path.dirname(__file__), 'templates')))

os.path.dirname(__file__), 'templates')))
class AppRequestHandler(webapp2.RequestHandler):
"""Base class for controllers"""
_template = None
Expand Down Expand Up @@ -306,7 +306,8 @@ def out(self, **dictionary):
def toJson(self, dictionary, prettify = False):
"""Display JSON data template.
Prettify flag tells whether to use the google code prettify markup"""
data = {"json": json.dumps(dictionary)}
enc = simplejson.JSONEncoder()
data = {"json": enc.encode(dictionary)}
if prettify:
jTemplate.render("data/prettify/json.html", data , self.response.out.write);
else:
Expand All @@ -320,3 +321,4 @@ def toXML(self, xml, prettify = False):
jTemplate.render("data/prettify/xml.html", data, self.response.out.write);
else:
jTemplate.render("data/xml.html", data,self.response.out.write );

7 changes: 7 additions & 0 deletions bootstrap/css/style.css
Expand Up @@ -20,3 +20,10 @@
.box.journal {
background-image:url('/img/journal.png');
}

.footer {
margin-top: 45px;
padding: 35px 0 36px;
border-top: 1px solid
#E5E5E5;
}
2 changes: 1 addition & 1 deletion controllers/adminController.py
Expand Up @@ -2,7 +2,7 @@
# Controller for administrator presentation
#
###
from util import AppRequestHandler
from Util import AppRequestHandler

class adminDashboard(AppRequestHandler):
def get(self):
Expand Down
2 changes: 1 addition & 1 deletion controllers/databaseController.py
Expand Up @@ -2,7 +2,7 @@
Controller containing logic for requests to different SNP databases
"""

from util import AppRequestHandler
from Util import AppRequestHandler
from Bio import Entrez
from xml.dom.minidom import parseString

Expand Down
9 changes: 5 additions & 4 deletions controllers/gwasController.py
@@ -1,7 +1,8 @@
"""
Controller for main presentation of GWAS data
"""
from util import AppRequestHandler
###
# Controller for main presentation
#
###
from Util import AppRequestHandler
from models.users import UserData
from models.study import Study, Disease, Gene
from models.annotation import Comment
Expand Down
9 changes: 5 additions & 4 deletions controllers/mainController.py
@@ -1,7 +1,8 @@
"""
Controller for main presentation
"""
from util import AppRequestHandler
###
# Controller for main presentation
#
###
from Util import AppRequestHandler
from models.snp import snp
from models.users import UserData

Expand Down
39 changes: 39 additions & 0 deletions controllers/searchController.py
@@ -0,0 +1,39 @@
""" Controller for managing the different kinds of search """
from Util import AppRequestHandler
from models.study import Study, Snp, Gene, Disease

class SearchHandler(AppRequestHandler):
_model = None
""" Handle SNP searches """
def get(self, query):
""" Request via GET = single """
result = None
if query != "":
result = self._model.search('"'+query+'"')
out = []
for scoreddocument in result:
d = {}
for f in scoreddocument.fields:
d[f.name] = f.value
out.append(d)
self.toJson(out)
else:
self.error(404)


class SNPSearch(SearchHandler):
_model = Snp

class GeneSearch(SearchHandler):
_model = Gene

class DiseaseSearch(SearchHandler):
_model = Disease

class StudySearch(SearchHandler):
_model = Study

__routes__ = [('/search/snp/(.*)', SNPSearch),
('/search/gene/(.*)', GeneSearch),
('/search/disease/(.*)', DiseaseSearch),
('/search/study/(.*)', StudySearch)]
2 changes: 1 addition & 1 deletion controllers/taggingController.py
@@ -1,7 +1,7 @@
"""Tagging controller is deprecated.
Allows basic tagging of SNPs only"""

from util import AppRequestHandler
from Util import AppRequestHandler
from models.snp import snp
from models.snp_url import snp_url
from models.domain_tag import domain_tag
Expand Down
1 change: 0 additions & 1 deletion main.py
Expand Up @@ -18,7 +18,6 @@
# from google.appengine.ext.webapp import util
# from google.appengine.api import users

# import logging
import controllers
from webapp2 import WSGIApplication

Expand Down
18 changes: 18 additions & 0 deletions models/Application.py
@@ -0,0 +1,18 @@
from google.appengine.ext import db
from google.appengine.api import search

class AppModel(db.Model):
_index = None

""" General methods for models """
def __dict__(self):
""" Allow serialization of model objects """
return dict([(p, unicode(getattr(self, p))) for p in self.properties()])

@classmethod
def search(self, query):
"""Allow full-text search"""
if self._index:
return search.Index(name=self._index).search(query)
else:
raise AttributeError('Index is not set for search on '+str(self))