Skip to content

Commit

Permalink
Merge pull request #528 from NBISweden/feature/browser-postgres
Browse files Browse the repository at this point in the history
Migrate variant browser to postgres
  • Loading branch information
viklund committed Apr 2, 2019
2 parents a850f39 + bee90bd commit 8874f2b
Show file tree
Hide file tree
Showing 33 changed files with 8,067 additions and 989 deletions.
4 changes: 2 additions & 2 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[run]
omit = /usr/local/*,/home/travis/virtualenv/*
omit = /usr/local/*,/home/travis/virtualenv/*,*venv*

[report]
omit = /usr/local/*,/home/travis/virtualenv/*
omit = */__init__.py, */test.py, */test_*.py
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ before_install:
- test/travis_before_install.sh
install:
- pip install -r backend/requirements.txt
- pip install -r test/requirements.txt
- pip install coverage coveralls
script:
- test/travis_script.sh
Expand Down
23 changes: 6 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,26 +58,15 @@ The application has only been tested with python 3.5.2. It will most likely work
Quick development mode
----------------------

1. Install docker (and docker-compose in case it's not included in the installation)
2. Create test database

1. Initiate a mysql data volume by running `./scripts/create_docker_db_volume_tarball.sh` or
`./scripts/download_and_create_docker_db_volume.sh`

2. Load mysql dummy data by running `./scripts/load_mysql_dummy_data.sh`

3. Copy `settings_sample.json` into `settings.json` and
- Update the credentials for elixir and google oauth.
- Elixir/redirectUri: http://localhost:4000/elixir/login
- redirectUri: http://localhost:4000/login
- Set `mysqlHost` to `db`
- Set `mysqlSchema` to `swefreq_test`
- Clear `mysqlPasswd` (set it to empty `""`)
4. Make a symbolic link from `backend/static` to `static`.
5. Run the server:
1. Install docker (and docker-compose in case it's not included in the installation)
2. Run the server:
```bash
$ docker-compose up
```
3. Add test data to db:
```bash
$ psql -h localhost -U postgres swefreq -f test/data/browser_test_data.sql
```

[travis-badge]: https://travis-ci.org/NBISweden/swefreq.svg?branch=develop
[travis-link]: https://travis-ci.org/NBISweden/swefreq
Expand Down
30 changes: 19 additions & 11 deletions backend/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import db
import handlers
import settings
from modules.browser import utils


def build_dataset_structure(dataset_version, user=None, dataset=None):
Expand Down Expand Up @@ -60,7 +61,6 @@ class GetSchema(handlers.UnsafeHandler):
figure out what information to return.
"""
def get(self):

dataset = None
version = None
try:
Expand Down Expand Up @@ -137,16 +137,12 @@ def get(self):

ret = []
if user:
futures = db.DatasetVersion.select(
).join(
db.Dataset
).join(
db.DatasetAccess
).where(
db.DatasetVersion.available_from > datetime.now(),
db.DatasetAccess.user == user,
db.DatasetAccess.is_admin
)
futures = (db.DatasetVersion.select()
.join(db.Dataset)
.join(db.DatasetAccess)
.where(db.DatasetVersion.available_from > datetime.now(),
db.DatasetAccess.user == user,
db.DatasetAccess.is_admin))
for f in futures:
dataset = build_dataset_structure(f, user)
dataset['future'] = True
Expand All @@ -162,6 +158,7 @@ def get(self):

class GetDataset(handlers.UnsafeHandler):
def get(self, dataset, version=None):
dataset, version = utils.parse_dataset(dataset, version)
user = self.current_user

future_version = False
Expand All @@ -182,6 +179,7 @@ def get(self, dataset, version=None):

class ListDatasetVersions(handlers.UnsafeHandler):
def get(self, dataset):
dataset, _ = utils.parse_dataset(dataset)
user = self.current_user
dataset = db.get_dataset(dataset)

Expand Down Expand Up @@ -221,6 +219,7 @@ def get(self, dataset):

class GenerateTemporaryLink(handlers.AuthorizedHandler):
def post(self, dataset, version=None):
dataset, version = utils.parse_dataset(dataset, version)
user = self.current_user
dataset_version = db.get_dataset_version(dataset, version)
if dataset_version is None:
Expand Down Expand Up @@ -250,6 +249,7 @@ def post(self, dataset, version=None):

class DatasetFiles(handlers.AuthorizedHandler):
def get(self, dataset, version=None):
dataset, version = utils.parse_dataset(dataset, version)
dataset_version = db.get_dataset_version(dataset, version)
if dataset_version is None:
self.send_error(status_code=404)
Expand All @@ -272,6 +272,7 @@ def format_bytes(nbytes):

class Collection(handlers.UnsafeHandler):
def get(self, dataset):
dataset, _ = utils.parse_dataset(dataset)
dataset = db.get_dataset(dataset)

collections = {}
Expand Down Expand Up @@ -376,6 +377,7 @@ def country_list(self):

class RequestAccess(handlers.SafeHandler):
def post(self, dataset):
dataset, _ = utils.parse_dataset(dataset)
user = self.current_user
dataset = db.get_dataset(dataset)

Expand Down Expand Up @@ -408,6 +410,7 @@ def post(self, dataset):

class LogEvent(handlers.SafeHandler):
def post(self, dataset, event, target):
dataset, _ = utils.parse_dataset(dataset)
user = self.current_user

if event == 'consent':
Expand All @@ -428,6 +431,7 @@ def post(self, dataset, event, target):

class ApproveUser(handlers.AdminHandler):
def post(self, dataset, email):
dataset, _ = utils.parse_dataset(dataset)
with db.database.atomic():
dataset = db.get_dataset(dataset)

Expand Down Expand Up @@ -470,6 +474,7 @@ def post(self, dataset, email):

class RevokeUser(handlers.AdminHandler):
def post(self, dataset, email):
dataset, _ = utils.parse_dataset(dataset)
with db.database.atomic():
dataset = db.get_dataset(dataset)
user = db.User.select().where(db.User.email == email).get()
Expand Down Expand Up @@ -506,6 +511,7 @@ def _build_json_response(query, access_for):

class DatasetUsersPending(handlers.AdminHandler):
def get(self, dataset):
dataset, _ = utils.parse_dataset(dataset)
dataset = db.get_dataset(dataset)
users = db.User.select()
access = (db.DatasetAccessPending
Expand All @@ -520,6 +526,7 @@ def get(self, dataset):

class DatasetUsersCurrent(handlers.AdminHandler):
def get(self, dataset):
dataset, _ = utils.parse_dataset(dataset)
dataset = db.get_dataset(dataset)
users = db.User.select()
access = (db.DatasetAccessCurrent
Expand Down Expand Up @@ -563,6 +570,7 @@ def get(self):

class ServeLogo(handlers.UnsafeHandler):
def get(self, dataset):
dataset, _ = utils.parse_dataset(dataset)
try:
logo_entry = db.DatasetLogo.select(
db.DatasetLogo
Expand Down
48 changes: 24 additions & 24 deletions backend/beacon.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import db
import handlers
import pymongo
import settings

import tornado.web

class Query(handlers.UnsafeHandler):
Expand Down Expand Up @@ -65,6 +65,7 @@ def get(self):
'beacon': 'swefreq-beacon'
})


class Info(handlers.UnsafeHandler):
def get(self):
query_uri = "%s://%s/query?" % ('https', self.request.host)
Expand Down Expand Up @@ -97,39 +98,38 @@ def get(self):
] #
})

def connect_mongo(dataset):
client = pymongo.MongoClient(host=settings.mongo_host, port=settings.mongo_port)

auth_db = client['exac-user']
auth_db.authenticate(settings.mongo_user, settings.mongo_password)

return client[dataset]


def lookupAllele(chrom, pos, referenceAllele, allele, reference, dataset): #pylint: disable=too-many-arguments, unused-argument
"""CHeck if an allele is present in the database
"""
Check if an allele is present in the database
Args:
chrom: The chromosome, format matches [1-22XY]
pos: Coordinate within a chromosome. Position is a number and is 0-based
allele: Any string of nucleotides A,C,T,G
alternate: Any string of nucleotides A,C,T,G
reference: The human reference build that was used
dataset: Dataset to look in (currently used to select Mongo database)
Returns:
The string 'true' if the allele was found, otherwise the string 'false'
"""
if reference == 'hg19':
reference = 'GRChg37'

dataset = "exac-{}-{}".format(dataset.lower(), reference)

mdb = connect_mongo(dataset)

# must add support for reference build
# Beacon is 0-based, our database is 1-based in coords.

pos += 1
res = mdb.variants.find({'chrom': chrom, 'pos': pos})
for r in res:
if r['alt'] == allele and r['ref'] == referenceAllele:
return True

return False
dataset_version = db.get_dataset_version(dataset)
if not dataset_version:
return None
try:
variant = (db.Variant
.select()
.where((db.Variant.pos == pos) &
(db.Variant.ref == referenceAllele) &
(db.Variant.alt == allele) &
(db.Variant.chrom == chrom) &
(db.Variant.dataset_version == dataset_version))
.get())
return True
except db.Variant.DoesNotExist:
return False
71 changes: 58 additions & 13 deletions backend/db.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python3

import logging
import settings
from peewee import (BigIntegerField,
BlobField,
Expand Down Expand Up @@ -450,8 +451,11 @@ class Meta:

def get_next_free_uid():
"""
Returns the next free uid >= 10000, and higher than the current uid's
from the sftp_user table in the database.
Get the next free uid >= 10000 and > than the current uids
from the sftp_user table in the db.
Returns:
int: the next free uid
"""
default = 10000
next_uid = default
Expand All @@ -464,27 +468,68 @@ def get_next_free_uid():

return next_uid


def get_admin_datasets(user):
"""
Get a list of datasets where user is admin
Args:
user (User): Peewee User object for the user of interest
Returns:
DataSetAccess:
"""
return DatasetAccess.select().where( DatasetAccess.user == user, DatasetAccess.is_admin)

def get_dataset(dataset):

def get_dataset(dataset:str):
"""
Given dataset name get Dataset
Args:
dataset (str): short name of the dataset
Returns:
Dataset: the corresponding DatasetVersion entry
"""
dataset = Dataset.select().where( Dataset.short_name == dataset).get()
return dataset

def get_dataset_version(dataset, version=None):

def get_dataset_version(dataset:str, version:str=None):
"""
Given dataset get DatasetVersion
Args:
dataset (str): short name of the dataset
Returns:
DatasetVersion: the corresponding DatasetVersion entry
"""
if version:
dataset_version = (DatasetVersion
.select(DatasetVersion, Dataset)
.join(Dataset)
.where(DatasetVersion.version == version,
Dataset.short_name == dataset)).get()
try:
dataset_version = (DatasetVersion
.select(DatasetVersion, Dataset)
.join(Dataset)
.where(DatasetVersion.version == version,
Dataset.short_name == dataset)).get()
except DatasetVersion.DoesNotExist:
logging.error("get_dataset_version({}, {}): ".format(dataset, version) +
"cannot retrieve dataset version")
return
else:
dataset_version = (DatasetVersionCurrent
.select(DatasetVersionCurrent, Dataset)
.join(Dataset)
.where(Dataset.short_name == dataset)).get()
try:
dataset_version = (DatasetVersionCurrent
.select(DatasetVersionCurrent, Dataset)
.join(Dataset)
.where(Dataset.short_name == dataset)).get()
except DatasetVersionCurrent.DoesNotExist:
logging.error("get_dataset_version({}, version=None): ".format(dataset) +
"cannot retrieve dataset version")
return
return dataset_version


def build_dict_from_row(row):
d = {}

Expand Down
Loading

0 comments on commit 8874f2b

Please sign in to comment.