Skip to content

Commit

Permalink
Merge 0b1e668 into a69a287
Browse files Browse the repository at this point in the history
  • Loading branch information
dnil committed Mar 7, 2019
2 parents a69a287 + 0b1e668 commit 1ca6056
Show file tree
Hide file tree
Showing 29 changed files with 875 additions and 132 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ Add stuff here
- Display cases with number of variants from CLI
- Only display cases with number of variants above certain treshold. (Also CLI)
- Export of verified variants by CLI or from the dashboard
- Extend case level queries with default panels, cohorts and phenotype groups.
- Slice dashboard statistics display using case level queries
- Add a view where all variants for an institute can be searched across cases, filtering on gene and rank score. Allows searching research variants for cases that have research open.

### Fixed
- Fixed code to extract variant conservation (gerp, phyloP, phastCons)
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ WeasyPrint>=0.42.3
xlsxwriter
click<7.0
requests-oauthlib<1.2.0
oauthlib<3.0.0

# unclear
Flask-Login
Expand Down
45 changes: 35 additions & 10 deletions scout/adapter/mongo/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@
class CaseHandler(object):
"""Part of the pymongo adapter that handles cases and institutes"""

def cases(self, collaborator=None, query=None, skip_assigned=False,
def cases(self, owner=None, collaborator=None, query=None, skip_assigned=False,
has_causatives=False, reruns=False, finished=False,
research_requested=False, is_research=False, status=None,
phenotype_terms=False, pinned=False, cohort=False, name_query=None):
phenotype_terms=False, pinned=False, cohort=False, name_query=None,
yield_query=False):
"""Fetches all cases from the backend.
Args:
collaborator(str): If collaborator should be considered
owner(str): Query cases for specified case owner only
query(dict): If a specific query is used
skip_assigned(bool)
has_causatives(bool)
Expand All @@ -37,11 +39,16 @@ def cases(self, collaborator=None, query=None, skip_assigned=False,
status(str)
phenotype_terms(bool): Fetch all cases with phenotype terms
pinned(bool): Fetch all cases with pinned variants
name_query(str): Could be hpo term, user, part of display name,
name_query(str): Could be hpo term, HPO-group, user, part of display name,
part of inds or part of synopsis
yield_query(bool): If true, only return mongo query dict for use in
compound querying.
Yields:
Cases ordered by date
Returns:
Cases ordered by date.
If yield_query is True, does not pose query to db;
instead returns corresponding query dict
that can be reused in compound queries or for testing.
"""
LOG.debug("Fetch all cases")
query = query or {}
Expand All @@ -50,6 +57,10 @@ def cases(self, collaborator=None, query=None, skip_assigned=False,
LOG.debug("Use collaborator {0}".format(collaborator))
query['collaborators'] = collaborator

if owner:
LOG.debug("Use owner {0}".format(owner))
query['owner'] = owner

if skip_assigned:
query['assignees'] = {'$exists': False}

Expand Down Expand Up @@ -87,39 +98,53 @@ def cases(self, collaborator=None, query=None, skip_assigned=False,
elif name_query.startswith('HP:'):
LOG.debug("HPO case query")
query['phenotype_terms.phenotype_id'] = name_query
elif name_query.startswith('PG:'):
LOG.debug("PG case query")
phenotype_group_query = name_query.replace('PG:', 'HP:')
query['phenotype_groups.phenotype_id'] = phenotype_group_query
elif name_query.startswith('synopsis:'):
synopsis_query=name_query.replace('synopsis:','')
query['$text']={'$search':synopsis_query}
elif name_query.startswith('cohort:'):
cohort_query = name_query.replace('cohort:','')
query['cohorts'] = cohort_query
elif name_query.startswith('panel:'):
panel_name_query = name_query.replace('panel:','')
query['panels'] = {'$elemMatch': {'panel_name': panel_name_query,
'is_default': True }}
else:
query['$or'] = [
{'display_name': {'$regex': name_query}},
{'individuals.display_name': {'$regex': name_query}},
]

if yield_query:
return query

LOG.info("Get cases with query {0}".format(query))
return self.case_collection.find(query).sort('updated_at', -1)

def nr_cases(self, institute_id=None):
"""Return the number of cases
This function will change when we migrate to 3.7.1
Args:
collaborator(str): Institute id
Returns:
nr_cases(int)
"""
query = {}

if institute_id:
query['collaborators'] = institute_id

LOG.debug("Fetch all cases with query {0}".format(query))
nr_cases = self.case_collection.find(query).count()

return nr_cases


def update_dynamic_gene_list(self, case, hgnc_symbols=None, hgnc_ids=None,
phenotype_ids=None, build='37'):
Expand Down
41 changes: 41 additions & 0 deletions scout/adapter/mongo/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,47 @@

class QueryHandler(object):

def build_variant_query(self, query=None, category='snv', variant_type=['clinical']):
"""Build a mongo query across multiple cases.
Translate query options from a form into a complete mongo query dictionary.
Beware that unindexed queries against a large variant collection will
be extremely slow.
Currently indexed query options:
hgnc_symbols
rank_score
variant_type
category
Args:
query(dict): A query dictionary for the database, from a query form.
category(str): 'snv', 'sv', 'str' or 'cancer'
variant_type(str): 'clinical' or 'research'
Returns:
mongo_query : A dictionary in the mongo query format.
"""

query = query or {}
mongo_variant_query = {}

logger.debug("Building a mongo query for %s" % query)

if query.get('hgnc_symbols'):
mongo_variant_query['hgnc_symbols'] = {'$in': query['hgnc_symbols']}

mongo_variant_query['variant_type'] = {'$in': variant_type}

mongo_variant_query['category'] = category

rank_score = query.get('rank_score') or 15

mongo_variant_query['rank_score'] = {'$gte': rank_score}
logger.debug("Querying %s" % mongo_variant_query)

return mongo_variant_query

def build_query(self, case_id, query=None, variant_ids=None, category='snv'):
"""Build a mongo query
Expand Down
33 changes: 31 additions & 2 deletions scout/adapter/mongo/variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def add_gene_info(self, variant_obj, gene_panels=None):
gene_panels(list(dict)): List of panels from database
"""
gene_panels = gene_panels or []

# Add a variable that checks if there are any refseq transcripts
variant_obj['has_refseq'] = False

Expand Down Expand Up @@ -251,6 +251,36 @@ def variant(self, document_id, gene_panels=None, case_id=None):
variant_obj['position'])
return variant_obj

def gene_variants(self, query=None,
category='snv', variant_type=['clinical'],
nr_of_variants=50, skip=0):
"""Return all variants seen in a given gene.
If skip not equal to 0 skip the first n variants.
Arguments:
query(dict): A dictionary with querys for the database, including
variant_type: 'clinical', 'research'
category(str): 'sv', 'str', 'snv' or 'cancer'
nr_of_variants(int): if -1 return all variants
skip(int): How many variants to skip
"""

mongo_variant_query = self.build_variant_query(query=query,
category=category, variant_type=variant_type)

sorting = [('rank_score', pymongo.DESCENDING)]

if nr_of_variants == -1:
nr_of_variants = 0 # This will return all variants
else:
nr_of_variants = skip + nr_of_variants

result = self.variant_collection.find(
mongo_variant_query
).sort(sorting).skip(skip).limit(nr_of_variants)

return result

def verified(self, institute_id):
"""Return all verified variants for a given institute
Expand Down Expand Up @@ -281,7 +311,6 @@ def verified(self, institute_id):

return res


def get_causatives(self, institute_id, case_id=None):
"""Return all causative variants for an institute
Expand Down
12 changes: 11 additions & 1 deletion scout/constants/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@
name="caseid_category_varianttype_rankscore",
background=True,
),
IndexModel([
('hgnc_symbols', ASCENDING),
('rank_score', DESCENDING),
('category', ASCENDING),
('variant_type', ASCENDING)],
name="hgncsymbol_rankscore_category_varianttype",
background=True,
partialFilterExpression={ 'rank_score': { '$gt': 5 } ,
'category': 'snv' }
),
IndexModel([
('case_id', ASCENDING),
('category', ASCENDING),
Expand All @@ -68,7 +78,7 @@
background=True,
sparse=True,
),

],
'hpo_term': [
IndexModel([
Expand Down
72 changes: 71 additions & 1 deletion scout/server/blueprints/cases/controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,26 @@
import requests
import datetime

import logging

from bs4 import BeautifulSoup
from xlsxwriter import Workbook
from flask import url_for
from flask_mail import Message
import query_phenomizer
from flask_login import current_user

from scout.constants import (CASE_STATUSES, PHENOTYPE_GROUPS, COHORT_TAGS, SEX_MAP, PHENOTYPE_MAP, VERBS_MAP, MT_EXPORT_HEADER)
from scout.constants.variant_tags import MANUAL_RANK_OPTIONS, DISMISS_VARIANT_OPTIONS, GENETIC_MODELS
from scout.export.variant import export_mt_variants
from scout.server.utils import institute_and_case
from scout.server.utils import institute_and_case, user_institutes
from scout.parse.clinvar import clinvar_submission_header, clinvar_submission_lines
from scout.server.blueprints.variants.controllers import variant as variant_decorator
from scout.server.blueprints.variants.controllers import sv_variant
from scout.server.blueprints.variants.controllers import get_predictions
from scout.server.blueprints.genes.controllers import gene

log = logging.getLogger(__name__)

STATUS_MAP = {'solved': 'bg-success', 'archived': 'bg-warning'}

Expand Down Expand Up @@ -426,6 +433,69 @@ def vcf2cytosure(store, institute_id, case_name, individual_id):

return (individual_obj['display_name'], individual_obj['vcf2cytosure'])

def gene_variants(store, variants_query, page=1, per_page=50):
"""Pre-process list of variants."""
variant_count = variants_query.count()
skip_count = per_page * max(page - 1, 0)
more_variants = True if variant_count > (skip_count + per_page) else False
variant_res = variants_query.skip(skip_count).limit(per_page)

my_institutes = list(inst['_id'] for inst in user_institutes(store, current_user))

log.debug("Institutes allowed: {}.".format(my_institutes))

variants = []
for variant_obj in variant_res:
# hide other institutes for now
if (variant_obj['institute'] not in my_institutes):
log.debug("Institute {} not allowed.".format(variant_obj['institute']))
continue

variant_case_obj = store.case(case_id=variant_obj['case_id'])
case_display_name = variant_case_obj['display_name']
variant_obj['case_display_name'] = case_display_name

gene_ids = []
gene_symbols = []
hgvs_c = []
hgvs_p = []
variant_genes = variant_obj.get('genes')

if variant_genes is not None:
functional_annotation = ''
for gene_obj in variant_genes:
hgnc_id = gene_obj['hgnc_id']
gene_symbol = gene(store, hgnc_id)['symbol']
gene_ids.append(hgnc_id)
gene_symbols.append(gene_symbol)

hgvs_nucleotide = '-'
# gather HGVS info from gene transcripts
transcripts_list = gene_obj.get('transcripts')
for transcript_obj in transcripts_list:
if transcript_obj.get('is_canonical') and transcript_obj.get('is_canonical') is True:
hgvs_nucleotide = str(transcript_obj.get('coding_sequence_name'))
hgvs_protein = str(transcript_obj.get('protein_sequence_name'))
hgvs_c.append(hgvs_nucleotide)
hgvs_p.append(hgvs_protein)

log.debug("HGVS: {} {} {}.".format(gene_symbols, hgvs_c, hgvs_p))

if len(gene_symbols) == 1:
if(hgvs_p[0] != "None"):
hgvs = hgvs_p[0]
elif(hgvs_c[0] != "None"):
hgvs = hgvs_c[0]
else:
hgvs = "-"
variant_obj['hgvs'] = hgvs
variant_obj.update(get_predictions(variant_genes))
variants.append(variant_obj)

return {
'variants': variants,
'more_variants': more_variants,
}

def multiqc(store, institute_id, case_name):
"""Find MultiQC report for the case."""
Expand Down
29 changes: 29 additions & 0 deletions scout/server/blueprints/cases/forms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
import decimal

from flask_wtf import FlaskForm
from wtforms import (BooleanField, DecimalField, Field, TextField, SelectMultipleField,
IntegerField, SubmitField)
from wtforms.widgets import TextInput

# make a base class or other utility with this instead..
class TagListField(Field):
widget = TextInput()

def _value(self):
if self.data:
return ', '.join(self.data)
else:
return ''

def process_formdata(self, valuelist):
if valuelist:
self.data = [x.strip() for x in valuelist[0].split(',') if x.strip()]
else:
self.data = []

class GeneVariantFiltersForm(FlaskForm):
"""Base FiltersForm for SNVs"""
variant_type = SelectMultipleField(choices=[('clinical','clinical'),('research','research')])
hgnc_symbols = TagListField('HGNC Symbols/Ids (case sensitive)')
filter_variants = SubmitField(label='Filter variants')
Loading

0 comments on commit 1ca6056

Please sign in to comment.