Merge 0b1e668 into a69a287

Clinical-Genomics · Mar 7, 2019 · 1ca6056 · 1ca6056
2 parents a69a287 + 0b1e668
commit 1ca6056
Show file tree

Hide file tree

Showing 29 changed files with 875 additions and 132 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,9 @@ Add stuff here
 - Display cases with number of variants from CLI
 - Only display cases with number of variants above certain treshold. (Also CLI)
 - Export of verified variants by CLI or from the dashboard
+- Extend case level queries with default panels, cohorts and phenotype groups.
+- Slice dashboard statistics display using case level queries
+- Add a view where all variants for an institute can be searched across cases, filtering on gene and rank score. Allows searching research variants for cases that have research open.
 
 ### Fixed
 - Fixed code to extract variant conservation (gerp, phyloP, phastCons)

diff --git a/requirements.txt b/requirements.txt
@@ -23,6 +23,7 @@ WeasyPrint>=0.42.3
 xlsxwriter
 click<7.0
 requests-oauthlib<1.2.0
+oauthlib<3.0.0
 
 # unclear
 Flask-Login

diff --git a/scout/adapter/mongo/case.py b/scout/adapter/mongo/case.py
@@ -19,14 +19,16 @@
 class CaseHandler(object):
     """Part of the pymongo adapter that handles cases and institutes"""
 
-    def cases(self, collaborator=None, query=None, skip_assigned=False,
+    def cases(self, owner=None, collaborator=None, query=None, skip_assigned=False,
               has_causatives=False, reruns=False, finished=False,
               research_requested=False, is_research=False, status=None,
-              phenotype_terms=False, pinned=False, cohort=False, name_query=None):
+              phenotype_terms=False, pinned=False, cohort=False, name_query=None,
+              yield_query=False):
         """Fetches all cases from the backend.
 
         Args:
             collaborator(str): If collaborator should be considered
+            owner(str): Query cases for specified case owner only
             query(dict): If a specific query is used
             skip_assigned(bool)
             has_causatives(bool)
@@ -37,11 +39,16 @@ def cases(self, collaborator=None, query=None, skip_assigned=False,
             status(str)
             phenotype_terms(bool): Fetch all cases with phenotype terms
             pinned(bool): Fetch all cases with pinned variants
-            name_query(str): Could be hpo term, user, part of display name, 
+            name_query(str): Could be hpo term, HPO-group, user, part of display name,
                              part of inds or part of synopsis
+            yield_query(bool): If true, only return mongo query dict for use in
+                                compound querying.
 
-        Yields:
-            Cases ordered by date
+        Returns:
+            Cases ordered by date.
+            If yield_query is True, does not pose query to db;
+                instead returns corresponding query dict
+                that can be reused in compound queries or for testing.
         """
         LOG.debug("Fetch all cases")
         query = query or {}
@@ -50,6 +57,10 @@ def cases(self, collaborator=None, query=None, skip_assigned=False,
             LOG.debug("Use collaborator {0}".format(collaborator))
             query['collaborators'] = collaborator
 
+        if owner:
+            LOG.debug("Use owner {0}".format(owner))
+            query['owner'] = owner
+
         if skip_assigned:
             query['assignees'] = {'$exists': False}
 
@@ -87,39 +98,53 @@ def cases(self, collaborator=None, query=None, skip_assigned=False,
             elif name_query.startswith('HP:'):
                 LOG.debug("HPO case query")
                 query['phenotype_terms.phenotype_id'] = name_query
+            elif name_query.startswith('PG:'):
+                LOG.debug("PG case query")
+                phenotype_group_query = name_query.replace('PG:', 'HP:')
+                query['phenotype_groups.phenotype_id'] = phenotype_group_query
             elif name_query.startswith('synopsis:'):
                 synopsis_query=name_query.replace('synopsis:','')
                 query['$text']={'$search':synopsis_query}
+            elif name_query.startswith('cohort:'):
+                cohort_query = name_query.replace('cohort:','')
+                query['cohorts'] = cohort_query
+            elif name_query.startswith('panel:'):
+                panel_name_query = name_query.replace('panel:','')
+                query['panels'] = {'$elemMatch': {'panel_name': panel_name_query,
+                                    'is_default': True }}
             else:
                 query['$or'] = [
                     {'display_name': {'$regex': name_query}},
                     {'individuals.display_name': {'$regex': name_query}},
                 ]
 
+        if yield_query:
+            return query
+
         LOG.info("Get cases with query {0}".format(query))
         return self.case_collection.find(query).sort('updated_at', -1)
 
     def nr_cases(self, institute_id=None):
         """Return the number of cases
-        
+
         This function will change when we migrate to 3.7.1
-        
+
         Args:
             collaborator(str): Institute id
-        
+
         Returns:
             nr_cases(int)
         """
         query = {}
 
         if institute_id:
             query['collaborators'] = institute_id
-        
+
         LOG.debug("Fetch all cases with query {0}".format(query))
         nr_cases = self.case_collection.find(query).count()
 
         return nr_cases
-    
+
 
     def update_dynamic_gene_list(self, case, hgnc_symbols=None, hgnc_ids=None,
                                  phenotype_ids=None, build='37'):

diff --git a/scout/adapter/mongo/query.py b/scout/adapter/mongo/query.py
@@ -6,6 +6,47 @@
 
 class QueryHandler(object):
 
+    def build_variant_query(self, query=None, category='snv', variant_type=['clinical']):
+        """Build a mongo query across multiple cases.
+        Translate query options from a form into a complete mongo query dictionary.
+
+        Beware that unindexed queries against a large variant collection will
+        be extremely slow.
+
+        Currently indexed query options:
+            hgnc_symbols
+            rank_score
+            variant_type
+            category
+
+        Args:
+            query(dict): A query dictionary for the database, from a query form.
+            category(str): 'snv', 'sv', 'str' or 'cancer'
+            variant_type(str): 'clinical' or 'research'
+
+        Returns:
+            mongo_query : A dictionary in the mongo query format.
+        """
+
+        query = query or {}
+        mongo_variant_query = {}
+
+        logger.debug("Building a mongo query for %s" % query)
+
+        if query.get('hgnc_symbols'):
+            mongo_variant_query['hgnc_symbols'] = {'$in': query['hgnc_symbols']}
+
+        mongo_variant_query['variant_type'] = {'$in': variant_type}
+
+        mongo_variant_query['category'] = category
+
+        rank_score = query.get('rank_score') or 15
+
+        mongo_variant_query['rank_score'] = {'$gte': rank_score}
+        logger.debug("Querying %s" % mongo_variant_query)
+
+        return mongo_variant_query
+
     def build_query(self, case_id, query=None, variant_ids=None, category='snv'):
         """Build a mongo query
 

diff --git a/scout/adapter/mongo/variant.py b/scout/adapter/mongo/variant.py
@@ -44,7 +44,7 @@ def add_gene_info(self, variant_obj, gene_panels=None):
             gene_panels(list(dict)): List of panels from database
         """
         gene_panels = gene_panels or []
-        
+
         # Add a variable that checks if there are any refseq transcripts
         variant_obj['has_refseq'] = False
 
@@ -251,6 +251,36 @@ def variant(self, document_id, gene_panels=None, case_id=None):
                                                variant_obj['position'])
         return variant_obj
 
+    def gene_variants(self, query=None,
+                   category='snv', variant_type=['clinical'],
+                   nr_of_variants=50, skip=0):
+        """Return all variants seen in a given gene.
+
+        If skip not equal to 0 skip the first n variants.
+
+        Arguments:
+            query(dict): A dictionary with querys for the database, including
+            variant_type: 'clinical', 'research'
+            category(str): 'sv', 'str', 'snv' or 'cancer'
+            nr_of_variants(int): if -1 return all variants
+            skip(int): How many variants to skip
+        """
+
+        mongo_variant_query = self.build_variant_query(query=query,
+                                   category=category, variant_type=variant_type)
+
+        sorting = [('rank_score', pymongo.DESCENDING)]
+
+        if nr_of_variants == -1:
+            nr_of_variants = 0 # This will return all variants
+        else:
+            nr_of_variants = skip + nr_of_variants
+
+        result = self.variant_collection.find(
+            mongo_variant_query
+            ).sort(sorting).skip(skip).limit(nr_of_variants)
+
+        return result
 
     def verified(self, institute_id):
         """Return all verified variants for a given institute
@@ -281,7 +311,6 @@ def verified(self, institute_id):
 
         return res
 
-
     def get_causatives(self, institute_id, case_id=None):
         """Return all causative variants for an institute
 

diff --git a/scout/constants/indexes.py b/scout/constants/indexes.py
@@ -44,6 +44,16 @@
             name="caseid_category_varianttype_rankscore",
             background=True,
             ),
+        IndexModel([
+            ('hgnc_symbols', ASCENDING),
+            ('rank_score', DESCENDING),
+            ('category', ASCENDING),
+            ('variant_type', ASCENDING)],
+            name="hgncsymbol_rankscore_category_varianttype",
+            background=True,
+            partialFilterExpression={ 'rank_score': { '$gt': 5 } ,
+                                     'category': 'snv' }
+            ),
         IndexModel([
             ('case_id', ASCENDING),
             ('category', ASCENDING),
@@ -68,7 +78,7 @@
             background=True,
             sparse=True,
             ),
-        
+
     ],
     'hpo_term': [
         IndexModel([

diff --git a/scout/server/blueprints/cases/controllers.py b/scout/server/blueprints/cases/controllers.py
@@ -4,19 +4,26 @@
 import requests
 import datetime
 
+import logging
+
 from bs4 import BeautifulSoup
 from xlsxwriter import Workbook
 from flask import url_for
 from flask_mail import Message
 import query_phenomizer
+from flask_login import current_user
 
 from scout.constants import (CASE_STATUSES, PHENOTYPE_GROUPS, COHORT_TAGS, SEX_MAP, PHENOTYPE_MAP, VERBS_MAP, MT_EXPORT_HEADER)
 from scout.constants.variant_tags import MANUAL_RANK_OPTIONS, DISMISS_VARIANT_OPTIONS, GENETIC_MODELS
 from scout.export.variant import export_mt_variants
-from scout.server.utils import institute_and_case
+from scout.server.utils import institute_and_case, user_institutes
 from scout.parse.clinvar import clinvar_submission_header, clinvar_submission_lines
 from scout.server.blueprints.variants.controllers import variant as variant_decorator
 from scout.server.blueprints.variants.controllers import sv_variant
+from scout.server.blueprints.variants.controllers import get_predictions
+from scout.server.blueprints.genes.controllers import gene
+
+log = logging.getLogger(__name__)
 
 STATUS_MAP = {'solved': 'bg-success', 'archived': 'bg-warning'}
 
@@ -426,6 +433,69 @@ def vcf2cytosure(store, institute_id, case_name, individual_id):
 
     return (individual_obj['display_name'], individual_obj['vcf2cytosure'])
 
+def gene_variants(store, variants_query, page=1, per_page=50):
+    """Pre-process list of variants."""
+    variant_count = variants_query.count()
+    skip_count = per_page * max(page - 1, 0)
+    more_variants = True if variant_count > (skip_count + per_page) else False
+    variant_res = variants_query.skip(skip_count).limit(per_page)
+
+    my_institutes = list(inst['_id'] for inst in user_institutes(store, current_user))
+
+    log.debug("Institutes allowed: {}.".format(my_institutes))
+
+    variants = []
+    for variant_obj in variant_res:
+        # hide other institutes for now
+        if (variant_obj['institute'] not in my_institutes):
+            log.debug("Institute {} not allowed.".format(variant_obj['institute']))
+            continue
+
+        variant_case_obj = store.case(case_id=variant_obj['case_id'])
+        case_display_name = variant_case_obj['display_name']
+        variant_obj['case_display_name'] = case_display_name
+
+        gene_ids = []
+        gene_symbols = []
+        hgvs_c = []
+        hgvs_p = []
+        variant_genes = variant_obj.get('genes')
+
+        if variant_genes is not None:
+            functional_annotation = ''
+            for gene_obj in variant_genes:
+                hgnc_id = gene_obj['hgnc_id']
+                gene_symbol = gene(store, hgnc_id)['symbol']
+                gene_ids.append(hgnc_id)
+                gene_symbols.append(gene_symbol)
+
+                hgvs_nucleotide = '-'
+                # gather HGVS info from gene transcripts
+                transcripts_list = gene_obj.get('transcripts')
+                for transcript_obj in transcripts_list:
+                    if transcript_obj.get('is_canonical') and transcript_obj.get('is_canonical') is True:
+                        hgvs_nucleotide = str(transcript_obj.get('coding_sequence_name'))
+                        hgvs_protein = str(transcript_obj.get('protein_sequence_name'))
+                hgvs_c.append(hgvs_nucleotide)
+                hgvs_p.append(hgvs_protein)
+
+            log.debug("HGVS: {} {} {}.".format(gene_symbols, hgvs_c, hgvs_p))
+
+            if len(gene_symbols) == 1:
+                if(hgvs_p[0] != "None"):
+                    hgvs = hgvs_p[0]
+                elif(hgvs_c[0] != "None"):
+                    hgvs = hgvs_c[0]
+                else:
+                    hgvs = "-"
+                variant_obj['hgvs'] = hgvs
+            variant_obj.update(get_predictions(variant_genes))
+        variants.append(variant_obj)
+
+    return {
+        'variants': variants,
+        'more_variants': more_variants,
+    }
 
 def multiqc(store, institute_id, case_name):
     """Find MultiQC report for the case."""

diff --git a/scout/server/blueprints/cases/forms.py b/scout/server/blueprints/cases/forms.py
@@ -0,0 +1,29 @@
+# -*- coding: utf-8 -*-
+import decimal
+
+from flask_wtf import FlaskForm
+from wtforms import (BooleanField, DecimalField, Field, TextField, SelectMultipleField,
+                     IntegerField, SubmitField)
+from wtforms.widgets import TextInput
+
+# make a base class or other utility with this instead..
+class TagListField(Field):
+    widget = TextInput()
+
+    def _value(self):
+        if self.data:
+            return ', '.join(self.data)
+        else:
+            return ''
+
+    def process_formdata(self, valuelist):
+        if valuelist:
+            self.data = [x.strip() for x in valuelist[0].split(',') if x.strip()]
+        else:
+            self.data = []
+
+class GeneVariantFiltersForm(FlaskForm):
+    """Base FiltersForm for SNVs"""
+    variant_type = SelectMultipleField(choices=[('clinical','clinical'),('research','research')])
+    hgnc_symbols = TagListField('HGNC Symbols/Ids (case sensitive)')
+    filter_variants = SubmitField(label='Filter variants')