Skip to content

Commit

Permalink
Merge d83889c into a69a287
Browse files Browse the repository at this point in the history
  • Loading branch information
northwestwitch committed Mar 5, 2019
2 parents a69a287 + d83889c commit a7e4c6e
Show file tree
Hide file tree
Showing 4 changed files with 187 additions and 42 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ About changelog [here](https://keepachangelog.com/en/1.0.0/)
Add stuff here


### Fixed
- Clinsig filter is fixed so clinsig numerical values are returned
- Split multi clinsig string values in different elements of clinsig array
- Regex to search in multi clinsig string values or multi revstat string values


## [4.3.1]

### Added
Expand Down
36 changes: 29 additions & 7 deletions scout/adapter/mongo/query.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import re

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -267,33 +268,54 @@ def build_query(self, case_id, query=None, variant_ids=None, category='snv'):

if query.get('clinsig'):
rank = []
str_rank = []

for item in query['clinsig']:
rank.append(item)
rank.append(int(item))
# search for human readable clinsig values in newer cases
rank.append(CLINSIG_MAP[int(item)])
str_rank.append(CLINSIG_MAP[int(item)])

if query.get('clinsig_confident_always_returned') == True:

trusted_revision_level = ['mult', 'single', 'exp', 'guideline']

mongo_query_major = { "clnsig":
{
'$elemMatch': { 'value':
{ '$in': rank },
'revstat':
{ '$in': trusted_revision_level }
'$elemMatch': {
'$or' : [
{ 'value' : { '$in': rank }},
{ 'value' : re.compile('|'.join(str_rank)) }
],
'$or' : [
{'revstat': { '$in': trusted_revision_level }},
{'revstat' : re.compile('|'.join(trusted_revision_level)) }
]
}
}
}

else:
logger.debug("add CLINSIG filter for rank: %s" %
', '.join(str(query['clinsig'])))
clnsig_query = {
"clnsig":
{
'$elemMatch': {
'$or' : [
{ 'value' : { '$in': rank }},
{ 'value' : re.compile('|'.join(str_rank)) }
]
}
}
}
if mongo_query_minor:
mongo_query_minor.append({'clnsig.value': {'$in': rank}})
#mongo_query_minor.append({'clnsig.value': {'$in': rank}})
mongo_query_minor.append(clnsig_query)

else:
# if this is the only minor critera, use implicit and.
mongo_query['clnsig.value'] = {'$in': rank}
mongo_query['clnsig'] = clnsig_query['clnsig']

if mongo_query_minor and mongo_query_major:
if gene_query:
Expand Down
17 changes: 9 additions & 8 deletions scout/parse/variant/clnsig.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def parse_clnsig(acc, sig, revstat, transcripts):
clnsig_accsessions(list): A list with clnsig accessions
"""
clnsig_accsessions = []

if acc:
# New format of clinvar allways have integers as accession numbers
try:
Expand All @@ -31,18 +31,19 @@ def parse_clnsig(acc, sig, revstat, transcripts):
revstat_groups = []
if revstat:
revstat_groups = [rev.lstrip('_') for rev in revstat.split(',')]

sig_groups = []
if sig:
for significance in sig.split('/'):
splitted_word = significance.split('_')
sig_groups.append(' '.join(splitted_word[:2]))

clnsig_accsessions.append({
'value': ', '.join(sig_groups),
'accession': int(acc),
'revstat': ', '.join(revstat_groups),
})

for sign_term in sig_groups:
clnsig_accsessions.append({
'value': sign_term,
'accession': int(acc),
'revstat': ', '.join(revstat_groups),
})
else:
# There are sometimes different separators so we need to check which
# one to use
Expand Down
170 changes: 143 additions & 27 deletions tests/adapter/mongo/test_query.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from scout.constants import CLINSIG_MAP
import re

def test_build_query(adapter):
case_id = 'cust000'
Expand Down Expand Up @@ -95,27 +96,36 @@ def test_build_gnomad_and_cadd(adapter):
def test_build_clinsig(adapter):
case_id = 'cust000'
clinsig_items = [ 3, 4, 5 ]
clinsig_mapped_items = []
all_clinsig = [] # both numerical and human readable values

for item in clinsig_items:
all_clinsig.append(item)
all_clinsig.append(CLINSIG_MAP[item])
clinsig_mapped_items.append(CLINSIG_MAP[item])

query = {'clinsig': clinsig_items}

mongo_query = adapter.build_query(case_id, query=query)

assert mongo_query['clnsig.value'] == {
'$in': all_clinsig
}
assert mongo_query['clnsig'] == {
'$elemMatch': {
'$or' : [
{ 'value' : { '$in': all_clinsig }},
{ 'value' : re.compile('|'.join(clinsig_mapped_items)) }
]
}
}

def test_build_clinsig_filter(adapter):
def test_build_clinsig_filter(adapter, real_variant_database):
case_id = 'cust000'
clinsig_items = [ 4, 5 ]
clinsig_mapped_items = []
all_clinsig = [] # both numerical and human readable values
for item in clinsig_items:
all_clinsig.append(item)
all_clinsig.append(CLINSIG_MAP[item])
clinsig_mapped_items.append(CLINSIG_MAP[item])

region_annotation = ['exonic', 'splicing']

Expand All @@ -128,19 +138,82 @@ def test_build_clinsig_filter(adapter):
{ 'genes.region_annotation':
{'$in': region_annotation }
},
{ 'clnsig.value':
{ '$in': all_clinsig }
}
{ 'clnsig': {
'$elemMatch': {
'$or' : [
{ 'value' : { '$in': all_clinsig }},
{ 'value' : re.compile('|'.join(clinsig_mapped_items)) }
]
}
}}
]

def test_build_clinsig_always(adapter):

assert real_variant_database.variant_collection.find_one()

# Test that the query works with real data:

case_obj = real_variant_database.case_collection.find_one()
case_id = case_obj['_id']

# Execute a raw query to collect variants that should pass the filter
n_results_raw_query = real_variant_database.variant_collection.find({
'$and' : [
{'genes.region_annotation' : {'$in' : region_annotation}},
{'clnsig.value' : {'$in' : [4, 'Likely pathogenic', 5, 'Pathogenic']}},
{'case_id' : case_id},
{'category' : 'snv'},
{'variant_type' : 'clinical'}
]}).count()
assert n_results_raw_query

adapter = real_variant_database

# filter real variants using query:
filtered_variants = adapter.variants(case_id=case_id, nr_of_variants=-1, query=query)

# number of variants returned by raw query and filtered variants should be the same:
assert filtered_variants.count() == n_results_raw_query

# Check if query works on clnsig.value that comma separated values:
a_variant = list(filtered_variants)[0]
assert a_variant['_id']

# there should be no variant with clnsig.value=='Pathogenic, Likely pathogenic'
assert real_variant_database.variant_collection.find({'clnsig.value' : 'Pathogenic, Likely pathogenic'}).count() == 0

# Modify clnsig value of this variant to 'Pathogenic, Likely pathogenic'
real_variant_database.variant_collection.update_one({'_id' : a_variant['_id']}, {'$set' : {'clnsig.0.value': 'Pathogenic, Likely pathogenic'}})

# One variant has multiple clssig now:
real_variant_database.variant_collection.find({'clnsig.value' : 'Pathogenic, Likely pathogenic'}).count() == 1

# Update raw query to find this variant as well
n_results_raw_query = real_variant_database.variant_collection.find({
'$and' : [
{'genes.region_annotation' : {'$in' : region_annotation}},
{'clnsig.value' : {'$in' : [4, 'Likely pathogenic', 5, 'Pathogenic', 'Pathogenic, Likely pathogenic']}},
{'case_id' : case_id},
{'category' : 'snv'},
{'variant_type' : 'clinical'}
]}).count()

# Makes sure that the variant is found anyway by the query:
n_filtered_variants = adapter.variants(case_id=case_id, nr_of_variants=-1, query=query).count()
assert n_results_raw_query == n_filtered_variants


def test_build_clinsig_always(adapter, real_variant_database):
case_id = 'cust000'
clinsig_confident_always_returned = True
trusted_revstat_lev = ['mult', 'single', 'exp', 'guideline']
clinsig_items = [ 4, 5 ]
clinsig_mapped_items = []
all_clinsig = [] # both numerical and human readable values
for item in clinsig_items:
all_clinsig.append(item)
all_clinsig.append(CLINSIG_MAP[item])
clinsig_mapped_items.append(CLINSIG_MAP[item])

region_annotation = ['exonic', 'splicing']
freq=0.01
Expand All @@ -167,19 +240,59 @@ def test_build_clinsig_always(adapter):
]},
{ 'clnsig':
{
'$elemMatch': { 'value':
{ '$in' : all_clinsig },
'revstat':
{ '$in' : ['mult',
'single',
'exp',
'guideline']
}
}
'$elemMatch': {
'$or' :[
{ 'value' : { '$in': all_clinsig }},
{ 'value' : re.compile('|'.join(clinsig_mapped_items)) }
],
'$or' : [
{ 'revstat' : {'$in' : trusted_revstat_lev }},
{ 'revstat' : re.compile('|'.join(trusted_revstat_lev)) }
]

}
}
}
]

# Test that the query works with real data

case_obj = real_variant_database.case_collection.find_one()
case_id = case_obj['_id']

adapter = real_variant_database
assert adapter.variants(case_id=case_id, nr_of_variants=-1).count()

# filter variants using query:
filtered_variants = list(adapter.variants(case_id=case_id, nr_of_variants=-1, query=query))
assert filtered_variants

# Make sure that variants are filtered as they should:
for var in filtered_variants:

gnomad_filter = False
anno_filter = False
clisig_filter = False

if 'gnomad_frequency' in var:
if var['gnomad_frequency'] < freq:
gnomad_filter = True
else:
gnomad_filter = True

for gene in var['genes']:
if gene['region_annotation'] in region_annotation:
anno_filter = True

if 'clnsig' in var:
for clnsig in var['clnsig']:
if clnsig['value'] in [4, 'Likely pathogenic', 5, 'Pathogenic']:
clisig_filter = True

# Assert that variant passes gnomad filter + anno_filter or has the required clinsig
assert (gnomad_filter and anno_filter) or clisig_filter


def test_build_spidex_not_reported(adapter):
case_id = 'cust000'
spidex_human = ['not_reported']
Expand Down Expand Up @@ -208,11 +321,14 @@ def test_build_spidex_high(adapter):
def test_build_clinsig_always_only(adapter):
case_id = 'cust000'
clinsig_confident_always_returned = True
trusted_revstat_lev = ['mult', 'single', 'exp', 'guideline']
clinsig_items = [ 4, 5 ]
clinsig_mapped_items = []
all_clinsig = [] # both numerical and human readable values
for item in clinsig_items:
all_clinsig.append(item)
all_clinsig.append(CLINSIG_MAP[item])
clinsig_mapped_items.append(CLINSIG_MAP[item])

query = {'clinsig': clinsig_items,
'clinsig_confident_always_returned': clinsig_confident_always_returned
Expand All @@ -221,16 +337,16 @@ def test_build_clinsig_always_only(adapter):
mongo_query = adapter.build_query(case_id, query=query)

assert mongo_query['clnsig'] == {
'$elemMatch': { 'value':
{ '$in' : all_clinsig },
'revstat':
{ '$in' : ['mult',
'single',
'exp',
'guideline']
}
}
}
'$elemMatch': {
'$or' : [
{ 'value' : { '$in': all_clinsig }},
{ 'value' : re.compile('|'.join(clinsig_mapped_items)) }
],
'$or': [
{ 'revstat' : {'$in' : trusted_revstat_lev }},
{'revstat' : re.compile('|'.join(trusted_revstat_lev)) }
]
}}

def test_build_chrom(adapter):
case_id = 'cust000'
Expand Down

0 comments on commit a7e4c6e

Please sign in to comment.