From efca7db75d4be462a90258607454f38b89ae23bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Wed, 12 Jun 2019 09:42:51 +0200 Subject: [PATCH 1/5] Change error handling to mainly three exceptions: NotFoundError - no hits in db or similar ParsingError - unable to parse request (e. g. bad region) MalformedRequest - other error, currently only too large region --- backend/modules/browser/browser_handlers.py | 68 +++++--- backend/modules/browser/error.py | 11 ++ backend/modules/browser/lookups.py | 161 ++++++++++-------- .../browser/tests/test_browser_handlers.py | 13 +- backend/modules/browser/tests/test_lookups.py | 139 +++++++++------ backend/modules/browser/tests/test_utils.py | 80 +++++++-- backend/modules/browser/utils.py | 37 ++-- test/data/browser_test_data.sql | 3 +- 8 files changed, 325 insertions(+), 187 deletions(-) create mode 100644 backend/modules/browser/error.py diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index 6abc27649..f3d9b4fc0 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -5,6 +5,7 @@ import db import handlers +from . import error from . import lookups from . import utils @@ -24,7 +25,7 @@ def get(self, dataset:str, query:str, ds_version:str=None): dataset, ds_version = utils.parse_dataset(dataset, ds_version) ret = {} - results = lookups.get_autocomplete(dataset, query, ds_version) + results = lookups.autocomplete(dataset, query, ds_version) ret = {'values': sorted(list(set(results)))[:20]} self.finish(ret) @@ -87,12 +88,13 @@ def get(self, dataset:str, datatype:str, item:str, ds_version:str=None): ds_version (str): dataset version """ dataset, ds_version = utils.parse_dataset(dataset, ds_version) - ret = utils.get_coverage(dataset, datatype, item, ds_version) - if 'bad_region' in ret: - self.send_error(status_code=400, reason="Unable to parse the region") + try: + ret = utils.get_coverage(dataset, datatype, item, ds_version) + except error.NotFoundError as err: + self.send_error(status_code=404, reason=str(err)) return - if 'region_too_large' in ret: - self.send_error(status_code=400, reason="The region is too large") + except (error.ParsingError, error.MalformedRequest) as err: + self.send_error(status_code=400, reason=str(err)) return self.finish(ret) @@ -114,7 +116,7 @@ def get(self, dataset:str, datatype:str, item:str, ds_version:str=None): try: ret = utils.get_coverage_pos(dataset, datatype, item, ds_version) except ValueError: - logging.error('GetCoveragePos: unable to parse region ({})'.format(region)) + logging.error('GetCoveragePos: unable to parse region ({})'.format(item)) self.send_error(status_code=400, reason='Unable to parse region') return @@ -139,7 +141,15 @@ def get(self, dataset:str, gene:str, ds_version:str=None): ret = {'gene':{'gene_id': gene_id}} # Gene - gene = lookups.get_gene(dataset, gene_id, ds_version) + try: + gene = lookups.get_gene(dataset, gene_id, ds_version) + except error.NotFoundError as err: + self.send_error(status_code=404, reason=str(err)) + return + except (error.ParsingError, error.MalformedRequest) as err: + self.send_error(status_code=400, reason=str(err)) + return + if not gene: self.send_error(status_code=404, reason='Gene not found') return @@ -181,9 +191,9 @@ def get(self, dataset:str, region:str, ds_version:str=None): try: chrom, start, stop = utils.parse_region(region) - except ValueError: - logging.error('GetRegion: unable to parse region ({})'.format(region)) - self.send_error(status_code=400, reason='Unable to parse region') + except error.ParsingError as err: + self.send_error(status_code=400, reason=str(err)) + logging.warning('GetRegion: unable to parse region ({})'.format(region)) return ret = {'region':{'chrom': chrom, @@ -193,7 +203,7 @@ def get(self, dataset:str, region:str, ds_version:str=None): } if utils.is_region_too_large(start, stop): - self.send_error(status_code=400, reason="The region is too large") + self.send_error(status_code=400, reason='Region too large') return genes_in_region = lookups.get_genes_in_region(dataset, chrom, start, stop, ds_version) @@ -229,10 +239,12 @@ def get(self, dataset:str, transcript:str, ds_version:str=None): } # Add transcript information - transcript = lookups.get_transcript(dataset, transcript_id, ds_version) - if not transcript: - self.send_error(status_code=404, reason='Transcript not found') + try: + transcript = lookups.get_transcript(dataset, transcript_id, ds_version) + except error.NotFoundError as err: + self.send_error(status_code=404, reason=str(err)) return + ret['transcript']['id'] = transcript['transcript_id'] ret['transcript']['number_of_CDS'] = len([t for t in transcript['exons'] if t['feature_type'] == 'CDS']) @@ -270,18 +282,21 @@ def get(self, dataset:str, variant:str, ds_version:str=None): ret = {'variant':{}} # Variant v = variant.split('-') + if len(v) != 4: + logging.error('GetVariant: unable to parse variant ({})'.format(variant)) + self.send_error(status_code=400, reason=f'Unable to parse variant {variant}') try: v[1] = int(v[1]) except ValueError: - logging.error('GetVariant: unable to parse variant ({})'.format(variant)) - self.send_error(status_code=400, reason="Unable to parse variant") + logging.error('GetVariant: position not an integer ({})'.format(variant)) + self.send_error(status_code=400, reason=f'Position is not an integer in variant {variant}') return orig_variant = variant - variant = lookups.get_variant(dataset, v[1], v[0], v[2], v[3], ds_version) - - if not variant: + try: + variant = lookups.get_variant(dataset, v[1], v[0], v[2], v[3], ds_version) + except error.NotFoundError as err: logging.error('Variant not found ({})'.format(orig_variant)) - self.send_error(status_code=404, reason='Variant not found') + self.send_error(status_code=404, reason=str(err)) return # Just get the information we need @@ -379,12 +394,13 @@ def get(self, dataset:str, datatype:str, item:str, ds_version:str=None): item (str): item to query """ dataset, ds_version = utils.parse_dataset(dataset, ds_version) - ret = utils.get_variant_list(dataset, datatype, item, ds_version) - if not ret: - self.send_error(status_code=500, reason='Unable to retrieve variants') + try: + ret = utils.get_variant_list(dataset, datatype, item, ds_version) + except error.NotFoundError as err: + self.send_error(status_code=404, reason=str(err)) return - if 'region_too_large' in ret: - self.send_error(status_code=400, reason="The region is too large") + except (error.ParsingError, error.MalformedRequest) as err: + self.send_error(status_code=400, reason=str(err)) return # inconvenient way of doing humpBack-conversion diff --git a/backend/modules/browser/error.py b/backend/modules/browser/error.py new file mode 100644 index 000000000..606bb5c2e --- /dev/null +++ b/backend/modules/browser/error.py @@ -0,0 +1,11 @@ +class NotFoundError(Exception): + """The query returned nothing from the database.""" + pass + +class ParsingError(Exception): + """Failed to parse the request.""" + pass + +class MalformedRequest(Exception): + """Bad request (e.g. too large region).""" + pass diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 652e7f359..112dbb262 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -1,17 +1,17 @@ """Lookup functions for the variant browser.""" - - import logging import re import db +from . import error + SEARCH_LIMIT = 10000 REGION_REGEX = re.compile(r'^\s*(\d+|X|Y|M|MT)\s*([-:]?)\s*(\d*)-?([\dACTG]*)-?([ACTG]*)') -def get_autocomplete(dataset:str, query:str, ds_version:str=None): +def autocomplete(dataset:str, query:str, ds_version:str=None): """ Provide autocomplete suggestions based on the query. @@ -27,7 +27,7 @@ def get_autocomplete(dataset:str, query:str, ds_version:str=None): try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: - return None + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') query = (db.Gene.select(db.Gene.name) .where(((db.Gene.name.startswith(query)) & (db.Gene.reference_set == ref_set)))) @@ -68,32 +68,46 @@ def get_awesomebar_result(dataset:str, query:str, ds_version:str=None): query = query.strip() # Parse Variant types - variant = get_variants_by_rsid(dataset, query.lower(), ds_version=ds_version) - if variant: + try: + variant = get_variants_by_rsid(dataset, query.lower(), ds_version=ds_version) + except (error.NotFoundError, error.ParsingError): + pass + else: if len(variant) == 1: - retval = ('variant', variant[0]['variant_id']) - else: - retval = ('dbsnp_variant_set', variant[0]['rsid']) - return retval + return ('variant', variant[0]['variant_id']) + return ('dbsnp_variant_set', variant[0]['rsid']) - gene = get_gene_by_name(dataset, query) - # From here out, all should be uppercase (gene, tx, region, variant_id) - query = query.upper() - if not gene: + # Gene + try: gene = get_gene_by_name(dataset, query) - if gene: + except error.NotFoundError: + pass + else: return 'gene', gene['gene_id'] + # Capital letters for all other queries + query = query.upper() + try: + gene = get_gene_by_name(dataset, query) + except error.NotFoundError: + pass + else: + return 'gene', gene['gene_id'] # Ensembl formatted queries if query.startswith('ENS'): # Gene - gene = get_gene(dataset, query) - if gene: + try: + gene = get_gene(dataset, query) + except error.NotFoundError: + pass + else: return 'gene', gene['gene_id'] - # Transcript - transcript = get_transcript(dataset, query) - if transcript: + try: + transcript = get_transcript(dataset, query) + except error.NotFoundError: + pass + else: return 'transcript', transcript['transcript_id'] # Region and variant queries @@ -130,17 +144,19 @@ def get_coverage_for_bases(dataset:str, chrom:str, start_pos:int, end_pos:int=No """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return [] + raise error.NotFoundError(f'Unable to find the dataset version in the database') if end_pos is None: end_pos = start_pos - return [values for values in (db.Coverage - .select() - .where((db.Coverage.pos >= start_pos) & - (db.Coverage.pos <= end_pos) & - (db.Coverage.chrom == chrom) & - (db.Coverage.dataset_version == dataset_version.id)) - .dicts())] + coverage = (db.Coverage.select() + .where((db.Coverage.pos >= start_pos) & + (db.Coverage.pos <= end_pos) & + (db.Coverage.chrom == chrom) & + (db.Coverage.dataset_version == dataset_version.id)) + .dicts()) + if not coverage: + raise error.NotFoundError('No coverage found for the region') + return coverage def get_coverage_for_transcript(dataset:str, chrom:str, start_pos:int, end_pos:int=None, ds_version:str=None): @@ -186,7 +202,8 @@ def get_exons_in_transcript(dataset:str, transcript_id:str, ds_version=None): ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: logging.info('get_exons_in_transcript({}, {}): unable to find dataset dbid'.format(dataset, transcript_id)) - return None + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') + try: transcript = (db.Transcript .select() @@ -196,11 +213,14 @@ def get_exons_in_transcript(dataset:str, transcript_id:str, ds_version=None): .get()) except db.Transcript.DoesNotExist: logging.info('get_exons_in_transcript({}, {}): unable to retrieve transcript'.format(dataset, transcript_id)) - return None + raise error.NotFoundError(f'Transcript {transcript_id} not found in reference data.') wanted_types = ('CDS', 'UTR', 'exon') - return sorted(list(db.Feature.select().where((db.Feature.transcript == transcript) & - (db.Feature.feature_type in wanted_types)).dicts()), - key=lambda k: k['start']) + features = sorted(list(db.Feature.select().where((db.Feature.transcript == transcript) & + (db.Feature.feature_type in wanted_types)).dicts()), + key=lambda k: k['start']) + if not features: + raise error.NotFoundError(f'No features found for transcript {transcript_id} in reference data.') + return features def get_gene(dataset:str, gene_id:str, ds_version:str=None): @@ -219,12 +239,13 @@ def get_gene(dataset:str, gene_id:str, ds_version:str=None): try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: - return None + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') + try: return db.Gene.select().where((db.Gene.gene_id == gene_id) & (db.Gene.reference_set == ref_set)).dicts().get() except db.Gene.DoesNotExist: - return None + raise error.NotFoundError(f'Gene {gene_id} not found in reference data.') def get_gene_by_dbid(gene_dbid:str): @@ -262,7 +283,8 @@ def get_gene_by_name(dataset:str, gene_name:str, ds_version=None): try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: - return {} + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') + try: return (db.Gene.select() .where((db.Gene.reference_set == ref_set) & @@ -278,8 +300,8 @@ def get_gene_by_name(dataset:str, gene_name:str, ds_version=None): .dicts() .get()) except db.GeneOtherNames.DoesNotExist: - logging.error('get_gene_by_name({}, {}): unable to retrieve gene'.format(dataset, gene_name)) - return {} + logging.info('get_gene_by_name({}, {}): unable to retrieve gene'.format(dataset, gene_name)) + raise error.NotFoundError(f'Gene {gene_name} not found in reference data') def get_genes_in_region(dataset:str, chrom:str, start_pos:int, stop_pos:int, ds_version:str=None): @@ -300,13 +322,13 @@ def get_genes_in_region(dataset:str, chrom:str, start_pos:int, stop_pos:int, ds_ try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: - return {} + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') - gene_query = db.Gene.select().where((db.Gene.reference_set == ref_set) & - (db.Gene.start <= stop_pos) & - (db.Gene.stop >= start_pos) & - (db.Gene.chrom == chrom)).dicts() - return [gene for gene in gene_query] + genes = db.Gene.select().where((db.Gene.reference_set == ref_set) & + (db.Gene.start <= stop_pos) & + (db.Gene.stop >= start_pos) & + (db.Gene.chrom == chrom)).dicts() + return genes def get_raw_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_version:str=None): @@ -327,7 +349,7 @@ def get_raw_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_versio """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return None + raise error.NotFoundError(f'Unable to find the dataset version in the database') try: variant = (db.Variant @@ -351,9 +373,9 @@ def get_raw_variant(dataset:str, pos:int, chrom:str, ref:str, alt:str, ds_versio .dicts()] return variant except db.Variant.DoesNotExist: - logging.error('get_raw_variant({}, {}, {}, {}, {}, {}): unable to retrieve variant' - .format(dataset, pos, chrom, ref, alt, dataset_version.id)) - return None + logging.info('get_raw_variant({}, {}, {}, {}, {}, {}): unable to retrieve variant' + .format(dataset, pos, chrom, ref, alt, dataset_version.id)) + raise error.NotFoundError(f'Variant {chrom}-{pos}-{ref}-{alt} not found') def get_transcript(dataset:str, transcript_id:str, ds_version:str=None): @@ -374,7 +396,7 @@ def get_transcript(dataset:str, transcript_id:str, ds_version:str=None): try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: - return None + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') try: transcript = (db.Transcript .select(db.Transcript, db.Gene.gene_id) @@ -386,7 +408,8 @@ def get_transcript(dataset:str, transcript_id:str, ds_version:str=None): transcript['exons'] = get_exons_in_transcript(dataset, transcript_id) return transcript except db.Transcript.DoesNotExist: - return None + logging.info('get_transcript({}, {}): unable to retrieve transcript'.format(dataset, transcript_id)) + raise error.NotFoundError(f'Transcript {transcript_id} not found in reference data') def get_transcripts_in_gene(dataset:str, gene_id:str, ds_version:str=None): @@ -405,14 +428,15 @@ def get_transcripts_in_gene(dataset:str, gene_id:str, ds_version:str=None): try: ref_set = db.get_dataset_version(dataset, ds_version).reference_set except AttributeError: - logging.error('get_transcripts_in_gene({}, {}): unable to get referenceset dbid'.format(dataset, gene_id)) - return [] + logging.warning('get_transcripts_in_gene({}, {}): unable to get referenceset dbid'.format(dataset, gene_id)) + raise error.NotFoundError(f'Reference set not found for dataset {dataset}.') + try: gene = db.Gene.select().where((db.Gene.reference_set == ref_set) & (db.Gene.gene_id == gene_id)).dicts().get() except db.Gene.DoesNotExist: - logging.error('get_transcripts_in_gene({}, {}): unable to retrieve gene'.format(dataset, gene_id)) - return [] + logging.info('get_transcripts_in_gene({}, {}): unable to retrieve gene'.format(dataset, gene_id)) + raise error.NotFoundError(f'Gene {gene_id} not found in reference data') return [transcript for transcript in db.Transcript.select().where(db.Transcript.gene == gene['id']).dicts()] @@ -469,24 +493,25 @@ def get_variants_by_rsid(dataset:str, rsid:str, ds_version:str=None): """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return None + raise error.NotFoundError(f'Unable to find the dataset version in the database') if not rsid.startswith('rs'): logging.error('get_variants_by_rsid({}, {}): rsid not starting with rs'.format(dataset, rsid)) - return None + raise error.ParsingError('rsid not starting with rs') try: rsid = int(rsid.lstrip('rs')) except ValueError: logging.error('get_variants_by_rsid({}, {}): not an integer after rs'.format(dataset, rsid)) - return None - query = (db.Variant - .select() - .where((db.Variant.rsid == rsid) & - (db.Variant.dataset_version == dataset_version)) - .dicts()) + raise error.ParsingError('Not an integer after rs') + variants = (db.Variant + .select() + .where((db.Variant.rsid == rsid) & + (db.Variant.dataset_version == dataset_version)) + .dicts()) - variants = [variant for variant in query] + if not variants: + raise error.NotFoundError('No variants found for rsid {rsid}') return variants @@ -505,10 +530,10 @@ def get_variants_in_gene(dataset:str, gene_id:str, ds_version:str=None): """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return None + raise error.NotFoundError(f'Unable to find the dataset version in the database') gene = get_gene(dataset, gene_id, ds_version) if not gene: - return None + raise error.NotFoundError(f'Gene {gene_id} not found in reference data') variants = [variant for variant in db.Variant.select() .join(db.VariantGenes) @@ -544,7 +569,7 @@ def get_variants_in_region(dataset:str, chrom:str, start_pos:int, end_pos:int, d """ dataset_version = db.get_dataset_version(dataset, ds_version) if not dataset_version: - return None + raise error.NotFoundError(f'Unable to find the dataset version in the database') query = (db.Variant .select() .where((db.Variant.pos >= start_pos) & @@ -582,10 +607,12 @@ def get_variants_in_transcript(dataset:str, transcript_id:str, ds_version:str=No """ dataset_version = db.get_dataset_version(dataset, ds_version) + if not dataset_version: + raise error.NotFoundError(f'Unable to find the dataset version in the database') transcript = get_transcript(dataset, transcript_id, ds_version) if not transcript: - return None + raise error.NotFoundError(f'Transcript {transcript_id} not found in reference data') variants = [variant for variant in db.Variant.select() .join(db.VariantTranscripts) diff --git a/backend/modules/browser/tests/test_browser_handlers.py b/backend/modules/browser/tests/test_browser_handlers.py index 19d174884..b69797afb 100644 --- a/backend/modules/browser/tests/test_browser_handlers.py +++ b/backend/modules/browser/tests/test_browser_handlers.py @@ -1,7 +1,6 @@ """ Test the browser handlers """ - import requests import json @@ -63,7 +62,7 @@ def test_get_coverage(): assert response.status_code == 400 data_item = '1-1-5' response = requests.get('{}/api/dataset/{}/browser/coverage/{}/{}'.format(BASE_URL, dataset, data_type, data_item)) - assert response.status_code == 200 + assert response.status_code == 404 def test_get_coverage_pos(): @@ -244,9 +243,15 @@ def test_search(): assert data['type'] == 'dbsnp' assert data['value'] == 142856307 - query = '21-9411281-T-C' + query = '22-1234321-A-T' + response = requests.get('{}/api/dataset/{}/browser/search/{}'.format(BASE_URL, dataset, query)) + data = json.loads(response.text) + assert data['type'] == 'not_found' + assert data['value'] == '22-1234321-A-T' + + query = '21-29461622-G-A' version = '20161223' response = requests.get('{}/api/dataset/{}/version/{}/browser/search/{}'.format(BASE_URL, dataset, version, query)) data = json.loads(response.text) assert data['type'] == 'variant' - assert data['value'] == '21-9411281-T-C' + assert data['value'] == '21-29461622-G-A' diff --git a/backend/modules/browser/tests/test_lookups.py b/backend/modules/browser/tests/test_lookups.py index 21a13f5c9..6de69d606 100644 --- a/backend/modules/browser/tests/test_lookups.py +++ b/backend/modules/browser/tests/test_lookups.py @@ -2,19 +2,22 @@ Tests for the functions available in lookups.py """ +import pytest + +from .. import error from .. import lookups -def test_get_autocomplete(): +def test_autocomplete(): """ Test get_autocomplete() """ - res = lookups.get_autocomplete('SweGen', 'PA') + res = lookups.autocomplete('SweGen', 'PA') expected = set(["PABPC1P9", "PACSIN2", "PANX2", "PARP4P3", "PARVB", "PARVG", "PATZ1", "PAXBP1", "PAXBP1-AS1"]) assert set(res) == expected - res = lookups.get_autocomplete('Bad_dataset', 'PA') - assert not res + with pytest.raises(error.NotFoundError): + res = lookups.autocomplete('Bad_dataset', 'PA') def test_get_awesomebar_result(): @@ -33,6 +36,8 @@ def test_get_awesomebar_result(): assert result == ('transcript', 'ENST00000457709') result = lookups.get_awesomebar_result('SweGen', '22-46615715-46615880') assert result == ('region', '22-46615715-46615880') + result = lookups.get_awesomebar_result('SweGen', '22-1234321-A-A') + assert result == ('not_found', '22-1234321-A-A') result = lookups.get_awesomebar_result('SweGen', 'CHR22:46615715-46615880') assert result == ('region', '22-46615715-46615880') result = lookups.get_awesomebar_result('SweGen', 'CHR22-29461622-G-A') @@ -64,11 +69,12 @@ def test_get_coverage_for_bases(): assert len(lookups.get_coverage_for_bases('SweGen', '22', 46615715, 46615880)) == 17 # no hits - coverage = lookups.get_coverage_for_bases('SweGen', '1', 55500283, 55500285) - assert not coverage + with pytest.raises(error.NotFoundError): + lookups.get_coverage_for_bases('SweGen', '1', 55500283, 55500285) # incorrect dataset - assert not lookups.get_coverage_for_bases('BAD_DATASET', '1', 55500283, 55500320) + with pytest.raises(error.NotFoundError): + lookups.get_coverage_for_bases('BAD_DATASET', '1', 55500283, 55500320) def test_get_coverage_for_transcript(): @@ -93,11 +99,12 @@ def test_get_coverage_for_transcript(): assert len(lookups.get_coverage_for_transcript('SweGen', '22', 46615715, 46615880)) == 17 # no hits - coverage = lookups.get_coverage_for_transcript('SweGen', '1', 55500283, 55500285) - assert not coverage + with pytest.raises(error.NotFoundError): + coverage = lookups.get_coverage_for_transcript('SweGen', '1', 55500283, 55500285) # incorrect dataset - assert not lookups.get_coverage_for_transcript('BAD_DATASET', '1', 55500283, 55500320) + with pytest.raises(error.NotFoundError): + assert not lookups.get_coverage_for_transcript('BAD_DATASET', '1', 55500283, 55500320) def test_get_exons_in_transcript(): @@ -108,12 +115,12 @@ def test_get_exons_in_transcript(): assert len(result) == 14 # bad dataset - result = lookups.get_exons_in_transcript('NO_DATASET', 'ENST00000215855') - assert not result + with pytest.raises(error.NotFoundError): + result = lookups.get_exons_in_transcript('NO_DATASET', 'ENST00000215855') # bad transcript - result = lookups.get_exons_in_transcript('SweGen', 'BAD_TRANSCRIPT') - assert not result + with pytest.raises(error.NotFoundError): + result = lookups.get_exons_in_transcript('SweGen', 'BAD_TRANSCRIPT') def test_get_gene(): @@ -135,12 +142,12 @@ def test_get_gene(): assert result[val] == expected[val] # non-existing gene - result = lookups.get_gene('SweGen', 'NOT_A_GENE') - assert not result + with pytest.raises(error.NotFoundError): + result = lookups.get_gene('SweGen', 'NOT_A_GENE') # non-existing dataset - result = lookups.get_gene('NoDataset', 'ENSG00000223972') - assert not result + with pytest.raises(error.NotFoundError): + result = lookups.get_gene('NoDataset', 'ENSG00000223972') def test_get_gene_by_dbid(): @@ -186,17 +193,15 @@ def test_get_gene_by_name(caplog): assert result[val] == expected[val] # non-existing gene - result = lookups.get_gene_by_name('SweGen', 'NOT_A_GENE') - assert not result - assert caplog.messages[0] == 'get_gene_by_name(SweGen, NOT_A_GENE): unable to retrieve gene' + with pytest.raises(error.NotFoundError): + lookups.get_gene_by_name('SweGen', 'NOT_A_GENE') # non-existing dataset - result = lookups.get_gene_by_name('NoDataset', 'ENSG00000223972') - assert not result + with pytest.raises(error.NotFoundError): + lookups.get_gene_by_name('NoDataset', 'ENSG00000223972') # name in other_names result = lookups.get_gene_by_name('SweGen', 'BCL8C') - print(result) assert result['gene_id'] == 'ENSG00000223875' @@ -214,10 +219,10 @@ def test_get_genes_in_region(): expected_ids = ['ENSG00000231565'] assert [gene['gene_id'] for gene in res] == expected_ids # bad dataset - res = lookups.get_genes_in_region('bad_dataset', '22', 25595800, 25615800) + with pytest.raises(error.NotFoundError): + lookups.get_genes_in_region('bad_dataset', '22', 25595800, 25615800) # nothing found - res = lookups.get_genes_in_region('SweGen', '22', 25595800, 25595801) - assert not res + assert not lookups.get_genes_in_region('SweGen', '22', 25595800, 25595801) def test_get_transcript(): @@ -237,7 +242,8 @@ def test_get_transcript(): assert len(result['exons']) == 1 # non-existing - assert not lookups.get_transcript('SweGen', 'INCORRECT') + with pytest.raises(error.NotFoundError): + lookups.get_transcript('SweGen', 'INCORRECT') def test_get_transcripts_in_gene(): @@ -247,8 +253,10 @@ def test_get_transcripts_in_gene(): res = lookups.get_transcripts_in_gene('SweGen', 'ENSG00000228314') assert len(res) == 3 - assert not lookups.get_transcripts_in_gene('bad_dataset', 'ENSG00000241670') - assert not lookups.get_transcripts_in_gene('SweGen', 'ENSGASDFG') + with pytest.raises(error.NotFoundError): + lookups.get_transcripts_in_gene('bad_dataset', 'ENSG00000241670') + with pytest.raises(error.NotFoundError): + lookups.get_transcripts_in_gene('SweGen', 'ENSGASDFG') def test_get_raw_variant(): @@ -260,8 +268,10 @@ def test_get_raw_variant(): assert len(result['genes']) == len(['ENSG00000229286', 'ENSG00000235265']) assert set(result['transcripts']) == set(['ENST00000448070', 'ENST00000413156']) assert len(result['transcripts']) == len(['ENST00000448070', 'ENST00000413156']) - assert not lookups.get_raw_variant('SweGen', 55500281, '1', 'A', 'T') - assert not lookups.get_raw_variant('bad_dataset', 55500283, '1', 'A', 'T') + with pytest.raises(error.NotFoundError): + assert not lookups.get_raw_variant('SweGen', 55500281, '1', 'A', 'T') + with pytest.raises(error.NotFoundError): + assert not lookups.get_raw_variant('bad_dataset', 55500283, '1', 'A', 'T') def test_get_transcripts_in_gene_by_dbid(): @@ -285,15 +295,20 @@ def test_get_variant(): assert len(result['genes']) == len(['ENSG00000229286', 'ENSG00000235265']) assert set(result['transcripts']) == set(['ENST00000448070', 'ENST00000413156']) assert len(result['transcripts']) == len(['ENST00000448070', 'ENST00000413156']) - result = lookups.get_variant('SweGen', 9411609, '21', 'G', 'T') - assert not result + + # not found + with pytest.raises(error.NotFoundError): + result = lookups.get_variant('SweGen', 12321, '21', 'G', 'G') + with pytest.raises(error.NotFoundError): + result = lookups.get_variant('SweGen', 9411609, '21', 'G', 'T') # incorrect position - assert not lookups.get_variant('SweGen', -1, '1', 'A', 'T') + with pytest.raises(error.NotFoundError): + assert not lookups.get_variant('SweGen', -1, '1', 'A', 'T') # with version - result = lookups.get_variant('SweGen', 16057464, '22', 'G', 'A', "20161223") - assert not result + with pytest.raises(error.NotFoundError): + result = lookups.get_variant('SweGen', 16057464, '22', 'G', 'A', "20161223") result = lookups.get_variant('SweGen', 9411609, '21', 'G', 'T', "20161223") assert result['variant_id'] == '21-9411609-G-T' @@ -306,19 +321,25 @@ def test_get_variants_by_rsid(): result = lookups.get_variants_by_rsid('SweGen', 'rs142856307') assert result[0]['pos'] == 16285954 assert len(result) == 5 - assert not lookups.get_variants_by_rsid('SweGen', 'rs76676778') + with pytest.raises(error.NotFoundError): + assert not lookups.get_variants_by_rsid('SweGen', 'rs76676778') # with version - assert not lookups.get_variants_by_rsid('SweGen', 'rs185758992', '20161223') + with pytest.raises(error.NotFoundError): + lookups.get_variants_by_rsid('SweGen', 'rs185758992', '20161223') result = lookups.get_variants_by_rsid('SweGen', 'rs76676778', '20161223') assert result[0]['variant_id'] == '21-9411609-G-T' # errors - assert lookups.get_variants_by_rsid('incorrect_name', 'rs373706802') is None - assert lookups.get_variants_by_rsid('SweGen', '373706802') is None - assert lookups.get_variants_by_rsid('SweGen', 'rs3737o68o2') is None + with pytest.raises(error.NotFoundError): + lookups.get_variants_by_rsid('incorrect_name', 'rs373706802') + with pytest.raises(error.ParsingError): + lookups.get_variants_by_rsid('SweGen', '373706802') + with pytest.raises(error.ParsingError): + lookups.get_variants_by_rsid('SweGen', 'rs3737o68o2') # no variants with rsid available - assert not lookups.get_variants_by_rsid('SweGen', 'rs1') + with pytest.raises(error.NotFoundError): + lookups.get_variants_by_rsid('SweGen', 'rs1') def test_get_variants_in_gene(): @@ -327,9 +348,17 @@ def test_get_variants_in_gene(): """ res = lookups.get_variants_in_gene('SweGen', 'ENSG00000198062') assert len(res) == 512 - assert not lookups.get_variants_in_gene('bad_dataset', 'ENSG00000198062') - assert not lookups.get_variants_in_gene('bad_dataset', 'ENSGASDFG') - assert not lookups.get_variants_in_gene('SweGen', 'ENSG00000198062', "BAD_VERSION") + + # existing gene without variants + assert not lookups.get_variants_in_gene('SweGen', 'ENSG00000128298') + + # bad requests + with pytest.raises(error.NotFoundError): + lookups.get_variants_in_gene('bad_dataset', 'ENSG00000198062') + with pytest.raises(error.NotFoundError): + lookups.get_variants_in_gene('bad_dataset', 'ENSGASDFG') + with pytest.raises(error.NotFoundError): + lookups.get_variants_in_gene('SweGen', 'ENSG00000198062', "BAD_VERSION") def test_get_variants_in_region(): @@ -342,12 +371,14 @@ def test_get_variants_in_region(): assert [res['pos'] for res in result] == expected_pos # no positions covered - result = lookups.get_variants_in_region('SweGen', '22', 16079200, 16079000) - assert not result + assert not lookups.get_variants_in_region('SweGen', '22', 16079200, 16079000) + + # no variants found + assert not lookups.get_variants_in_region('SweGen', '22', 106079000, 106079200) # incorrect dataset - result = lookups.get_variants_in_region('Incorrect_dataset', '22', 16079200, 16079400) - assert not result + with pytest.raises(error.NotFoundError): + lookups.get_variants_in_region('Incorrect_dataset', '22', 16079200, 16079400) def test_get_variants_in_transcript(): @@ -356,5 +387,9 @@ def test_get_variants_in_transcript(): """ res = lookups.get_variants_in_transcript('SweGen', 'ENST00000452800') assert len(res) == 508 - assert not lookups.get_variants_in_transcript('BAD_DATASET', 'ENST00000452800') - assert not lookups.get_variants_in_transcript('SweGen', 'ENST123') + + # bad requests + with pytest.raises(error.NotFoundError): + assert not lookups.get_variants_in_transcript('BAD_DATASET', 'ENST00000452800') + with pytest.raises(error.NotFoundError): + assert not lookups.get_variants_in_transcript('SweGen', 'ENST123') diff --git a/backend/modules/browser/tests/test_utils.py b/backend/modules/browser/tests/test_utils.py index 74cbbd7f5..83361475e 100644 --- a/backend/modules/browser/tests/test_utils.py +++ b/backend/modules/browser/tests/test_utils.py @@ -2,6 +2,9 @@ Tests for utils.py """ +import pytest + +from .. import error from .. import lookups from .. import utils @@ -52,7 +55,7 @@ def test_add_consequence_to_variant(): assert variant['major_consequence'] == '' # bad variant - variant = lookups.get_variant('SweGen', 38481311, '444', 'C', 'T') + variant = {} utils.add_consequence_to_variant(variant) assert not variant @@ -74,18 +77,22 @@ def test_get_coverage(): assert len(res['coverage']) == 144 res = utils.get_coverage('SweGen', 'region', '22-46615715-46615880') assert len(res['coverage']) == 17 - res = utils.get_coverage('SweGen', 'region', '22:46615715-46615880') - assert not res['coverage'] - res = utils.get_coverage('SweGen', 'region', '22-46615715asd-46615880') - assert not res['coverage'] - assert res['bad_region'] res = utils.get_coverage('SweGen', 'transcript', 'ENST00000438441') assert len(res['coverage']) == 144 - assert not utils.get_coverage('BAD_SET', 'transcript', 'ENST00000438441')['coverage'] + # bad regions + with pytest.raises(error.ParsingError): + res = utils.get_coverage('SweGen', 'region', '22-46615715asd-46615880') + # is seen as 22:46615715-46615880-46615880 + with pytest.raises(error.NotFoundError): + utils.get_coverage('SweGen', 'region', '22:46615715-46615880') + + # no coverage found + with pytest.raises(error.NotFoundError): + utils.get_coverage('BAD_SET', 'transcript', 'ENST00000438441')['coverage'] - res = utils.get_coverage('SweGen', 'region', '22-1-1000000') - assert res['region_too_large'] + with pytest.raises(error.MalformedRequest): + res = utils.get_coverage('SweGen', 'region', '22-1-1000000') def test_get_coverage_pos(): @@ -105,9 +112,20 @@ def test_get_coverage_pos(): assert res['start'] == 16364817 assert res['stop'] == 16366254 - res = utils.get_coverage_pos('BAD_SET', 'transcript', 'ENST00000438441') - for value in res.values(): - assert not value + # bad requests + with pytest.raises(error.NotFoundError): + utils.get_coverage_pos('BAD_SET', 'transcript', 'ENST00000438441') + with pytest.raises(error.NotFoundError): + utils.get_coverage_pos('SweGen', 'transcript', 'ENST1234321') + with pytest.raises(error.NotFoundError): + utils.get_coverage_pos('SweGen', 'gene', 'ENSG1234321') + with pytest.raises(error.ParsingError): + utils.get_coverage_pos('BAD_SET', 'region', '1:1:1:1') + + # too large request + with pytest.raises(error.MalformedRequest): + utils.get_coverage_pos('SweGen', 'region', '1-1-10000000') + def test_data_structures(): @@ -191,15 +209,25 @@ def test_get_variant_list(): assert len(res['variants']) == 13 res = utils.get_variant_list('SweGen', 'transcript', 'ENST00000438441') assert len(res['variants']) == 178 - res = utils.get_variant_list('SweGen', 'transcript', 'ENSTWEIRD') - assert not res - res = utils.get_variant_list('SweGen', 'region', '22-1-1000000') - assert res['region_too_large'] - res = utils.get_variant_list('SweGen', 'region', '22-16272587') assert len(res['variants']) == 4 + # bad requests + with pytest.raises(error.NotFoundError): + utils.get_variant_list('SweGen', 'transcript', 'ENSTWEIRD') + with pytest.raises(error.NotFoundError): + utils.get_variant_list('Bad_dataset', 'transcript', 'ENSTWEIRD') + with pytest.raises(error.NotFoundError): + utils.get_variant_list('SweGen', 'gene', 'ENSG1234321') + with pytest.raises(error.ParsingError): + utils.get_variant_list('SweGen', 'region', '1-1-1-1-1') + + # too large region + with pytest.raises(error.MalformedRequest): + utils.get_variant_list('SweGen', 'region', '22-1-1000000') + + def test_order_vep_by_csq(): """ Test order_vep_by_csq() @@ -224,6 +252,24 @@ def test_parse_dataset(): assert utils.parse_dataset('hg19:SweGen:180101') == ('SweGen', '180101') +def test_parse_region(): + assert utils.parse_region('1-2-3') == ('1', 2, 3) + assert utils.parse_region('X-15-30') == ('X', 15, 30) + assert utils.parse_region('1-2') == ('1', 2, 2) + + # bad regions + with pytest.raises(error.ParsingError): + print(utils.parse_region('1:2:2')) + with pytest.raises(error.ParsingError): + utils.parse_region('1-2-2-2') + with pytest.raises(error.ParsingError): + utils.parse_region('asdfgh') + with pytest.raises(error.ParsingError): + utils.parse_region('X-15-z') + with pytest.raises(error.ParsingError): + utils.parse_region('X-y-15') + + def test_remove_extraneous_vep_annotations(): """ Test remove_extraneous_vep_annotations() diff --git a/backend/modules/browser/utils.py b/backend/modules/browser/utils.py index 589e2fe72..d3c15aaf8 100644 --- a/backend/modules/browser/utils.py +++ b/backend/modules/browser/utils.py @@ -2,6 +2,7 @@ import logging +from . import error from . import lookups # for coverage @@ -176,12 +177,10 @@ def get_coverage(dataset:str, datatype:str, item:str, ds_version:str=None): ret['coverage'] = lookups.get_coverage_for_transcript(dataset, transcript['chrom'], start, stop, ds_version) elif datatype == 'region': - try: - chrom, start, stop = parse_region(item) - except ValueError: - return {'coverage': [], 'bad_region':True} + chrom, start, stop = parse_region(item) + if is_region_too_large(start, stop): - return {'coverage': [], 'region_too_large': True} + raise error.MalformedRequest('Region too large') ret['coverage'] = lookups.get_coverage_for_bases(dataset, chrom, start, stop, ds_version) elif datatype == 'transcript': @@ -211,15 +210,15 @@ def get_coverage_pos(dataset:str, datatype:str, item:str, ds_version:str=None): if datatype == 'region': chrom, start, stop = parse_region(item) + if is_region_too_large(start, stop): + raise error.MalformedRequest('Region too large') ret['start'] = start ret['stop'] = stop ret['chrom'] = chrom else: if datatype == 'gene': gene = lookups.get_gene(dataset, item) - if gene: - transcript = lookups.get_transcript(dataset, gene['canonical_transcript'], ds_version) - else: transcript = None + transcript = lookups.get_transcript(dataset, gene['canonical_transcript'], ds_version) elif datatype == 'transcript': transcript = lookups.get_transcript(dataset, item, ds_version) if transcript: @@ -343,15 +342,10 @@ def get_variant_list(dataset:str, datatype:str, item:str, ds_version:str=None): variants = lookups.get_variants_in_gene(dataset, item, ds_version) elif datatype == 'region': - try: - chrom, start, stop = parse_region(item) - start = int(start) - stop = int(stop) - except ValueError: - return None + chrom, start, stop = parse_region(item) if is_region_too_large(start, stop): - return {'variants': [], 'headers': [], 'region_too_large': True} + raise error.MalformedRequest('Region too large') variants = lookups.get_variants_in_region(dataset, chrom, start, stop, ds_version) elif datatype == 'transcript': @@ -453,7 +447,7 @@ def parse_region(region:str): Parse a region with either one or two positions Args: - region (str): region, e.g. `3:1000000` or `3:100100` + region (str): region, e.g. `3-100-200` or `3-100` Returns: tuple: (chrom, start, pos) @@ -465,11 +459,14 @@ def parse_region(region:str): elif len(parts) == 3: chrom, start, stop = parts else: - raise ValueError - - start = int(start) - stop = int(stop) + raise error.ParsingError(f'Unable to parse region {region}.') + try: + start = int(start) + stop = int(stop) + except ValueError: + raise error.ParsingError(f'Unable to parse region {region} (positions not integers).') + return chrom, start, stop diff --git a/test/data/browser_test_data.sql b/test/data/browser_test_data.sql index 8f62af24f..5240fc76e 100644 --- a/test/data/browser_test_data.sql +++ b/test/data/browser_test_data.sql @@ -25,9 +25,9 @@ COPY data.reference_sets (id, reference_build, reference_name, ensembl_version, \. COPY data.dataset_versions (id, dataset, reference_set, dataset_version, dataset_description, terms, available_from, ref_doi, data_contact_name, data_contact_link, num_variants, coverage_levels, portal_avail, file_access, beacon_access) FROM stdin; +1 1 1 20161223 desc terms 2001-01-01 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC 2 1 1 20170823 desc terms 2001-01-01 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC 3 1 1 20171025 desc terms 2001-01-01 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC -1 1 1 20161223 desc terms 2001-01-01 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC 4 1 1 20180409 desc terms 2001-01-01 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC \. @@ -2647,6 +2647,7 @@ COPY data.variants (id, dataset_version, variant_type, rsid, chrom, pos, ref, al 1665 4 \N 75186185 22 16371114 A G 191977 {22-16371114-A-G} 0 0.30399999 VQSRTrancheSNP99.90to100.00 22-16371114-A-G 608 2000 {"DP": "84373", "FS": "2.211", "MQ": "31.17", "QD": "3.14", "VQSLOD": "-50.73", "MQRankSum": "-1.184", "BaseQRankSum": "-4.11", "ReadPosRankSum": "1.07", "ClippingRankSum": "0.012"} [{"LoF": "", "TSL": "", "CCDS": "", "ENSP": "", "EXON": "", "GMAF": "", "Gene": "ENSG00000231565", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "G", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "1", "SYMBOL": "NEK2P2", "TREMBL": "", "AFR_MAF": "", "AMR_MAF": "", "BIOTYPE": "processed_pseudogene", "DOMAINS": "", "EAS_MAF": "", "EUR_MAF": "", "Feature": "ENST00000438441", "HGNC_ID": "37816", "SAS_MAF": "", "SOMATIC": "", "UNIPARC": "", "CLIN_SIG": "", "DISTANCE": "4910", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "YES", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "downstream_gene_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "HGNC", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs4068944", "MOTIF_SCORE_CHANGE": ""}, {"LoF": "", "TSL": "", "CCDS": "", "ENSP": "", "EXON": "", "GMAF": "", "Gene": "ENSG00000230471", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "G", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "1", "SYMBOL": "LA16c-2F2.8", "TREMBL": "", "AFR_MAF": "", "AMR_MAF": "", "BIOTYPE": "lincRNA", "DOMAINS": "", "EAS_MAF": "", "EUR_MAF": "", "Feature": "ENST00000428118", "HGNC_ID": "", "SAS_MAF": "", "SOMATIC": "", "UNIPARC": "", "CLIN_SIG": "", "DISTANCE": "1967", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "YES", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "upstream_gene_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "Clone_based_vega_gene", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs4068944", "MOTIF_SCORE_CHANGE": ""}] 1668 4 \N 783 22 29461622 G A 715011 {22-29461622-G-A} 772 0.62349999 PASS 22-29461622-G-A 1247 2000 {"DP": "36991", "FS": "0", "MQ": "60", "QD": "22.28", "VQSLOD": "22.38", "MQRankSum": "0.023", "BaseQRankSum": "2.44", "ReadPosRankSum": "0.313", "ClippingRankSum": "-0.031"} [{"LoF": "", "TSL": "", "CCDS": "CCDS13848.1", "ENSP": "ENSP00000216071", "EXON": "", "GMAF": "G:0.4289", "Gene": "ENSG00000100249", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "-1", "SYMBOL": "C22orf31", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "protein_coding", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENST00000216071", "HGNC_ID": "26931", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "UPI0000073FE0", "CLIN_SIG": "", "DISTANCE": "3790", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "YES", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "CV031_HUMAN", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "upstream_gene_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "HGNC", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}, {"LoF": "", "TSL": "", "CCDS": "", "ENSP": "", "EXON": "", "GMAF": "G:0.4289", "Gene": "", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "", "SYMBOL": "", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "promoter_flanking_region", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENSR00001731804", "HGNC_ID": "", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "", "CLIN_SIG": "", "DISTANCE": "", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "regulatory_region_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "RegulatoryFeature", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}] 1669 4 \N \N 22 29465622 G A 288.69 {22-29465622-G-A} \N 0.00166667 PASS 22-25275494-G-A 15 1000 {"DP": "10377", "FS": "0", "MQ": "60", "QD": "9.31", "VQSLOD": "0.894", "MQRankSum": "0.58", "BaseQRankSum": "1.34", "ReadPosRankSum": "-0.54", "ClippingRankSum": "-0.821", "InbreedingCoeff": "-0.0017"} [{"AF": "", "LoF": "HC", "TSL": "", "CCDS": "CCDS46675.1", "ENSP": "ENSP00000383211", "EXON": "", "Gene": "ENSG00000167037", "SIFT": "", "AA_AF": "", "EA_AF": "", "FLAGS": "", "HGVSc": "ENST00000400358.4:c.1495+1G>A", "HGVSp": "", "PHENO": "", "miRNA": "", "AFR_AF": "", "AMR_AF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EAS_AF": "", "EUR_AF": "", "IMPACT": "HIGH", "INTRON": "14/24", "MAX_AF": "", "PUBMED": "", "SAS_AF": "", "SOURCE": "Ensembl", "STRAND": "1", "SYMBOL": "SGSM1", "TREMBL": "", "BIOTYPE": "protein_coding", "DOMAINS": "", "Feature": "ENST00000400358", "HGNC_ID": "29410", "SOMATIC": "", "UNIPARC": "UPI0001533DB1", "BAM_EDIT": "", "CLIN_SIG": "", "DISTANCE": "", "LoF_info": "BRANCHPOINT_DISTANCE:NA&DONOR_ESE:17&DONOR_ISS:9&EXON_END:25275493&DONOR_ISE:4&EXON_START:25275429&DONOR_ESS:9&MUTANT_DONOR_MES:-1.73390323294901&INTRON_START:25275494&DONOR_GERP_DIFF:0&DONOR_DISRUPTION_PROB:0.995351102026242&INTRON_END:25280019&DONOR_MES_DIFF:8.18202723619546&DONOR_DISRUPTING&RESCUE_DONOR_MES:-1.73390323294901&RESCUE_DONOR_POS:0&CRYPTIC_DONOR_MES:-6.95778366793159&CRYPTIC_DONOR_POS:-2&INTRON_SIZE:4526", "PolyPhen": "", "USED_REF": "G", "CANONICAL": "", "GIVEN_REF": "G", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "Q2NKQ1", "gnomAD_AF": "0", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "splice_donor_variant", "HGVS_OFFSET": "", "MAX_AF_POPS": "gnomAD_AFR&gnomAD_AMR&gnomAD_ASJ&gnomAD_EAS&gnomAD_FIN&gnomAD_NFE&gnomAD_OTH&gnomAD_SAS", "CDS_position": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "REFSEQ_MATCH": "", "SYMBOL_SOURCE": "HGNC", "VARIANT_CLASS": "SNV", "cDNA_position": "", "gnomAD_AFR_AF": "0", "gnomAD_AMR_AF": "0", "gnomAD_ASJ_AF": "0", "gnomAD_EAS_AF": "0", "gnomAD_FIN_AF": "0", "gnomAD_NFE_AF": "0", "gnomAD_OTH_AF": "0", "gnomAD_SAS_AF": "0", "Protein_position": "", "Existing_variation": "rs1299387256", "MOTIF_SCORE_CHANGE": ""}] +1670 1 \N \N 21 29461622 G A 715011 {22-29461622-G-A} 772 0.62349999 PASS 22-29461622-G-A 1247 2000 {"DP": "36991", "FS": "0", "MQ": "60", "QD": "22.28", "VQSLOD": "22.38", "MQRankSum": "0.023", "BaseQRankSum": "2.44", "ReadPosRankSum": "0.313", "ClippingRankSum": "-0.031"} [{"LoF": "", "TSL": "", "CCDS": "CCDS13848.1", "ENSP": "ENSP00000216071", "EXON": "", "GMAF": "G:0.4289", "Gene": "ENSG00000100249", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "-1", "SYMBOL": "C22orf31", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "protein_coding", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENST00000216071", "HGNC_ID": "26931", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "UPI0000073FE0", "CLIN_SIG": "", "DISTANCE": "3790", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "YES", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "CV031_HUMAN", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "upstream_gene_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "HGNC", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}, {"LoF": "", "TSL": "", "CCDS": "", "ENSP": "", "EXON": "", "GMAF": "G:0.4289", "Gene": "", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "", "SYMBOL": "", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "promoter_flanking_region", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENSR00001731804", "HGNC_ID": "", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "", "CLIN_SIG": "", "DISTANCE": "", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "regulatory_region_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "RegulatoryFeature", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}] \. COPY data.variant_genes (id, variant, gene) FROM stdin; From 4cdb4efc47fe787f0828630d82435528fa3d9ece Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 14 Jun 2019 10:57:00 +0200 Subject: [PATCH 2/5] Perform actual searches for all example queries in browser. Perform a lookup of the variant to match other queries with no results found. --- backend/modules/browser/lookups.py | 6 ++++++ frontend/templates/ng-templates/dataset-browser.html | 9 ++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 112dbb262..6eadf5186 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -1,4 +1,5 @@ """Lookup functions for the variant browser.""" + import logging import re @@ -119,8 +120,13 @@ def get_awesomebar_result(dataset:str, query:str, ds_version:str=None): target_type = 'region' if match.group(2) == ":": target = target.replace(":","-") + if match.group(5) and set(match.group(4)).issubset(set("ACGT")): target_type = 'variant' + try: + get_raw_variant(dataset, match.group(3), match.group(1), match.group(4), match.group(5), ds_version) + except error.NotFoundError as err: + target_type = 'not_found' return target_type, target diff --git a/frontend/templates/ng-templates/dataset-browser.html b/frontend/templates/ng-templates/dataset-browser.html index c4e2a7a5a..aa6a1dcc0 100644 --- a/frontend/templates/ng-templates/dataset-browser.html +++ b/frontend/templates/ng-templates/dataset-browser.html @@ -26,12 +26,11 @@

Examples - Gene: - PCSK9, Transcript: - ENST00000407236, Variant: - 22-46615880-T-C, Multi-allelic - variant: + PCSK9, Transcript: + ENST00000407236, Variant: + 22-46615880-T-C, Reference SNP ID: rs1800234, Region: - 22:46615715-46615880 + 22:46615715-46615880

From a137326341a89129edc15a13c69f75aff3d8899c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Fri, 14 Jun 2019 11:12:24 +0200 Subject: [PATCH 3/5] Add empty lines to make lists render as actual lists in sphinx. --- backend/modules/browser/lookups.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/modules/browser/lookups.py b/backend/modules/browser/lookups.py index 6eadf5186..72f0fcc79 100644 --- a/backend/modules/browser/lookups.py +++ b/backend/modules/browser/lookups.py @@ -41,6 +41,7 @@ def get_awesomebar_result(dataset:str, query:str, ds_version:str=None): Parse the search input. Datatype is one of: + * `gene` * `transcript` * `variant` @@ -48,11 +49,13 @@ def get_awesomebar_result(dataset:str, query:str, ds_version:str=None): * `region` Identifier is one of: + * ensembl ID for gene * variant ID string for variant (eg. 1-1000-A-T) * region ID string for region (eg. 1-1000-2000) Follow these steps: + * if query is an ensembl ID, return it * if a gene symbol, return that gene's ensembl ID * if an RSID, return that variant's string From da93f7ed5dda80be231992f582126236aaddbfe1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Mon, 17 Jun 2019 19:32:39 +0200 Subject: [PATCH 4/5] Add handling of exception when generating dataset frequencies. --- backend/modules/browser/browser_handlers.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/backend/modules/browser/browser_handlers.py b/backend/modules/browser/browser_handlers.py index f3d9b4fc0..26da598b4 100644 --- a/backend/modules/browser/browser_handlers.py +++ b/backend/modules/browser/browser_handlers.py @@ -335,7 +335,6 @@ def get(self, dataset:str, variant:str, ds_version:str=None): 'canonical': annotation['CANONICAL'], 'modification': annotation['HGVSp'].split(":")[1] if ':' in annotation['HGVSp'] else None}] - # Dataset frequencies. # This is reported per variable in the database data, with dataset # information inside the variables, so here we reorder to make the @@ -347,9 +346,11 @@ def get(self, dataset:str, variant:str, ds_version:str=None): dsvs = [dsv for dsv in dsvs if dsv.reference_set == curr_dsv.reference_set] dsv_groups = [(curr_dsv, variant)] for dsv in dsvs: - hit = lookups.get_variant(dsv.dataset.short_name, v[1], v[0], v[2], v[3], dsv.version) - if hit: - dsv_groups.append((dsv, hit)) + try: + hit = lookups.get_variant(dsv.dataset.short_name, v[1], v[0], v[2], v[3], dsv.version) + except error.NotFoundError: + continue + dsv_groups.append((dsv, hit)) frequencies = {'headers':[['Dataset','pop'], ['Allele Count','acs'], From 70d75faa451826da9d1ae0469dcdbc40ec56bead Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20=C3=96stberg?= Date: Tue, 18 Jun 2019 07:58:58 +0200 Subject: [PATCH 5/5] Add a second dataset to browser test data. --- test/data/browser_test_data.sql | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/test/data/browser_test_data.sql b/test/data/browser_test_data.sql index 5240fc76e..c5966b1f6 100644 --- a/test/data/browser_test_data.sql +++ b/test/data/browser_test_data.sql @@ -14,10 +14,12 @@ COPY data.collections (id, study_name, ethnicity) FROM stdin; COPY data.studies (id, pi_name, pi_email, contact_name, contact_email, title, study_description, publication_date, ref_doi) FROM stdin; 1 name email name email SweGen \N 2001-01-01 00:00:00 doi +2 name2 email2 name2 email2 SweGen2 \N 2001-01-02 00:00:00 doi \. COPY data.datasets (id, study, short_name, full_name, browser_uri, beacon_uri, beacon_description, avg_seq_depth, seq_type, seq_tech, seq_center, dataset_size) FROM stdin; 1 1 SweGen SweGen url \N \N 0 type method place 0 +2 1 SweGen2 SweGen2 url \N \N 0 type method place 0 \. COPY data.reference_sets (id, reference_build, reference_name, ensembl_version, gencode_version, dbnsfp_version) FROM stdin; @@ -26,9 +28,10 @@ COPY data.reference_sets (id, reference_build, reference_name, ensembl_version, COPY data.dataset_versions (id, dataset, reference_set, dataset_version, dataset_description, terms, available_from, ref_doi, data_contact_name, data_contact_link, num_variants, coverage_levels, portal_avail, file_access, beacon_access) FROM stdin; 1 1 1 20161223 desc terms 2001-01-01 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC -2 1 1 20170823 desc terms 2001-01-01 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC -3 1 1 20171025 desc terms 2001-01-01 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC -4 1 1 20180409 desc terms 2001-01-01 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC +2 1 1 20170823 desc terms 2001-01-02 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC +3 1 1 20171025 desc terms 2001-01-03 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC +4 1 1 20180409 desc terms 2001-01-04 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC +5 2 1 20190409 desc terms 2001-01-05 00:00:00 doi place email \N {1,5,10,15,20,25,30,50,100} TRUE REGISTERED PUBLIC \. COPY data.coverage (id, dataset_version, chrom, pos, mean, median, coverage) FROM stdin; @@ -2648,6 +2651,9 @@ COPY data.variants (id, dataset_version, variant_type, rsid, chrom, pos, ref, al 1668 4 \N 783 22 29461622 G A 715011 {22-29461622-G-A} 772 0.62349999 PASS 22-29461622-G-A 1247 2000 {"DP": "36991", "FS": "0", "MQ": "60", "QD": "22.28", "VQSLOD": "22.38", "MQRankSum": "0.023", "BaseQRankSum": "2.44", "ReadPosRankSum": "0.313", "ClippingRankSum": "-0.031"} [{"LoF": "", "TSL": "", "CCDS": "CCDS13848.1", "ENSP": "ENSP00000216071", "EXON": "", "GMAF": "G:0.4289", "Gene": "ENSG00000100249", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "-1", "SYMBOL": "C22orf31", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "protein_coding", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENST00000216071", "HGNC_ID": "26931", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "UPI0000073FE0", "CLIN_SIG": "", "DISTANCE": "3790", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "YES", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "CV031_HUMAN", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "upstream_gene_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "HGNC", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}, {"LoF": "", "TSL": "", "CCDS": "", "ENSP": "", "EXON": "", "GMAF": "G:0.4289", "Gene": "", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "", "SYMBOL": "", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "promoter_flanking_region", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENSR00001731804", "HGNC_ID": "", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "", "CLIN_SIG": "", "DISTANCE": "", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "regulatory_region_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "RegulatoryFeature", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}] 1669 4 \N \N 22 29465622 G A 288.69 {22-29465622-G-A} \N 0.00166667 PASS 22-25275494-G-A 15 1000 {"DP": "10377", "FS": "0", "MQ": "60", "QD": "9.31", "VQSLOD": "0.894", "MQRankSum": "0.58", "BaseQRankSum": "1.34", "ReadPosRankSum": "-0.54", "ClippingRankSum": "-0.821", "InbreedingCoeff": "-0.0017"} [{"AF": "", "LoF": "HC", "TSL": "", "CCDS": "CCDS46675.1", "ENSP": "ENSP00000383211", "EXON": "", "Gene": "ENSG00000167037", "SIFT": "", "AA_AF": "", "EA_AF": "", "FLAGS": "", "HGVSc": "ENST00000400358.4:c.1495+1G>A", "HGVSp": "", "PHENO": "", "miRNA": "", "AFR_AF": "", "AMR_AF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EAS_AF": "", "EUR_AF": "", "IMPACT": "HIGH", "INTRON": "14/24", "MAX_AF": "", "PUBMED": "", "SAS_AF": "", "SOURCE": "Ensembl", "STRAND": "1", "SYMBOL": "SGSM1", "TREMBL": "", "BIOTYPE": "protein_coding", "DOMAINS": "", "Feature": "ENST00000400358", "HGNC_ID": "29410", "SOMATIC": "", "UNIPARC": "UPI0001533DB1", "BAM_EDIT": "", "CLIN_SIG": "", "DISTANCE": "", "LoF_info": "BRANCHPOINT_DISTANCE:NA&DONOR_ESE:17&DONOR_ISS:9&EXON_END:25275493&DONOR_ISE:4&EXON_START:25275429&DONOR_ESS:9&MUTANT_DONOR_MES:-1.73390323294901&INTRON_START:25275494&DONOR_GERP_DIFF:0&DONOR_DISRUPTION_PROB:0.995351102026242&INTRON_END:25280019&DONOR_MES_DIFF:8.18202723619546&DONOR_DISRUPTING&RESCUE_DONOR_MES:-1.73390323294901&RESCUE_DONOR_POS:0&CRYPTIC_DONOR_MES:-6.95778366793159&CRYPTIC_DONOR_POS:-2&INTRON_SIZE:4526", "PolyPhen": "", "USED_REF": "G", "CANONICAL": "", "GIVEN_REF": "G", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "Q2NKQ1", "gnomAD_AF": "0", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "splice_donor_variant", "HGVS_OFFSET": "", "MAX_AF_POPS": "gnomAD_AFR&gnomAD_AMR&gnomAD_ASJ&gnomAD_EAS&gnomAD_FIN&gnomAD_NFE&gnomAD_OTH&gnomAD_SAS", "CDS_position": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "REFSEQ_MATCH": "", "SYMBOL_SOURCE": "HGNC", "VARIANT_CLASS": "SNV", "cDNA_position": "", "gnomAD_AFR_AF": "0", "gnomAD_AMR_AF": "0", "gnomAD_ASJ_AF": "0", "gnomAD_EAS_AF": "0", "gnomAD_FIN_AF": "0", "gnomAD_NFE_AF": "0", "gnomAD_OTH_AF": "0", "gnomAD_SAS_AF": "0", "Protein_position": "", "Existing_variation": "rs1299387256", "MOTIF_SCORE_CHANGE": ""}] 1670 1 \N \N 21 29461622 G A 715011 {22-29461622-G-A} 772 0.62349999 PASS 22-29461622-G-A 1247 2000 {"DP": "36991", "FS": "0", "MQ": "60", "QD": "22.28", "VQSLOD": "22.38", "MQRankSum": "0.023", "BaseQRankSum": "2.44", "ReadPosRankSum": "0.313", "ClippingRankSum": "-0.031"} [{"LoF": "", "TSL": "", "CCDS": "CCDS13848.1", "ENSP": "ENSP00000216071", "EXON": "", "GMAF": "G:0.4289", "Gene": "ENSG00000100249", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "-1", "SYMBOL": "C22orf31", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "protein_coding", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENST00000216071", "HGNC_ID": "26931", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "UPI0000073FE0", "CLIN_SIG": "", "DISTANCE": "3790", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "YES", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "CV031_HUMAN", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "upstream_gene_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "HGNC", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}, {"LoF": "", "TSL": "", "CCDS": "", "ENSP": "", "EXON": "", "GMAF": "G:0.4289", "Gene": "", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "", "SYMBOL": "", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "promoter_flanking_region", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENSR00001731804", "HGNC_ID": "", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "", "CLIN_SIG": "", "DISTANCE": "", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "regulatory_region_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "RegulatoryFeature", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}] +1671 5 \N 783 22 29461622 G A 715011 {22-29461622-G-A} 772 0.62349999 PASS 22-29461622-G-A 1247 2000 {"DP": "36991", "FS": "0", "MQ": "60", "QD": "22.28", "VQSLOD": "22.38", "MQRankSum": "0.023", "BaseQRankSum": "2.44", "ReadPosRankSum": "0.313", "ClippingRankSum": "-0.031"} [{"LoF": "", "TSL": "", "CCDS": "CCDS13848.1", "ENSP": "ENSP00000216071", "EXON": "", "GMAF": "G:0.4289", "Gene": "ENSG00000100249", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "-1", "SYMBOL": "C22orf31", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "protein_coding", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENST00000216071", "HGNC_ID": "26931", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "UPI0000073FE0", "CLIN_SIG": "", "DISTANCE": "3790", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "YES", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "CV031_HUMAN", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "upstream_gene_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "HGNC", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}, {"LoF": "", "TSL": "", "CCDS": "", "ENSP": "", "EXON": "", "GMAF": "G:0.4289", "Gene": "", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "", "SYMBOL": "", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "promoter_flanking_region", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENSR00001731804", "HGNC_ID": "", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "", "CLIN_SIG": "", "DISTANCE": "", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "regulatory_region_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "RegulatoryFeature", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}] +1672 5 \N \N 22 29465622 G A 288.69 {22-29465622-G-A} \N 0.00166667 PASS 22-25275494-G-A 15 1000 {"DP": "10377", "FS": "0", "MQ": "60", "QD": "9.31", "VQSLOD": "0.894", "MQRankSum": "0.58", "BaseQRankSum": "1.34", "ReadPosRankSum": "-0.54", "ClippingRankSum": "-0.821", "InbreedingCoeff": "-0.0017"} [{"AF": "", "LoF": "HC", "TSL": "", "CCDS": "CCDS46675.1", "ENSP": "ENSP00000383211", "EXON": "", "Gene": "ENSG00000167037", "SIFT": "", "AA_AF": "", "EA_AF": "", "FLAGS": "", "HGVSc": "ENST00000400358.4:c.1495+1G>A", "HGVSp": "", "PHENO": "", "miRNA": "", "AFR_AF": "", "AMR_AF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EAS_AF": "", "EUR_AF": "", "IMPACT": "HIGH", "INTRON": "14/24", "MAX_AF": "", "PUBMED": "", "SAS_AF": "", "SOURCE": "Ensembl", "STRAND": "1", "SYMBOL": "SGSM1", "TREMBL": "", "BIOTYPE": "protein_coding", "DOMAINS": "", "Feature": "ENST00000400358", "HGNC_ID": "29410", "SOMATIC": "", "UNIPARC": "UPI0001533DB1", "BAM_EDIT": "", "CLIN_SIG": "", "DISTANCE": "", "LoF_info": "BRANCHPOINT_DISTANCE:NA&DONOR_ESE:17&DONOR_ISS:9&EXON_END:25275493&DONOR_ISE:4&EXON_START:25275429&DONOR_ESS:9&MUTANT_DONOR_MES:-1.73390323294901&INTRON_START:25275494&DONOR_GERP_DIFF:0&DONOR_DISRUPTION_PROB:0.995351102026242&INTRON_END:25280019&DONOR_MES_DIFF:8.18202723619546&DONOR_DISRUPTING&RESCUE_DONOR_MES:-1.73390323294901&RESCUE_DONOR_POS:0&CRYPTIC_DONOR_MES:-6.95778366793159&CRYPTIC_DONOR_POS:-2&INTRON_SIZE:4526", "PolyPhen": "", "USED_REF": "G", "CANONICAL": "", "GIVEN_REF": "G", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "Q2NKQ1", "gnomAD_AF": "0", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "splice_donor_variant", "HGVS_OFFSET": "", "MAX_AF_POPS": "gnomAD_AFR&gnomAD_AMR&gnomAD_ASJ&gnomAD_EAS&gnomAD_FIN&gnomAD_NFE&gnomAD_OTH&gnomAD_SAS", "CDS_position": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "REFSEQ_MATCH": "", "SYMBOL_SOURCE": "HGNC", "VARIANT_CLASS": "SNV", "cDNA_position": "", "gnomAD_AFR_AF": "0", "gnomAD_AMR_AF": "0", "gnomAD_ASJ_AF": "0", "gnomAD_EAS_AF": "0", "gnomAD_FIN_AF": "0", "gnomAD_NFE_AF": "0", "gnomAD_OTH_AF": "0", "gnomAD_SAS_AF": "0", "Protein_position": "", "Existing_variation": "rs1299387256", "MOTIF_SCORE_CHANGE": ""}] +1673 5 \N \N 21 29461622 G A 715011 {22-29461622-G-A} 772 0.62349999 PASS 22-29461622-G-A 1247 2000 {"DP": "36991", "FS": "0", "MQ": "60", "QD": "22.28", "VQSLOD": "22.38", "MQRankSum": "0.023", "BaseQRankSum": "2.44", "ReadPosRankSum": "0.313", "ClippingRankSum": "-0.031"} [{"LoF": "", "TSL": "", "CCDS": "CCDS13848.1", "ENSP": "ENSP00000216071", "EXON": "", "GMAF": "G:0.4289", "Gene": "ENSG00000100249", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "-1", "SYMBOL": "C22orf31", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "protein_coding", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENST00000216071", "HGNC_ID": "26931", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "UPI0000073FE0", "CLIN_SIG": "", "DISTANCE": "3790", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "YES", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "CV031_HUMAN", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "upstream_gene_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "Transcript", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "HGNC", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}, {"LoF": "", "TSL": "", "CCDS": "", "ENSP": "", "EXON": "", "GMAF": "G:0.4289", "Gene": "", "SIFT": "", "FLAGS": "", "HGVSc": "", "HGVSp": "", "PHENO": "", "AA_MAF": "", "APPRIS": "", "Allele": "A", "Codons": "", "EA_MAF": "", "IMPACT": "MODIFIER", "INTRON": "", "PUBMED": "", "STRAND": "", "SYMBOL": "", "TREMBL": "", "AFR_MAF": "A:0.5681", "AMR_MAF": "A:0.4654", "BIOTYPE": "promoter_flanking_region", "DOMAINS": "", "EAS_MAF": "A:0.5466", "EUR_MAF": "A:0.664", "Feature": "ENSR00001731804", "HGNC_ID": "", "SAS_MAF": "A:0.5798", "SOMATIC": "", "UNIPARC": "", "CLIN_SIG": "", "DISTANCE": "", "ExAC_MAF": "", "LoF_info": "", "PolyPhen": "", "CANONICAL": "", "LoF_flags": "", "MOTIF_POS": "", "SWISSPROT": "", "ALLELE_NUM": "1", "GENE_PHENO": "", "LoF_filter": "", "MOTIF_NAME": "", "Amino_acids": "", "Consequence": "regulatory_region_variant", "HGVS_OFFSET": "", "CDS_position": "", "ExAC_AFR_MAF": "", "ExAC_AMR_MAF": "", "ExAC_Adj_MAF": "", "ExAC_EAS_MAF": "", "ExAC_FIN_MAF": "", "ExAC_NFE_MAF": "", "ExAC_OTH_MAF": "", "ExAC_SAS_MAF": "", "Feature_type": "RegulatoryFeature", "HIGH_INF_POS": "", "SYMBOL_SOURCE": "", "VARIANT_CLASS": "SNV", "cDNA_position": "", "Protein_position": "", "Existing_variation": "rs783", "MOTIF_SCORE_CHANGE": ""}] \. COPY data.variant_genes (id, variant, gene) FROM stdin;