Skip to content

Commit

Permalink
save SVs to beacon database
Browse files Browse the repository at this point in the history
  • Loading branch information
northwestwitch committed May 4, 2020
1 parent 7397aca commit 126a2ef
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 115 deletions.
2 changes: 2 additions & 0 deletions cgbeacon2/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

###### Files ######
test_snv_vcf = "resources/demo/643594.clinical.vcf.gz"
test_sv_vcf = "resources/demo/643594.clinical.SV.vcf.gz"
empty_vcf = "resources/demo/empty.clinical.SV.vcf.gz"
panel1 = "resources/demo/panel1.bed"
panel2 = "resources/demo/panel2.bed"

###### Paths ######
test_snv_vcf_path = pkg_resources.resource_filename("cgbeacon2", test_snv_vcf)
test_sv_vcf_path = pkg_resources.resource_filename("cgbeacon2", test_sv_vcf)
test_empty_vcf_path = pkg_resources.resource_filename("cgbeacon2", empty_vcf)
panel1_path = pkg_resources.resource_filename("cgbeacon2", panel1)
panel2_path = pkg_resources.resource_filename("cgbeacon2", panel2)
Binary file not shown.
7 changes: 4 additions & 3 deletions cgbeacon2/utils/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,17 @@ def add_variants(database, vcf_obj, samples, assembly, dataset_id, nr_variants):
if len(sample_calls) == 0:
continue # variant was not called in samples of interest

if vcf_variant.var_type == "sv": # otherwise snp or indel
parsed_variant["variant_type"] = "sv" # fix later, this is not OK yet

parsed_variant = dict(
chromosome=vcf_variant.CHROM,
start=vcf_variant.start,
end=vcf_variant.end,
reference_bases=vcf_variant.REF,
alternate_bases=vcf_variant.ALT,
)

if vcf_variant.var_type == "sv":
parsed_variant["variant_type"] = vcf_variant.get("SVTYPE")

dataset_dict = {dataset_id: {"samples": sample_calls}}
# Create standard variant object with specific _id
variant = Variant(parsed_variant, dataset_dict, assembly)
Expand Down
122 changes: 122 additions & 0 deletions tests/server/blueprints/api_v1/test_request_errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# -*- coding: utf-8 -*-
import json
from cgbeacon2.constants import (
NO_MANDATORY_PARAMS,
NO_SECONDARY_PARAMS,
NO_POSITION_PARAMS,
NO_SV_END_PARAM,
INVALID_COORD_RANGE,
BUILD_MISMATCH,
)

BASE_ARGS = "query?assemblyId=GRCh37&referenceName=1&referenceBases=TA"

################## TESTS FOR HANDLING WRONG REQUESTS ################


def test_query_get_request_missing_mandatory_params(mock_app):
"""Test the query endpoint by sending a request without mandatory params:
referenceName, referenceBases, assemblyId
"""

# When a request missing one or more required params is sent to the server
response = mock_app.test_client().get("/apiv1.0/query?")

# Then it should return error
assert response.status_code == 400
data = json.loads(response.data)
assert data["message"]["error"] == NO_MANDATORY_PARAMS
assert data["message"]["exists"] == None
assert data["message"]["datasetAlleleResponses"] == []
assert data["message"]["beaconId"]
assert data["message"]["apiVersion"] == "1.0.0"


def test_query_get_request_build_mismatch(mock_app, test_dataset_cli):
"""Test the query endpoint by sending a request with build mismatch between queried datasets and genome build"""

# Having a dataset with genome build GRCh38 in the database:
database = mock_app.db
test_dataset_cli["assembly_id"] = "GRCh38"
database["dataset"].insert_one(test_dataset_cli)

# When a request with genome build GRCh37 and detasetIds with genome build GRCh38 is sent to the server:
query_string = "&".join([BASE_ARGS, f"datasetIds={test_dataset_cli['_id']}"])
response = mock_app.test_client().get("".join(["/apiv1.0/", query_string]))

# Then it should return error
assert response.status_code == 400
data = json.loads(response.data)
assert data["message"]["error"] == BUILD_MISMATCH


def test_query_get_request_missing_secondary_params(mock_app):
"""Test the query endpoint by sending a request without secondary params:
alternateBases, variantType
"""
# When a request missing alternateBases or variantType params is sent to the server
query_string = BASE_ARGS
response = mock_app.test_client().get("".join(["/apiv1.0/", query_string]))

# Then it should return error
assert response.status_code == 400
data = json.loads(response.data)
assert data["message"]["error"] == NO_SECONDARY_PARAMS


def test_query_get_request_missing_positions_params(mock_app):
"""Test the query endpoint by sending a request missing coordinate params:
Either stat or startMin + startMax + endMin + endMax
"""
# When a request missing start position and all the 4 range position coordinates (startMin, startMax, endMin, endMax)
query_string = "&".join(
[BASE_ARGS, "alternateBases=T&startMin=2&startMax=6&endMin=4"]
)
response = mock_app.test_client().get("".join(["/apiv1.0/", query_string]))
data = json.loads(response.data)
# Then it should return error
assert response.status_code == 400
assert data["message"]["error"] == NO_POSITION_PARAMS


def test_query_get_request_non_numerical_sv_coordinates(mock_app):
"""Test the query endpoint by sending a request missing SV coordinates params:
provide only start but no end param
"""
query_string = "&".join([BASE_ARGS, "start=4&variantType=DUP"])
# When a request for SV variants is missing stop position parameter
response = mock_app.test_client().get("".join(["/apiv1.0/", query_string]))
data = json.loads(response.data)
# Then it should return error
assert response.status_code == 400
assert data["message"]["error"] == NO_SV_END_PARAM


def test_query_get_request_non_increasing_sv_coordinates(mock_app):
"""Test the query endpoint by sending a request with non-ordered range coordinates"""

range_coords = "&variantType=DUP&startMin=2&startMax=4&endMin=7&endMax=5"
query_string = "&".join([BASE_ARGS, range_coords])

# When a request for range coordinates doesn't contain ordered coordinates
response = mock_app.test_client().get("".join(["/apiv1.0/", query_string]))
data = json.loads(response.data)
# Then it should return error
assert response.status_code == 400
assert data["message"]["error"] == INVALID_COORD_RANGE


def test_query_get_request_non_numerical_range_coordinates(mock_app):
"""Test the query endpoint by sending a request with non-numerical range coordinates"""

range_coords = "&variantType=DUP&startMin=2&startMax=3&endMin=6&endMax=FOO"
query_string = "&".join([BASE_ARGS, range_coords])

# When a request for range coordinates doesn't contain integers
response = mock_app.test_client().get("".join(["/apiv1.0/", query_string]))
data = json.loads(response.data)
# Then it should return error
assert response.status_code == 400
assert data["message"]["error"] == INVALID_COORD_RANGE
112 changes: 0 additions & 112 deletions tests/server/blueprints/api_v1/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
BASE_ARGS = "query?assemblyId=GRCh37&referenceName=1&referenceBases=TA"
COORDS_ARGS = "start=235826381&end=235826383"
ALT_ARG = "alternateBases=T"
DATASET_ARGS = "datasetIds=foo&datasetIds=test_ds"


def test_info(mock_app):
Expand All @@ -29,117 +28,6 @@ def test_info(mock_app):
assert data[field] is not None


################## TESTS FOR HANDLING WRONG REQUESTS ################


def test_query_get_request_missing_mandatory_params(mock_app):
"""Test the query endpoint by sending a request without mandatory params:
referenceName, referenceBases, assemblyId
"""

# When a request missing one or more required params is sent to the server
response = mock_app.test_client().get("/apiv1.0/query?")

# Then it should return error
assert response.status_code == 400
data = json.loads(response.data)
assert data["message"]["error"] == NO_MANDATORY_PARAMS
assert data["message"]["exists"] == None
assert data["message"]["datasetAlleleResponses"] == []
assert data["message"]["beaconId"]
assert data["message"]["apiVersion"] == "1.0.0"


def test_query_get_request_build_mismatch(mock_app, test_dataset_cli):
"""Test the query endpoint by sending a request with build mismatch between queried datasets and genome build"""

# Having a dataset with genome build GRCh38 in the database:
database = mock_app.db
test_dataset_cli["assembly_id"] = "GRCh38"
database["dataset"].insert_one(test_dataset_cli)

# When a request with genome build GRCh37 and detasetIds with genome build GRCh38 is sent to the server:
query_string = "&".join([BASE_ARGS, f"datasetIds={test_dataset_cli['_id']}"])
response = mock_app.test_client().get("".join(["/apiv1.0/", query_string]))

# Then it should return error
assert response.status_code == 400
data = json.loads(response.data)
assert data["message"]["error"] == BUILD_MISMATCH


def test_query_get_request_missing_secondary_params(mock_app):
"""Test the query endpoint by sending a request without secondary params:
alternateBases, variantType
"""
# When a request missing alternateBases or variantType params is sent to the server
query_string = BASE_ARGS
response = mock_app.test_client().get("".join(["/apiv1.0/", query_string]))

# Then it should return error
assert response.status_code == 400
data = json.loads(response.data)
assert data["message"]["error"] == NO_SECONDARY_PARAMS


def test_query_get_request_missing_positions_params(mock_app):
"""Test the query endpoint by sending a request missing coordinate params:
Either stat or startMin + startMax + endMin + endMax
"""
# When a request missing start position and all the 4 range position coordinates (startMin, startMax, endMin, endMax)
query_string = "&".join(
[BASE_ARGS, "alternateBases=T&startMin=2&startMax=6&endMin=4"]
)
response = mock_app.test_client().get("".join(["/apiv1.0/", query_string]))
data = json.loads(response.data)
# Then it should return error
assert response.status_code == 400
assert data["message"]["error"] == NO_POSITION_PARAMS


def test_query_get_request_non_numerical_sv_coordinates(mock_app):
"""Test the query endpoint by sending a request missing SV coordinates params:
provide only start but no end param
"""
query_string = "&".join([BASE_ARGS, "start=4&variantType=DUP"])
# When a request for SV variants is missing stop position parameter
response = mock_app.test_client().get("".join(["/apiv1.0/", query_string]))
data = json.loads(response.data)
# Then it should return error
assert response.status_code == 400
assert data["message"]["error"] == NO_SV_END_PARAM


def test_query_get_request_non_increasing_sv_coordinates(mock_app):
"""Test the query endpoint by sending a request with non-ordered range coordinates"""

range_coords = "&variantType=DUP&startMin=2&startMax=4&endMin=7&endMax=5"
query_string = "&".join([BASE_ARGS, range_coords])

# When a request for range coordinates doesn't contain ordered coordinates
response = mock_app.test_client().get("".join(["/apiv1.0/", query_string]))
data = json.loads(response.data)
# Then it should return error
assert response.status_code == 400
assert data["message"]["error"] == INVALID_COORD_RANGE


def test_query_get_request_non_numerical_range_coordinates(mock_app):
"""Test the query endpoint by sending a request with non-numerical range coordinates"""

range_coords = "&variantType=DUP&startMin=2&startMax=3&endMin=6&endMax=FOO"
query_string = "&".join([BASE_ARGS, range_coords])

# When a request for range coordinates doesn't contain integers
response = mock_app.test_client().get("".join(["/apiv1.0/", query_string]))
data = json.loads(response.data)
# Then it should return error
assert response.status_code == 400
assert data["message"]["error"] == INVALID_COORD_RANGE


################## TESTS FOR HANDLING SNV REQUESTS ################


Expand Down

0 comments on commit 126a2ef

Please sign in to comment.