Skip to content

Commit

Permalink
Merge 57b3082 into 7397aca
Browse files Browse the repository at this point in the history
  • Loading branch information
northwestwitch committed May 4, 2020
2 parents 7397aca + 57b3082 commit cfe7fd7
Show file tree
Hide file tree
Showing 7 changed files with 220 additions and 123 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@
- Added repository codeowners
- Added tests for queries with datasetAlleleResponses == HIT and MISS
- No conflicts between queried assembly and the assembly or queried datasets
- Parse SVs and add save them to database
2 changes: 2 additions & 0 deletions cgbeacon2/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

###### Files ######
test_snv_vcf = "resources/demo/643594.clinical.vcf.gz"
test_sv_vcf = "resources/demo/643594.clinical.SV.vcf.gz"
empty_vcf = "resources/demo/empty.clinical.SV.vcf.gz"
panel1 = "resources/demo/panel1.bed"
panel2 = "resources/demo/panel2.bed"

###### Paths ######
test_snv_vcf_path = pkg_resources.resource_filename("cgbeacon2", test_snv_vcf)
test_sv_vcf_path = pkg_resources.resource_filename("cgbeacon2", test_sv_vcf)
test_empty_vcf_path = pkg_resources.resource_filename("cgbeacon2", empty_vcf)
panel1_path = pkg_resources.resource_filename("cgbeacon2", panel1)
panel2_path = pkg_resources.resource_filename("cgbeacon2", panel2)
Binary file not shown.
7 changes: 4 additions & 3 deletions cgbeacon2/utils/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,17 @@ def add_variants(database, vcf_obj, samples, assembly, dataset_id, nr_variants):
if len(sample_calls) == 0:
continue # variant was not called in samples of interest

if vcf_variant.var_type == "sv": # otherwise snp or indel
parsed_variant["variant_type"] = "sv" # fix later, this is not OK yet

parsed_variant = dict(
chromosome=vcf_variant.CHROM,
start=vcf_variant.start,
end=vcf_variant.end,
reference_bases=vcf_variant.REF,
alternate_bases=vcf_variant.ALT,
)

if vcf_variant.var_type == "sv":
parsed_variant["variant_type"] = vcf_variant.INFO["SVTYPE"]

dataset_dict = {dataset_id: {"samples": sample_calls}}
# Create standard variant object with specific _id
variant = Variant(parsed_variant, dataset_dict, assembly)
Expand Down
91 changes: 91 additions & 0 deletions tests/cli/add/test_add_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pytest
from cgbeacon2.resources import (
test_snv_vcf_path,
test_sv_vcf_path,
test_empty_vcf_path,
panel1_path,
panel2_path,
Expand Down Expand Up @@ -264,3 +265,93 @@ def test_add_other_sample_variants(mock_app, test_dataset_cli, database):
assert sample in dataset_obj["samples"]
assert sample2 in dataset_obj["samples"]
assert "updated" in dataset_obj


def test_add_sv_variants(mock_app, test_dataset_cli, database):
"""Test adding SV variants for one sample"""

runner = mock_app.test_cli_runner()

# Having a database containing a dataset
dataset = test_dataset_cli
database["dataset"].insert_one(dataset)

sample = "ADM1059A1"

# When invoking the add variants from a VCF file for the first time
result = runner.invoke(
cli,
[
"add",
"variants",
"-ds",
dataset["_id"],
"-vcf",
test_sv_vcf_path,
"-sample",
sample,
],
)

# Then a number of variants should have been saved to database
saved_vars = list(database["variant"].find())
assert len(saved_vars) > 0

valid_types = ["INS", "DUP", "DEL", "INV"]
# AND all of them should have a valid SV variant type
for var in saved_vars:
assert var["variantType"] in valid_types


def test_add_snv_sv_variants(mock_app, test_dataset_cli, database):
"""Test adding snv + sv variants for one sample"""

runner = mock_app.test_cli_runner()

# Having a database containing a dataset
dataset = test_dataset_cli
database["dataset"].insert_one(dataset)

sample = "ADM1059A1"

# When invoking the add variants from a VCF file for the first time
# filtering using 2 gene panels
result = runner.invoke(
cli,
[
"add",
"variants",
"-ds",
dataset["_id"],
"-vcf",
test_snv_vcf_path,
"-sample",
sample,
"-panel",
panel1_path,
"-panel",
panel2_path,
],
)

# Then a number of SNV variants should have been saved to database
saved_snvs = sum(1 for i in database["variant"].find())

# WHEN variants from a another VCF file containing SVs are added
result = runner.invoke(
cli,
[
"add",
"variants",
"-ds",
dataset["_id"],
"-vcf",
test_sv_vcf_path,
"-sample",
sample,
],
)

# THEN more variants should have been added to the database
new_saved_vars = sum(1 for i in database["variant"].find())
assert new_saved_vars > saved_snvs
122 changes: 122 additions & 0 deletions tests/server/blueprints/api_v1/test_request_errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# -*- coding: utf-8 -*-
import json
from cgbeacon2.constants import (
NO_MANDATORY_PARAMS,
NO_SECONDARY_PARAMS,
NO_POSITION_PARAMS,
NO_SV_END_PARAM,
INVALID_COORD_RANGE,
BUILD_MISMATCH,
)

BASE_ARGS = "query?assemblyId=GRCh37&referenceName=1&referenceBases=TA"

################## TESTS FOR HANDLING WRONG REQUESTS ################


def test_query_get_request_missing_mandatory_params(mock_app):
"""Test the query endpoint by sending a request without mandatory params:
referenceName, referenceBases, assemblyId
"""

# When a request missing one or more required params is sent to the server
response = mock_app.test_client().get("/apiv1.0/query?")

# Then it should return error
assert response.status_code == 400
data = json.loads(response.data)
assert data["message"]["error"] == NO_MANDATORY_PARAMS
assert data["message"]["exists"] == None
assert data["message"]["datasetAlleleResponses"] == []
assert data["message"]["beaconId"]
assert data["message"]["apiVersion"] == "1.0.0"


def test_query_get_request_build_mismatch(mock_app, test_dataset_cli):
"""Test the query endpoint by sending a request with build mismatch between queried datasets and genome build"""

# Having a dataset with genome build GRCh38 in the database:
database = mock_app.db
test_dataset_cli["assembly_id"] = "GRCh38"
database["dataset"].insert_one(test_dataset_cli)

# When a request with genome build GRCh37 and detasetIds with genome build GRCh38 is sent to the server:
query_string = "&".join([BASE_ARGS, f"datasetIds={test_dataset_cli['_id']}"])
response = mock_app.test_client().get("".join(["/apiv1.0/", query_string]))

# Then it should return error
assert response.status_code == 400
data = json.loads(response.data)
assert data["message"]["error"] == BUILD_MISMATCH


def test_query_get_request_missing_secondary_params(mock_app):
"""Test the query endpoint by sending a request without secondary params:
alternateBases, variantType
"""
# When a request missing alternateBases or variantType params is sent to the server
query_string = BASE_ARGS
response = mock_app.test_client().get("".join(["/apiv1.0/", query_string]))

# Then it should return error
assert response.status_code == 400
data = json.loads(response.data)
assert data["message"]["error"] == NO_SECONDARY_PARAMS


def test_query_get_request_missing_positions_params(mock_app):
"""Test the query endpoint by sending a request missing coordinate params:
Either stat or startMin + startMax + endMin + endMax
"""
# When a request missing start position and all the 4 range position coordinates (startMin, startMax, endMin, endMax)
query_string = "&".join(
[BASE_ARGS, "alternateBases=T&startMin=2&startMax=6&endMin=4"]
)
response = mock_app.test_client().get("".join(["/apiv1.0/", query_string]))
data = json.loads(response.data)
# Then it should return error
assert response.status_code == 400
assert data["message"]["error"] == NO_POSITION_PARAMS


def test_query_get_request_non_numerical_sv_coordinates(mock_app):
"""Test the query endpoint by sending a request missing SV coordinates params:
provide only start but no end param
"""
query_string = "&".join([BASE_ARGS, "start=4&variantType=DUP"])
# When a request for SV variants is missing stop position parameter
response = mock_app.test_client().get("".join(["/apiv1.0/", query_string]))
data = json.loads(response.data)
# Then it should return error
assert response.status_code == 400
assert data["message"]["error"] == NO_SV_END_PARAM


def test_query_get_request_non_increasing_sv_coordinates(mock_app):
"""Test the query endpoint by sending a request with non-ordered range coordinates"""

range_coords = "&variantType=DUP&startMin=2&startMax=4&endMin=7&endMax=5"
query_string = "&".join([BASE_ARGS, range_coords])

# When a request for range coordinates doesn't contain ordered coordinates
response = mock_app.test_client().get("".join(["/apiv1.0/", query_string]))
data = json.loads(response.data)
# Then it should return error
assert response.status_code == 400
assert data["message"]["error"] == INVALID_COORD_RANGE


def test_query_get_request_non_numerical_range_coordinates(mock_app):
"""Test the query endpoint by sending a request with non-numerical range coordinates"""

range_coords = "&variantType=DUP&startMin=2&startMax=3&endMin=6&endMax=FOO"
query_string = "&".join([BASE_ARGS, range_coords])

# When a request for range coordinates doesn't contain integers
response = mock_app.test_client().get("".join(["/apiv1.0/", query_string]))
data = json.loads(response.data)
# Then it should return error
assert response.status_code == 400
assert data["message"]["error"] == INVALID_COORD_RANGE

0 comments on commit cfe7fd7

Please sign in to comment.