Skip to content

Commit

Permalink
allow fuzzy coordinate search
Browse files Browse the repository at this point in the history
  • Loading branch information
northwestwitch committed Jun 23, 2020
1 parent 73c1e14 commit 65a2a3d
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 43 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
### Fixed
- Added an init file inside the demo resources folder
- Accept variants annotated with build GRCh37 and GRCh38 (`chrN`) instead of just `N` (as in hg19)
- Improved calculation of structural variants end coordinates

### Changed
- Renamed SNV and SV demo VCF files
- Range queries allowing for start position < end position when variantType=BND
- Coordinate Range queries allowing fuzzy positions

### Added
- Demo VCF file containing BND SV variants
Expand Down
1 change: 0 additions & 1 deletion cgbeacon2/constants/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
NO_SECONDARY_PARAMS,
NO_POSITION_PARAMS,
INVALID_COORDINATES,
INVALID_COORD_RANGE,
BUILD_MISMATCH,
)
from .oauth_errors import (
Expand Down
5 changes: 0 additions & 5 deletions cgbeacon2/constants/query_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,6 @@
errorMessage="invalid coordinates. Variant start and stop positions must be numbers",
)

INVALID_COORD_RANGE = dict(
errorCode=400,
errorMessage="invalid coordinate range: startMin <= startMax <= endMin <= endMax",
)

BUILD_MISMATCH = dict(
errorCode=400,
errorMessage="Requested genome assembly is in conflict with the assembly of one or more requested datasets",
Expand Down
1 change: 1 addition & 0 deletions cgbeacon2/constants/response_objs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
####### QUERY PARAMS #######
QUERY_PARAMS_API_V1 = [
"referenceName",
"mateName",
"referenceBases",
"assemblyId",
"start",
Expand Down
33 changes: 20 additions & 13 deletions cgbeacon2/server/blueprints/api_v1/controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
NO_SECONDARY_PARAMS,
NO_POSITION_PARAMS,
INVALID_COORDINATES,
INVALID_COORD_RANGE,
BUILD_MISMATCH,
QUERY_PARAMS_API_V1,
)
Expand Down Expand Up @@ -144,7 +143,7 @@ def check_allele_request(resp_obj, customer_query, mongo_query):
# Check that genomic coordinates are provided (even rough)
if (
customer_query.get("start") is None
and all([coord in customer_query.keys() for coord in RANGE_COORDINATES])
and any([coord in customer_query.keys() for coord in RANGE_COORDINATES])
is False
):
# return a bad request 400 error with explanation message
Expand All @@ -166,25 +165,33 @@ def check_allele_request(resp_obj, customer_query, mongo_query):
error=INVALID_COORDINATES, allelRequest=customer_query,
)

elif all(
# Range query
elif any(
[coord in customer_query.keys() for coord in RANGE_COORDINATES]
): # range query
# check that startMin <= startMax <= endMin <= endMax
# In general startMin <= startMax <= endMin <= endMax, but allow fuzzy ends query

fuzzy_start_query = {}
fuzzy_end_query = {}
try:
unsorted_coords = [
int(customer_query[coord]) for coord in RANGE_COORDINATES
]
if "startMin" in customer_query:
fuzzy_start_query["$gte"] = int(customer_query["startMin"])
if "startMax" in customer_query:
fuzzy_start_query["$lte"] = int(customer_query["startMax"])
if "endMin" in customer_query:
fuzzy_end_query["$gte"] = int(customer_query["endMin"])
if "endMax" in customer_query:
fuzzy_end_query["$lte"] = int(customer_query["endMax"])
except ValueError:
unsorted_coords = [1, 0]
if unsorted_coords != sorted(unsorted_coords): # coordinates are not valid
# return a bad request 400 error with explanation message
resp_obj["message"] = dict(
error=INVALID_COORD_RANGE, allelRequest=customer_query,
error=INVALID_COORDINATES, allelRequest=customer_query,
)
return

mongo_query["start"] = {"$gte": unsorted_coords[0], "$lte": unsorted_coords[1]}
mongo_query["end"] = {"$gte": unsorted_coords[2], "$lte": unsorted_coords[3]}
if fuzzy_start_query:
mongo_query["start"] = fuzzy_start_query
if fuzzy_end_query:
mongo_query["end"] = fuzzy_end_query

if mongo_query.get("_id") is None:
# perform normal query
Expand Down
19 changes: 19 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,25 @@ def test_sv():
return variant


@pytest.fixture
def test_bnd_sv():
"""A dictionary representing a BND sv variant as it is saved in database"""
variant = {
"_id": "c0e355e7899e9fd765797c9f72d0cf7f",
"referenceName": "17",
"mateName": "2",
"start": 198981,
"end": 321680,
"referenceBases": "A",
"alternateBases": "A]2:321681]",
"variantType": "BND",
"assemblyId": "GRCh37",
"datasetIds": {"test_public": {"samples": {"ADM1059A1": {"allele_count": 1}}}},
"call_count": 1,
}
return variant


@pytest.fixture
def public_dataset():
"""A test public dataset dictionary"""
Expand Down
25 changes: 3 additions & 22 deletions tests/server/blueprints/api_v1/test_request_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
NO_SECONDARY_PARAMS,
NO_POSITION_PARAMS,
INVALID_COORDINATES,
INVALID_COORD_RANGE,
BUILD_MISMATCH,
)

Expand Down Expand Up @@ -95,13 +94,11 @@ def test_query_get_request_non_numerical_sv_coordinates(mock_app):

def test_query_get_request_missing_positions_params(mock_app):
"""Test the query endpoint by sending a request missing coordinate params:
Either stat or startMin + startMax + endMin + endMax
Either start or any range coordinate
"""
# When a request missing start position and all the 4 range position coordinates (startMin, startMax, endMin, endMax)
query_string = "&".join(
[BASE_ARGS, "alternateBases=T&startMin=2&startMax=6&endMin=4"]
)
query_string = "&".join([BASE_ARGS, "alternateBases=T"])
response = mock_app.test_client().get(
"".join(["/apiv1.0/", query_string]), headers=HEADERS
)
Expand All @@ -111,22 +108,6 @@ def test_query_get_request_missing_positions_params(mock_app):
assert data["message"]["error"] == NO_POSITION_PARAMS


def test_query_get_request_non_increasing_sv_coordinates(mock_app):
"""Test the query endpoint by sending a request with non-ordered range coordinates"""

range_coords = "&variantType=DUP&startMin=2&startMax=4&endMin=7&endMax=5"
query_string = "&".join([BASE_ARGS, range_coords])

# When a request for range coordinates doesn't contain ordered coordinates
response = mock_app.test_client().get(
"".join(["/apiv1.0/", query_string]), headers=HEADERS
)
data = json.loads(response.data)
# Then it should return error
assert response.status_code == 400
assert data["message"]["error"] == INVALID_COORD_RANGE


def test_query_get_request_non_numerical_range_coordinates(mock_app):
"""Test the query endpoint by sending a request with non-numerical range coordinates"""

Expand All @@ -140,4 +121,4 @@ def test_query_get_request_non_numerical_range_coordinates(mock_app):
data = json.loads(response.data)
# Then it should return error
assert response.status_code == 400
assert data["message"]["error"] == INVALID_COORD_RANGE
assert data["message"]["error"] == INVALID_COORDINATES
68 changes: 67 additions & 1 deletion tests/server/blueprints/api_v1/test_views_query_no_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json

from cgbeacon2.cli.commands import cli
from cgbeacon2.resources import test_bnd_vcf_path

HEADERS = {"Content-type": "application/json", "Accept": "application/json"}

Expand All @@ -11,12 +12,77 @@
ALT_ARG = "alternateBases=T"


def test_post_range_coords_BND_SV_found(
mock_app, public_dataset, database, test_bnd_sv
):
"""Test a POST request to search for an existing BND structural variant
curl -X POST \
localhost:5000/apiv1.0/query \
-H 'Content-Type: application/json' \
-H 'Accept: application/json' \
-d '{"referenceName": "17",
"mateName": "2",
"variantType" : "BND",
"startMin": 198000,
"startMax": 200000,
"referenceBases": "A",
"assemblyId": "GRCh37",
"includeDatasetResponses": "HIT"}'
"""

# GIVEN a database containing a public dataset
database["dataset"].insert_one(public_dataset)

sample = "ADM1059A1"

# AND a number of BND variants
runner = mock_app.test_cli_runner()
result = runner.invoke(
cli,
[
"add",
"variants",
"-ds",
public_dataset["_id"],
"-vcf",
test_bnd_vcf_path,
"-sample",
sample,
],
)

data = json.dumps(
{
"referenceName": test_bnd_sv["referenceName"],
"referenceBases": test_bnd_sv["referenceBases"],
"mateName": test_bnd_sv["mateName"],
"variantType": test_bnd_sv["variantType"], # BND
"assemblyId": test_bnd_sv["assemblyId"],
"startMin": test_bnd_sv["start"] - 1000,
"startMax": test_bnd_sv["start"] + 1000,
"includeDatasetResponses": "ALL",
}
)

# When calling the endpoing with the POST method
response = mock_app.test_client().post("/apiv1.0/query", data=data, headers=HEADERS)

# Should not return error
assert response.status_code == 200
resp_data = json.loads(response.data)

# And the variant should be found
assert resp_data["datasetAlleleResponses"][0]["exists"] == True


def test_beacon_entrypoint(mock_app, registered_dataset):
"""Test the endpoint that returns the beacon info, when there is one dataset in database"""

runner = mock_app.test_cli_runner()

# Having a database containing a public dataset
# Having a database containing a registered_dataset dataset
database = mock_app.db

runner.invoke(
Expand Down

0 comments on commit 65a2a3d

Please sign in to comment.