Skip to content

Commit

Permalink
Implement atldld search img command (#85)
Browse files Browse the repository at this point in the history
  • Loading branch information
EmilieDel committed Oct 13, 2021
1 parent 01a9545 commit 07a250f
Show file tree
Hide file tree
Showing 4 changed files with 247 additions and 21 deletions.
51 changes: 49 additions & 2 deletions src/atldld/cli/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,59 @@ def search_dataset(dataset_id, specimen_id, gene_acronym, plane_of_section):
for meta in msg:
genes = ", ".join(gene["acronym"] for gene in meta["genes"])
plane_of_section = PlaneOfSection(meta["plane_of_section_id"])
print(
click.echo(
f"* id: {meta['id']:10d}, genes: {genes:>10s}, "
f"{plane_of_section}, {len(meta['section_images'])} section images"
)


@search_cmd.command("img", help="Search section images")
def search_img():
@click.option("-i", "--id", "image_id", help="The image ID")
@click.option("-d", "--dataset", "dataset_id", help="The dataset ID")
@click.option("-g", "--gene-name", "gene_acronym", help="The gene acronym")
@click.option("-s", "--specimen", "specimen_id", help="The specimen ID")
def search_img(image_id, dataset_id, gene_acronym, specimen_id):
"""Run search subcommand."""
from collections import defaultdict
from typing import Any, DefaultDict

from atldld import requests

criteria: DefaultDict[str, Any] = defaultdict(dict)
if image_id is not None:
criteria["id"] = image_id
if dataset_id is not None:
criteria["data_set_id"] = dataset_id
if gene_acronym is not None:
criteria["data_set"]["genes"] = {"acronym": gene_acronym}
if specimen_id is not None:
criteria["data_set"]["specimen_id"] = specimen_id

if len(criteria) == 0:
raise click.ClickException(
"At least one of the search criteria has to be specified. "
"Use the --help flag to see all available criteria."
)

# Send request
rma_parameters = requests.RMAParameters(
"SectionImage",
criteria=criteria,
)
click.secho("Searching...", fg="green")
try:
msg = requests.rma_all(rma_parameters)
except requests.RMAError as exc:
raise click.ClickException(
f"An error occurred while querying the AIBS servers: {str(exc)}"
)

if len(msg) == 0:
click.secho("No images found", fg="red")
else:
click.secho(f"{len(msg)} image(s) found:", fg="green")
for meta in msg:
click.echo(
f"* id: {meta['id']:10d}, dataset: {meta['data_set_id']:>10d}, "
f"h: {meta['height']:>5d}, w: {meta['width']:>5d}"
)
66 changes: 52 additions & 14 deletions src/atldld/requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ class RMAParameters:
* Criteria filters only support the equality operator
* Associations (~nested criteria) are modeled by values in the `criteria`
dictionary that are dictionaries themselves.
* Nested associations are not supported.
* The include field does not support filters
"""

Expand All @@ -67,20 +66,20 @@ def __str__(self) -> str:
# criteria = {"specimen_id": 123, "genes": {"acronym": "Gad1"}}
# This should translate to the following URL part:
# "rma::criteria,[specimen_id$eq123],genes[acronym$eqGad1]"
# criteria = {"data_set":
# {"specimen_id": 123, "genes": {"acronym": "Gad1"}}}
# This should translate to the following URL part:
# "rma::criteria,data_set[specimen_id$eq123](genes[acronym$eqGad1])"
flags.append("rma::criteria")
criteria = {}
sub_criteria_fields = {}
for k, v in self.criteria.items():
if isinstance(v, dict):
sub_criteria_fields[k] = v
else:
criteria[k] = v
if criteria:
flags.append("".join(f"[{k}$eq{v}]" for k, v in criteria.items()))
for field, sub_criteria in sub_criteria_fields.items():
flags.append(
field + "".join(f"[{k}$eq{v}]" for k, v in sub_criteria.items())
)

fields, associations = self._split_criteria(self.criteria)

if fields:
flags.append("".join(f"[{k}$eq{v}]" for k, v in fields.items()))

for name, criteria in associations.items():
# data_set, {"specimen_id": 123, "genes": {"acronym": "Gad1"}}
flags.append(self._parse_association(name, criteria))

# Include
if self.include is not None:
Expand All @@ -98,6 +97,45 @@ def __str__(self) -> str:

return f'criteria={",".join(flags)}'

def _split_criteria(
self, criteria: Dict[str, Any]
) -> Tuple[Dict[str, str], Dict[str, Any]]:
"""Separate criteria into fields and associations.
For example: criteria: {"specimen_id": 123, "genes": {"acronym": "Gad1"}}
--> fields: {"specimen_id": 123}
--> associations: {"genes": {"acronym": "Gad1"}}
"""
fields = {}
associations = {}

for k, v in criteria.items():
if isinstance(v, dict):
associations[k] = v
else:
fields[k] = v

return fields, associations

def _parse_association(self, name, criteria):
"""Parse association for the creation of the URL."""
result = name
fields, associations = self._split_criteria(criteria)

for k, v in fields.items():
result += f"[{k}$eq{v}]"

if associations:
parsed_associations = [
self._parse_association(association_name, association_criteria)
for association_name, association_criteria in associations.items()
]
result += "("
result += ",".join(parsed_associations)
result += ")"

return result


def rma_all(rma_parameters: RMAParameters) -> list:
"""Send one or multiple RMA requests to get all data for given parameters.
Expand Down
108 changes: 103 additions & 5 deletions tests/cli/test_search_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import pytest
from click.testing import CliRunner

from atldld.cli.search import search_cmd, search_dataset
from atldld.cli.search import search_cmd, search_dataset, search_img
from atldld.requests import RMAError


Expand All @@ -31,11 +31,12 @@ def test_running_without_arguments_prints_help(self):
assert result.output.startswith("Usage:")


class TestSearchDataset:
@pytest.fixture
def rma_all(self, mocker):
return mocker.patch("atldld.requests.rma_all", return_value=[])
@pytest.fixture
def rma_all(mocker):
return mocker.patch("atldld.requests.rma_all", return_value=[])


class TestSearchDataset:
def test_calling_without_parameters_produces_an_error(self):
runner = CliRunner()
result = runner.invoke(search_dataset)
Expand Down Expand Up @@ -135,3 +136,100 @@ def test_unknown_plane_of_section(self, rma_all):
)
assert result.exit_code == 0
assert f'Unknown plane of section name: "{plane_of_section}"' in result.output


class TestSearchImage:
def test_calling_without_parameters_produces_an_error(self):
runner = CliRunner()
result = runner.invoke(search_img)
assert result.exit_code != 0 # should exit with an error code
assert result.output.startswith(
"Error: At least one of the search criteria has to be specified."
)

def test_rma_errors_are_reported(self, rma_all):
error_msg = "Some error occurred"
rma_all.side_effect = RMAError(error_msg)
result = CliRunner().invoke(search_img, ["--id", "1"])
assert result.exit_code != 0
assert "error" in result.output
assert error_msg in result.output

def test_no_images_found(self, rma_all):
rma_all.return_value = []
result = CliRunner().invoke(search_img, ["--id", "1"])
assert result.exit_code == 0
assert "No images found" in result.output

@pytest.mark.parametrize(
("cli_params", "expected_criteria"),
(
(["--id", "1"], {"id": "1"}),
(["--dataset", "789"], {"data_set_id": "789"}),
(["--specimen", "702694"], {"data_set": {"specimen_id": "702694"}}),
(
["--gene-name", "my-gene"],
{"data_set": {"genes": {"acronym": "my-gene"}}},
),
),
ids=(
"Filter by image ID",
"Filter by dataset ID",
"Filter by specimen ID",
"Filter by gene acronym",
),
)
def test_search_filters(self, rma_all, cli_params, expected_criteria):
"""Test that CLI parameters are correctly translated to criteria."""
result = CliRunner().invoke(search_img, cli_params)
assert result.exit_code == 0
assert rma_all.called_once
# Get the args of the last call to rma_all
(rma_parameters,), _kwargs = rma_all.call_args
assert rma_parameters.criteria == expected_criteria

@pytest.mark.parametrize(
"command",
[
"--id",
"--dataset",
],
)
def test_all_results_are_shown(self, rma_all, command):
msg = [
{
"id": 1,
"data_set_id": 1,
"height": 100,
"width": 100,
},
{
"id": 2,
"data_set_id": 1,
"height": 200,
"width": 200,
},
{
"id": 3,
"data_set_id": 1,
"height": 300,
"width": 300,
},
]
rma_all.return_value = msg
runner = CliRunner()
result = runner.invoke(search_img, [command, "whatever"])
assert result.exit_code == 0

# Check the output contains the correct number of bullet points
assert len(re.findall(r"\*", result.output)) == len(msg)

# Check each bullet point has the correct content
for item in msg:
assert re.search(
(
fr"id: +{item['id']}, dataset: +{item['data_set_id']}, "
fr"h: +{item['height']}, w: +{item['width']}"
),
result.output.strip(),
)
43 changes: 43 additions & 0 deletions tests/test_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,45 @@ def test_model(self):
params = RMAParameters("my-model")
assert str(params) == "criteria=model::my-model"

@pytest.mark.parametrize(
("name", "criteria", "url_params"),
(
(
"",
{"id": 10, "name": "dataset"},
"[id$eq10][name$eqdataset]",
),
(
"data_set",
{"id": 123, "genes": {"acronym": "Gad1"}},
"data_set[id$eq123](genes[acronym$eqGad1])",
),
(
"data_set",
{"specimen_id": 123, "genes": {"acronym": "Gad1"}},
"data_set[specimen_id$eq123](genes[acronym$eqGad1])",
),
(
"data_set",
{
"id": 479,
"sphinx_id": 138444,
"genes": {
"acronym": "Gad1",
"organism": {"id": 2, "ncbitaxonomyid": 10090},
},
"specimen": {"id": 702765},
},
"data_set[id$eq479][sphinx_id$eq138444](genes[acronym$eqGad1]"
"(organism[id$eq2][ncbitaxonomyid$eq10090]),specimen[id$eq702765])",
),
),
)
def test_parse_association(self, name, criteria, url_params):
params = RMAParameters("my-model")
result = params._parse_association(name, criteria)
assert result == url_params

@pytest.mark.parametrize(
("criteria", "url_params"),
(
Expand All @@ -42,6 +81,10 @@ def test_model(self):
{"specimen_id": 123, "genes": {"acronym": "Gad1"}},
"[specimen_id$eq123],genes[acronym$eqGad1]",
),
(
{"data_set": {"specimen_id": 123, "genes": {"acronym": "Gad1"}}},
"data_set[specimen_id$eq123](genes[acronym$eqGad1])",
),
),
)
def test_criteria(self, criteria, url_params):
Expand Down

0 comments on commit 07a250f

Please sign in to comment.