Implement atldld search img command (#85)

BlueBrain · Oct 13, 2021 · 07a250f · 07a250f
1 parent 01a9545
commit 07a250f
Show file tree

Hide file tree

Showing 4 changed files with 247 additions and 21 deletions.
diff --git a/src/atldld/cli/search.py b/src/atldld/cli/search.py
@@ -84,12 +84,59 @@ def search_dataset(dataset_id, specimen_id, gene_acronym, plane_of_section):
         for meta in msg:
             genes = ", ".join(gene["acronym"] for gene in meta["genes"])
             plane_of_section = PlaneOfSection(meta["plane_of_section_id"])
-            print(
+            click.echo(
                 f"* id: {meta['id']:10d}, genes: {genes:>10s}, "
                 f"{plane_of_section}, {len(meta['section_images'])} section images"
             )
 
 
 @search_cmd.command("img", help="Search section images")
-def search_img():
+@click.option("-i", "--id", "image_id", help="The image ID")
+@click.option("-d", "--dataset", "dataset_id", help="The dataset ID")
+@click.option("-g", "--gene-name", "gene_acronym", help="The gene acronym")
+@click.option("-s", "--specimen", "specimen_id", help="The specimen ID")
+def search_img(image_id, dataset_id, gene_acronym, specimen_id):
     """Run search subcommand."""
+    from collections import defaultdict
+    from typing import Any, DefaultDict
+
+    from atldld import requests
+
+    criteria: DefaultDict[str, Any] = defaultdict(dict)
+    if image_id is not None:
+        criteria["id"] = image_id
+    if dataset_id is not None:
+        criteria["data_set_id"] = dataset_id
+    if gene_acronym is not None:
+        criteria["data_set"]["genes"] = {"acronym": gene_acronym}
+    if specimen_id is not None:
+        criteria["data_set"]["specimen_id"] = specimen_id
+
+    if len(criteria) == 0:
+        raise click.ClickException(
+            "At least one of the search criteria has to be specified. "
+            "Use the --help flag to see all available criteria."
+        )
+
+    # Send request
+    rma_parameters = requests.RMAParameters(
+        "SectionImage",
+        criteria=criteria,
+    )
+    click.secho("Searching...", fg="green")
+    try:
+        msg = requests.rma_all(rma_parameters)
+    except requests.RMAError as exc:
+        raise click.ClickException(
+            f"An error occurred while querying the AIBS servers: {str(exc)}"
+        )
+
+    if len(msg) == 0:
+        click.secho("No images found", fg="red")
+    else:
+        click.secho(f"{len(msg)} image(s) found:", fg="green")
+        for meta in msg:
+            click.echo(
+                f"* id: {meta['id']:10d}, dataset: {meta['data_set_id']:>10d}, "
+                f"h: {meta['height']:>5d}, w: {meta['width']:>5d}"
+            )
diff --git a/src/atldld/requests.py b/src/atldld/requests.py
@@ -42,7 +42,6 @@ class RMAParameters:
     * Criteria filters only support the equality operator
     * Associations (~nested criteria) are modeled by values in the `criteria`
       dictionary that are dictionaries themselves.
-    * Nested associations are not supported.
     * The include field does not support filters
     """
 
@@ -67,20 +66,20 @@ def __str__(self) -> str:
             # criteria = {"specimen_id": 123, "genes": {"acronym": "Gad1"}}
             # This should translate to the following URL part:
             # "rma::criteria,[specimen_id$eq123],genes[acronym$eqGad1]"
+            # criteria = {"data_set":
+            # {"specimen_id": 123, "genes": {"acronym": "Gad1"}}}
+            # This should translate to the following URL part:
+            # "rma::criteria,data_set[specimen_id$eq123](genes[acronym$eqGad1])"
             flags.append("rma::criteria")
-            criteria = {}
-            sub_criteria_fields = {}
-            for k, v in self.criteria.items():
-                if isinstance(v, dict):
-                    sub_criteria_fields[k] = v
-                else:
-                    criteria[k] = v
-            if criteria:
-                flags.append("".join(f"[{k}$eq{v}]" for k, v in criteria.items()))
-            for field, sub_criteria in sub_criteria_fields.items():
-                flags.append(
-                    field + "".join(f"[{k}$eq{v}]" for k, v in sub_criteria.items())
-                )
+
+            fields, associations = self._split_criteria(self.criteria)
+
+            if fields:
+                flags.append("".join(f"[{k}$eq{v}]" for k, v in fields.items()))
+
+            for name, criteria in associations.items():
+                # data_set, {"specimen_id": 123, "genes": {"acronym": "Gad1"}}
+                flags.append(self._parse_association(name, criteria))
 
         # Include
         if self.include is not None:
@@ -98,6 +97,45 @@ def __str__(self) -> str:
 
         return f'criteria={",".join(flags)}'
 
+    def _split_criteria(
+        self, criteria: Dict[str, Any]
+    ) -> Tuple[Dict[str, str], Dict[str, Any]]:
+        """Separate criteria into fields and associations.
+
+        For example: criteria: {"specimen_id": 123, "genes": {"acronym": "Gad1"}}
+        --> fields: {"specimen_id": 123}
+        --> associations: {"genes": {"acronym": "Gad1"}}
+        """
+        fields = {}
+        associations = {}
+
+        for k, v in criteria.items():
+            if isinstance(v, dict):
+                associations[k] = v
+            else:
+                fields[k] = v
+
+        return fields, associations
+
+    def _parse_association(self, name, criteria):
+        """Parse association for the creation of the URL."""
+        result = name
+        fields, associations = self._split_criteria(criteria)
+
+        for k, v in fields.items():
+            result += f"[{k}$eq{v}]"
+
+        if associations:
+            parsed_associations = [
+                self._parse_association(association_name, association_criteria)
+                for association_name, association_criteria in associations.items()
+            ]
+            result += "("
+            result += ",".join(parsed_associations)
+            result += ")"
+
+        return result
+
 
 def rma_all(rma_parameters: RMAParameters) -> list:
     """Send one or multiple RMA requests to get all data for given parameters.

diff --git a/tests/cli/test_search_cmd.py b/tests/cli/test_search_cmd.py
@@ -19,7 +19,7 @@
 import pytest
 from click.testing import CliRunner
 
-from atldld.cli.search import search_cmd, search_dataset
+from atldld.cli.search import search_cmd, search_dataset, search_img
 from atldld.requests import RMAError
 
 
@@ -31,11 +31,12 @@ def test_running_without_arguments_prints_help(self):
         assert result.output.startswith("Usage:")
 
 
-class TestSearchDataset:
-    @pytest.fixture
-    def rma_all(self, mocker):
-        return mocker.patch("atldld.requests.rma_all", return_value=[])
+@pytest.fixture
+def rma_all(mocker):
+    return mocker.patch("atldld.requests.rma_all", return_value=[])
+
 
+class TestSearchDataset:
     def test_calling_without_parameters_produces_an_error(self):
         runner = CliRunner()
         result = runner.invoke(search_dataset)
@@ -135,3 +136,100 @@ def test_unknown_plane_of_section(self, rma_all):
         )
         assert result.exit_code == 0
         assert f'Unknown plane of section name: "{plane_of_section}"' in result.output
+
+
+class TestSearchImage:
+    def test_calling_without_parameters_produces_an_error(self):
+        runner = CliRunner()
+        result = runner.invoke(search_img)
+        assert result.exit_code != 0  # should exit with an error code
+        assert result.output.startswith(
+            "Error: At least one of the search criteria has to be specified."
+        )
+
+    def test_rma_errors_are_reported(self, rma_all):
+        error_msg = "Some error occurred"
+        rma_all.side_effect = RMAError(error_msg)
+        result = CliRunner().invoke(search_img, ["--id", "1"])
+        assert result.exit_code != 0
+        assert "error" in result.output
+        assert error_msg in result.output
+
+    def test_no_images_found(self, rma_all):
+        rma_all.return_value = []
+        result = CliRunner().invoke(search_img, ["--id", "1"])
+        assert result.exit_code == 0
+        assert "No images found" in result.output
+
+    @pytest.mark.parametrize(
+        ("cli_params", "expected_criteria"),
+        (
+            (["--id", "1"], {"id": "1"}),
+            (["--dataset", "789"], {"data_set_id": "789"}),
+            (["--specimen", "702694"], {"data_set": {"specimen_id": "702694"}}),
+            (
+                ["--gene-name", "my-gene"],
+                {"data_set": {"genes": {"acronym": "my-gene"}}},
+            ),
+        ),
+        ids=(
+            "Filter by image ID",
+            "Filter by dataset ID",
+            "Filter by specimen ID",
+            "Filter by gene acronym",
+        ),
+    )
+    def test_search_filters(self, rma_all, cli_params, expected_criteria):
+        """Test that CLI parameters are correctly translated to criteria."""
+        result = CliRunner().invoke(search_img, cli_params)
+        assert result.exit_code == 0
+        assert rma_all.called_once
+        # Get the args of the last call to rma_all
+        (rma_parameters,), _kwargs = rma_all.call_args
+        assert rma_parameters.criteria == expected_criteria
+
+    @pytest.mark.parametrize(
+        "command",
+        [
+            "--id",
+            "--dataset",
+        ],
+    )
+    def test_all_results_are_shown(self, rma_all, command):
+        msg = [
+            {
+                "id": 1,
+                "data_set_id": 1,
+                "height": 100,
+                "width": 100,
+            },
+            {
+                "id": 2,
+                "data_set_id": 1,
+                "height": 200,
+                "width": 200,
+            },
+            {
+                "id": 3,
+                "data_set_id": 1,
+                "height": 300,
+                "width": 300,
+            },
+        ]
+        rma_all.return_value = msg
+        runner = CliRunner()
+        result = runner.invoke(search_img, [command, "whatever"])
+        assert result.exit_code == 0
+
+        # Check the output contains the correct number of bullet points
+        assert len(re.findall(r"\*", result.output)) == len(msg)
+
+        # Check each bullet point has the correct content
+        for item in msg:
+            assert re.search(
+                (
+                    fr"id: +{item['id']}, dataset: +{item['data_set_id']}, "
+                    fr"h: +{item['height']}, w: +{item['width']}"
+                ),
+                result.output.strip(),
+            )
diff --git a/tests/test_requests.py b/tests/test_requests.py
@@ -27,6 +27,45 @@ def test_model(self):
         params = RMAParameters("my-model")
         assert str(params) == "criteria=model::my-model"
 
+    @pytest.mark.parametrize(
+        ("name", "criteria", "url_params"),
+        (
+            (
+                "",
+                {"id": 10, "name": "dataset"},
+                "[id$eq10][name$eqdataset]",
+            ),
+            (
+                "data_set",
+                {"id": 123, "genes": {"acronym": "Gad1"}},
+                "data_set[id$eq123](genes[acronym$eqGad1])",
+            ),
+            (
+                "data_set",
+                {"specimen_id": 123, "genes": {"acronym": "Gad1"}},
+                "data_set[specimen_id$eq123](genes[acronym$eqGad1])",
+            ),
+            (
+                "data_set",
+                {
+                    "id": 479,
+                    "sphinx_id": 138444,
+                    "genes": {
+                        "acronym": "Gad1",
+                        "organism": {"id": 2, "ncbitaxonomyid": 10090},
+                    },
+                    "specimen": {"id": 702765},
+                },
+                "data_set[id$eq479][sphinx_id$eq138444](genes[acronym$eqGad1]"
+                "(organism[id$eq2][ncbitaxonomyid$eq10090]),specimen[id$eq702765])",
+            ),
+        ),
+    )
+    def test_parse_association(self, name, criteria, url_params):
+        params = RMAParameters("my-model")
+        result = params._parse_association(name, criteria)
+        assert result == url_params
+
     @pytest.mark.parametrize(
         ("criteria", "url_params"),
         (
@@ -42,6 +81,10 @@ def test_model(self):
                 {"specimen_id": 123, "genes": {"acronym": "Gad1"}},
                 "[specimen_id$eq123],genes[acronym$eqGad1]",
             ),
+            (
+                {"data_set": {"specimen_id": 123, "genes": {"acronym": "Gad1"}}},
+                "data_set[specimen_id$eq123](genes[acronym$eqGad1])",
+            ),
         ),
     )
     def test_criteria(self, criteria, url_params):