Skip to content

Commit

Permalink
HDXDS-481 Allow override of countries when adding one or more configu…
Browse files Browse the repository at this point in the history
…rable scrapers (#33)

* Per scraper configuration of countries to process

* Add delete_scraper to Runner

* Add test for delete_scraper
  • Loading branch information
mcarans committed Feb 2, 2024
1 parent 80054ca commit 4957b61
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 4 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Expand Up @@ -185,7 +185,7 @@ python-dateutil==2.8.2
# pandas
python-io-wrapper==0.3.1
# via libhxl
python-slugify==8.0.2
python-slugify==8.0.3
# via
# ckanapi
# frictionless
Expand Down
24 changes: 23 additions & 1 deletion src/hdx/scraper/runner.py
Expand Up @@ -106,6 +106,7 @@ def add_configurable(
source_configuration: Dict = {},
suffix: Optional[str] = None,
force_add_to_run: bool = False,
countryiso3s: Optional[List[str]] = None,
) -> str:
"""Add configurable scraper to the run. If running specific scrapers rather than
all, and you want to force the inclusion of the scraper in the run regardless of
Expand All @@ -121,6 +122,7 @@ def add_configurable(
source_configuration (Dict): Configuration for sources. Defaults to empty dict (use defaults).
suffix (Optional[str]): Suffix to add to the scraper name
force_add_to_run (bool): Whether to force include the scraper in the next run
countryiso3s (Optional[List[str]]): Override list of country iso3s. Defaults to None.
Returns:
str: scraper name (including suffix if set)
Expand All @@ -129,11 +131,13 @@ def add_configurable(
scraper_name = f"{name}{suffix}"
else:
scraper_name = name
if not countryiso3s:
countryiso3s = self.countryiso3s
self.scrapers[scraper_name] = ConfigurableScraper(
name,
datasetinfo,
level,
self.countryiso3s,
countryiso3s,
adminlevel,
level_name,
source_configuration,
Expand All @@ -159,6 +163,7 @@ def add_configurables(
source_configuration: Dict = {},
suffix: Optional[str] = None,
force_add_to_run: bool = False,
countryiso3s: Optional[List[str]] = None,
) -> List[str]:
"""Add multiple configurable scrapers to the run. If running specific scrapers
rather than all, and you want to force the inclusion of the scraper in the run
Expand All @@ -173,6 +178,7 @@ def add_configurables(
source_configuration (Dict): Configuration for sources. Defaults to empty dict (use defaults).
suffix (Optional[str]): Suffix to add to the scraper name
force_add_to_run (bool): Whether to force include the scraper in the next run
countryiso3s (Optional[List[str]]): Override list of country iso3s. Defaults to None.
Returns:
List[str]: scraper names (including suffix if set)
Expand All @@ -190,6 +196,7 @@ def add_configurables(
source_configuration,
suffix,
force_add_to_run,
countryiso3s,
)
)
return keys
Expand Down Expand Up @@ -516,6 +523,21 @@ def get_scraper_exception(self, name: str) -> BaseScraper:
raise ValueError(f"No such scraper {name}!")
return scraper

def delete_scraper(self, name: str) -> bool:
"""Delete scraper with given name
Args:
name (str): Name of scraper
Returns:
bool: True if the scraper was present, False if not
"""
if name not in self.scraper_names:
return False
self.scraper_names.remove(name)
del self.scrapers[name]
return True

def add_instance_variables(self, name: str, **kwargs: Any) -> None:
"""Add instance variables to scraper instance given scraper name
Expand Down
27 changes: 25 additions & 2 deletions tests/hdx/scraper/test_scrapers_national.py
Expand Up @@ -738,9 +738,27 @@ def test_get_national_use_hxl(self, configuration):
today = parse_date("2022-06-03")
level = "national"
scraper_configuration = configuration[f"scraper_{level}"]
iso3s = ("AFG",)
iso3s = ("AFG", "PHL", "ZMB")
runner = Runner(iso3s, today)
runner.add_configurables(scraper_configuration, level)
# We also test overriding the Runner country isos here
keys = runner.add_configurables(
scraper_configuration, level, countryiso3s=("AFG",)
)
expected_keys = [
"population",
"who_national",
"who_national2",
"who_national3",
"access",
"sadd",
"ourworldindata",
"broken_owd_url",
"covidtests",
"idps",
"casualties",
"oxcgrt",
]
assert keys == expected_keys
name = "oxcgrt"
headers = (["StringencyIndexForDisplay"], ["#severity+stringency+num"])
values = [{"AFG": "11.11"}]
Expand All @@ -753,3 +771,8 @@ def test_get_national_use_hxl(self, configuration):
)
]
run_check_scraper(name, runner, level, headers, values, sources)

assert runner.delete_scraper("sadd") is True
expected_keys.remove("sadd")
assert runner.get_scraper_names() == expected_keys
assert runner.get_scraper("sadd") is None

0 comments on commit 4957b61

Please sign in to comment.