From 67283a6d6655dc4b72e163f5fa036b02f15ef962 Mon Sep 17 00:00:00 2001 From: Mike Date: Mon, 13 Feb 2023 15:33:25 +1300 Subject: [PATCH] Change to reference period --- doc/main.md | 30 ++++++++++++------------- requirements.txt | 4 ++-- setup.cfg | 7 +++--- src/hdx/scraper/geonode/geonodetohdx.py | 16 ++++++------- test-requirements.txt | 4 ++-- 5 files changed, 30 insertions(+), 31 deletions(-) diff --git a/doc/main.md b/doc/main.md index d347256..6f24bc0 100755 --- a/doc/main.md +++ b/doc/main.md @@ -21,30 +21,30 @@ The library has detailed API documentation which can be found in the menu on the You should create an object of the GeoNodeToHDX class: - geonodetohdx = GeoNodeToHDX('https://geonode.wfp.org', downloader) - geonodetohdx = GeoNodeToHDX('https://geonode.themimu.info', downloader) + geonodetohdx = GeoNodeToHDX("https://geonode.wfp.org", downloader) + geonodetohdx = GeoNodeToHDX("https://geonode.themimu.info", downloader) It has high level methods generate_datasets_and_showcases and delete_other_datasets: # generate datasets and showcases reading country and layer information from the GeoNode - datasets = generate_datasets_and_showcases('maintainerid', 'orgid', 'orgname', updatefreq='Adhoc', + datasets = generate_datasets_and_showcases("maintainerid", "orgid", "orgname", updatefreq="Adhoc", subnational=True) # generate datasets and showcases reading layer information ignoring region (country) in layers call - countrydata = {'iso3': 'MMR', 'name': 'Myanmar', 'layers': None} - datasets = generate_datasets_and_showcases('maintainerid', 'orgid', 'orgname', updatefreq='Adhoc', + countrydata = {"iso3": "MMR", "name": "Myanmar", "layers": None} + datasets = generate_datasets_and_showcases("maintainerid", "orgid", "orgname", updatefreq="Adhoc", subnational=True, countrydata=countrydata) # delete any datasets and associated showcases from HDX that are not in the list datasets # (assuming matching organisation id, maintainer id and geonode url in the resource url) delete_other_datasets(datasets) -If you need more fine grained control, it has low level methods +If you need more fine-grained control, it has low level methods get_locationsdata, get_layersdata, generate_dataset_and_showcase: # get countries where count > 0 countries = geonodetohdx.get_countries(use_count=True) # get layers for country with ISO 3 code SDN - layers = geonodetohdx.get_layers(countryiso='SDN') + layers = geonodetohdx.get_layers(countryiso="SDN") # get layers for all countries layers = get_layers(countryiso=None) @@ -55,20 +55,20 @@ creating a YAML configuration with the new configuration in this format: - deprecated category_mapping: - Elevation: 'elevation - topography - altitude' - 'Inland Waters': river + Elevation: "elevation - topography - altitude" + "Inland Waters": river titleabstract_mapping: bridges: - bridges - transportation - - 'facilities and infrastructure' + - "facilities and infrastructure" idp: camp: - - 'displaced persons locations - camps - shelters' - - 'internally displaced persons - idp' + - "displaced persons locations - camps - shelters" + - "internally displaced persons - idp" else: - - 'internally displaced persons - idp' + - "internally displaced persons - idp" ignore_data are any terms in the abstract that mean that the dataset should not be added to HDX. @@ -79,10 +79,10 @@ to HDX metadata tags. titleabstract_mapping are mappings from terms in the title or abstract to HDX metadata tags. -For more fine grained tuning of these, you retrieve the dictionaries and +For more fine-grained tuning of these, you retrieve the dictionaries and manipulate them directly: - geonodetohdx = GeoNodeToHDX('https://geonode.wfp.org', downloader) + geonodetohdx = GeoNodeToHDX("https://geonode.wfp.org", downloader) ignore_data = geonodetohdx.get_ignore_data() category_mapping = geonodetohdx.get_category_mapping() titleabstract_mapping = geonodetohdx.get_titleabstract_mapping() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 5b961a9..029d94f 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -python-slugify==6.1.2 -hdx-python-api==5.8.4 +python-slugify==8.0.0 +hdx-python-api==5.9.8 diff --git a/setup.cfg b/setup.cfg index 0340156..8fb9574 100755 --- a/setup.cfg +++ b/setup.cfg @@ -18,12 +18,11 @@ classifiers = Programming Language :: Python Programming Language :: Python :: 3 Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.6 - Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 + Programming Language :: Python :: 3.12 Intended Audience :: Developers License :: OSI Approved :: MIT License Natural Language :: English @@ -39,10 +38,10 @@ include_package_data = True package_dir = =src -python_requires = >=3.6 +python_requires = >=3.8 install_requires = - hdx-python-api>=5.8.4 + hdx-python-api>=5.9.8 python-slugify [options.packages.find] diff --git a/src/hdx/scraper/geonode/geonodetohdx.py b/src/hdx/scraper/geonode/geonodetohdx.py index c09b36b..8a2750a 100755 --- a/src/hdx/scraper/geonode/geonodetohdx.py +++ b/src/hdx/scraper/geonode/geonodetohdx.py @@ -261,7 +261,7 @@ def generate_dataset_and_showcase( dataset = Dataset({"title": origtitle}) if get_date_from_title: ranges = dataset.remove_dates_from_title( - change_title=True, set_dataset_date=True + change_title=True, set_reference_period=True ) else: ranges = list() @@ -273,15 +273,15 @@ def generate_dataset_and_showcase( dataset_notes = notes else: dataset_notes = f"{notes}\n\n{supplemental_information}" - dataset_date = parse_date(layer["date"]) + reference_period = parse_date(layer["date"]) if origtitle == title: - dataset.set_date_of_dataset(dataset_date) + dataset.set_reference_period(reference_period) else: dataset_notes = ( f"{dataset_notes}\n\nOriginal dataset title: {origtitle}" ) logger.info( - f"Using {ranges[0][0]}-{ranges[0][1]} instead of {dataset_date} for dataset date" + f"Using {ranges[0][0]}-{ranges[0][1]} instead of {reference_period} for reference period" ) slugified_name = slugify( f"{self.get_orgname(metadata)}_geonode_{title}" @@ -395,7 +395,7 @@ def generate_datasets_and_showcases( else: countries = self.get_countries() logger.info(f"Number of countries: {len(countries)}") - dataset_dates = OrderedDict() + reference_periods = OrderedDict() if "batch" not in kwargs: kwargs["batch"] = get_uuid() for countrydata in countries: @@ -418,7 +418,7 @@ def generate_datasets_and_showcases( for range in ranges: if range[1] > max_date: max_date = range[1] - prev_max = dataset_dates.get(dataset_name) + prev_max = reference_periods.get(dataset_name) if prev_max and prev_max > max_date: logger.warning( f'Ignoring {layer["title"]} with max date {max_date}!' @@ -426,8 +426,8 @@ def generate_datasets_and_showcases( ) continue create_dataset_showcase(dataset, showcase, **kwargs) - dataset_dates[dataset_name] = max_date - return list(dataset_dates.keys()) + reference_periods[dataset_name] = max_date + return list(reference_periods.keys()) def delete_other_datasets( self, diff --git a/test-requirements.txt b/test-requirements.txt index 309ffb0..da70283 100755 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,4 +1,4 @@ -pytest==7.2.0 +pytest==7.2.1 pytest-cov==4.0.0 -tox==4.2.6 +tox==4.4.5 -r requirements.txt