Skip to content

Commit

Permalink
Change to reference period
Browse files Browse the repository at this point in the history
  • Loading branch information
Mike committed Feb 13, 2023
1 parent 8999dd5 commit 67283a6
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 31 deletions.
30 changes: 15 additions & 15 deletions doc/main.md
Expand Up @@ -21,30 +21,30 @@ The library has detailed API documentation which can be found in the menu on the

You should create an object of the GeoNodeToHDX class:

geonodetohdx = GeoNodeToHDX('https://geonode.wfp.org', downloader)
geonodetohdx = GeoNodeToHDX('https://geonode.themimu.info', downloader)
geonodetohdx = GeoNodeToHDX("https://geonode.wfp.org", downloader)
geonodetohdx = GeoNodeToHDX("https://geonode.themimu.info", downloader)

It has high level methods generate_datasets_and_showcases and
delete_other_datasets:

# generate datasets and showcases reading country and layer information from the GeoNode
datasets = generate_datasets_and_showcases('maintainerid', 'orgid', 'orgname', updatefreq='Adhoc',
datasets = generate_datasets_and_showcases("maintainerid", "orgid", "orgname", updatefreq="Adhoc",
subnational=True)
# generate datasets and showcases reading layer information ignoring region (country) in layers call
countrydata = {'iso3': 'MMR', 'name': 'Myanmar', 'layers': None}
datasets = generate_datasets_and_showcases('maintainerid', 'orgid', 'orgname', updatefreq='Adhoc',
countrydata = {"iso3": "MMR", "name": "Myanmar", "layers": None}
datasets = generate_datasets_and_showcases("maintainerid", "orgid", "orgname", updatefreq="Adhoc",
subnational=True, countrydata=countrydata)
# delete any datasets and associated showcases from HDX that are not in the list datasets
# (assuming matching organisation id, maintainer id and geonode url in the resource url)
delete_other_datasets(datasets)

If you need more fine grained control, it has low level methods
If you need more fine-grained control, it has low level methods
get_locationsdata, get_layersdata, generate_dataset_and_showcase:

# get countries where count > 0
countries = geonodetohdx.get_countries(use_count=True)
# get layers for country with ISO 3 code SDN
layers = geonodetohdx.get_layers(countryiso='SDN')
layers = geonodetohdx.get_layers(countryiso="SDN")
# get layers for all countries
layers = get_layers(countryiso=None)

Expand All @@ -55,20 +55,20 @@ creating a YAML configuration with the new configuration in this format:
- deprecated

category_mapping:
Elevation: 'elevation - topography - altitude'
'Inland Waters': river
Elevation: "elevation - topography - altitude"
"Inland Waters": river

titleabstract_mapping:
bridges:
- bridges
- transportation
- 'facilities and infrastructure'
- "facilities and infrastructure"
idp:
camp:
- 'displaced persons locations - camps - shelters'
- 'internally displaced persons - idp'
- "displaced persons locations - camps - shelters"
- "internally displaced persons - idp"
else:
- 'internally displaced persons - idp'
- "internally displaced persons - idp"

ignore_data are any terms in the abstract that mean that the dataset
should not be added to HDX.
Expand All @@ -79,10 +79,10 @@ to HDX metadata tags.
titleabstract_mapping are mappings from terms in the title or abstract to
HDX metadata tags.

For more fine grained tuning of these, you retrieve the dictionaries and
For more fine-grained tuning of these, you retrieve the dictionaries and
manipulate them directly:

geonodetohdx = GeoNodeToHDX('https://geonode.wfp.org', downloader)
geonodetohdx = GeoNodeToHDX("https://geonode.wfp.org", downloader)
ignore_data = geonodetohdx.get_ignore_data()
category_mapping = geonodetohdx.get_category_mapping()
titleabstract_mapping = geonodetohdx.get_titleabstract_mapping()
4 changes: 2 additions & 2 deletions requirements.txt
@@ -1,2 +1,2 @@
python-slugify==6.1.2
hdx-python-api==5.8.4
python-slugify==8.0.0
hdx-python-api==5.9.8
7 changes: 3 additions & 4 deletions setup.cfg
Expand Up @@ -18,12 +18,11 @@ classifiers =
Programming Language :: Python
Programming Language :: Python :: 3
Programming Language :: Python :: 3 :: Only
Programming Language :: Python :: 3.6
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
Programming Language :: Python :: 3.12
Intended Audience :: Developers
License :: OSI Approved :: MIT License
Natural Language :: English
Expand All @@ -39,10 +38,10 @@ include_package_data = True
package_dir =
=src

python_requires = >=3.6
python_requires = >=3.8

install_requires =
hdx-python-api>=5.8.4
hdx-python-api>=5.9.8
python-slugify

[options.packages.find]
Expand Down
16 changes: 8 additions & 8 deletions src/hdx/scraper/geonode/geonodetohdx.py
Expand Up @@ -261,7 +261,7 @@ def generate_dataset_and_showcase(
dataset = Dataset({"title": origtitle})
if get_date_from_title:
ranges = dataset.remove_dates_from_title(
change_title=True, set_dataset_date=True
change_title=True, set_reference_period=True
)
else:
ranges = list()
Expand All @@ -273,15 +273,15 @@ def generate_dataset_and_showcase(
dataset_notes = notes
else:
dataset_notes = f"{notes}\n\n{supplemental_information}"
dataset_date = parse_date(layer["date"])
reference_period = parse_date(layer["date"])
if origtitle == title:
dataset.set_date_of_dataset(dataset_date)
dataset.set_reference_period(reference_period)
else:
dataset_notes = (
f"{dataset_notes}\n\nOriginal dataset title: {origtitle}"
)
logger.info(
f"Using {ranges[0][0]}-{ranges[0][1]} instead of {dataset_date} for dataset date"
f"Using {ranges[0][0]}-{ranges[0][1]} instead of {reference_period} for reference period"
)
slugified_name = slugify(
f"{self.get_orgname(metadata)}_geonode_{title}"
Expand Down Expand Up @@ -395,7 +395,7 @@ def generate_datasets_and_showcases(
else:
countries = self.get_countries()
logger.info(f"Number of countries: {len(countries)}")
dataset_dates = OrderedDict()
reference_periods = OrderedDict()
if "batch" not in kwargs:
kwargs["batch"] = get_uuid()
for countrydata in countries:
Expand All @@ -418,16 +418,16 @@ def generate_datasets_and_showcases(
for range in ranges:
if range[1] > max_date:
max_date = range[1]
prev_max = dataset_dates.get(dataset_name)
prev_max = reference_periods.get(dataset_name)
if prev_max and prev_max > max_date:
logger.warning(
f'Ignoring {layer["title"]} with max date {max_date}!'
f" {dataset_name} (dates removed) with max date {prev_max} has been created already!"
)
continue
create_dataset_showcase(dataset, showcase, **kwargs)
dataset_dates[dataset_name] = max_date
return list(dataset_dates.keys())
reference_periods[dataset_name] = max_date
return list(reference_periods.keys())

def delete_other_datasets(
self,
Expand Down
4 changes: 2 additions & 2 deletions test-requirements.txt
@@ -1,4 +1,4 @@
pytest==7.2.0
pytest==7.2.1
pytest-cov==4.0.0
tox==4.2.6
tox==4.4.5
-r requirements.txt

0 comments on commit 67283a6

Please sign in to comment.