Merge 1fafbbe into 9312b23

OCHA-DAP · Dec 11, 2023 · a88e1f5 · a88e1f5
2 parents 9312b23 + 1fafbbe
commit a88e1f5
Show file tree

Hide file tree

Showing 83 changed files with 14,459 additions and 14,264 deletions.
diff --git a/.config/coveragerc b/.config/coveragerc
@@ -14,4 +14,4 @@ exclude_also =
     if 0:
     if __name__ == .__main__.:
     if TYPE_CHECKING:
-    @(abc\.)?abstractmethod
+    @(abc\.)?abstractmethod
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -20,7 +20,7 @@ you make a git commit:
 
     pre-commit install
 
-The configuration file for this project is in a 
+The configuration file for this project is in a
 non-start location. Thus, you will need to edit your
 `.git/hooks/pre-commit` file to reflect this. Change
 the line that begins with `ARGS` to:
@@ -29,7 +29,7 @@ the line that begins with `ARGS` to:
 
 With pre-commit, all code is formatted according to
 [black]("https://github.com/psf/black") and
-[ruff]("https://github.com/charliermarsh/ruff") guidelines. 
+[ruff]("https://github.com/charliermarsh/ruff") guidelines.
 
 To check if your changes pass pre-commit without committing, run:
 
@@ -46,8 +46,8 @@ Follow the example set out already in ``api.rst`` as you write the documentation
 ## Packages
 
 [pip-tools](https://github.com/jazzband/pip-tools) is used for
-package management.  If you’ve introduced a new package to the 
-source code (i.e.anywhere in `src/`), please add it to the 
+package management.  If you’ve introduced a new package to the
+source code (i.e.anywhere in `src/`), please add it to the
 `project.dependencies` section of
 `pyproject.toml` with any known version constraints.
 

diff --git a/README.md b/README.md
@@ -4,19 +4,19 @@
 [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
 [![Downloads](https://img.shields.io/pypi/dm/hdx-python-scraper.svg)](https://pypistats.org/packages/hdx-python-scraper)
 
-The HDX Python Scraper Library is designed to enable you to easily develop code that 
-assembles data from one or more tabular sources that can be csv, xls, xlsx or JSON. It 
-uses a YAML file that specifies for each source what needs to be read and allows some 
-transformations to be performed on the data. The output is written to JSON, Google sheets 
-and/or Excel and includes the addition of 
-[Humanitarian Exchange Language (HXL)](https://hxlstandard.org/) hashtags specified in 
-the YAML file. Custom Python scrapers can also be written that conform to a defined 
-specification and the framework handles the execution of both configurable and custom 
+The HDX Python Scraper Library is designed to enable you to easily develop code that
+assembles data from one or more tabular sources that can be csv, xls, xlsx or JSON. It
+uses a YAML file that specifies for each source what needs to be read and allows some
+transformations to be performed on the data. The output is written to JSON, Google sheets
+and/or Excel and includes the addition of
+[Humanitarian Exchange Language (HXL)](https://hxlstandard.org/) hashtags specified in
+the YAML file. Custom Python scrapers can also be written that conform to a defined
+specification and the framework handles the execution of both configurable and custom
 scrapers.
 
-For more information, please read the 
-[documentation](https://hdx-python-scraper.readthedocs.io/en/latest/). 
+For more information, please read the
+[documentation](https://hdx-python-scraper.readthedocs.io/en/latest/).
 
-This library is part of the 
-[Humanitarian Data Exchange](https://data.humdata.org/) (HDX) project. If you have 
+This library is part of the
+[Humanitarian Data Exchange](https://data.humdata.org/) (HDX) project. If you have
 humanitarian related data, please upload your datasets to HDX.
diff --git a/documentation/main.md b/documentation/main.md
@@ -26,6 +26,9 @@ install with:
     pip install hdx-python-scraper[pandas]
 
 ## Breaking Changes
+From 2.2.7, resource name is used when available instead of creating name from
+url so tests that use saved data from the Read class may break
+
 From 2.1.2, Python 3.7 no longer supported
 
 From 2.0.1, all functions in outputs.update_tabs are methods in the new Writer class

diff --git a/pyproject.toml b/pyproject.toml
@@ -34,7 +34,8 @@ classifiers = [
 requires-python = ">=3.8"
 
 dependencies = [
-    "hdx-python-api>=6.1.3",
+    "hdx-python-api>=6.1.4",
+    "hdx-python-country>=3.6.3",
     "gspread",
     "regex",
 ]
@@ -48,7 +49,7 @@ content-type = "text/markdown"
 Homepage = "https://github.com/OCHA-DAP/hdx-python-scraper"
 
 [project.optional-dependencies]
-pandas = ["pandas>=2.1.1"]
+pandas = ["pandas>=2.1.3"]
 test = ["pytest", "pytest-cov"]
 dev = ["pre-commit"]
 

diff --git a/requirements.txt b/requirements.txt
@@ -13,15 +13,15 @@ attrs==23.1.0
     #   jsonschema
 cachetools==5.3.2
     # via google-auth
-certifi==2023.7.22
+certifi==2023.11.17
     # via requests
 cffi==1.16.0
     # via cryptography
 cfgv==3.4.0
     # via pre-commit
 chardet==5.2.0
     # via frictionless
-charset-normalizer==3.3.1
+charset-normalizer==3.3.2
     # via requests
 ckanapi==4.7
     # via hdx-python-api
@@ -31,7 +31,7 @@ colorama==0.4.6
     # via typer
 coverage[toml]==7.3.2
     # via pytest-cov
-cryptography==41.0.5
+cryptography==41.0.7
     # via pyopenssl
 defopt==6.4.0
     # via hdx-python-api
@@ -49,29 +49,33 @@ email-validator==2.1.0.post1
     # via hdx-python-api
 et-xmlfile==1.1.0
     # via openpyxl
-filelock==3.13.0
+filelock==3.13.1
     # via virtualenv
 frictionless==5.16.0
     # via hdx-python-utilities
-google-auth==2.23.3
+google-auth==2.25.2
     # via
     #   google-auth-oauthlib
     #   gspread
 google-auth-oauthlib==1.1.0
     # via gspread
-gspread==5.12.0
+gspread==5.12.2
     # via hdx-python-scraper (pyproject.toml)
-hdx-python-api==6.1.3
+hdx-python-api==6.1.4
     # via hdx-python-scraper (pyproject.toml)
-hdx-python-country==3.5.8
-    # via hdx-python-api
+hdx-python-country==3.6.3
+    # via
+    #   hdx-python-api
+    #   hdx-python-scraper (pyproject.toml)
 hdx-python-utilities==3.6.2
-    # via hdx-python-country
-humanize==4.8.0
+    # via
+    #   hdx-python-api
+    #   hdx-python-country
+humanize==4.9.0
     # via frictionless
-identify==2.5.31
+identify==2.5.33
     # via pre-commit
-idna==3.4
+idna==3.6
     # via
     #   email-validator
     #   requests
@@ -94,14 +98,16 @@ jsonschema==4.17.3
     #   frictionless
     #   tableschema-to-template
 libhxl==5.1
-    # via hdx-python-country
+    # via
+    #   hdx-python-api
+    #   hdx-python-country
 loguru==0.7.2
     # via hdx-python-utilities
-makefun==1.15.1
+makefun==1.15.2
     # via hdx-python-api
 markdown-it-py==3.0.0
     # via rich
-marko==2.0.1
+marko==2.0.2
     # via frictionless
 markupsafe==2.1.3
     # via jinja2
@@ -113,19 +119,19 @@ nodeenv==1.8.0
     # via pre-commit
 num2words==0.5.13
     # via quantulum3
-numpy==1.26.1
+numpy==1.26.2
     # via pandas
 oauthlib==3.2.2
     # via requests-oauthlib
 openpyxl==3.1.2
     # via hdx-python-utilities
 packaging==23.2
     # via pytest
-pandas==2.1.2
+pandas==2.1.4
     # via hdx-python-scraper (pyproject.toml)
 petl==1.7.14
     # via frictionless
-platformdirs==3.11.0
+platformdirs==4.1.0
     # via virtualenv
 pluggy==1.3.0
     # via pytest
@@ -135,9 +141,9 @@ ply==3.11
     #   libhxl
 pockets==0.9.1
     # via sphinxcontrib-napoleon
-pre-commit==3.5.0
+pre-commit==3.6.0
     # via hdx-python-scraper (pyproject.toml)
-pyasn1==0.5.0
+pyasn1==0.5.1
     # via
     #   hdx-python-api
     #   ndg-httpsclient
@@ -147,13 +153,13 @@ pyasn1-modules==0.3.0
     # via google-auth
 pycparser==2.21
     # via cffi
-pydantic==2.4.2
+pydantic==2.5.2
     # via
     #   frictionless
     #   inflect
-pydantic-core==2.10.1
+pydantic-core==2.14.5
     # via pydantic
-pygments==2.16.1
+pygments==2.17.2
     # via rich
 pyopenssl==23.3.0
     # via
@@ -198,6 +204,7 @@ requests==2.31.0
     # via
     #   ckanapi
     #   frictionless
+    #   hdx-python-api
     #   libhxl
     #   requests-file
     #   requests-oauthlib
@@ -207,11 +214,11 @@ requests-oauthlib==1.3.1
     # via google-auth-oauthlib
 rfc3986==2.0.0
     # via frictionless
-rich==13.6.0
+rich==13.7.0
     # via typer
 rsa==4.9
     # via google-auth
-ruamel-yaml==0.18.3
+ruamel-yaml==0.18.5
     # via hdx-python-utilities
 ruamel-yaml-clib==0.2.8
     # via ruamel-yaml
@@ -241,7 +248,7 @@ text-unidecode==1.3
     # via python-slugify
 typer[all]==0.9.0
     # via frictionless
-typing-extensions==4.8.0
+typing-extensions==4.9.0
     # via
     #   frictionless
     #   inflect
@@ -254,15 +261,15 @@ unidecode==1.3.7
     # via
     #   libhxl
     #   pyphonetics
-urllib3==2.0.7
+urllib3==2.1.0
     # via
     #   libhxl
     #   requests
 validators==0.22.0
     # via frictionless
-virtualenv==20.24.6
+virtualenv==20.25.0
     # via pre-commit
-wheel==0.41.2
+wheel==0.42.0
     # via libhxl
 xlrd==2.0.1
     # via hdx-python-utilities

diff --git a/src/hdx/scraper/configurable/resource_downloader.py b/src/hdx/scraper/configurable/resource_downloader.py
@@ -33,7 +33,7 @@ def run(self) -> None:
         """
         reader = self.get_reader("hdx")
         resource = reader.read_hdx_metadata(self.datasetinfo)
-        url, path = reader.download_resource(self.name, resource)
+        url, path = reader.download_resource(resource, file_prefix=self.name)
         logger.info(f"Downloading {url} to {path}")
         copy2(path, join(self.folder, self.datasetinfo["filename"]))
 

diff --git a/src/hdx/scraper/configurable/timeseries.py b/src/hdx/scraper/configurable/timeseries.py
@@ -50,7 +50,9 @@ def run(self) -> None:
             "output_hxl"
         ]
         rows = [headers, hxltags]
-        file_headers, iterator = self.get_reader().read(self.datasetinfo)
+        file_headers, iterator = self.get_reader(self.name).read(
+            self.datasetinfo
+        )
         for inrow in iterator:
             if isinstance(datecol, list):
                 dates = [str(inrow[x]) for x in datecol]

diff --git a/src/hdx/scraper/runner.py b/src/hdx/scraper/runner.py
@@ -1167,6 +1167,17 @@ def get_hapi_metadata(
                 continue
             hapi_dataset_metadata = scraper.get_hapi_dataset_metadata()
             hapi_resource_metadata = scraper.get_hapi_resource_metadata()
+            reader = scraper.get_reader(name)
+            hxl_info = reader.hxl_info_hapi_resource_metadata(
+                hapi_resource_metadata
+            )
+            is_hxl = False
+            if hxl_info:
+                for sheet in hxl_info.get("sheets", ()):
+                    if sheet["is_hxlated"]:
+                        is_hxl = True
+                        break
+            hapi_resource_metadata["is_hxl"] = is_hxl
             dataset_id = hapi_dataset_metadata["hdx_id"]
             resource_id = hapi_resource_metadata["hdx_id"]
             hapi_metadata = results.get(