data: retrieve gdp and pop raw data for UA,MD

PyPSA · Jul 11, 2024 · a183b93 · a183b93
1 parent c50a377
commit a183b93
Show file tree

Hide file tree

Showing 3 changed files with 87 additions and 0 deletions.
diff --git a/rules/retrieve.smk b/rules/retrieve.smk
@@ -319,3 +319,20 @@ if config["enable"]["retrieve"]:
             "../envs/retrieve.yaml"
         script:
             "../scripts/retrieve_monthly_fuel_prices.py"
+
+
+if config["enable"]["retrieve"] and any(c in ["UA", "MD"] for c in config["countries"]):
+
+    rule retrieve_gdp_uamd:
+        output:
+            "data/GDP_per_capita_PPP_1990_2015_v2.nc",
+            "data/ppp_2013_1km_Aggregated.tif",
+        log:
+            "logs/retrieve_gdp_uamd.log",
+        resources:
+            mem_mb=5000,
+        retries: 2
+        conda:
+            "../envs/retrieve.yaml"
+        script:
+            "../scripts/retrieve_gdp_uamd.py"
diff --git a/scripts/_helpers.py b/scripts/_helpers.py
@@ -370,6 +370,27 @@ def update_to(b=1, bsize=1, tsize=None):
             urllib.request.urlretrieve(url, file, reporthook=update_to)
 
 
+def retrieve_file(url, destination):
+    """
+    Downloads a file from a specified URL to a local destination using custom
+    headers that mimic a Firefox browser request.
+
+    This function is useful for overcoming 'HTTP Error 403: Forbidden'
+    issues, which often occur when the server requires more typical
+    browser-like headers for access.
+    """
+
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
+    }
+    response = requests.get(url, headers=headers)
+    response.raise_for_status()
+
+    with open(destination, "wb") as f:
+        f.write(response.content)
+    logger.info(f"File downloaded and saved as {destination}")
+
+
 def mock_snakemake(
     rulename,
     root_dir=None,

diff --git a/scripts/retrieve_gdp_uamd.py b/scripts/retrieve_gdp_uamd.py
@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText: : 2023-2024 The PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: MIT
+"""
+Retrieve monthly fuel prices from Destatis.
+"""
+
+import logging
+from pathlib import Path
+
+from _helpers import retrieve_file
+
+logger = logging.getLogger(__name__)
+
+
+def retrieve(url, destination):
+
+    logger.info(f"Downloading file from '{url}'.")
+    retrieve_file(url, destination)
+    logger.info("File downloaded and validated.")
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        from _helpers import mock_snakemake
+
+        snakemake = mock_snakemake("retrieve_gdp_uamd")
+        rootpath = ".."
+    else:
+        rootpath = "."
+
+datasets = [
+    # GDP_PPP_30arcsec_v3.nc: raw dataset. Available at: [M. Kummu, M. Taka, J. H. A. Guillaume. (2020), Data from: Gridded global datasets for Gross Domestic Product and Human Development Index over 1990-2015, Dryad, Dataset. doi: https://doi.org/10.5061/dryad.dk1j0]
+    (
+        "https://datadryad.org/stash/downloads/file_stream/241947",
+        "GDP_per_capita_PPP_1990_2015_v2.nc",
+    ),
+    # ppp_2020_1km_Aggregated.tif: raw dataset. Available at: https://data.humdata.org/dataset/
+    (
+        "https://data.worldpop.org/GIS/Population/Global_2000_2020/2020/0_Mosaicked/ppp_2020_1km_Aggregated.tif",
+        "ppp_2020_1km_Aggregated.tif",
+    ),
+]
+
+# Download and validate each dataset
+for url, filename in datasets:
+    file_path = rootpath / "data" / filename
+    retrieve(url, file_path)