From a183b9356f6d32f36f5e2773aa43645f9148ba65 Mon Sep 17 00:00:00 2001 From: Irieo Date: Thu, 11 Jul 2024 16:36:11 +0200 Subject: [PATCH] data: retrieve gdp and pop raw data for UA,MD --- rules/retrieve.smk | 17 +++++++++++++ scripts/_helpers.py | 21 ++++++++++++++++ scripts/retrieve_gdp_uamd.py | 49 ++++++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+) create mode 100644 scripts/retrieve_gdp_uamd.py diff --git a/rules/retrieve.smk b/rules/retrieve.smk index 10ad9684a..aed2ec9dc 100644 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -319,3 +319,20 @@ if config["enable"]["retrieve"]: "../envs/retrieve.yaml" script: "../scripts/retrieve_monthly_fuel_prices.py" + + +if config["enable"]["retrieve"] and any(c in ["UA", "MD"] for c in config["countries"]): + + rule retrieve_gdp_uamd: + output: + "data/GDP_per_capita_PPP_1990_2015_v2.nc", + "data/ppp_2013_1km_Aggregated.tif", + log: + "logs/retrieve_gdp_uamd.log", + resources: + mem_mb=5000, + retries: 2 + conda: + "../envs/retrieve.yaml" + script: + "../scripts/retrieve_gdp_uamd.py" diff --git a/scripts/_helpers.py b/scripts/_helpers.py index 0bf92e396..c40945ad1 100644 --- a/scripts/_helpers.py +++ b/scripts/_helpers.py @@ -370,6 +370,27 @@ def update_to(b=1, bsize=1, tsize=None): urllib.request.urlretrieve(url, file, reporthook=update_to) +def retrieve_file(url, destination): + """ + Downloads a file from a specified URL to a local destination using custom + headers that mimic a Firefox browser request. + + This function is useful for overcoming 'HTTP Error 403: Forbidden' + issues, which often occur when the server requires more typical + browser-like headers for access. + """ + + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" + } + response = requests.get(url, headers=headers) + response.raise_for_status() + + with open(destination, "wb") as f: + f.write(response.content) + logger.info(f"File downloaded and saved as {destination}") + + def mock_snakemake( rulename, root_dir=None, diff --git a/scripts/retrieve_gdp_uamd.py b/scripts/retrieve_gdp_uamd.py new file mode 100644 index 000000000..2f6c115b7 --- /dev/null +++ b/scripts/retrieve_gdp_uamd.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: : 2023-2024 The PyPSA-Eur Authors +# +# SPDX-License-Identifier: MIT +""" +Retrieve monthly fuel prices from Destatis. +""" + +import logging +from pathlib import Path + +from _helpers import retrieve_file + +logger = logging.getLogger(__name__) + + +def retrieve(url, destination): + + logger.info(f"Downloading file from '{url}'.") + retrieve_file(url, destination) + logger.info("File downloaded and validated.") + + +if __name__ == "__main__": + if "snakemake" not in globals(): + from _helpers import mock_snakemake + + snakemake = mock_snakemake("retrieve_gdp_uamd") + rootpath = ".." + else: + rootpath = "." + +datasets = [ + # GDP_PPP_30arcsec_v3.nc: raw dataset. Available at: [M. Kummu, M. Taka, J. H. A. Guillaume. (2020), Data from: Gridded global datasets for Gross Domestic Product and Human Development Index over 1990-2015, Dryad, Dataset. doi: https://doi.org/10.5061/dryad.dk1j0] + ( + "https://datadryad.org/stash/downloads/file_stream/241947", + "GDP_per_capita_PPP_1990_2015_v2.nc", + ), + # ppp_2020_1km_Aggregated.tif: raw dataset. Available at: https://data.humdata.org/dataset/ + ( + "https://data.worldpop.org/GIS/Population/Global_2000_2020/2020/0_Mosaicked/ppp_2020_1km_Aggregated.tif", + "ppp_2020_1km_Aggregated.tif", + ), +] + +# Download and validate each dataset +for url, filename in datasets: + file_path = rootpath / "data" / filename + retrieve(url, file_path)