Skip to content

Commit

Permalink
data: retrieve gdp and pop raw data for UA,MD
Browse files Browse the repository at this point in the history
  • Loading branch information
Irieo committed Jul 11, 2024
1 parent c50a377 commit a183b93
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 0 deletions.
17 changes: 17 additions & 0 deletions rules/retrieve.smk
Original file line number Diff line number Diff line change
Expand Up @@ -319,3 +319,20 @@ if config["enable"]["retrieve"]:
"../envs/retrieve.yaml"
script:
"../scripts/retrieve_monthly_fuel_prices.py"


if config["enable"]["retrieve"] and any(c in ["UA", "MD"] for c in config["countries"]):

rule retrieve_gdp_uamd:
output:
"data/GDP_per_capita_PPP_1990_2015_v2.nc",
"data/ppp_2013_1km_Aggregated.tif",
log:
"logs/retrieve_gdp_uamd.log",
resources:
mem_mb=5000,
retries: 2
conda:
"../envs/retrieve.yaml"
script:
"../scripts/retrieve_gdp_uamd.py"
21 changes: 21 additions & 0 deletions scripts/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,27 @@ def update_to(b=1, bsize=1, tsize=None):
urllib.request.urlretrieve(url, file, reporthook=update_to)


def retrieve_file(url, destination):
"""
Downloads a file from a specified URL to a local destination using custom
headers that mimic a Firefox browser request.
This function is useful for overcoming 'HTTP Error 403: Forbidden'
issues, which often occur when the server requires more typical
browser-like headers for access.
"""

headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
}
response = requests.get(url, headers=headers)
response.raise_for_status()

with open(destination, "wb") as f:
f.write(response.content)
logger.info(f"File downloaded and saved as {destination}")


def mock_snakemake(
rulename,
root_dir=None,
Expand Down
49 changes: 49 additions & 0 deletions scripts/retrieve_gdp_uamd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: : 2023-2024 The PyPSA-Eur Authors
#
# SPDX-License-Identifier: MIT
"""
Retrieve monthly fuel prices from Destatis.
"""

import logging
from pathlib import Path

from _helpers import retrieve_file

logger = logging.getLogger(__name__)


def retrieve(url, destination):

logger.info(f"Downloading file from '{url}'.")
retrieve_file(url, destination)
logger.info("File downloaded and validated.")


if __name__ == "__main__":
if "snakemake" not in globals():
from _helpers import mock_snakemake

snakemake = mock_snakemake("retrieve_gdp_uamd")
rootpath = ".."
else:
rootpath = "."

datasets = [
# GDP_PPP_30arcsec_v3.nc: raw dataset. Available at: [M. Kummu, M. Taka, J. H. A. Guillaume. (2020), Data from: Gridded global datasets for Gross Domestic Product and Human Development Index over 1990-2015, Dryad, Dataset. doi: https://doi.org/10.5061/dryad.dk1j0]
(
"https://datadryad.org/stash/downloads/file_stream/241947",
"GDP_per_capita_PPP_1990_2015_v2.nc",
),
# ppp_2020_1km_Aggregated.tif: raw dataset. Available at: https://data.humdata.org/dataset/
(
"https://data.worldpop.org/GIS/Population/Global_2000_2020/2020/0_Mosaicked/ppp_2020_1km_Aggregated.tif",
"ppp_2020_1km_Aggregated.tif",
),
]

# Download and validate each dataset
for url, filename in datasets:
file_path = rootpath / "data" / filename
retrieve(url, file_path)

0 comments on commit a183b93

Please sign in to comment.