In [None]:
# initializer
import pathlib

# Make directories on shared (local) storage
for local_dir in [
    conf_local_root,
    conf_local_knmi,
    conf_local_odim,
    conf_local_vp,
    conf_local_conf,
]:
    local_dir = pathlib.Path(local_dir)
    if not local_dir.exists():
        local_dir.mkdir(parents=True, exist_ok=True)
# Reference files
if not pathlib.Path(conf_local_radar_db).exists():
    from minio import Minio, S3Error

    minioClient = Minio(
        endpoint=param_minio_endpoint,
        access_key=param_minio_access_key,
        secret_key=param_minio_secret_key,
        secure=True,
    )
    print(f"{conf_local_radar_db} not found, downloading")
    minioClient.fget_object(
        bucket_name=conf_minio_public_bucket_name,
        object_name=conf_minio_public_conf_radar_db_object_name,
        file_path=conf_local_radar_db,
    )

# Now produce a variable which acts as a marker for the workflow manager
# We can then drag a line from the configuration / initializer
# and time the start of the rest of the workflow
# If you decide to make different sets of configurations, you can store them
# and decide per workflow which config to attach
init_complete = "Yes"  # Cant sent bool
print("Finished initialization")

In [None]:
# list-knmi-files
"""
consume dummy var from config to signal workflow start
There is something dodgy going on with how
strings are being passed around.
The string "Yes" is being sent as '"Yes"'
So, to prevent extra quotes being introduced
we eval init_complete first before
we test if it contains "Yes"
"""
# Libraries
import requests

# Strip any extra quotes
init_complete = init_complete.replace("'", "")
init_complete = init_complete.replace('"', "")
if init_complete == "Yes":
    print("Workflow configuration succesfull")
else:
    print("Workflow configuration was not complete, exitting")
    import sys

    sys.exit(1)

# Notes:
# Timestamps in iso8601
# 2020-01-01T00:00+00:00

# configure
start_ts = param_start_date
end_ts = param_end_date
datasetName, datasetVersion, api_url, _ = conf_radars.get(param_radar)
params = {
    "datasetName": datasetName,
    "datasetVersion": datasetVersion,
    "maxKeys": 10,
    "sorting": "asc",
    "orderBy": "created",
    "begin": start_ts,
    "end": end_ts,
}
# Request a response from the KNMI severs
# Try the next page tokens
dataset_files = []
while True:
    list_files_response = requests.get(
        url=api_url, headers={"Authorization": param_api_key}, params=params
    )
    list_files = list_files_response.json()
    dset_files = list_files.get("files")
    dset_files = [list(dset_file.values()) for dset_file in dset_files]
    dataset_files += dset_files
    nextPageToken = list_files.get("nextPageToken")
    if not nextPageToken:
        break
    else:
        params.update({"nextPageToken": nextPageToken})

# KNMI outputs per 5 minutes, per 15 is less of a heavy hit on downloads and processing
# Quick and dirty way to only keep the 15 minute measurements.
# Check API if we can filter for this on their end. If not fine
filtered_list = []
interval_list = list(range(0, 60, param_interval))
for dataset_file in dataset_files:
    minute = int(dataset_file[0].split("_")[-1].split(".")[0][-2:])
    if minute in interval_list:
        filtered_list.append(dataset_file)

dataset_files = filtered_list
print(f"Found {len(dataset_files)} files")
print(dataset_files)