In [77]:
import pygrib
import pandas as pd
import numpy as np
import os
import tarfile
import logging
import sys
import requests
from datetime import datetime, timedelta

In [78]:
logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(os.environ.get("LOG_LEVEL", logging.INFO))


class OpenDataAPI:
    def __init__(self, api_token: str):
        self.base_url = "https://api.dataplatform.knmi.nl/open-data/v1"
        self.headers = {"Authorization": api_token}

    def __get_data(self, url, params=None):
        return requests.get(url, headers=self.headers, params=params).json()

    def list_files(self, dataset_name: str, dataset_version: str, params: dict):
        return self.__get_data(
            f"{self.base_url}/datasets/{dataset_name}/versions/{dataset_version}/files",
            params=params,
        )

    def get_file_url(self, dataset_name: str, dataset_version: str, file_name: str):
        return self.__get_data(
            f"{self.base_url}/datasets/{dataset_name}/versions/{dataset_version}/files/{file_name}/url"
        )


def download_file_from_temporary_download_url(download_url, filename):
    try:
        with requests.get(download_url, stream=True) as r:
            r.raise_for_status()
            with open(filename, "wb") as f:
                for chunk in r.iter_content(chunk_size=8192):
                    f.write(chunk)
    except Exception:
        logger.exception("Unable to download file using download URL")
        sys.exit(1)

    logger.info(f"Successfully downloaded dataset file to {filename}")


def main():
    api_key = "eyJvcmciOiI1ZTU1NGUxOTI3NGE5NjAwMDEyYTNlYjEiLCJpZCI6ImE1OGI5NGZmMDY5NDRhZDNhZjFkMDBmNDBmNTQyNjBkIiwiaCI6Im11cm11cjEyOCJ9"
    dataset_name = "harmonie_arome_cy40_p1"
    dataset_version = "0.2"
    logger.info(f"Fetching latest file of {dataset_name} version {dataset_version}")

    api = OpenDataAPI(api_token=api_key)

    # sort the files in descending order and only retrieve the first file
    params = {"maxKeys": 1, "orderBy": "created", "sorting": "desc"}
    response = api.list_files(dataset_name, dataset_version, params)
    if "error" in response:
        logger.error(f"Unable to retrieve list of files: {response['error']}")
        sys.exit(1)

    latest_file = response["files"][0].get("filename")
    logger.info(f"Latest file is: {latest_file}")

    # fetch the download url and download the file
    response = api.get_file_url(dataset_name, dataset_version, latest_file)
    download_file_from_temporary_download_url(response["temporaryDownloadUrl"], latest_file)


if __name__ == "__main__":
    main()

INFO:__main__:Fetching latest file of harmonie_arome_cy40_p1 version 0.2
INFO:__main__:Latest file is: harm40_v1_p1_2024052906.tar
INFO:__main__:Successfully downloaded dataset file to harm40_v1_p1_2024052906.tar


In [79]:

def unpack_tar_file(tar_path):
    # Create destination folder path
    dest_folder = os.path.join(os.path.dirname(tar_path), os.path.basename(tar_path).rsplit('.', 1)[0])
    os.makedirs(dest_folder, exist_ok=True)  # Ensure the destination folder exists
    
    # Extract all contents of file in destination folder path
    with tarfile.open(tar_path, "r") as tar:
        tar.extractall(path=dest_folder)

# Replace with the path to your tar file
tar_path = "harm40_v1_p1_2024052906.tar"
unpack_tar_file(tar_path)


In [83]:
# Eindhoven
eindhoven_lat = 51.4416
eindhoven_lon = 5.4697

# Folder grib files
grib_folder = r"C:\Users\20193362\Desktop\InterProject\InterdisciplinaryProject\energy_prediction\harm40_v1_p1_2024052906"

# List needed parameters, see code matrix KNMI
parameters = {"temperature": "11", "windU": "33", "windV": "34", "globalRadiation":  "117"}

# Initialize list to hold the data
data_list = []

# Loop over each file 
for file_name in os.listdir(grib_folder):
    if file_name.endswith('_GB'):
        grib_file = os.path.join(grib_folder, file_name)
        grbs = pygrib.open(grib_file)
        
        # Retrieve the lat/lon grid
        first_message = grbs.message(1)
        lats, lons = first_message.latlons()
        
        # Find the closest grid point
        distance = np.sqrt((lats - eindhoven_lat)**2 + (lons - eindhoven_lon)**2)
        min_index = distance.argmin()
        nearest_point_lat = lats.flat[min_index]
        nearest_point_lon = lons.flat[min_index]

        data_date = str(first_message.dataDate)  # Format: YYYYMMDD
        data_time = first_message.dataTime  # Format: HHMM
        
        # Create the base datetime object from dataDate and dataTime
        base_datetime = datetime.strptime(f"{data_date} {data_time:04d}", "%Y%m%d %H%M")
        step_range = float(first_message.stepRange)
        valid_datetime = base_datetime + timedelta(hours=step_range)
        
        # Initialize a dictionary to hold the data for this file
        data_dict = {
            'file_name': file_name,
            'datetime': valid_datetime,
            'latitude': nearest_point_lat,
            'longitude': nearest_point_lon
        }
        
        # Extract data for each parameter
        for key in parameters:
            try:
                grb_message = grbs.select(parameterName=params[key])[0] # First instance
                parameter_name = key
                eindhoven_value = grb_message.values.flat[min_index]
                data_dict[parameter_name] = eindhoven_value
            except (IndexError, ValueError):
                data_dict[parameter_name] = np.nan # When parameter is not found in grib file
        
        grbs.close()
        
        # Append dictionary to list
        data_list.append(data_dict)

# Convert list of dictionaries to DF
gribData = pd.DataFrame(data_list)
df = gribData.copy()
df['windSpeed'] = np.sqrt(df['windU']**2 + df['windV']**2)
df['temperature'] = df['temperature'] - 272.15
df = df.drop(['windU', 'windV'], axis=1)

print(df)

                         file_name            datetime  latitude  longitude  \
0   HA40_N25_202405290600_00000_GB 2024-05-29 06:00:00    51.438      5.476   
1   HA40_N25_202405290600_00100_GB 2024-05-29 07:00:00    51.438      5.476   
2   HA40_N25_202405290600_00200_GB 2024-05-29 08:00:00    51.438      5.476   
3   HA40_N25_202405290600_00300_GB 2024-05-29 09:00:00    51.438      5.476   
4   HA40_N25_202405290600_00400_GB 2024-05-29 10:00:00    51.438      5.476   
5   HA40_N25_202405290600_00500_GB 2024-05-29 11:00:00    51.438      5.476   
6   HA40_N25_202405290600_00600_GB 2024-05-29 12:00:00    51.438      5.476   
7   HA40_N25_202405290600_00700_GB 2024-05-29 13:00:00    51.438      5.476   
8   HA40_N25_202405290600_00800_GB 2024-05-29 14:00:00    51.438      5.476   
9   HA40_N25_202405290600_00900_GB 2024-05-29 15:00:00    51.438      5.476   
10  HA40_N25_202405290600_01000_GB 2024-05-29 16:00:00    51.438      5.476   
11  HA40_N25_202405290600_01100_GB 2024-05-29 17:00: