In [77]:
import xarray as xr
import subprocess
import os
import numpy
from datetime import timedelta
import datetime
import pandas as pd

In [94]:
def concatenate_variables(dataset, path, date, variable, bandName):
    """
    Description:
        Concatenates variables.

    Args:
        dataset (str): The path of the dataset to process.
        path (str): The path to the directory containing the dataset.
        variable (str): The variable name to select from the file (default: 'u-component of wind [m/s]').
        bandName (str): The name to assign to the concatenated variable in the output dataset (default: 'u10').

    Returns:
        dataset (xarray.Dataset): The concatenated dataset.

    Throws:
        AttributeError

    Execution
        dataset = "dataset.nc"
        path = "/pat/to/folder/"
        variable = "u-component of wind [m/s]" | "v-component of wind [m/s]"
        bandName = "u10" | "v10"

    Notes:
        Instead of a list comprehension the .sel() function is used.
        A new `inputPath` instead of the global res_path variable was added.
        Instead of manually renaming dimensions and variables the rename_dims() and rename_vars() functions are used.
"""

    # Open the dataset.
    print("Opening dataset:", os.path.join(path, dataset))
    # ds = xr.open_dataset(os.path.join(path, dataset))
    ds = xr.open_dataset(os.path.join(path, dataset), engine="netcdf4")

    # Select the variables based on the given variable name
    selectedVars = ds[[var for var in ds.data_vars if ds[var].attrs.get("GRIB_COMMENT") == variable]]
    print("Selecting variables with GRIB_COMMENT:", variable)

    # Extract the time information for each variable
    timeDict = {}

    for band in selectedVars.data_vars:
        '''    
        try:
            # long_time = int(selected_vars[band].GRIB_VALID_TIME)
            longTime = int(selectedVars[band].GRIB_VALID_TIME.dt.total_seconds())
        except AttributeError:
            longTime = int(ds[band].GRIB_VALID_TIME.split("s")[0])
        '''
        longTime = ds[band].GRIB_VALID_TIME
        dtObject = datetime.datetime.fromtimestamp(longTime)
        timeDict[band] = dtObject

    # Concatenate the selected variables along the time dimension.
    print("Concatenating selected variables along the time dimension.")
    newVariable = xr.concat([selectedVars[varName] for varName in selectedVars.data_vars], dim=list(timeDict.values()))

    # Rename the time dimension and variables.
    # new_variable = new_variable.rename({"concat_dim":"time"})
    print("Renaming time dimension and variables.")
    newVariable = newVariable.rename({"concat_dim": "time"})
    dataset = newVariable.to_dataset()
    # dataset = dataset.rename({list(dataset.data_vars)[0]:band_name})
    dataset = dataset.rename({list(dataset.data_vars)[0]: bandName})

    # Sort and slice the dataset if `time` dimension exists.
    if "time" in dataset.dims:
        print("Sorting and slicing the dataset based on the time dimension.")
        dataset = dataset.sortby("time")
        #dataset = dataset.isel(time=slice(10, 34))
        dataset=dataset.sel(time=slice(pd.to_datetime(date, format='%Y%m%d'), 
                               pd.to_datetime(date+"2359", format='%Y%m%d%H%M')))
    print("Dataset processing completed.")
    return dataset


In [88]:
timeDict = {}
for band in selectedVars.data_vars:
    longTime = ds[band].GRIB_VALID_TIME
    dtObject = datetime.datetime.fromtimestamp(longTime)
    timeDict[band] = dtObject
print("Concatenating selected variables along the time dimension.")
newVariable = xr.concat([selectedVars[varName] for varName in selectedVars.data_vars], dim=list(timeDict.values()))

# Rename the time dimension and variables.
# new_variable = new_variable.rename({"concat_dim":"time"})
print("Renaming time dimension and variables.")
bandName="u10"
newVariable = newVariable.rename({"concat_dim": "time"})
dataset = newVariable.to_dataset()
# dataset = dataset.rename({list(dataset.data_vars)[0]:band_name})
dataset = dataset.rename({list(dataset.data_vars)[0]: bandName})


Concatenating selected variables along the time dimension.
Renaming time dimension and variables.


In [89]:
print("Sorting and slicing the dataset based on the time dimension.")
dataset = dataset.sortby("time")

dataset=dataset.sel(time=slice(pd.to_datetime("20230823", format='%Y%m%d'), 
                               pd.to_datetime("202308232359", format='%Y%m%d%H%M')))


Sorting and slicing the dataset based on the time dimension.


In [104]:
print('Time limits wind:\nFirst: %s\nLast: %s'%(dataset['time'][0].values,dataset['time'][-1].values))

Time limits wind:
First: 2023-08-23T00:00:00.000000000
Last: 2023-08-23T23:00:00.000000000


In [36]:
inp="/mnt/nvme2tb/ffp/datasets/weather/WRF-20230821.grb2"
of="/mnt/nvme2tb/ffp/datasets/weather/WRF-20230821.nc"
command = ["gdal_translate", "-of", "netCDF", inp, of]
subprocess.run(command, check=True)

Input file size is 480, 400
0...10...20...30...40...50...60...70...80...90...100 - done.


CompletedProcess(args=['gdal_translate', '-of', 'netCDF', '/mnt/nvme2tb/ffp/datasets/weather/WRF-20230821.grb2', '/mnt/nvme2tb/ffp/datasets/weather/WRF-20230821.nc'], returncode=0)

In [37]:
ds = xr.load_dataset("/mnt/nvme2tb/ffp/datasets/weather/WRF-20230821.nc")

In [39]:
ds

In [95]:
datasetU = concatenate_variables("WRF-20230821.nc", "/mnt/nvme2tb/ffp/datasets/weather/","20230823",
                                             variable="u-component of wind [m/s]", bandName="u10")
            # v-component
datasetV = concatenate_variables("WRF-20230821.nc", "/mnt/nvme2tb/ffp/datasets/weather/","20230823",
                                             variable="v-component of wind [m/s]", bandName="v10")


Opening dataset: /mnt/nvme2tb/ffp/datasets/weather/WRF-20230821.nc
Selecting variables with GRIB_COMMENT: u-component of wind [m/s]
Concatenating selected variables along the time dimension.
Renaming time dimension and variables.
Sorting and slicing the dataset based on the time dimension.
Dataset processing completed.
Opening dataset: /mnt/nvme2tb/ffp/datasets/weather/WRF-20230821.nc
Selecting variables with GRIB_COMMENT: v-component of wind [m/s]
Concatenating selected variables along the time dimension.
Renaming time dimension and variables.
Sorting and slicing the dataset based on the time dimension.
Dataset processing completed.


In [96]:
datasetU

In [64]:
datasetU.time

In [40]:
variable="u-component of wind [m/s]"
selectedVars = ds[[var for var in ds.data_vars if ds[var].attrs.get("GRIB_COMMENT") == variable]]


In [41]:
selectedVars.data_vars['Band3'].GRIB_VALID_TIME

1692619200

In [45]:
for b in selectedVars:
    print(datetime.datetime.fromtimestamp(selectedVars.data_vars[b].GRIB_VALID_TIME))

2023-08-21 15:00:00
2023-08-21 16:00:00
2023-08-21 17:00:00
2023-08-21 18:00:00
2023-08-21 19:00:00
2023-08-21 20:00:00
2023-08-21 21:00:00
2023-08-21 22:00:00
2023-08-21 23:00:00
2023-08-22 00:00:00
2023-08-22 01:00:00
2023-08-22 02:00:00
2023-08-22 03:00:00
2023-08-22 04:00:00
2023-08-22 05:00:00
2023-08-22 06:00:00
2023-08-22 07:00:00
2023-08-22 08:00:00
2023-08-22 09:00:00
2023-08-22 10:00:00
2023-08-22 11:00:00
2023-08-22 12:00:00
2023-08-22 13:00:00
2023-08-22 14:00:00
2023-08-22 15:00:00
2023-08-22 16:00:00
2023-08-22 17:00:00
2023-08-22 18:00:00
2023-08-22 19:00:00
2023-08-22 20:00:00
2023-08-22 21:00:00
2023-08-22 22:00:00
2023-08-22 23:00:00
2023-08-23 00:00:00
2023-08-23 01:00:00
2023-08-23 02:00:00
2023-08-23 03:00:00
2023-08-23 04:00:00
2023-08-23 05:00:00
2023-08-23 06:00:00
2023-08-23 07:00:00
2023-08-23 08:00:00
2023-08-23 09:00:00
2023-08-23 10:00:00
2023-08-23 11:00:00
2023-08-23 12:00:00
2023-08-23 13:00:00
2023-08-23 14:00:00
2023-08-23 15:00:00
2023-08-23 16:00:00


In [47]:
datetime.datetime.fromtimestamp(0)

datetime.datetime(1970, 1, 1, 2, 0)