In [8]:
import xarray as xr
import subprocess

In [None]:
def concatenate_variables(dataset, path, variable, bandName):
    """
    Description:
        Concatenates variables.

    Args:
        dataset (str): The path of the dataset to process.
        path (str): The path to the directory containing the dataset.
        variable (str): The variable name to select from the file (default: 'u-component of wind [m/s]').
        bandName (str): The name to assign to the concatenated variable in the output dataset (default: 'u10').

    Returns:
        dataset (xarray.Dataset): The concatenated dataset.

    Throws:
        AttributeError

    Execution
        dataset = "dataset.nc"
        path = "/pat/to/folder/"
        variable = "u-component of wind [m/s]" | "v-component of wind [m/s]"
        bandName = "u10" | "v10"

    Notes:
        Instead of a list comprehension the .sel() function is used.
        A new `inputPath` instead of the global res_path variable was added.
        Instead of manually renaming dimensions and variables the rename_dims() and rename_vars() functions are used.
"""

    # Open the dataset.
    print("Opening dataset:", os.path.join(path, dataset))
    # ds = xr.open_dataset(os.path.join(path, dataset))
    ds = xr.open_dataset(os.path.join(path, dataset), engine="netcdf4")

    # Select the variables based on the given variable name
    selectedVars = ds[[var for var in ds.data_vars if ds[var].attrs.get("GRIB_COMMENT") == variable]]
    print("Selecting variables with GRIB_COMMENT:", variable)

    # Extract the time information for each variable
    timeDict = {}

    for band in selectedVars.data_vars:

        try:
            # long_time = int(selected_vars[band].GRIB_VALID_TIME)
            longTime = int(selectedVars[band].GRIB_VALID_TIME.dt.total_seconds())
        except AttributeError:
            longTime = int(ds[band].GRIB_VALID_TIME.split("s")[0])

        dtObject = datetime.datetime.fromtimestamp(longTime)
        timeDict[band] = dtObject

    # Concatenate the selected variables along the time dimension.
    print("Concatenating selected variables along the time dimension.")
    newVariable = xr.concat([selectedVars[varName] for varName in selectedVars.data_vars], dim=list(timeDict.values()))

    # Rename the time dimension and variables.
    # new_variable = new_variable.rename({"concat_dim":"time"})
    print("Renaming time dimension and variables.")
    newVariable = newVariable.rename({"concat_dim": "time"})
    dataset = newVariable.to_dataset()
    # dataset = dataset.rename({list(dataset.data_vars)[0]:band_name})
    dataset = dataset.rename({list(dataset.data_vars)[0]: bandName})

    # Sort and slice the dataset if `time` dimension exists.
    if "time" in dataset.dims:
        print("Sorting and slicing the dataset based on the time dimension.")
        dataset = dataset.sortby("time")
        dataset = dataset.isel(time=slice(10, 34))

    print("Dataset processing completed.")
    return dataset


In [9]:
inp="/mnt/nvme2tb/ffp/datasets/weather/WRF-20230821.grb2"
of="/mnt/nvme2tb/ffp/datasets/weather/WRF-20230821.nc"
command = ["gdal_translate", "-of", "netCDF", inp, of]
subprocess.run(command, check=True)

Input file size is 480, 400
0...10...20...30...40...50...60...70...80...90...100 - done.


CompletedProcess(args=['gdal_translate', '-of', 'netCDF', '/mnt/nvme2tb/ffp/datasets/weather/WRF-20230821.grb2', '/mnt/nvme2tb/ffp/datasets/weather/WRF-20230821.nc'], returncode=0)

In [10]:
ds = xr.load_dataset("/mnt/nvme2tb/ffp/datasets/weather/WRF-20230821.nc")

In [11]:
ds