# DFS5.2
The time units attribute of DFS5.2 is missing, and different fields have different time resolution.  
This notebook shows how to create a single multi-year dataset with all fields.

In [1]:
# Import modules
import intake
import xarray as xr
import pandas as pd

# Import DFS5_2 entry
cat = intake.open_catalog("../catalog.yaml")
DFS = cat.NOCS["DFS5.2"]


# Function to add time units
def add_time_units(ds):
    """
    Find out year from Identification attribute,
    the add units to the time coordinate.
    """
    filename = ds.attrs["Identification"].split(": ")[-1]
    year = filename.split("_y")[-1].replace(".nc", "")
    ds.time.attrs["units"] = f"days since {year}"

    # Decode time and infer frequency
    ds = xr.decode_cf(ds)
    freq = pd.infer_freq(ds["time"].values)

    # Rename time dimension
    ds = ds.rename(time=f"time{freq}")

    return ds


# xarray arguments
xarray_kwargs = dict(
    **DFS.describe()["args"][
        "xarray_kwargs"
    ],  # Use the xarray arguments set in the catalog
    preprocess=add_time_units,  # Add preprocess argument
)

# Settings
time_freqs = ["3H", "D"]
years = range(1990, 2000)
concat_kwargs = dict(data_vars="minimal", coords="minimal", compat="override")

# Create one dataset
ds2merge = []
print(f"Reading {DFS.description}")
# Loop over time frequencies
for freq in time_freqs:
    ds2concat = []
    print(f"\t{freq}", end=": ")
    # Loop over years
    for year in years:
        print(year, end=f", " if year != years[-1] else ".\n")
        ds2concat += [
            DFS(
                year=year, time_freq=freq, xarray_kwargs=xarray_kwargs
            ).to_dask()
        ]
    ds2merge += [xr.concat(ds2concat, f"time{freq}", **concat_kwargs)]
ds = xr.merge(ds2merge)
print(ds)

Reading Drakkar Forcing Set version 5.2
	3H: 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999.
	D: 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999.
<xarray.Dataset>
Dimensions:  (lat0: 256, lon0: 512, time3H: 29216, timeD: 3652)
Coordinates:
  * lon0     (lon0) float32 0.0 0.703125 1.40625 ... 358.59375 359.29688
  * lat0     (lat0) float32 89.46282 88.76695 88.06697 ... -88.76695 -89.46282
  * time3H   (time3H) datetime64[ns] 1990-01-01T03:00:00 ... 2000-01-01
  * timeD    (timeD) datetime64[ns] 1990-01-01 1990-01-02 ... 1999-12-31
Data variables:
    q2       (time3H, lat0, lon0) float32 dask.array<chunksize=(2920, 256, 512), meta=np.ndarray>
    t2       (time3H, lat0, lon0) float32 dask.array<chunksize=(2920, 256, 512), meta=np.ndarray>
    u10      (time3H, lat0, lon0) float32 dask.array<chunksize=(2920, 256, 512), meta=np.ndarray>
    v10      (time3H, lat0, lon0) float32 dask.array<chunksize=(2920, 256, 512), meta=np.ndarray>
    precip   (timeD, lat0, lon0)