Ouranosinc · aulemahal · Aug 23, 2022 · Aug 23, 2022 · Aug 23, 2022 · Aug 23, 2022
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -2,6 +2,14 @@
 History
 =======
 
+v0.3.2 (2022-08-23)
+-------------------
+Contributor to this version: Pascal Bourgault (:user:`aulemahal`).
+
+Breaking changes
+^^^^^^^^^^^^^^^^
+* Following a change in intake-esm xscen now uses "cat:" to prefix the dataset attributes extracted from the catalog. All catalog-generated attributes should now be valid when saving to netCDF. (:issue:`13`, :pull:`51`).
+
 v0.3.0 (2022-08-23)
 -------------------
 Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`), Juliette Lavoie (:user:`juliettelavoie`), Trevor James Smith (:user:`Zeitsperre`) and Pascal Bourgault (:user:`aulemahal`).

diff --git a/docs/notebooks b/docs/notebooks
diff --git a/xscen/aggregate.py b/xscen/aggregate.py
@@ -128,7 +128,7 @@ def climatological_mean(
         ds_rolling[vv].attrs["history"] = history
 
     if to_level is not None:
-        ds_rolling.attrs["cat/processing_level"] = to_level
+        ds_rolling.attrs["cat:processing_level"] = to_level
 
     return ds_rolling
 
@@ -237,7 +237,7 @@ def compute_deltas(
     deltas = deltas.reindex_like(ds)
 
     if to_level is not None:
-        deltas.attrs["cat/processing_level"] = to_level
+        deltas.attrs["cat:processing_level"] = to_level
 
     return deltas
 
@@ -448,8 +448,8 @@ def spatial_mean(
 
     # Attrs
     if to_domain is not None:
-        ds_agg.attrs["cat/domain"] = to_domain
+        ds_agg.attrs["cat:domain"] = to_domain
     if to_level is not None:
-        ds_agg.attrs["cat/processing_level"] = to_level
+        ds_agg.attrs["cat:processing_level"] = to_level
 
     return ds_agg
diff --git a/xscen/biasadjust.py b/xscen/biasadjust.py
@@ -171,9 +171,9 @@ def train(
     }
 
     # attrs that are needed to open with .to_dataset_dict()
-    for a in ["cat/xrfreq", "cat/domain", "cat/id"]:
+    for a in ["cat:xrfreq", "cat:domain", "cat:id"]:
         ds.attrs[a] = dhist.attrs[a] if a in dhist.attrs else None
-    ds.attrs["cat/processing_level"] = f"training_{var[0]}"
+    ds.attrs["cat:processing_level"] = f"training_{var[0]}"
 
     return ds
 
@@ -287,12 +287,12 @@ def adjust(
     dscen = xr.Dataset(data_vars={var: dscen}, attrs=dsim.attrs)
     # TODO: History, attrs, etc. (TODO kept from previous version of `biasadjust`)
     # TODO: Check for variables to add (grid_mapping, etc.) (TODO kept from previous version of `biasadjust`)
-    dscen.attrs["cat/processing_level"] = to_level
-    dscen.attrs["cat/variable"] = parse_from_ds(dscen, ["variable"])["variable"]
+    dscen.attrs["cat:processing_level"] = to_level
+    dscen.attrs["cat:variable"] = parse_from_ds(dscen, ["variable"])["variable"]
     if bias_adjust_institution is not None:
-        dscen.attrs["cat/bias_adjust_institution"] = bias_adjust_institution
+        dscen.attrs["cat:bias_adjust_institution"] = bias_adjust_institution
     if bias_adjust_project is not None:
-        dscen.attrs["cat/bias_adjust_project"] = bias_adjust_project
+        dscen.attrs["cat:bias_adjust_project"] = bias_adjust_project
 
     if moving_yearly_window:
         dscen = unpack_moving_yearly_window(dscen)

diff --git a/xscen/catalog.py b/xscen/catalog.py
@@ -466,7 +466,7 @@ def update_from_ds(
         ----------
         ds : xarray.Dataset
           Dataset that we want to add to the catalog.
-          The columns of the catalog will be filled from the global attributes starting with 'cat/' of the dataset.
+          The columns of the catalog will be filled from the global attributes starting with 'cat:' of the dataset.
         info_dict: dict
           Optional extra information to fill the catalog.
         path: str
@@ -475,8 +475,8 @@ def update_from_ds(
         d = {}
 
         for col in self.df.columns:
-            if f"cat/{col}" in ds.attrs:
-                d[col] = ds.attrs[f"cat/{col}"]
+            if f"cat:{col}" in ds.attrs:
+                d[col] = ds.attrs[f"cat:{col}"]
         if info_dict:
             d.update(info_dict)
 

diff --git a/xscen/ensembles.py b/xscen/ensembles.py
@@ -81,7 +81,7 @@ def ensemble_stats(
             for a_key, a_val in attributes.items():
                 if (
                     (a_key not in ds.attrs)
-                    or (a_key in ["cat/date_start", "cat/date_end"])
+                    or (a_key in ["cat:date_start", "cat:date_end"])
                     or (a_val != ds.attrs[a_key])
                 ):
                     del ens_stats.attrs[a_key]
@@ -90,11 +90,11 @@ def ensemble_stats(
             {
                 key[4:]: [value]
                 for key, value in ens_stats.attrs.items()
-                if key[:4] == "cat/"
+                if key.startswith("cat:")
             }
         )
-        ens_stats.attrs["cat/id"] = generate_id(df)[0]
+        ens_stats.attrs["cat:id"] = generate_id(df)[0]
 
-    ens_stats.attrs["cat/processing_level"] = to_level
+    ens_stats.attrs["cat:processing_level"] = to_level
 
     return ens_stats
diff --git a/xscen/extract.py b/xscen/extract.py
@@ -301,11 +301,11 @@ def extract_dataset(
 
         ds.attrs = attrs
         if "time" not in ds.dims:
-            ds.attrs["cat/frequency"] = "fx"
-            ds.attrs["cat/xrfreq"] = "fx"
+            ds.attrs["cat:frequency"] = "fx"
+            ds.attrs["cat:xrfreq"] = "fx"
         else:
-            ds.attrs["cat/xrfreq"] = xrfreq
-            ds.attrs["cat/frequency"] = CV.xrfreq_to_frequency(xrfreq)
+            ds.attrs["cat:xrfreq"] = xrfreq
+            ds.attrs["cat:frequency"] = CV.xrfreq_to_frequency(xrfreq)
 
         # Subset time on the periods
         if periods is None and hasattr(catalog, "_requested_periods"):
@@ -321,12 +321,12 @@ def extract_dataset(
         # Custom call to clisops
         if region is not None:
             ds = clisops_subset(ds, region)
-            ds.attrs["cat/domain"] = region["name"]
+            ds.attrs["cat:domain"] = region["name"]
 
         # add relevant attrs
-        ds.attrs["cat/processing_level"] = to_level
-        if "cat/variable" not in ds.attrs:
-            ds.attrs["cat/variable"] = parse_from_ds(ds, ["variable"])["variable"]
+        ds.attrs["cat:processing_level"] = to_level
+        if "cat:variable" not in ds.attrs:
+            ds.attrs["cat:variable"] = parse_from_ds(ds, ["variable"])["variable"]
 
         out_dict[xrfreq] = ds
 

diff --git a/xscen/indicators.py b/xscen/indicators.py
@@ -148,11 +148,11 @@ def compute_indicators(
 
             # TODO: Double-check History, units, attrs, add missing variables (grid_mapping), etc.
             out_dict[key].attrs = ds.attrs
-            out_dict[key].attrs.pop("cat/variable", None)
-            out_dict[key].attrs["cat/xrfreq"] = freq
-            out_dict[key].attrs["cat/frequency"] = CV.xrfreq_to_frequency(freq, None)
+            out_dict[key].attrs.pop("cat:variable", None)
+            out_dict[key].attrs["cat:xrfreq"] = freq
+            out_dict[key].attrs["cat:frequency"] = CV.xrfreq_to_frequency(freq, None)
             if to_level is not None:
-                out_dict[key].attrs["cat/processing_level"] = to_level
+                out_dict[key].attrs["cat:processing_level"] = to_level
 
         else:
             if isinstance(out, tuple):  # In the case of multiple outputs

diff --git a/xscen/regrid.py b/xscen/regrid.py
@@ -100,7 +100,7 @@ def regrid_dataset(
             # if weights_location does no exist, create it
             if not os.path.exists(weights_location):
                 os.makedirs(weights_location)
-            id = ds.attrs["cat/id"] if "cat/id" in ds.attrs else "weights"
+            id = ds.attrs["cat:id"] if "cat:id" in ds.attrs else "weights"
             # give unique name to weights file
             weights_filename = os.path.join(
                 weights_location,
@@ -190,9 +190,9 @@ def regrid_dataset(
 
     out = out.drop_vars("latitude_longitude", errors="ignore")
     # Attrs
-    out.attrs["cat/processing_level"] = to_level
-    out.attrs["cat/domain"] = (
-        ds_grid.attrs["cat/domain"] if "cat/domain" in ds_grid.attrs else None
+    out.attrs["cat:processing_level"] = to_level
+    out.attrs["cat:domain"] = (
+        ds_grid.attrs["cat:domain"] if "cat:domain" in ds_grid.attrs else None
     )
     return out
 

diff --git a/xscen/utils.py b/xscen/utils.py
@@ -183,6 +183,15 @@ def natural_sort(_list: list):
     return sorted(_list, key=alphanum_key)
 
 
+def get_cat_attrs(ds: xr.Dataset | dict):
+    """Return the catalog-specific attributes from a dataset or dictionary."""
+    if isinstance(ds, (xr.Dataset, xr.DataArray)):
+        attrs = ds.attrs
+    else:
+        attrs = ds
+    return {k[4:]: v for k, v in attrs.items() if k.startswith("cat:")}
+
+
 def maybe_unstack(
     ds: xr.Dataset,
     coords: str = None,
@@ -369,13 +378,13 @@ def clean_up(
         or use the same substring matching rules as intake_esm:
         - ending with a '*' means checks if the substring is contained in the string
         - starting with a '^' means check if the string starts with the substring.
-        eg. {'global': ['necessary note', '^cat/'], 'tasmax': 'new_name'}
+        eg. {'global': ['necessary note', '^cat:'], 'tasmax': 'new_name'}
     add_attrs: dict
         Dictionary where the keys are the variables and the values are a another dictionary of attributes.
         For global attrs, use the key 'global'.
         eg. {'global': {'title': 'amazing new dataset'}, 'tasmax': {'note': 'important info about tasmax'}}
     change_attr_prefix: str
-        Replace "cat/" in the catalogue global attrs by this new string
+        Replace "cat:" in the catalogue global attrs by this new string
     to_level: str
         The processing level to assign to the output.
 
@@ -439,7 +448,7 @@ def _search(a, b):
         else:
             return a == b
 
-    ds.attrs["cat/processing_level"] = to_level
+    ds.attrs["cat:processing_level"] = to_level
 
     # remove attrs
     if attrs_to_remove:
@@ -472,7 +481,7 @@ def _search(a, b):
 
     if change_attr_prefix:
         for ds_attr in list(ds.attrs.keys()):
-            new_name = ds_attr.replace("cat/", change_attr_prefix)
+            new_name = ds_attr.replace("cat:", change_attr_prefix)
             if new_name:
                 ds.attrs[new_name] = ds.attrs.pop(ds_attr)