Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adapt to xscen-509 #51

Merged
merged 12 commits into from Aug 23, 2022
8 changes: 8 additions & 0 deletions HISTORY.rst
Expand Up @@ -2,6 +2,14 @@
History
=======

v0.3.2 (2022-08-23)
aulemahal marked this conversation as resolved.
Show resolved Hide resolved
-------------------
Contributor to this version: Pascal Bourgault (:user:`aulemahal`).

Breaking changes
^^^^^^^^^^^^^^^^
* Following a change in intake-esm xscen now uses "cat:" to prefix the dataset attributes extracted from the catalog. All catalog-generated attributes should now be valid when saving to netCDF. (:issue:`13`, :pull:`51`).

v0.3.0 (2022-08-23)
-------------------
Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`), Juliette Lavoie (:user:`juliettelavoie`), Trevor James Smith (:user:`Zeitsperre`) and Pascal Bourgault (:user:`aulemahal`).
Expand Down
2 changes: 1 addition & 1 deletion docs/notebooks
Submodule notebooks updated from af2c95 to 66447d
8 changes: 4 additions & 4 deletions xscen/aggregate.py
Expand Up @@ -128,7 +128,7 @@ def climatological_mean(
ds_rolling[vv].attrs["history"] = history

if to_level is not None:
ds_rolling.attrs["cat/processing_level"] = to_level
ds_rolling.attrs["cat:processing_level"] = to_level

return ds_rolling

Expand Down Expand Up @@ -237,7 +237,7 @@ def compute_deltas(
deltas = deltas.reindex_like(ds)

if to_level is not None:
deltas.attrs["cat/processing_level"] = to_level
deltas.attrs["cat:processing_level"] = to_level

return deltas

Expand Down Expand Up @@ -448,8 +448,8 @@ def spatial_mean(

# Attrs
if to_domain is not None:
ds_agg.attrs["cat/domain"] = to_domain
ds_agg.attrs["cat:domain"] = to_domain
if to_level is not None:
ds_agg.attrs["cat/processing_level"] = to_level
ds_agg.attrs["cat:processing_level"] = to_level

return ds_agg
12 changes: 6 additions & 6 deletions xscen/biasadjust.py
Expand Up @@ -171,9 +171,9 @@ def train(
}

# attrs that are needed to open with .to_dataset_dict()
for a in ["cat/xrfreq", "cat/domain", "cat/id"]:
for a in ["cat:xrfreq", "cat:domain", "cat:id"]:
ds.attrs[a] = dhist.attrs[a] if a in dhist.attrs else None
ds.attrs["cat/processing_level"] = f"training_{var[0]}"
ds.attrs["cat:processing_level"] = f"training_{var[0]}"

return ds

Expand Down Expand Up @@ -287,12 +287,12 @@ def adjust(
dscen = xr.Dataset(data_vars={var: dscen}, attrs=dsim.attrs)
# TODO: History, attrs, etc. (TODO kept from previous version of `biasadjust`)
# TODO: Check for variables to add (grid_mapping, etc.) (TODO kept from previous version of `biasadjust`)
dscen.attrs["cat/processing_level"] = to_level
dscen.attrs["cat/variable"] = parse_from_ds(dscen, ["variable"])["variable"]
dscen.attrs["cat:processing_level"] = to_level
dscen.attrs["cat:variable"] = parse_from_ds(dscen, ["variable"])["variable"]
if bias_adjust_institution is not None:
dscen.attrs["cat/bias_adjust_institution"] = bias_adjust_institution
dscen.attrs["cat:bias_adjust_institution"] = bias_adjust_institution
if bias_adjust_project is not None:
dscen.attrs["cat/bias_adjust_project"] = bias_adjust_project
dscen.attrs["cat:bias_adjust_project"] = bias_adjust_project

if moving_yearly_window:
dscen = unpack_moving_yearly_window(dscen)
Expand Down
6 changes: 3 additions & 3 deletions xscen/catalog.py
Expand Up @@ -466,7 +466,7 @@ def update_from_ds(
----------
ds : xarray.Dataset
Dataset that we want to add to the catalog.
The columns of the catalog will be filled from the global attributes starting with 'cat/' of the dataset.
The columns of the catalog will be filled from the global attributes starting with 'cat:' of the dataset.
info_dict: dict
Optional extra information to fill the catalog.
path: str
Expand All @@ -475,8 +475,8 @@ def update_from_ds(
d = {}

for col in self.df.columns:
if f"cat/{col}" in ds.attrs:
d[col] = ds.attrs[f"cat/{col}"]
if f"cat:{col}" in ds.attrs:
d[col] = ds.attrs[f"cat:{col}"]
if info_dict:
d.update(info_dict)

Expand Down
8 changes: 4 additions & 4 deletions xscen/ensembles.py
Expand Up @@ -81,7 +81,7 @@ def ensemble_stats(
for a_key, a_val in attributes.items():
if (
(a_key not in ds.attrs)
or (a_key in ["cat/date_start", "cat/date_end"])
or (a_key in ["cat:date_start", "cat:date_end"])
or (a_val != ds.attrs[a_key])
):
del ens_stats.attrs[a_key]
Expand All @@ -90,11 +90,11 @@ def ensemble_stats(
{
key[4:]: [value]
for key, value in ens_stats.attrs.items()
if key[:4] == "cat/"
if key.startswith("cat:")
}
)
ens_stats.attrs["cat/id"] = generate_id(df)[0]
ens_stats.attrs["cat:id"] = generate_id(df)[0]

ens_stats.attrs["cat/processing_level"] = to_level
ens_stats.attrs["cat:processing_level"] = to_level

return ens_stats
16 changes: 8 additions & 8 deletions xscen/extract.py
Expand Up @@ -301,11 +301,11 @@ def extract_dataset(

ds.attrs = attrs
if "time" not in ds.dims:
ds.attrs["cat/frequency"] = "fx"
ds.attrs["cat/xrfreq"] = "fx"
ds.attrs["cat:frequency"] = "fx"
ds.attrs["cat:xrfreq"] = "fx"
else:
ds.attrs["cat/xrfreq"] = xrfreq
ds.attrs["cat/frequency"] = CV.xrfreq_to_frequency(xrfreq)
ds.attrs["cat:xrfreq"] = xrfreq
ds.attrs["cat:frequency"] = CV.xrfreq_to_frequency(xrfreq)

# Subset time on the periods
if periods is None and hasattr(catalog, "_requested_periods"):
Expand All @@ -321,12 +321,12 @@ def extract_dataset(
# Custom call to clisops
if region is not None:
ds = clisops_subset(ds, region)
ds.attrs["cat/domain"] = region["name"]
ds.attrs["cat:domain"] = region["name"]

# add relevant attrs
ds.attrs["cat/processing_level"] = to_level
if "cat/variable" not in ds.attrs:
ds.attrs["cat/variable"] = parse_from_ds(ds, ["variable"])["variable"]
ds.attrs["cat:processing_level"] = to_level
if "cat:variable" not in ds.attrs:
ds.attrs["cat:variable"] = parse_from_ds(ds, ["variable"])["variable"]

out_dict[xrfreq] = ds

Expand Down
8 changes: 4 additions & 4 deletions xscen/indicators.py
Expand Up @@ -148,11 +148,11 @@ def compute_indicators(

# TODO: Double-check History, units, attrs, add missing variables (grid_mapping), etc.
out_dict[key].attrs = ds.attrs
out_dict[key].attrs.pop("cat/variable", None)
out_dict[key].attrs["cat/xrfreq"] = freq
out_dict[key].attrs["cat/frequency"] = CV.xrfreq_to_frequency(freq, None)
out_dict[key].attrs.pop("cat:variable", None)
out_dict[key].attrs["cat:xrfreq"] = freq
out_dict[key].attrs["cat:frequency"] = CV.xrfreq_to_frequency(freq, None)
if to_level is not None:
out_dict[key].attrs["cat/processing_level"] = to_level
out_dict[key].attrs["cat:processing_level"] = to_level

else:
if isinstance(out, tuple): # In the case of multiple outputs
Expand Down
8 changes: 4 additions & 4 deletions xscen/regrid.py
Expand Up @@ -100,7 +100,7 @@ def regrid_dataset(
# if weights_location does no exist, create it
if not os.path.exists(weights_location):
os.makedirs(weights_location)
id = ds.attrs["cat/id"] if "cat/id" in ds.attrs else "weights"
id = ds.attrs["cat:id"] if "cat:id" in ds.attrs else "weights"
# give unique name to weights file
weights_filename = os.path.join(
weights_location,
Expand Down Expand Up @@ -190,9 +190,9 @@ def regrid_dataset(

out = out.drop_vars("latitude_longitude", errors="ignore")
# Attrs
out.attrs["cat/processing_level"] = to_level
out.attrs["cat/domain"] = (
ds_grid.attrs["cat/domain"] if "cat/domain" in ds_grid.attrs else None
out.attrs["cat:processing_level"] = to_level
out.attrs["cat:domain"] = (
ds_grid.attrs["cat:domain"] if "cat:domain" in ds_grid.attrs else None
)
return out

Expand Down
17 changes: 13 additions & 4 deletions xscen/utils.py
Expand Up @@ -183,6 +183,15 @@ def natural_sort(_list: list):
return sorted(_list, key=alphanum_key)


def get_cat_attrs(ds: xr.Dataset | dict):
"""Return the catalog-specific attributes from a dataset or dictionary."""
if isinstance(ds, (xr.Dataset, xr.DataArray)):
attrs = ds.attrs
else:
attrs = ds
return {k[4:]: v for k, v in attrs.items() if k.startswith("cat:")}


def maybe_unstack(
ds: xr.Dataset,
coords: str = None,
Expand Down Expand Up @@ -369,13 +378,13 @@ def clean_up(
or use the same substring matching rules as intake_esm:
- ending with a '*' means checks if the substring is contained in the string
- starting with a '^' means check if the string starts with the substring.
eg. {'global': ['necessary note', '^cat/'], 'tasmax': 'new_name'}
eg. {'global': ['necessary note', '^cat:'], 'tasmax': 'new_name'}
add_attrs: dict
Dictionary where the keys are the variables and the values are a another dictionary of attributes.
For global attrs, use the key 'global'.
eg. {'global': {'title': 'amazing new dataset'}, 'tasmax': {'note': 'important info about tasmax'}}
change_attr_prefix: str
Replace "cat/" in the catalogue global attrs by this new string
Replace "cat:" in the catalogue global attrs by this new string
aulemahal marked this conversation as resolved.
Show resolved Hide resolved
to_level: str
The processing level to assign to the output.

Expand Down Expand Up @@ -439,7 +448,7 @@ def _search(a, b):
else:
return a == b

ds.attrs["cat/processing_level"] = to_level
ds.attrs["cat:processing_level"] = to_level

# remove attrs
if attrs_to_remove:
Expand Down Expand Up @@ -472,7 +481,7 @@ def _search(a, b):

if change_attr_prefix:
for ds_attr in list(ds.attrs.keys()):
new_name = ds_attr.replace("cat/", change_attr_prefix)
new_name = ds_attr.replace("cat:", change_attr_prefix)
if new_name:
ds.attrs[new_name] = ds.attrs.pop(ds_attr)

Expand Down