Skip to content

Commit

Permalink
Merge pull request #51 from Ouranosinc/adapt-to-xscen509
Browse files Browse the repository at this point in the history
Adapt to xscen-509
  • Loading branch information
aulemahal committed Aug 23, 2022
2 parents bd48318 + 2deb4ca commit 61baccc
Show file tree
Hide file tree
Showing 16 changed files with 62 additions and 43 deletions.
2 changes: 1 addition & 1 deletion .cruft.json
Expand Up @@ -11,7 +11,7 @@
"project_slug": "xscen",
"project_short_description": "A climate change scenario-building analysis framework, built with xclim/xarray.",
"pypi_username": "RondeauG",
"version": "0.3.1-beta",
"version": "0.3.2",
"use_pytest": "y",
"use_black": "y",
"use_pypi_deployment_with_travis": "n",
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/bump-version.yml
Expand Up @@ -15,7 +15,7 @@ on:
- .yamllint.yaml
- AUTHORS.rst
- CONTRIBUTING.rst
- docs/**.ipynb
- docs/notebooks
- docs/*.py
- docs/*.rst
- environment-dev.yml
Expand Down
8 changes: 8 additions & 0 deletions HISTORY.rst
Expand Up @@ -2,6 +2,14 @@
History
=======

v0.3.2 (2022-08-23)
-------------------
Contributor to this version: Pascal Bourgault (:user:`aulemahal`).

Breaking changes
^^^^^^^^^^^^^^^^
* Following a change in intake-esm xscen now uses "cat:" to prefix the dataset attributes extracted from the catalog. All catalog-generated attributes should now be valid when saving to netCDF. (:issue:`13`, :pull:`51`).

v0.3.0 (2022-08-23)
-------------------
Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`), Juliette Lavoie (:user:`juliettelavoie`), Trevor James Smith (:user:`Zeitsperre`) and Pascal Bourgault (:user:`aulemahal`).
Expand Down
2 changes: 1 addition & 1 deletion docs/notebooks
Submodule notebooks updated from af2c95 to 19fa91
1 change: 1 addition & 0 deletions environment-dev.yml
Expand Up @@ -35,6 +35,7 @@ dependencies:
- requests
- xcollection
# Dev
- bumpversion
- ipykernel
- ipython
- jupyter_client
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.3.1-beta
current_version = 0.3.2
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+).(?P<patch>\d+)(\-(?P<release>[a-z]+))?
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -61,6 +61,6 @@
test_suite="tests",
tests_require=["pytest", "pytest-cov"],
url="https://github.com/Ouranosinc/xscen",
version="0.3.1-beta",
version="0.3.2",
zip_safe=False,
)
2 changes: 1 addition & 1 deletion xscen/__init__.py
Expand Up @@ -38,7 +38,7 @@

__author__ = """Gabriel Rondeau-Genesse"""
__email__ = "rondeau-genesse.gabriel@ouranos.ca"
__version__ = "0.3.1-beta"
__version__ = "0.3.2"


# monkeypatch so that warnings.warn() doesn't mention itself
Expand Down
8 changes: 4 additions & 4 deletions xscen/aggregate.py
Expand Up @@ -128,7 +128,7 @@ def climatological_mean(
ds_rolling[vv].attrs["history"] = history

if to_level is not None:
ds_rolling.attrs["cat/processing_level"] = to_level
ds_rolling.attrs["cat:processing_level"] = to_level

return ds_rolling

Expand Down Expand Up @@ -237,7 +237,7 @@ def compute_deltas(
deltas = deltas.reindex_like(ds)

if to_level is not None:
deltas.attrs["cat/processing_level"] = to_level
deltas.attrs["cat:processing_level"] = to_level

return deltas

Expand Down Expand Up @@ -448,8 +448,8 @@ def spatial_mean(

# Attrs
if to_domain is not None:
ds_agg.attrs["cat/domain"] = to_domain
ds_agg.attrs["cat:domain"] = to_domain
if to_level is not None:
ds_agg.attrs["cat/processing_level"] = to_level
ds_agg.attrs["cat:processing_level"] = to_level

return ds_agg
12 changes: 6 additions & 6 deletions xscen/biasadjust.py
Expand Up @@ -171,9 +171,9 @@ def train(
}

# attrs that are needed to open with .to_dataset_dict()
for a in ["cat/xrfreq", "cat/domain", "cat/id"]:
for a in ["cat:xrfreq", "cat:domain", "cat:id"]:
ds.attrs[a] = dhist.attrs[a] if a in dhist.attrs else None
ds.attrs["cat/processing_level"] = f"training_{var[0]}"
ds.attrs["cat:processing_level"] = f"training_{var[0]}"

return ds

Expand Down Expand Up @@ -287,12 +287,12 @@ def adjust(
dscen = xr.Dataset(data_vars={var: dscen}, attrs=dsim.attrs)
# TODO: History, attrs, etc. (TODO kept from previous version of `biasadjust`)
# TODO: Check for variables to add (grid_mapping, etc.) (TODO kept from previous version of `biasadjust`)
dscen.attrs["cat/processing_level"] = to_level
dscen.attrs["cat/variable"] = parse_from_ds(dscen, ["variable"])["variable"]
dscen.attrs["cat:processing_level"] = to_level
dscen.attrs["cat:variable"] = parse_from_ds(dscen, ["variable"])["variable"]
if bias_adjust_institution is not None:
dscen.attrs["cat/bias_adjust_institution"] = bias_adjust_institution
dscen.attrs["cat:bias_adjust_institution"] = bias_adjust_institution
if bias_adjust_project is not None:
dscen.attrs["cat/bias_adjust_project"] = bias_adjust_project
dscen.attrs["cat:bias_adjust_project"] = bias_adjust_project

if moving_yearly_window:
dscen = unpack_moving_yearly_window(dscen)
Expand Down
6 changes: 3 additions & 3 deletions xscen/catalog.py
Expand Up @@ -466,7 +466,7 @@ def update_from_ds(
----------
ds : xarray.Dataset
Dataset that we want to add to the catalog.
The columns of the catalog will be filled from the global attributes starting with 'cat/' of the dataset.
The columns of the catalog will be filled from the global attributes starting with 'cat:' of the dataset.
info_dict: dict
Optional extra information to fill the catalog.
path: str
Expand All @@ -475,8 +475,8 @@ def update_from_ds(
d = {}

for col in self.df.columns:
if f"cat/{col}" in ds.attrs:
d[col] = ds.attrs[f"cat/{col}"]
if f"cat:{col}" in ds.attrs:
d[col] = ds.attrs[f"cat:{col}"]
if info_dict:
d.update(info_dict)

Expand Down
8 changes: 4 additions & 4 deletions xscen/ensembles.py
Expand Up @@ -81,7 +81,7 @@ def ensemble_stats(
for a_key, a_val in attributes.items():
if (
(a_key not in ds.attrs)
or (a_key in ["cat/date_start", "cat/date_end"])
or (a_key in ["cat:date_start", "cat:date_end"])
or (a_val != ds.attrs[a_key])
):
del ens_stats.attrs[a_key]
Expand All @@ -90,11 +90,11 @@ def ensemble_stats(
{
key[4:]: [value]
for key, value in ens_stats.attrs.items()
if key[:4] == "cat/"
if key.startswith("cat:")
}
)
ens_stats.attrs["cat/id"] = generate_id(df)[0]
ens_stats.attrs["cat:id"] = generate_id(df)[0]

ens_stats.attrs["cat/processing_level"] = to_level
ens_stats.attrs["cat:processing_level"] = to_level

return ens_stats
16 changes: 8 additions & 8 deletions xscen/extract.py
Expand Up @@ -301,11 +301,11 @@ def extract_dataset(

ds.attrs = attrs
if "time" not in ds.dims:
ds.attrs["cat/frequency"] = "fx"
ds.attrs["cat/xrfreq"] = "fx"
ds.attrs["cat:frequency"] = "fx"
ds.attrs["cat:xrfreq"] = "fx"
else:
ds.attrs["cat/xrfreq"] = xrfreq
ds.attrs["cat/frequency"] = CV.xrfreq_to_frequency(xrfreq)
ds.attrs["cat:xrfreq"] = xrfreq
ds.attrs["cat:frequency"] = CV.xrfreq_to_frequency(xrfreq)

# Subset time on the periods
if periods is None and hasattr(catalog, "_requested_periods"):
Expand All @@ -321,12 +321,12 @@ def extract_dataset(
# Custom call to clisops
if region is not None:
ds = clisops_subset(ds, region)
ds.attrs["cat/domain"] = region["name"]
ds.attrs["cat:domain"] = region["name"]

# add relevant attrs
ds.attrs["cat/processing_level"] = to_level
if "cat/variable" not in ds.attrs:
ds.attrs["cat/variable"] = parse_from_ds(ds, ["variable"])["variable"]
ds.attrs["cat:processing_level"] = to_level
if "cat:variable" not in ds.attrs:
ds.attrs["cat:variable"] = parse_from_ds(ds, ["variable"])["variable"]

out_dict[xrfreq] = ds

Expand Down
8 changes: 4 additions & 4 deletions xscen/indicators.py
Expand Up @@ -148,11 +148,11 @@ def compute_indicators(

# TODO: Double-check History, units, attrs, add missing variables (grid_mapping), etc.
out_dict[key].attrs = ds.attrs
out_dict[key].attrs.pop("cat/variable", None)
out_dict[key].attrs["cat/xrfreq"] = freq
out_dict[key].attrs["cat/frequency"] = CV.xrfreq_to_frequency(freq, None)
out_dict[key].attrs.pop("cat:variable", None)
out_dict[key].attrs["cat:xrfreq"] = freq
out_dict[key].attrs["cat:frequency"] = CV.xrfreq_to_frequency(freq, None)
if to_level is not None:
out_dict[key].attrs["cat/processing_level"] = to_level
out_dict[key].attrs["cat:processing_level"] = to_level

else:
if isinstance(out, tuple): # In the case of multiple outputs
Expand Down
8 changes: 4 additions & 4 deletions xscen/regrid.py
Expand Up @@ -100,7 +100,7 @@ def regrid_dataset(
# if weights_location does no exist, create it
if not os.path.exists(weights_location):
os.makedirs(weights_location)
id = ds.attrs["cat/id"] if "cat/id" in ds.attrs else "weights"
id = ds.attrs["cat:id"] if "cat:id" in ds.attrs else "weights"
# give unique name to weights file
weights_filename = os.path.join(
weights_location,
Expand Down Expand Up @@ -190,9 +190,9 @@ def regrid_dataset(

out = out.drop_vars("latitude_longitude", errors="ignore")
# Attrs
out.attrs["cat/processing_level"] = to_level
out.attrs["cat/domain"] = (
ds_grid.attrs["cat/domain"] if "cat/domain" in ds_grid.attrs else None
out.attrs["cat:processing_level"] = to_level
out.attrs["cat:domain"] = (
ds_grid.attrs["cat:domain"] if "cat:domain" in ds_grid.attrs else None
)
return out

Expand Down
18 changes: 14 additions & 4 deletions xscen/utils.py
Expand Up @@ -23,6 +23,7 @@
__all__ = [
"change_units",
"clean_up",
"get_cat_attrs",
"maybe_unstack",
"minimum_calendar",
"natural_sort",
Expand Down Expand Up @@ -183,6 +184,15 @@ def natural_sort(_list: list):
return sorted(_list, key=alphanum_key)


def get_cat_attrs(ds: xr.Dataset | dict):
"""Return the catalog-specific attributes from a dataset or dictionary."""
if isinstance(ds, (xr.Dataset, xr.DataArray)):
attrs = ds.attrs
else:
attrs = ds
return {k[4:]: v for k, v in attrs.items() if k.startswith("cat:")}


def maybe_unstack(
ds: xr.Dataset,
coords: str = None,
Expand Down Expand Up @@ -369,13 +379,13 @@ def clean_up(
or use the same substring matching rules as intake_esm:
- ending with a '*' means checks if the substring is contained in the string
- starting with a '^' means check if the string starts with the substring.
eg. {'global': ['necessary note', '^cat/'], 'tasmax': 'new_name'}
eg. {'global': ['necessary note', '^cat:'], 'tasmax': 'new_name'}
add_attrs: dict
Dictionary where the keys are the variables and the values are a another dictionary of attributes.
For global attrs, use the key 'global'.
eg. {'global': {'title': 'amazing new dataset'}, 'tasmax': {'note': 'important info about tasmax'}}
change_attr_prefix: str
Replace "cat/" in the catalogue global attrs by this new string
Replace "cat:" in the catalog global attrs by this new string
to_level: str
The processing level to assign to the output.
Expand Down Expand Up @@ -439,7 +449,7 @@ def _search(a, b):
else:
return a == b

ds.attrs["cat/processing_level"] = to_level
ds.attrs["cat:processing_level"] = to_level

# remove attrs
if attrs_to_remove:
Expand Down Expand Up @@ -472,7 +482,7 @@ def _search(a, b):

if change_attr_prefix:
for ds_attr in list(ds.attrs.keys()):
new_name = ds_attr.replace("cat/", change_attr_prefix)
new_name = ds_attr.replace("cat:", change_attr_prefix)
if new_name:
ds.attrs[new_name] = ds.attrs.pop(ds_attr)

Expand Down

0 comments on commit 61baccc

Please sign in to comment.