Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update get_warming_level #215

Merged
merged 4 commits into from Jun 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions HISTORY.rst
Expand Up @@ -33,6 +33,7 @@ Internal changes
* Implemented a template to be used for unit tests. (:pull:`187`).
* Updated GitHub Actions to remove deprecation warnings. (:pull:`187`).
* Updated the cookiecutter used to generate boilerplate documentation and code via `cruft`. (:pull:`212`).
* A few changes to `subset_warming_level` so it doesn't need `driving_institution`. (:pull:`215`).

v0.6.0 (2023-05-04)
-------------------
Expand Down
2 changes: 1 addition & 1 deletion docs/notebooks/6_config.ipynb
Expand Up @@ -200,7 +200,7 @@
" argument:\n",
"```\n",
"\n",
"The most up-to-date list of modules can be consulted [here](https://xscen.readthedocs.io/en/latest/modules.html), as well as at the start of this tutorial. A simple example would be as follows:\n",
"The most up-to-date list of modules can be consulted [here](https://xscen.readthedocs.io/en/latest/apidoc/modules.html), as well as at the start of this tutorial. A simple example would be as follows:\n",
"```\n",
"aggregate:\n",
" compute_deltas:\n",
Expand Down
224 changes: 122 additions & 102 deletions xscen/extract.py
Expand Up @@ -23,7 +23,7 @@
from .spatial import subset
from .utils import CV
from .utils import ensure_correct_time as _ensure_correct_time
from .utils import natural_sort, standardize_periods
from .utils import get_cat_attrs, natural_sort, standardize_periods

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -822,7 +822,7 @@ def search_data_catalogs(

@parse_config
def get_warming_level(
realization: Union[xr.Dataset, str, list],
realization: Union[xr.Dataset, dict, str, list],
wl: float,
*,
window: int = 20,
Expand All @@ -835,11 +835,13 @@ def get_warming_level(

Parameters
----------
realization : Union[xr.Dataset, str, list]
Input dataset, string or list of strings indicating the models to be evaluated.
Strings should follow this formatting: mip-era_source_experiment_member. Regex wildcards (.*) are accepted, but may lead to unexpected results.
realization : xr.Dataset, dict, str or list of those
Model to be evaluated. Needs the four fields mip_era, source, experiment and member,
as a dict or in a Dataset's attributes. Strings should follow this formatting: {mip_era}_{source}_{experiment}_{member}.
Lists of dicts, strings or Datasets are also accepted, in which case the output will be a dict.
Regex wildcards (.*) are accepted, but may lead to unexpected results.
Datasets should include the catalogue attributes (starting by "cat:") required to create such a string: 'cat:mip_era', 'cat:experiment',
'cat:member', and either 'cat:source' for global models or 'cat:driving_institution' (optional) + 'cat:driving_model' for regional models.
'cat:member', and either 'cat:source' for global models or 'cat:driving_model' for regional models.
e.g. 'CMIP5_CanESM2_rcp85_r1i1p1'
wl : float
Warming level.
Expand All @@ -862,119 +864,137 @@ def get_warming_level(
Returns
-------
dict, list or str
If `models` is a Dataset or a string, the output will follow the format indicated by `return_period`.
If `models` is a list, the output will be a dictionary where the keys are the elements of `models`and the values follow the format indicated by `return_period`.
If `realization` is a Dataset, a dict or a string, the output will follow the format indicated by `return_period`.
If `realization` is a list, the output will be a dictionary where the keys are the selected columns from the csv and the values follow the format indicated by `return_period`.
"""
if tas_baseline_period is None:
tas_baseline_period = ["1850", "1900"]
tas_baseline_period = standardize_periods(tas_baseline_period, multiple=False)

if tas_csv is None:
tas_csv = Path(__file__).parent / "data/IPCC_annual_global_tas.csv"

if isinstance(realization, xr.Dataset):
# get info on ds
if pd.isna(realization.attrs.get("cat:driving_model", None)):
source_ds = realization.attrs["cat:source"]
else:
institution_ds = realization.attrs.get("cat:driving_institution", None)
source_ds = realization.attrs["cat:driving_model"]
if (institution_ds is not None) and (
source_ds[0 : len(institution_ds)] == institution_ds
):
source_ds = source_ds.replace(f"{institution_ds}-", "", 1)
exp_ds = realization.attrs["cat:experiment"]
member_ds = ".*" if ignore_member else realization.attrs["cat:member"]
mip_era_ds = realization.attrs["cat:mip_era"]
tas_baseline_period = standardize_periods(
tas_baseline_period or ["1850", "1900"], multiple=False
)

info_models = [f"{mip_era_ds}_{source_ds}_{exp_ds}_{member_ds}"]
if (window % 2) not in {0, 1}:
raise ValueError(f"window should be an integer, received {type(window)}")

elif isinstance(realization, str):
info_models = [realization]
elif isinstance(realization, list):
info_models = realization
else:
raise ValueError(
f"'realization' must be a Dataset, string or list. Received {type(realization)}."
)
FIELDS = ["mip_era", "source", "experiment", "member"]

# open csv
annual_tas = pd.read_csv(tas_csv, index_col="year")
out = dict()
for model in info_models:
if len(model.split("_")) != 4:
if tas_csv is None:
tas_csv = Path(__file__).parent / "data" / "IPCC_annual_global_tas.csv"

info = {}
if isinstance(realization, (xr.Dataset, str, dict)):
realization = [realization]
info_models = []
for real in realization:
info = {}
if isinstance(real, xr.Dataset):
attrs = get_cat_attrs(real)
# get info on ds
if attrs.get("driving_model") is None:
info["source"] = attrs["source"]
else:
info["source"] = attrs["driving_model"]
info["experiment"] = attrs["experiment"]
info["member"] = ".*" if ignore_member else attrs["member"]
info["mip_era"] = attrs["mip_era"]
elif isinstance(real, str):
(
info["mip_era"],
info["source"],
info["experiment"],
info["member"],
) = real.split("_")
aulemahal marked this conversation as resolved.
Show resolved Hide resolved
elif isinstance(real, dict) and set(real.keys()).issuperset(
(set(FIELDS) - {"member"}) if ignore_member else FIELDS
):
info = real
aulemahal marked this conversation as resolved.
Show resolved Hide resolved
if ignore_member:
info["member"] = ".*"
else:
raise ValueError(
"'realization' should follow the format: 'mip-era_source_experiment_member'."
f"'realization' must be a Dataset, dict, string or list. Received {type(real)}."
)
info_models.append(info)

# choose colum based in ds cat attrs
right_column = annual_tas.filter(regex=re.compile(model, re.IGNORECASE), axis=1)

if len(right_column.columns) > 1:
logger.info(
"More than one column of the csv fits the dataset metadata. Choosing the first one."
)
right_column = pd.DataFrame(right_column.iloc[:, 0])
# open csv, split column names for easier usage
annual_tas = pd.read_csv(tas_csv, index_col="year")
models = pd.DataFrame.from_records(
[c.split("_") for c in annual_tas.columns],
index=annual_tas.columns,
columns=FIELDS,
)

if len(right_column.columns) == 0:
out = {}
for model in info_models:
# choose colum based in ds cat attrs
mip = models.mip_era.str.match(model["mip_era"])
src = models.source.str.match(model["source"])
if not src.any():
# Maybe it's an RCM, then source may contain the institute
src = models.source.apply(lambda s: model["source"].endswith(s))
exp = models.experiment.str.match(model["experiment"])
mem = models.member.str.match(model["member"])

candidates = models[mip & src & exp & mem]
if candidates.empty:
warnings.warn(
f"No columns fit the attributes of the input dataset ({model})."
)
out[model] = [None, None] if return_horizon else None
else:
selected = "_".join([model[c] for c in FIELDS])
out[selected] = [None, None] if return_horizon else None
continue
if len(candidates) > 1:
logger.info(
f"Computing warming level +{wl}°C for {model} from column: {right_column.columns[0]}."
"More than one column of the csv fits the dataset metadata. Choosing the first one."
)
selected = candidates.index[0]
right_column = annual_tas.loc[:, selected]

# compute reference temperature for the warming
mean_base = right_column.loc[
tas_baseline_period[0] : tas_baseline_period[1]
].mean()

yearly_diff = right_column - mean_base # difference from reference

# get the start and end date of the window when the warming level is first reached

# shift(-1) is needed to reproduce IPCC results.
# rolling defines the window as [n-10,n+9], but the the IPCC defines it as [n-9,n+10], where n is the center year.
if window % 2 == 0: # Even window
rolling_diff = (
yearly_diff.rolling(window=window, min_periods=window, center=True)
.mean()
.shift(-1)
)
elif window % 2 == 1: # Odd windows do not require the shift
rolling_diff = yearly_diff.rolling(
window=window, min_periods=window, center=True
).mean()
else:
raise ValueError(
f"window should be an integer, received {type(window)}"
)

yr = rolling_diff.where(rolling_diff >= wl).first_valid_index()
if yr is None:
start_yr = np.nan
end_yr = np.nan
else:
start_yr = int(yr - window / 2 + 1)
end_yr = int(yr + window / 2)
logger.debug(
f"Computing warming level +{wl}°C for {model} from column: {selected}."
)

if np.isnan(start_yr):
logger.info(
f"Global warming level of +{wl}C is not reached by the last year of the provided 'tas_csv' file for {model}."
)
out[model] = [None, None] if return_horizon else None
else:
out[model] = (
standardize_periods([start_yr, end_yr], multiple=False)
if return_horizon
else str(yr)
)
# compute reference temperature for the warming
mean_base = right_column.loc[
tas_baseline_period[0] : tas_baseline_period[1]
].mean()

yearly_diff = right_column - mean_base # difference from reference

# get the start and end date of the window when the warming level is first reached
# shift(-1) is needed to reproduce IPCC results.
# rolling defines the window as [n-10,n+9], but the the IPCC defines it as [n-9,n+10], where n is the center year.
if window % 2 == 0: # Even window
rolling_diff = (
yearly_diff.rolling(window=window, min_periods=window, center=True)
.mean()
.shift(-1)
)
else: # window % 2 == 1: # Odd windows do not require the shift
rolling_diff = yearly_diff.rolling(
window=window, min_periods=window, center=True
).mean()

if len(out) == 1:
out = next(iter(out.values()))
yr = rolling_diff.where(rolling_diff >= wl).first_valid_index()
if yr is None:
logger.info(
f"Global warming level of +{wl}C is not reached by the last year of the provided 'tas_csv' file for {selected}."
)
out[selected] = [None, None] if return_horizon else None
else:
start_yr = int(yr - window / 2 + 1)
end_yr = int(yr + window / 2)
out[selected] = (
standardize_periods([start_yr, end_yr], multiple=False)
if return_horizon
else str(yr)
)

if len(out) != len(realization):
warnings.warn(
"Two or more input model specifications pointed towards the same column in the CSV, "
"the length of the output is different from the input."
)
if len(realization) == 1:
out = out.popitem()[1]
return out
aulemahal marked this conversation as resolved.
Show resolved Hide resolved


Expand Down