diff --git a/optim_esm_tools/__init__.py b/optim_esm_tools/__init__.py index 5b581da8..c846bde1 100644 --- a/optim_esm_tools/__init__.py +++ b/optim_esm_tools/__init__.py @@ -2,9 +2,10 @@ __version__ = '0.1.1' __author__ = 'Joran R. Angevaare' +from . import utils +from . import config from . import analyze from . import cmip_files from . import synda_files -from . import utils from . import _test_utils from . import plotting diff --git a/optim_esm_tools/_test_utils.py b/optim_esm_tools/_test_utils.py index 553b083d..101dc1b8 100644 --- a/optim_esm_tools/_test_utils.py +++ b/optim_esm_tools/_test_utils.py @@ -4,8 +4,12 @@ EXMPLE_DATA_SET = 'CMIP6/ScenarioMIP/CCCma/CanESM5/ssp585/r3i1p2f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_ssp585_r3i1p2f1_gn_201501-210012.nc' +def get_synda_loc(): + return os.path.join(os.environ.get('ST_HOME'), 'data') + + def get_example_data_loc(): - return os.path.join(os.environ.get('ST_HOME'), 'data', EXMPLE_DATA_SET) + return os.path.join(get_synda_loc(), EXMPLE_DATA_SET) def synda_test_available(): diff --git a/optim_esm_tools/analyze/cmip_handler.py b/optim_esm_tools/analyze/cmip_handler.py index 83ed8f0e..00a77cd0 100644 --- a/optim_esm_tools/analyze/cmip_handler.py +++ b/optim_esm_tools/analyze/cmip_handler.py @@ -11,9 +11,9 @@ from optim_esm_tools.utils import depricated -from .globals import __OPTIM_VERSION__, folder_fmt +from .globals import _CMIP_HANDLER_VERSION, _FOLDER_FMT from .xarray_tools import _native_date_fmt -from optim_esm_tools.plotting.map_maker import MapMaker +from optim_esm_tools.plotting.map_maker import MapMaker, make_title def read_ds( @@ -45,7 +45,7 @@ def read_ds( min_time, max_time, _ma_window, - __OPTIM_VERSION__, + _CMIP_HANDLER_VERSION, ) if os.path.exists(post_processed_file) and _cache: @@ -72,7 +72,7 @@ def read_ds( folders = base.split(os.sep) # start with -1 (for i==0) - metadata = {k: folders[-i - 1] for i, k in enumerate(folder_fmt[::-1])} + metadata = {k: folders[-i - 1] for i, k in enumerate(_FOLDER_FMT[::-1])} metadata.update( dict(path=base, file=post_processed_file, running_mean_period=_ma_window) ) @@ -140,6 +140,8 @@ def _calculate_variables( detrended_run_mean.attrs['units'] = data_set[variable].attrs.get( 'units', '{units}' ) + for det in detrended, detrended_run_mean: + det.attrs.update(data_set[variable].attrs.copy()) data_set[f'{variable}_run_mean_{_ma_window}'] = run_mean data_set[f'{variable}_detrend'] = detrended data_set[f'{variable}_detrend_run_mean_{_ma_window}'] = detrended_run_mean diff --git a/optim_esm_tools/analyze/globals.py b/optim_esm_tools/analyze/globals.py index 42e7c97f..f43ae941 100644 --- a/optim_esm_tools/analyze/globals.py +++ b/optim_esm_tools/analyze/globals.py @@ -1,3 +1,5 @@ -_seconds_to_year = 365.25 * 24 * 3600 -folder_fmt = 'model_group model scenario run domain variable grid version'.split() -__OPTIM_VERSION__ = '0.1.11' +from optim_esm_tools.config import config + +_SECONDS_TO_YEAR = int(config['constants']['seconds_to_year']) +_FOLDER_FMT = config['CMIP_files']['folder_fmt'].split() +_CMIP_HANDLER_VERSION = config['versions']['cmip_handler'] diff --git a/optim_esm_tools/analyze/tipping_criteria.py b/optim_esm_tools/analyze/tipping_criteria.py index 14ef81e3..1418ef49 100644 --- a/optim_esm_tools/analyze/tipping_criteria.py +++ b/optim_esm_tools/analyze/tipping_criteria.py @@ -1,10 +1,9 @@ -from .xarray_tools import apply_abs +from .xarray_tools import apply_abs, _native_date_fmt from optim_esm_tools.utils import check_accepts import xarray as xr import numpy as np import typing as ty - -from .globals import _seconds_to_year +from .globals import _SECONDS_TO_YEAR @apply_abs() @@ -169,7 +168,7 @@ def max_derivative( var_name = naming.format(variable=variable, running_mean=running_mean) data_array = _remove_any_none_times(data_set[var_name], time_var) - result = data_array.differentiate(time_var).max(dim=time_var) * _seconds_to_year + result = data_array.differentiate(time_var).max(dim=time_var) * _SECONDS_TO_YEAR var_unit = data_array.attrs.get('units', '{units}') name = data_array.attrs.get(rename_to, variable) diff --git a/optim_esm_tools/cmip_files/__init__.py b/optim_esm_tools/cmip_files/__init__.py index 588ef18f..56ec5a33 100644 --- a/optim_esm_tools/cmip_files/__init__.py +++ b/optim_esm_tools/cmip_files/__init__.py @@ -1 +1,2 @@ from . import io +from . import find_matches diff --git a/optim_esm_tools/cmip_files/find_matches.py b/optim_esm_tools/cmip_files/find_matches.py new file mode 100644 index 00000000..111ef1cb --- /dev/null +++ b/optim_esm_tools/cmip_files/find_matches.py @@ -0,0 +1,134 @@ +import os +import glob +from optim_esm_tools.utils import check_accepts +from optim_esm_tools.config import config, get_logger +from collections import defaultdict + + +@check_accepts( + accepts=dict( + activity_id=('ScenarioMIP', 'CMIP', '*'), + experiment_id=('piControl', 'historical', 'ssp585', '*'), + ) +) +def find_matches( + base, + activity_id='ScenarioMIP', + institution_id='*', + source_id='*', + experiment_id='ssp585', + variant_label='*', + domain='Ayear', + variable_id='tas', + grid='*', + version='*', + max_versions=1, + max_members=1, +): + """Follow synda folder format to find matches""" + if max_versions is None: + max_versions = int(9e9) + if max_members is None: + max_members = int(9e9) + variantes = sorted( + glob.glob( + os.path.join( + base, + activity_id, + institution_id, + source_id, + experiment_id, + variant_label, + domain, + variable_id, + grid, + version, + ) + ), + key=_variant_label_id_and_version, + ) + seen = dict() + for candidate in variantes: + folders = candidate.split(os.sep) + group = folders[-7] + member = folders[-5] + version = folders[-1] + + if group not in seen: + seen[group] = defaultdict(list) + seen_members = seen[group] + if len(seen_members) < max_members or member in seen_members: + if len(seen_members.get(version, [])) == max_versions: + continue + + seen_members[version].append(candidate) + + return [ + folder + for group_dict in seen.values() + for versions in group_dict.values() + for folder in versions + ] + + +def _get_head(path): + log = get_logger() + if path.endswith(os.sep): + log.debug(f'Stripping tailing "/" from {path}') + path = path[: -len(os.sep)] + + if os.path.isfile(path): + log.debug(f'Splitting file from {path}') + path = os.path.split(path)[0] + return path + + +def is_excluded(path): + from fnmatch import fnmatch + + path = _get_head(path) + + for excluded in config['CMIP_files']['excluded'].split('\n'): + if not excluded: + continue + folders = excluded.split() + + path_ends_with = os.path.join(*path.split(os.sep)[-len(folders) :]) + match_to = os.path.join(*folders) + if fnmatch(path_ends_with, match_to): + return True + return False + + +def _variant_label_id_and_version(full_path): + run_variant_number = None + grid_version = None + for folder in full_path.split(os.sep): + if len(folder): + if folder[0] == 'r' and run_variant_number is None: + index = folder.split('i') + if len(index) == 2: + run_variant_number = int(index[0][1:]) + if ( + folder[0] == 'v' + and len(folder) == len('v20190731') + and grid_version is None + ): + grid_version = int(folder[1:]) + if run_variant_number is None or grid_version is None: + raise ValueError( + f'could not find run and version from {full_path} {run_variant_number} {grid_version}' + ) + return run_variant_number, -grid_version + + +def folder_to_dict(path): + path = _get_head(path) + folders = path.split(os.sep) + if folders[-1][0] == 'v' and len(folders[-1]) == len('v20190731'): + return { + k: folders[-i - 1] + for i, k in enumerate(config['CMIP_files']['folder_fmt'].split()[::-1]) + } + # great + raise NotImplementedError(f'folder {path} does not end with a version') diff --git a/optim_esm_tools/config.py b/optim_esm_tools/config.py new file mode 100644 index 00000000..74399334 --- /dev/null +++ b/optim_esm_tools/config.py @@ -0,0 +1,44 @@ +"""Shared common methods for reprocessing, not useful in itself""" +from optim_esm_tools.utils import root_folder +import configparser +import logging +import os +import warnings + +if 'OPTIM_ESM_CONFIG' in os.environ: + config_path = os.environ['OPTIM_ESM_CONFIG'] +else: + _warn_later = True + config_path = os.path.join(root_folder, 'optim_esm_tools', 'optim_esm_conf.ini') + +config = configparser.ConfigParser() +config.sections() +config.read(config_path) + +_logger = {} + + +def get_logger(name='oet'): + if name not in _logger: + logging.basicConfig( + level=getattr(logging, config['log']['logging_level'].upper()), + format=( + '%(asctime)s ' + '| %(name)-12s ' + '| %(levelname)-8s ' + '| %(message)s ' + '| %(funcName)s (l. %(lineno)d)' + ), + datefmt='%m-%d %H:%M', + ) + + log = logging.getLogger(name) + _logger[name] = log + return _logger[name] + + +if _warn_later: + get_logger().info( + f'Using {config_path}-config. Overwrite by setting "OPTIM_ESM_CONFIG" ' + f'as an environment variable' + ) diff --git a/optim_esm_tools/optim_esm_conf.ini b/optim_esm_tools/optim_esm_conf.ini new file mode 100644 index 00000000..bc7b4c0b --- /dev/null +++ b/optim_esm_tools/optim_esm_conf.ini @@ -0,0 +1,23 @@ +## Config file with defaults for optim_esm_tools + +[constants] +# 365.25 * 24 * 3600 +seconds_to_year = 31557600 + +[versions] +cmip_handler = 0.1.13 + +[display] +progress_bar = True + +[CMIP_files] +folder_fmt = institution_id source_id experiment_id variant_label domain variable_id grid version +excluded = + # This one only has a dataset which is 5 years long, rendering it quire useless for 10yr running means + E3SM-Project E3SM-1-1-ECA piControl r1i1p1f1 * * gr v20201204 + + # Bad data https://errata.es-doc.org/static/index.html?experiment=ssp585&institute=thu&project=cmip6&source=ciesm + THU CIESM ssp585 r1i1p1f1 * * * v20200417 + +[log] +logging_level=WARNING diff --git a/optim_esm_tools/plotting/map_maker.py b/optim_esm_tools/plotting/map_maker.py index 720322a4..bd5b1621 100644 --- a/optim_esm_tools/plotting/map_maker.py +++ b/optim_esm_tools/plotting/map_maker.py @@ -13,7 +13,7 @@ # import xrft -from optim_esm_tools.analyze.globals import __OPTIM_VERSION__, _seconds_to_year +from optim_esm_tools.analyze.globals import _SECONDS_TO_YEAR from optim_esm_tools.analyze.tipping_criteria import ( running_mean_diff, running_mean_std, @@ -54,8 +54,8 @@ def set_kw(self): import cartopy.crs as ccrs self.kw = immutabledict( - fig=dict(dpi=200, figsize=(12, 8)), - title=dict(fontsize=8), + fig=dict(dpi=200, figsize=(14, 10)), + title=dict(fontsize=12), gridspec=dict(hspace=0.3), cbar=dict(orientation='horizontal', extend='both'), plot=dict(transform=ccrs.PlateCarree()), @@ -192,15 +192,15 @@ def __getattr__(self, item): def _ts_single(time_val, mean, std, plot_kw, fill_kw): if fill_kw is None: fill_kw = dict(alpha=0.4, step='mid') - l = mean.plot(**plot_kw) if std is not None: # TODO, make this more elegant! # import cftime # plt.fill_between( [cftime.real_datetime(dd.year, dd.month, dd.day) for dd in time_val], mean - std, mean+std, **fill_kw) - (mean - std).plot(color=l[0]._color, alpha=0.4, drawstyle='steps-mid') - (mean + std).plot(color=l[0]._color, alpha=0.4, drawstyle='steps-mid') + + (mean - std).plot(color=l[0]._color, alpha=0.4) + (mean + std).plot(color=l[0]._color, alpha=0.4) def _ts( self, @@ -284,18 +284,24 @@ def _ddt_ts( ds = self.data_set variable_rm = f'{variable}_run_mean_{running_mean}' + da = ds[variable] + da_rm = ds[variable_rm] + + if other_dim: + da = da.mean(other_dim) + da_rm = da_rm.mean(other_dim) if not only_rm: # Dropna should take care of any nones in the data-array - dy_dt = ds[variable].mean(other_dim).dropna(time).differentiate(time) - dy_dt *= _seconds_to_year + dy_dt = da.dropna(time).differentiate(time) + dy_dt *= _SECONDS_TO_YEAR # mean, std = self._mean_and_std(dy_dt, variable=None, other_dim=other_dim) # plot_kw['label'] = variable # self._ts_single(ds[time].values, mean, std, plot_kw, fill_kw) label = f'd/dt {labels.get(variable, variable)}' dy_dt.plot(label=label, **plot_kw) - dy_dt_rm = ds[variable_rm].mean(other_dim).dropna(time).differentiate(time) - dy_dt_rm *= _seconds_to_year + dy_dt_rm = da_rm.dropna(time).differentiate(time) + dy_dt_rm *= _SECONDS_TO_YEAR label = ( f"d/dt {labels.get(variable_rm, f'{variable} running mean {running_mean}')}" ) @@ -316,7 +322,7 @@ def _mean_and_std(ds, variable, other_dim): else: da = ds[variable] if other_dim is None: - return da.mean(other_dim), None + return da, None return da.mean(other_dim), da.std(other_dim) def time_series( @@ -375,4 +381,6 @@ def title(self): def make_title(ds): - return '{model_group} {model} {scenario} {run} {variable}'.format(**ds.attrs) + return '{institution_id} {source_id} {experiment_id} {variant_label} {variable_id} {version}'.format( + **ds.attrs + ) diff --git a/optim_esm_tools/utils.py b/optim_esm_tools/utils.py index 08dca877..40788f87 100644 --- a/optim_esm_tools/utils.py +++ b/optim_esm_tools/utils.py @@ -55,13 +55,13 @@ def setup_plt(use_tex=True, register_as='custom_map'): from cycler import cycler params = { - 'axes.grid': False, - 'font.size': 20, - 'axes.titlesize': 22, - 'axes.labelsize': 20, + 'axes.grid': True, + 'font.size': 18, + 'axes.titlesize': 20, + 'axes.labelsize': 18, 'axes.linewidth': 2, - 'xtick.labelsize': 20, - 'ytick.labelsize': 20, + 'xtick.labelsize': 18, + 'ytick.labelsize': 18, 'ytick.major.size': 8, 'ytick.minor.size': 4, 'xtick.major.size': 8, @@ -72,7 +72,7 @@ def setup_plt(use_tex=True, register_as='custom_map'): 'ytick.minor.width': 2, 'xtick.direction': 'in', 'ytick.direction': 'in', - 'legend.fontsize': 20, + 'legend.fontsize': 18, 'figure.facecolor': 'w', 'figure.figsize': (8, 6), 'image.cmap': 'viridis', diff --git a/test/test_find_matches.py b/test/test_find_matches.py new file mode 100644 index 00000000..bfbc76e3 --- /dev/null +++ b/test/test_find_matches.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +import unittest +import optim_esm_tools as oet +import os +import matplotlib.pyplot as plt +import subprocess +from optim_esm_tools._test_utils import ( + synda_test_available, + get_example_data_loc, + get_synda_loc, +) + + +@unittest.skipIf(not synda_test_available(), 'synda data not available') +class TestMatches(unittest.TestCase): + def test_find_matches(self): + head = os.path.join(get_synda_loc(), 'CMIP6') + kw = oet.cmip_files.find_matches.folder_to_dict(get_example_data_loc()) + assert len( + oet.cmip_files.find_matches.find_matches( + base=head, + **kw, + ) + ) diff --git a/test/test_workflow.py b/test/test_workflow.py index b0daf0f3..b8b400d0 100644 --- a/test/test_workflow.py +++ b/test/test_workflow.py @@ -54,7 +54,7 @@ def test_apply_relative_units(self, unit='relative'): from immutabledict import immutabledict from functools import partial - mm.kw = immutabledict( + mm.conditions = immutabledict( { 'i ii iii iv v vi vii viii ix x'.split()[i]: props for i, props in enumerate( @@ -79,7 +79,7 @@ def test_apply_relative_units(self, unit='relative'): ) } ) - for i in mm.kw.keys(): + for i in mm.conditions.keys(): getattr(mm, i) def test_apply_std_unit(self):