Skip to content

Commit

Permalink
Add new file tools (#48)
Browse files Browse the repository at this point in the history
* refactor code

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* more cleanup (#47)

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* more cleanup

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add some test

* more file finding tools [skip ci]

* clean

* add test

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add more tests
  • Loading branch information
JoranAngevaare committed Jun 14, 2023
1 parent f2ca292 commit e1a6b78
Show file tree
Hide file tree
Showing 13 changed files with 276 additions and 34 deletions.
3 changes: 2 additions & 1 deletion optim_esm_tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
__version__ = '0.1.1'
__author__ = 'Joran R. Angevaare'

from . import utils
from . import config
from . import analyze
from . import cmip_files
from . import synda_files
from . import utils
from . import _test_utils
from . import plotting
6 changes: 5 additions & 1 deletion optim_esm_tools/_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@
EXMPLE_DATA_SET = 'CMIP6/ScenarioMIP/CCCma/CanESM5/ssp585/r3i1p2f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_ssp585_r3i1p2f1_gn_201501-210012.nc'


def get_synda_loc():
return os.path.join(os.environ.get('ST_HOME'), 'data')


def get_example_data_loc():
return os.path.join(os.environ.get('ST_HOME'), 'data', EXMPLE_DATA_SET)
return os.path.join(get_synda_loc(), EXMPLE_DATA_SET)


def synda_test_available():
Expand Down
10 changes: 6 additions & 4 deletions optim_esm_tools/analyze/cmip_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

from optim_esm_tools.utils import depricated

from .globals import __OPTIM_VERSION__, folder_fmt
from .globals import _CMIP_HANDLER_VERSION, _FOLDER_FMT
from .xarray_tools import _native_date_fmt
from optim_esm_tools.plotting.map_maker import MapMaker
from optim_esm_tools.plotting.map_maker import MapMaker, make_title


def read_ds(
Expand Down Expand Up @@ -45,7 +45,7 @@ def read_ds(
min_time,
max_time,
_ma_window,
__OPTIM_VERSION__,
_CMIP_HANDLER_VERSION,
)

if os.path.exists(post_processed_file) and _cache:
Expand All @@ -72,7 +72,7 @@ def read_ds(
folders = base.split(os.sep)

# start with -1 (for i==0)
metadata = {k: folders[-i - 1] for i, k in enumerate(folder_fmt[::-1])}
metadata = {k: folders[-i - 1] for i, k in enumerate(_FOLDER_FMT[::-1])}
metadata.update(
dict(path=base, file=post_processed_file, running_mean_period=_ma_window)
)
Expand Down Expand Up @@ -140,6 +140,8 @@ def _calculate_variables(
detrended_run_mean.attrs['units'] = data_set[variable].attrs.get(
'units', '{units}'
)
for det in detrended, detrended_run_mean:
det.attrs.update(data_set[variable].attrs.copy())
data_set[f'{variable}_run_mean_{_ma_window}'] = run_mean
data_set[f'{variable}_detrend'] = detrended
data_set[f'{variable}_detrend_run_mean_{_ma_window}'] = detrended_run_mean
Expand Down
8 changes: 5 additions & 3 deletions optim_esm_tools/analyze/globals.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
_seconds_to_year = 365.25 * 24 * 3600
folder_fmt = 'model_group model scenario run domain variable grid version'.split()
__OPTIM_VERSION__ = '0.1.11'
from optim_esm_tools.config import config

_SECONDS_TO_YEAR = int(config['constants']['seconds_to_year'])
_FOLDER_FMT = config['CMIP_files']['folder_fmt'].split()
_CMIP_HANDLER_VERSION = config['versions']['cmip_handler']
7 changes: 3 additions & 4 deletions optim_esm_tools/analyze/tipping_criteria.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from .xarray_tools import apply_abs
from .xarray_tools import apply_abs, _native_date_fmt
from optim_esm_tools.utils import check_accepts
import xarray as xr
import numpy as np
import typing as ty

from .globals import _seconds_to_year
from .globals import _SECONDS_TO_YEAR


@apply_abs()
Expand Down Expand Up @@ -169,7 +168,7 @@ def max_derivative(
var_name = naming.format(variable=variable, running_mean=running_mean)

data_array = _remove_any_none_times(data_set[var_name], time_var)
result = data_array.differentiate(time_var).max(dim=time_var) * _seconds_to_year
result = data_array.differentiate(time_var).max(dim=time_var) * _SECONDS_TO_YEAR

var_unit = data_array.attrs.get('units', '{units}')
name = data_array.attrs.get(rename_to, variable)
Expand Down
1 change: 1 addition & 0 deletions optim_esm_tools/cmip_files/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from . import io
from . import find_matches
134 changes: 134 additions & 0 deletions optim_esm_tools/cmip_files/find_matches.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import os
import glob
from optim_esm_tools.utils import check_accepts
from optim_esm_tools.config import config, get_logger
from collections import defaultdict


@check_accepts(
accepts=dict(
activity_id=('ScenarioMIP', 'CMIP', '*'),
experiment_id=('piControl', 'historical', 'ssp585', '*'),
)
)
def find_matches(
base,
activity_id='ScenarioMIP',
institution_id='*',
source_id='*',
experiment_id='ssp585',
variant_label='*',
domain='Ayear',
variable_id='tas',
grid='*',
version='*',
max_versions=1,
max_members=1,
):
"""Follow synda folder format to find matches"""
if max_versions is None:
max_versions = int(9e9)
if max_members is None:
max_members = int(9e9)
variantes = sorted(
glob.glob(
os.path.join(
base,
activity_id,
institution_id,
source_id,
experiment_id,
variant_label,
domain,
variable_id,
grid,
version,
)
),
key=_variant_label_id_and_version,
)
seen = dict()
for candidate in variantes:
folders = candidate.split(os.sep)
group = folders[-7]
member = folders[-5]
version = folders[-1]

if group not in seen:
seen[group] = defaultdict(list)
seen_members = seen[group]
if len(seen_members) < max_members or member in seen_members:
if len(seen_members.get(version, [])) == max_versions:
continue

seen_members[version].append(candidate)

return [
folder
for group_dict in seen.values()
for versions in group_dict.values()
for folder in versions
]


def _get_head(path):
log = get_logger()
if path.endswith(os.sep):
log.debug(f'Stripping tailing "/" from {path}')
path = path[: -len(os.sep)]

if os.path.isfile(path):
log.debug(f'Splitting file from {path}')
path = os.path.split(path)[0]
return path


def is_excluded(path):
from fnmatch import fnmatch

path = _get_head(path)

for excluded in config['CMIP_files']['excluded'].split('\n'):
if not excluded:
continue
folders = excluded.split()

path_ends_with = os.path.join(*path.split(os.sep)[-len(folders) :])
match_to = os.path.join(*folders)
if fnmatch(path_ends_with, match_to):
return True
return False


def _variant_label_id_and_version(full_path):
run_variant_number = None
grid_version = None
for folder in full_path.split(os.sep):
if len(folder):
if folder[0] == 'r' and run_variant_number is None:
index = folder.split('i')
if len(index) == 2:
run_variant_number = int(index[0][1:])
if (
folder[0] == 'v'
and len(folder) == len('v20190731')
and grid_version is None
):
grid_version = int(folder[1:])
if run_variant_number is None or grid_version is None:
raise ValueError(
f'could not find run and version from {full_path} {run_variant_number} {grid_version}'
)
return run_variant_number, -grid_version


def folder_to_dict(path):
path = _get_head(path)
folders = path.split(os.sep)
if folders[-1][0] == 'v' and len(folders[-1]) == len('v20190731'):
return {
k: folders[-i - 1]
for i, k in enumerate(config['CMIP_files']['folder_fmt'].split()[::-1])
}
# great
raise NotImplementedError(f'folder {path} does not end with a version')
44 changes: 44 additions & 0 deletions optim_esm_tools/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Shared common methods for reprocessing, not useful in itself"""
from optim_esm_tools.utils import root_folder
import configparser
import logging
import os
import warnings

if 'OPTIM_ESM_CONFIG' in os.environ:
config_path = os.environ['OPTIM_ESM_CONFIG']
else:
_warn_later = True
config_path = os.path.join(root_folder, 'optim_esm_tools', 'optim_esm_conf.ini')

config = configparser.ConfigParser()
config.sections()
config.read(config_path)

_logger = {}


def get_logger(name='oet'):
if name not in _logger:
logging.basicConfig(
level=getattr(logging, config['log']['logging_level'].upper()),
format=(
'%(asctime)s '
'| %(name)-12s '
'| %(levelname)-8s '
'| %(message)s '
'| %(funcName)s (l. %(lineno)d)'
),
datefmt='%m-%d %H:%M',
)

log = logging.getLogger(name)
_logger[name] = log
return _logger[name]


if _warn_later:
get_logger().info(
f'Using {config_path}-config. Overwrite by setting "OPTIM_ESM_CONFIG" '
f'as an environment variable'
)
23 changes: 23 additions & 0 deletions optim_esm_tools/optim_esm_conf.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
## Config file with defaults for optim_esm_tools

[constants]
# 365.25 * 24 * 3600
seconds_to_year = 31557600

[versions]
cmip_handler = 0.1.13

[display]
progress_bar = True

[CMIP_files]
folder_fmt = institution_id source_id experiment_id variant_label domain variable_id grid version
excluded =
# This one only has a dataset which is 5 years long, rendering it quire useless for 10yr running means
E3SM-Project E3SM-1-1-ECA piControl r1i1p1f1 * * gr v20201204

# Bad data https://errata.es-doc.org/static/index.html?experiment=ssp585&institute=thu&project=cmip6&source=ciesm
THU CIESM ssp585 r1i1p1f1 * * * v20200417

[log]
logging_level=WARNING
32 changes: 20 additions & 12 deletions optim_esm_tools/plotting/map_maker.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

# import xrft

from optim_esm_tools.analyze.globals import __OPTIM_VERSION__, _seconds_to_year
from optim_esm_tools.analyze.globals import _SECONDS_TO_YEAR
from optim_esm_tools.analyze.tipping_criteria import (
running_mean_diff,
running_mean_std,
Expand Down Expand Up @@ -54,8 +54,8 @@ def set_kw(self):
import cartopy.crs as ccrs

self.kw = immutabledict(
fig=dict(dpi=200, figsize=(12, 8)),
title=dict(fontsize=8),
fig=dict(dpi=200, figsize=(14, 10)),
title=dict(fontsize=12),
gridspec=dict(hspace=0.3),
cbar=dict(orientation='horizontal', extend='both'),
plot=dict(transform=ccrs.PlateCarree()),
Expand Down Expand Up @@ -192,15 +192,15 @@ def __getattr__(self, item):
def _ts_single(time_val, mean, std, plot_kw, fill_kw):
if fill_kw is None:
fill_kw = dict(alpha=0.4, step='mid')

l = mean.plot(**plot_kw)

if std is not None:
# TODO, make this more elegant!
# import cftime
# plt.fill_between( [cftime.real_datetime(dd.year, dd.month, dd.day) for dd in time_val], mean - std, mean+std, **fill_kw)
(mean - std).plot(color=l[0]._color, alpha=0.4, drawstyle='steps-mid')
(mean + std).plot(color=l[0]._color, alpha=0.4, drawstyle='steps-mid')

(mean - std).plot(color=l[0]._color, alpha=0.4)
(mean + std).plot(color=l[0]._color, alpha=0.4)

def _ts(
self,
Expand Down Expand Up @@ -284,18 +284,24 @@ def _ddt_ts(
ds = self.data_set
variable_rm = f'{variable}_run_mean_{running_mean}'

da = ds[variable]
da_rm = ds[variable_rm]

if other_dim:
da = da.mean(other_dim)
da_rm = da_rm.mean(other_dim)
if not only_rm:
# Dropna should take care of any nones in the data-array
dy_dt = ds[variable].mean(other_dim).dropna(time).differentiate(time)
dy_dt *= _seconds_to_year
dy_dt = da.dropna(time).differentiate(time)
dy_dt *= _SECONDS_TO_YEAR
# mean, std = self._mean_and_std(dy_dt, variable=None, other_dim=other_dim)
# plot_kw['label'] = variable
# self._ts_single(ds[time].values, mean, std, plot_kw, fill_kw)
label = f'd/dt {labels.get(variable, variable)}'
dy_dt.plot(label=label, **plot_kw)

dy_dt_rm = ds[variable_rm].mean(other_dim).dropna(time).differentiate(time)
dy_dt_rm *= _seconds_to_year
dy_dt_rm = da_rm.dropna(time).differentiate(time)
dy_dt_rm *= _SECONDS_TO_YEAR
label = (
f"d/dt {labels.get(variable_rm, f'{variable} running mean {running_mean}')}"
)
Expand All @@ -316,7 +322,7 @@ def _mean_and_std(ds, variable, other_dim):
else:
da = ds[variable]
if other_dim is None:
return da.mean(other_dim), None
return da, None
return da.mean(other_dim), da.std(other_dim)

def time_series(
Expand Down Expand Up @@ -375,4 +381,6 @@ def title(self):


def make_title(ds):
return '{model_group} {model} {scenario} {run} {variable}'.format(**ds.attrs)
return '{institution_id} {source_id} {experiment_id} {variant_label} {variable_id} {version}'.format(
**ds.attrs
)
Loading

0 comments on commit e1a6b78

Please sign in to comment.