-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* refactor code * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * more cleanup (#47) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * more cleanup * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add some test * more file finding tools [skip ci] * clean * add test * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add more tests
- Loading branch information
1 parent
f2ca292
commit e1a6b78
Showing
13 changed files
with
276 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
_seconds_to_year = 365.25 * 24 * 3600 | ||
folder_fmt = 'model_group model scenario run domain variable grid version'.split() | ||
__OPTIM_VERSION__ = '0.1.11' | ||
from optim_esm_tools.config import config | ||
|
||
_SECONDS_TO_YEAR = int(config['constants']['seconds_to_year']) | ||
_FOLDER_FMT = config['CMIP_files']['folder_fmt'].split() | ||
_CMIP_HANDLER_VERSION = config['versions']['cmip_handler'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
from . import io | ||
from . import find_matches |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
import os | ||
import glob | ||
from optim_esm_tools.utils import check_accepts | ||
from optim_esm_tools.config import config, get_logger | ||
from collections import defaultdict | ||
|
||
|
||
@check_accepts( | ||
accepts=dict( | ||
activity_id=('ScenarioMIP', 'CMIP', '*'), | ||
experiment_id=('piControl', 'historical', 'ssp585', '*'), | ||
) | ||
) | ||
def find_matches( | ||
base, | ||
activity_id='ScenarioMIP', | ||
institution_id='*', | ||
source_id='*', | ||
experiment_id='ssp585', | ||
variant_label='*', | ||
domain='Ayear', | ||
variable_id='tas', | ||
grid='*', | ||
version='*', | ||
max_versions=1, | ||
max_members=1, | ||
): | ||
"""Follow synda folder format to find matches""" | ||
if max_versions is None: | ||
max_versions = int(9e9) | ||
if max_members is None: | ||
max_members = int(9e9) | ||
variantes = sorted( | ||
glob.glob( | ||
os.path.join( | ||
base, | ||
activity_id, | ||
institution_id, | ||
source_id, | ||
experiment_id, | ||
variant_label, | ||
domain, | ||
variable_id, | ||
grid, | ||
version, | ||
) | ||
), | ||
key=_variant_label_id_and_version, | ||
) | ||
seen = dict() | ||
for candidate in variantes: | ||
folders = candidate.split(os.sep) | ||
group = folders[-7] | ||
member = folders[-5] | ||
version = folders[-1] | ||
|
||
if group not in seen: | ||
seen[group] = defaultdict(list) | ||
seen_members = seen[group] | ||
if len(seen_members) < max_members or member in seen_members: | ||
if len(seen_members.get(version, [])) == max_versions: | ||
continue | ||
|
||
seen_members[version].append(candidate) | ||
|
||
return [ | ||
folder | ||
for group_dict in seen.values() | ||
for versions in group_dict.values() | ||
for folder in versions | ||
] | ||
|
||
|
||
def _get_head(path): | ||
log = get_logger() | ||
if path.endswith(os.sep): | ||
log.debug(f'Stripping tailing "/" from {path}') | ||
path = path[: -len(os.sep)] | ||
|
||
if os.path.isfile(path): | ||
log.debug(f'Splitting file from {path}') | ||
path = os.path.split(path)[0] | ||
return path | ||
|
||
|
||
def is_excluded(path): | ||
from fnmatch import fnmatch | ||
|
||
path = _get_head(path) | ||
|
||
for excluded in config['CMIP_files']['excluded'].split('\n'): | ||
if not excluded: | ||
continue | ||
folders = excluded.split() | ||
|
||
path_ends_with = os.path.join(*path.split(os.sep)[-len(folders) :]) | ||
match_to = os.path.join(*folders) | ||
if fnmatch(path_ends_with, match_to): | ||
return True | ||
return False | ||
|
||
|
||
def _variant_label_id_and_version(full_path): | ||
run_variant_number = None | ||
grid_version = None | ||
for folder in full_path.split(os.sep): | ||
if len(folder): | ||
if folder[0] == 'r' and run_variant_number is None: | ||
index = folder.split('i') | ||
if len(index) == 2: | ||
run_variant_number = int(index[0][1:]) | ||
if ( | ||
folder[0] == 'v' | ||
and len(folder) == len('v20190731') | ||
and grid_version is None | ||
): | ||
grid_version = int(folder[1:]) | ||
if run_variant_number is None or grid_version is None: | ||
raise ValueError( | ||
f'could not find run and version from {full_path} {run_variant_number} {grid_version}' | ||
) | ||
return run_variant_number, -grid_version | ||
|
||
|
||
def folder_to_dict(path): | ||
path = _get_head(path) | ||
folders = path.split(os.sep) | ||
if folders[-1][0] == 'v' and len(folders[-1]) == len('v20190731'): | ||
return { | ||
k: folders[-i - 1] | ||
for i, k in enumerate(config['CMIP_files']['folder_fmt'].split()[::-1]) | ||
} | ||
# great | ||
raise NotImplementedError(f'folder {path} does not end with a version') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
"""Shared common methods for reprocessing, not useful in itself""" | ||
from optim_esm_tools.utils import root_folder | ||
import configparser | ||
import logging | ||
import os | ||
import warnings | ||
|
||
if 'OPTIM_ESM_CONFIG' in os.environ: | ||
config_path = os.environ['OPTIM_ESM_CONFIG'] | ||
else: | ||
_warn_later = True | ||
config_path = os.path.join(root_folder, 'optim_esm_tools', 'optim_esm_conf.ini') | ||
|
||
config = configparser.ConfigParser() | ||
config.sections() | ||
config.read(config_path) | ||
|
||
_logger = {} | ||
|
||
|
||
def get_logger(name='oet'): | ||
if name not in _logger: | ||
logging.basicConfig( | ||
level=getattr(logging, config['log']['logging_level'].upper()), | ||
format=( | ||
'%(asctime)s ' | ||
'| %(name)-12s ' | ||
'| %(levelname)-8s ' | ||
'| %(message)s ' | ||
'| %(funcName)s (l. %(lineno)d)' | ||
), | ||
datefmt='%m-%d %H:%M', | ||
) | ||
|
||
log = logging.getLogger(name) | ||
_logger[name] = log | ||
return _logger[name] | ||
|
||
|
||
if _warn_later: | ||
get_logger().info( | ||
f'Using {config_path}-config. Overwrite by setting "OPTIM_ESM_CONFIG" ' | ||
f'as an environment variable' | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
## Config file with defaults for optim_esm_tools | ||
|
||
[constants] | ||
# 365.25 * 24 * 3600 | ||
seconds_to_year = 31557600 | ||
|
||
[versions] | ||
cmip_handler = 0.1.13 | ||
|
||
[display] | ||
progress_bar = True | ||
|
||
[CMIP_files] | ||
folder_fmt = institution_id source_id experiment_id variant_label domain variable_id grid version | ||
excluded = | ||
# This one only has a dataset which is 5 years long, rendering it quire useless for 10yr running means | ||
E3SM-Project E3SM-1-1-ECA piControl r1i1p1f1 * * gr v20201204 | ||
|
||
# Bad data https://errata.es-doc.org/static/index.html?experiment=ssp585&institute=thu&project=cmip6&source=ciesm | ||
THU CIESM ssp585 r1i1p1f1 * * * v20200417 | ||
|
||
[log] | ||
logging_level=WARNING |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.