From 0384c3bba0acddb936aa986da92ed03958b7d7e6 Mon Sep 17 00:00:00 2001 From: Rahul Mahajan Date: Wed, 21 Sep 2022 10:40:47 -0400 Subject: [PATCH 1/4] initial commit for yaml work --- ush/python/README.md | 21 +++ ush/python/setup.cfg | 62 +++++++++ ush/python/setup.py | 4 + ush/python/src/pygw/__init__.py | 9 ++ ush/python/src/pygw/attrdict.py | 169 +++++++++++++++++++++++ ush/python/src/pygw/jinja.py | 58 ++++++++ ush/python/src/pygw/template.py | 191 ++++++++++++++++++++++++++ ush/python/src/pygw/timetools.py | 111 +++++++++++++++ ush/python/src/pygw/yaml_file.py | 144 +++++++++++++++++++ ush/python/src/tests/test_template.py | 148 ++++++++++++++++++++ 10 files changed, 917 insertions(+) create mode 100644 ush/python/README.md create mode 100644 ush/python/setup.cfg create mode 100644 ush/python/setup.py create mode 100644 ush/python/src/pygw/__init__.py create mode 100644 ush/python/src/pygw/attrdict.py create mode 100644 ush/python/src/pygw/jinja.py create mode 100644 ush/python/src/pygw/template.py create mode 100644 ush/python/src/pygw/timetools.py create mode 100644 ush/python/src/pygw/yaml_file.py create mode 100644 ush/python/src/tests/test_template.py diff --git a/ush/python/README.md b/ush/python/README.md new file mode 100644 index 0000000000..6a36cbb72f --- /dev/null +++ b/ush/python/README.md @@ -0,0 +1,21 @@ +# global workflow specific tools + +Python tools specifically for global applications + +## Installation +Simple installation instructions +```sh +$> git clone https://github.com/noaa-emc/global-workflow +$> cd global-workflow/ush/python +$> pip install . +``` + +It is not required to install this package. Instead, +```sh +$> cd global-workflow/ush/python +$> export PYTHONPATH=$PWD/src/pygw +``` +would put this package in the `PYTHONPATH` + +### Note: +These instructions will be updated and the tools are under development. diff --git a/ush/python/setup.cfg b/ush/python/setup.cfg new file mode 100644 index 0000000000..bb288b56a0 --- /dev/null +++ b/ush/python/setup.cfg @@ -0,0 +1,62 @@ +[metadata] +name = pygw +version = 0.0.1 +description = Global applications specific workflow related tools +long_description = file: README.md +long_description_content_type = text/markdown +author = "NOAA/NCEP/EMC" +#author_email = first.last@domain.tld +keywords = NOAA, NCEP, EMC, GFS, GEFS +home_page = https://github.com/noaa-emc/global-workflow +license = GNU Lesser General Public License +classifiers = + Development Status :: 1 - Beta + Intended Audience :: Developers + Intended Audience :: Science/Research + License :: OSI Approved :: GNU Lesser General Public License + Natural Language :: English + Operating System :: OS Independent + Programming Language :: Python + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Topic :: Software Development :: Libraries :: Python Modules + Operating System :: OS Independent + Typing :: Typed +project_urls = + Bug Tracker = https://github.com/noaa-emc/global-workflow/issues + CI = https://github.com/noaa-emc/global-workflow/actions + +[options] +zip_safe = False +include_package_data = True +package_dir = + =src +packages = find_namespace: +python_requires = >= 3.6 +setup_requires = + setuptools +install_requires = + numpy==1.21.6 + PyYAML==6.0 + Jinja2==3.1.2 +tests_require = + pytest + +[options.packages.find] +where=src + +[options.package_data] +* = *.txt, *.md + +[options.extras_require] +dev = pytest-cov>=3 + +[green] +file-pattern = test_*.py +verbose = 2 +no-skip-report = true +quiet-stdout = true +run-coverage = true diff --git a/ush/python/setup.py b/ush/python/setup.py new file mode 100644 index 0000000000..e748ce0b71 --- /dev/null +++ b/ush/python/setup.py @@ -0,0 +1,4 @@ +''' Standard file for building the package with Distutils. ''' + +import setuptools +setuptools.setup() diff --git a/ush/python/src/pygw/__init__.py b/ush/python/src/pygw/__init__.py new file mode 100644 index 0000000000..17e8573021 --- /dev/null +++ b/ush/python/src/pygw/__init__.py @@ -0,0 +1,9 @@ +""" +Commonly used toolset for the global applications and beyond. +""" +__docformat__ = "restructuredtext" + +import os + +pygw_directory = os.path.dirname(__file__) + diff --git a/ush/python/src/pygw/attrdict.py b/ush/python/src/pygw/attrdict.py new file mode 100644 index 0000000000..1641969597 --- /dev/null +++ b/ush/python/src/pygw/attrdict.py @@ -0,0 +1,169 @@ +# attrdict is a Python module that gives you dictionaries whose values are both +# gettable and settable using attributes, in addition to standard item-syntax. +# https://github.com/mewwts/addict +# addict/addict.py -> attrdict.py +# hash: 7e8d23d +# License: MIT +# class Dict -> class AttrDict to prevent name collisions w/ typing.Dict + +import copy + +__all__ = ['AttrDict'] + + +class AttrDict(dict): + + def __init__(__self, *args, **kwargs): + object.__setattr__(__self, '__parent', kwargs.pop('__parent', None)) + object.__setattr__(__self, '__key', kwargs.pop('__key', None)) + object.__setattr__(__self, '__frozen', False) + for arg in args: + if not arg: + continue + elif isinstance(arg, dict): + for key, val in arg.items(): + __self[key] = __self._hook(val) + elif isinstance(arg, tuple) and (not isinstance(arg[0], tuple)): + __self[arg[0]] = __self._hook(arg[1]) + else: + for key, val in iter(arg): + __self[key] = __self._hook(val) + + for key, val in kwargs.items(): + __self[key] = __self._hook(val) + + def __setattr__(self, name, value): + if hasattr(self.__class__, name): + raise AttributeError("'AttrDict' object attribute " + "'{0}' is read-only".format(name)) + else: + self[name] = value + + def __setitem__(self, name, value): + isFrozen = (hasattr(self, '__frozen') and + object.__getattribute__(self, '__frozen')) + if isFrozen and name not in super(AttrDict, self).keys(): + raise KeyError(name) + super(AttrDict, self).__setitem__(name, value) + try: + p = object.__getattribute__(self, '__parent') + key = object.__getattribute__(self, '__key') + except AttributeError: + p = None + key = None + if p is not None: + p[key] = self + object.__delattr__(self, '__parent') + object.__delattr__(self, '__key') + + def __add__(self, other): + if not self.keys(): + return other + else: + self_type = type(self).__name__ + other_type = type(other).__name__ + msg = "unsupported operand type(s) for +: '{}' and '{}'" + raise TypeError(msg.format(self_type, other_type)) + + @classmethod + def _hook(cls, item): + if isinstance(item, dict): + return cls(item) + elif isinstance(item, (list, tuple)): + return type(item)(cls._hook(elem) for elem in item) + return item + + def __getattr__(self, item): + return self.__getitem__(item) + + def __missing__(self, name): + if object.__getattribute__(self, '__frozen'): + raise KeyError(name) + return self.__class__(__parent=self, __key=name) + + def __delattr__(self, name): + del self[name] + + def to_dict(self): + base = {} + for key, value in self.items(): + if isinstance(value, type(self)): + base[key] = value.to_dict() + elif isinstance(value, (list, tuple)): + base[key] = type(value)( + item.to_dict() if isinstance(item, type(self)) else + item for item in value) + else: + base[key] = value + return base + + def copy(self): + return copy.copy(self) + + def deepcopy(self): + return copy.deepcopy(self) + + def __deepcopy__(self, memo): + other = self.__class__() + memo[id(self)] = other + for key, value in self.items(): + other[copy.deepcopy(key, memo)] = copy.deepcopy(value, memo) + return other + + def update(self, *args, **kwargs): + other = {} + if args: + if len(args) > 1: + raise TypeError() + other.update(args[0]) + other.update(kwargs) + for k, v in other.items(): + if ((k not in self) or + (not isinstance(self[k], dict)) or + (not isinstance(v, dict))): + self[k] = v + else: + self[k].update(v) + + def __getnewargs__(self): + return tuple(self.items()) + + def __getstate__(self): + return self + + def __setstate__(self, state): + self.update(state) + + def __or__(self, other): + if not isinstance(other, (AttrDict, dict)): + return NotImplemented + new = AttrDict(self) + new.update(other) + return new + + def __ror__(self, other): + if not isinstance(other, (AttrDict, dict)): + return NotImplemented + new = AttrDict(other) + new.update(self) + return new + + def __ior__(self, other): + self.update(other) + return self + + def setdefault(self, key, default=None): + if key in self: + return self[key] + else: + self[key] = default + return default + + def freeze(self, shouldFreeze=True): + object.__setattr__(self, '__frozen', shouldFreeze) + for key, val in self.items(): + if isinstance(val, AttrDict): + val.freeze(shouldFreeze) + + def unfreeze(self): + self.freeze(False) diff --git a/ush/python/src/pygw/jinja.py b/ush/python/src/pygw/jinja.py new file mode 100644 index 0000000000..b1dbc84f54 --- /dev/null +++ b/ush/python/src/pygw/jinja.py @@ -0,0 +1,58 @@ +import io +import sys +import jinja2 +from pathlib import Path + + +class Jinja: + + def __init__(self, template_path, data, allow_missing=True): + """ + Given a path to a (jinja2) template and a data object, substitute the + template file with data. + Allow for retaining missing or undefined variables. + """ + + self.data = data + self.undefined = jinja2.Undefined if allow_missing else jinja2.StrictUndefined + + if Path(template_path).is_file(): + self.template_path = Path(template_path) + print(self.template_path) + self.output = self._render_file() + else: + raise NotImplementedError("Unable to handle templates other than files") + + def _render_file(self): + template_dir = self.template_path.parent + template_file = self.template_path.relative_to(template_dir) + print(template_file) + + import os + dirname = os.path.dirname(str(self.template_path)) + relpath = os.path.relpath(str(self.template_path), dirname) + + loader = jinja2.FileSystemLoader(template_dir) + + output = self._render(str(template_file), loader) + + return output + + def _render(self, template_name, loader): + env = jinja2.Environment(loader=loader, undefined=self.undefined) + print(template_name) + template = env.get_template(template_name) + try: + output = template.render(**self.data) + except jinja2.UndefinedError as ee: + raise Exception(f"Undefined variable in jinja template\n{ee}") + + return output + + def save(self, output_file): + with open(output_file, 'wb') as fh: + fh.write(self.output.encode("utf-8")) + + def dump(self): + io.TextIOWrapper(sys.stdout.buffer, + encoding="utf-8").write(self.output) diff --git a/ush/python/src/pygw/template.py b/ush/python/src/pygw/template.py new file mode 100644 index 0000000000..ed7878b6a5 --- /dev/null +++ b/ush/python/src/pygw/template.py @@ -0,0 +1,191 @@ +import re +import os +import copy +from collections import namedtuple +from collections.abc import Sequence + +# Template imported with permission from jcsda/solo + +__all__ = ['Template', 'TemplateConstants', 'Jinja'] + + +class TemplateConstants: + DOLLAR_CURLY_BRACE = '${}' + DOLLAR_PARENTHESES = '$()' + DOUBLE_CURLY_BRACES = '{{}}' + AT_SQUARE_BRACES = '@[]' + AT_ANGLE_BRACKETS = '@<>' + + SubPair = namedtuple('SubPair', ['regex', 'slice']) + + +class Template: + + """ + Utility for substituting variables in a template. The template can be the contents of a whole file + as a string (substitute_string) or in a complex dictionary (substitute_structure). + substitutions defines different type of variables with a regex and a slice: + - the regex is supposed to find the whole variable, e.g, $(variable) + - the slice indicate how to slice the value returned by the regex to have the variable name, in the + case of $(variable), the slice is 2, -1 to remove $( and ). + You can easily add new type of variables following those rules. + + Please note that the regexes allow for at least one nested variable and the code is able to handle it. + It means that $($(variable)) will be processed correctly but the substitutions will need more than one + pass. + + If you have a file that is deeper than just a simple dictionary of has lists in it, you can use the method + build_index to create a dictionary that will have all the options from deeper levels (list, dicts). + You can then pass index.get as an argument to any method you use. + If you use substitute_with_dependencies, this is done automatically. + """ + + substitutions = { + TemplateConstants.DOLLAR_CURLY_BRACE: TemplateConstants.SubPair(re.compile('\${.*?}+'), slice(2, -1)), + TemplateConstants.DOLLAR_PARENTHESES: TemplateConstants.SubPair(re.compile('\$\(.*?\)+'), slice(2, -1)), + TemplateConstants.DOUBLE_CURLY_BRACES: TemplateConstants.SubPair(re.compile('{{.*?}}+'), slice(2, -2)), + TemplateConstants.AT_SQUARE_BRACES: TemplateConstants.SubPair(re.compile('@\[.*?\]+'), slice(2, -1)), + TemplateConstants.AT_ANGLE_BRACKETS: TemplateConstants.SubPair( + re.compile('@\<.*?\>+'), slice(2, -1)) + } + + @classmethod + def find_variables(cls, variable_to_substitute: str, var_type: str): + pair = cls.substitutions[var_type] + return [x[pair.slice] for x in re.findall(pair.regex, variable_to_substitute)] + + @classmethod + def substitute_string(cls, variable_to_substitute, var_type: str, get_value): + """ + Substitutes variables under the form var_type (e.g. DOLLAR_CURLY_BRACE), looks for a value returned + by function get_value and if found, substitutes the variable. Convert floats and int to string + before substitution. If the value in the dictionary is a complex type, just assign it instead + of substituting. + get_value is a function that returns the value to substitute: + signature: get_value(variable_name). + If substituting from a dictionary my_dict, pass my_dict.get + """ + pair = cls.substitutions[var_type] + if isinstance(variable_to_substitute, str): + variable_names = re.findall(pair.regex, variable_to_substitute) + for variable in variable_names: + var = variable[pair.slice] + v = get_value(var) + if v is not None: + if not is_single_type_or_string(v): + if len(variable_names) == 1: + # v could be a list or a dictionary (complex structure and not a string). + # If there is one variable that is the whole + # string, we can safely replace, otherwise do nothing. + if variable_to_substitute.replace(variable_names[0][pair.slice], '') == var_type: + variable_to_substitute = v + else: + if isinstance(v, float) or isinstance(v, int): + v = str(v) + if isinstance(v, str): + variable_to_substitute = variable_to_substitute.replace( + variable, v) + else: + variable_to_substitute = v + else: + more = re.search(pair.regex, var) + if more is not None: + new_value = cls.substitute_string( + var, var_type, get_value) + variable_to_substitute = variable_to_substitute.replace( + var, new_value) + return variable_to_substitute + + @classmethod + def substitute_structure(cls, structure_to_substitute, var_type: str, get_value): + """ + Traverses a dictionary and substitutes variables in fields, lists + and nested dictionaries. + """ + if isinstance(structure_to_substitute, dict): + for key, item in structure_to_substitute.items(): + structure_to_substitute[key] = cls.substitute_structure( + item, var_type, get_value) + elif is_sequence_and_not_string(structure_to_substitute): + for i, item in enumerate(structure_to_substitute): + structure_to_substitute[i] = cls.substitute_structure( + item, var_type, get_value) + else: + structure_to_substitute = cls.substitute_string(structure_to_substitute, var_type, + get_value) + return structure_to_substitute + + @classmethod + def substitute_structure_from_environment(cls, structure_to_substitute): + return cls.substitute_structure(structure_to_substitute, TemplateConstants.DOLLAR_CURLY_BRACE, os.environ.get) + + @classmethod + def substitute_with_dependencies(cls, dictionary, keys, var_type: str, shallow_precedence=True, excluded=()): + """ + Given a dictionary with a complex (deep) structure, we want to substitute variables, + using keys, another dictionary that may also have a deep structure (dictionary and keys + can be the same dictionary if you want to substitute in place). + We create an index based on keys (see build_index) and substitute values in dictionary + using index. If variables may refer to other variables, more than one pass of substitution + may be needed, so we substitute until there is no more change in dictionary (convergence). + """ + all_variables = cls.build_index(keys, excluded, shallow_precedence) + previous = {} + while dictionary != previous: + previous = copy.deepcopy(dictionary) + dictionary = cls.substitute_structure( + dictionary, var_type, all_variables.get) + return dictionary + + @classmethod + def build_index(cls, dictionary, excluded=None, shallow_precedence=True): + """ + Builds an index of all keys with their values, going deep into the dictionary. The index + if a flat structure (dictionary). + If the same key name is present more than once in the structure, we want to + either prioritise the values that are near the root of the tree (shallow_precedence=True) + or values that are near the leaves (shallow_precedence=False). We don't anticipated use + cases where the "nearest variable" should be used, but this could constitute a future + improvement. + """ + def build(structure, variables): + if isinstance(structure, dict): + for k, i in structure.items(): + if ((k not in variables) or (k in variables and not shallow_precedence)) and k not in excluded: + variables[k] = i + build(i, variables) + elif is_sequence_and_not_string(structure): + for v in structure: + build(v, variables) + var = {} + if excluded is None: + excluded = set() + build(dictionary, var) + return var + + +# These used to be in basic.py, and have been copied here till they are needed elsewhere. + + +def is_sequence_and_not_string(a): + return isinstance(a, Sequence) and not isinstance(a, str) + + +def is_single_type(s): + try: + len(s) + except TypeError: + return True + else: + return False + + +def is_single_type_or_string(s): + if isinstance(s, str): + return True + try: + len(s) + except TypeError: + return True + else: + return False diff --git a/ush/python/src/pygw/timetools.py b/ush/python/src/pygw/timetools.py new file mode 100644 index 0000000000..4a9e5f8e27 --- /dev/null +++ b/ush/python/src/pygw/timetools.py @@ -0,0 +1,111 @@ +import re +import datetime + + +__all__ = ["to_datetime", "to_timedelta", + "to_YMDH", "to_YMD", + "strftime", "strptime"] + + +_DATETIME_RE = re.compile( + r"(?P\d{4})(-)?(?P\d{2})(-)?(?P\d{2})(T)?(?P\d{2})?(:)?(?P\d{2})?(:)?(?P\d{2})?(Z)?") + +_TIMEDELTA_HOURS_RE = re.compile( + r"(?P[+-])?((?P\d+)[d])?(T)?((?P\d+)[H])?((?P\d+)[M])?((?P\d+)[S])?(Z)?") +_TIMEDELTA_TIME_RE = re.compile( + r"(?P[+-])?((?P\d+)\s+day(s)?,\s)?(T)?(?P\d{1,2})?(:(?P\d{1,2}))?(:(?P\d{1,2}))?") + + +def to_datetime(dtstr): + """ + Translate a string into a datetime object in a generic way. + The string can also support ISO 8601 representation. + + Formats accepted (T, Z, -, :) are optional: + YYYY-mm-dd + YYYY-mm-ddTHHZ + YYYY-mm-ddTHH:MMZ + YYYY-mm-ddTHH:MM:SSZ + """ + + mm = _DATETIME_RE.match(dtstr) + if mm: + return datetime.datetime(**{kk: int(vv) for kk, vv in mm.groupdict().items() if vv}) + else: + raise Exception(f"Bad datetime string: '{dtstr}'") + + +def to_timedelta(tdstr): + """ + Translate a string into a timedelta object in a generic way + + Formats accepted (, T, Z) are optional: +
dTHMSZ +
day(s), hh:mm:ss + + can be +/-, default is + + """ + + time_dict = {'sign': '+', + 'days': 0, + 'hours': 0, + 'minutes': 0, + 'seconds': 0} + + if any(x in tdstr for x in ['day', 'days', ':']): + mm = _TIMEDELTA_TIME_RE.match(tdstr) # timedelta representation + else: + mm = _TIMEDELTA_HOURS_RE.match(tdstr) # ISO 8601 representation + + if mm: + nmm = {kk: vv if vv is not None else time_dict[kk] + for kk, vv in mm.groupdict().items()} + sign = nmm['sign'] + del nmm['sign'] + nmm = {kk: float(vv) for kk, vv in nmm.items()} + dt = datetime.timedelta(**nmm) + if mm.group('sign') is not None and mm.group('sign') == '-': + dt = -dt + return dt + else: + raise Exception(f"Bad timedelta string: '{tdstr}'") + + +def to_YMDH(dt): + """ + Translate a datetime object to 'YYYYmmddHH' format. + """ + try: + return dt.strftime('%Y%m%d%H') + except Exception as ee: + raise Exception(f"Bad datetime: '{dt}'") + + +def to_YMD(dt): + """ + Translate a datetime object to 'YYYYmmdd' format. + """ + try: + return dt.strftime('%Y%m%d') + except Exception as ee: + raise Exception(f"Bad datetime: '{dt}'") + + +def strftime(dt, fmt): + """ + Return a formatted string from a datetime object. + """ + try: + return dt.strftime(fmt) + except Exception as ee: + raise Exception(f"Bad datetime (format): '{dt} ({fmt})'") + + +def strptime(dtstr, fmt): + """ + Translate a formatted string into datetime object. + """ + try: + return datetime.datetime.strptime(dtstr, fmt) + except Exception as ee: + raise Exception(f"Bad datetime string (format): '{dtstr} ({fmt})'") diff --git a/ush/python/src/pygw/yaml_file.py b/ush/python/src/pygw/yaml_file.py new file mode 100644 index 0000000000..db8cb213d6 --- /dev/null +++ b/ush/python/src/pygw/yaml_file.py @@ -0,0 +1,144 @@ +import os +import re +import yaml +import datetime +from .attrdict import AttrDict + +__all__ = ['YAMLFile', 'parse_yaml', + 'save_as_yaml', 'dump_as_yaml', 'vanilla_yaml'] + + +class YAMLFile(AttrDict): + """ + Reads a YAML file as an AttrDict and recursively converts + nested dictionaries into AttrDict. + This is the entry point for all YAML files. + """ + + def __init__(self, path=None, data=None): + super().__init__() + if path: + config = parse_yaml(path=path) + elif data: + config = parse_yaml(data=data) + + if config: + self.update(config) + + def save(self, target): + save_as_yaml(self, target) + + def dump(self): + return dump_as_yaml(self) + + def as_dict(self): + return vanilla_yaml(self) + + +def save_as_yaml(data, target): + # specifies a wide file so that long strings are on one line. + with open(target, 'w') as fh: + yaml.safe_dump(vanilla_yaml(data), fh, + width=100000, sort_keys=False) + + +def dump_as_yaml(data): + return yaml.dump(vanilla_yaml(data), + width=100000, sort_keys=False) + + +def parse_yaml(path=None, data=None, + encoding='utf-8', loader=yaml.SafeLoader): + """ + Load a yaml configuration file and resolve any environment variables + The environment variables must have !ENV before them and be in this format + to be parsed: ${VAR_NAME}. + E.g.: + database: + host: !ENV ${HOST} + port: !ENV ${PORT} + app: + log_path: !ENV '/var/${LOG_PATH}' + something_else: !ENV '${AWESOME_ENV_VAR}/var/${A_SECOND_AWESOME_VAR}' + :param str path: the path to the yaml file + :param str data: the yaml data itself as a stream + :param Type[yaml.loader] loader: Specify which loader to use. Defaults to yaml.SafeLoader + :param str encoding: the encoding of the data if a path is specified, defaults to utf-8 + :return: the dict configuration + :rtype: Dict[str, Any] + + Adopted from: + https://dev.to/mkaranasou/python-yaml-configuration-with-environment-variables-parsing-2ha6 + """ + # define tags + envtag = '!ENV' + inctag = '!INC' + # pattern for global vars: look for ${word} + pattern = re.compile('.*?\${(\w+)}.*?') + loader = loader or yaml.SafeLoader + + # the envtag will be used to mark where to start searching for the pattern + # e.g. somekey: !ENV somestring${MYENVVAR}blah blah blah + loader.add_implicit_resolver(envtag, pattern, None) + loader.add_implicit_resolver(inctag, pattern, None) + + def expand_env_variables(line): + match = pattern.findall(line) # to find all env variables in line + if match: + full_value = line + for g in match: + full_value = full_value.replace( + f'${{{g}}}', os.environ.get(g, f'${{{g}}}') + #f'${{{g}}}', os.environ.get(g, g) + ) + return full_value + return line + + def constructor_env_variables(loader, node): + """ + Extracts the environment variable from the node's value + :param yaml.Loader loader: the yaml loader + :param node: the current node in the yaml + :return: the parsed string that contains the value of the environment + variable + """ + value = loader.construct_scalar(node) + return expand_env_variables(value) + + def constructor_include_variables(loader, node): + """ + Extracts the environment variable from the node's value + :param yaml.Loader loader: the yaml loader + :param node: the current node in the yaml + :return: the content of the file to be included + """ + value = loader.construct_scalar(node) + value = expand_env_variables(value) + expanded = parse_yaml(value) + return expanded + + loader.add_constructor(envtag, constructor_env_variables) + loader.add_constructor(inctag, constructor_include_variables) + + if path: + with open(path, 'r', encoding=encoding) as conf_data: + return yaml.load(conf_data, Loader=loader) + elif data: + return yaml.load(data, Loader=loader) + else: + raise ValueError( + "Either a path or data should be defined as input") + + +def vanilla_yaml(ctx): + """ + Transform an input object of complex type as a plain type + """ + if isinstance(ctx, AttrDict): + return {kk: vanilla_yaml(vv) for kk, vv in ctx.items()} + elif isinstance(ctx, list): + return [vanilla_yaml(vv) for vv in ctx] + elif isinstance(ctx, datetime.datetime): + return ctx.strftime("%Y-%m-%dT%H:%M:%SZ") + else: + return ctx diff --git a/ush/python/src/tests/test_template.py b/ush/python/src/tests/test_template.py new file mode 100644 index 0000000000..5d7bb378b9 --- /dev/null +++ b/ush/python/src/tests/test_template.py @@ -0,0 +1,148 @@ +import os +from pygw.template import TemplateConstants, Template + + +def test_substitute_string_from_dict(): + """ + Substitute with ${v} + """ + template = '${greeting} to ${the_world}' + dictionary = { + 'greeting': 'Hello', + 'the_world': 'the world' + } + final = 'Hello to the world' + assert Template.substitute_structure(template, + TemplateConstants.DOLLAR_CURLY_BRACE, dictionary.get) == final + + +def test_substitute_string_from_dict_paren(): + """ + Substitute with $(v) + """ + template = '$(greeting) to $(the_world)' + dictionary = { + 'greeting': 'Hello', + 'the_world': 'the world' + } + final = 'Hello to the world' + assert Template.substitute_structure(template, + TemplateConstants.DOLLAR_PARENTHESES, dictionary.get) == final + + +def test_assign_string_from_dict_paren(): + """ + Substitute with $(v) should replace with the actual object + """ + template = '$(greeting)' + dictionary = { + 'greeting': { + 'a': 1, + 'b': 2 + } + } + assert Template.substitute_structure(template, + TemplateConstants.DOLLAR_PARENTHESES, + dictionary.get) == dictionary['greeting'] + + +def test_substitute_string_from_dict_double_curly(): + """ + Substitute with {{v}} + """ + template = '{{greeting}} to {{the_world}}' + dictionary = { + 'greeting': 'Hello', + 'the_world': 'the world' + } + final = 'Hello to the world' + assert Template.substitute_structure(template, + TemplateConstants.DOUBLE_CURLY_BRACES, + dictionary.get) == final + + +def test_substitute_string_from_dict_at_square(): + """ + Substitute with @[v] + """ + template = '@[greeting] to @[the_world]' + dictionary = { + 'greeting': 'Hello', + 'the_world': 'the world' + } + final = 'Hello to the world' + assert Template.substitute_structure(template, + TemplateConstants.AT_SQUARE_BRACES, + dictionary.get) == final + + +def test_substitute_string_from_dict_at_carrots(): + """ + Substitute with @ + """ + template = '@ to @' + dictionary = { + 'greeting': 'Hello', + 'the_world': 'the world' + } + final = 'Hello to the world' + assert Template.substitute_structure(template, + TemplateConstants.AT_ANGLE_BRACKETS, + dictionary.get) == final + + +def test_substitute_string_from_environment(): + """ + Substitute from environment + """ + template = '${GREETING} to ${THE_WORLD}' + os.environ['GREETING'] = 'Hello' + os.environ['THE_WORLD'] = 'the world' + final = 'Hello to the world' + assert Template.substitute_structure_from_environment(template) == final + + +def test_substitute_with_dependencies(): + input = { + 'root': '/home/user', + 'config_file': 'config.yaml', + 'config': '$(root)/config/$(config_file)', + 'greeting': 'hello $(world)', + 'world': 'world', + 'complex': '$(dictionary)', + 'dictionary': { + 'a': 1, + 'b': 2 + }, + 'dd': { '2': 'a', '1': 'b' }, + 'ee': { '3': 'a', '1': 'b' }, + 'ff': { '4': 'a', '1': 'b $(greeting)' }, + 'host': { + 'name': 'xenon', + 'config': '$(root)/hosts', + 'config_file': '$(config)/$(name).config.yaml', + 'proxy2': { + 'config': '$(root)/$(name).$(greeting).yaml', + 'list': [['$(root)/$(name)', 'toto.$(name).$(greeting)'], '$(config_file)'] + } + } + } + output = {'complex': {'a': 1, 'b': 2}, + 'config': '/home/user/config/config.yaml', + 'config_file': 'config.yaml', + 'dd': {'1': 'b', '2': 'a'}, + 'dictionary': {'a': 1, 'b': 2}, + 'ee': {'1': 'b', '3': 'a'}, + 'ff': {'1': 'b hello world', '4': 'a'}, + 'greeting': 'hello world', + 'host': {'config': '/home/user/hosts', + 'config_file': '/home/user/config/config.yaml/xenon.config.yaml', + 'name': 'xenon', + 'proxy2': {'config': '/home/user/xenon.hello world.yaml', + 'list': [['/home/user/xenon', 'toto.xenon.hello world'], + 'config.yaml']}}, + 'root': '/home/user', + 'world': 'world'} + + + assert Template.substitute_with_dependencies(input, input, TemplateConstants.DOLLAR_PARENTHESES) == output From e7ea232d73aa9d7989249d17a30bd4f975dcfe80 Mon Sep 17 00:00:00 2001 From: Rahul Mahajan Date: Thu, 22 Sep 2022 09:54:34 -0400 Subject: [PATCH 2/4] remove debug statements --- ush/python/src/pygw/jinja.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/ush/python/src/pygw/jinja.py b/ush/python/src/pygw/jinja.py index b1dbc84f54..2c36dad586 100644 --- a/ush/python/src/pygw/jinja.py +++ b/ush/python/src/pygw/jinja.py @@ -1,4 +1,5 @@ import io +import os import sys import jinja2 from pathlib import Path @@ -18,36 +19,34 @@ def __init__(self, template_path, data, allow_missing=True): if Path(template_path).is_file(): self.template_path = Path(template_path) - print(self.template_path) self.output = self._render_file() else: - raise NotImplementedError("Unable to handle templates other than files") + self.output = self._render_stream() + + def _render_stream(self): + raise NotImplementedError("Unable to handle templates other than files") def _render_file(self): template_dir = self.template_path.parent template_file = self.template_path.relative_to(template_dir) - print(template_file) - import os dirname = os.path.dirname(str(self.template_path)) relpath = os.path.relpath(str(self.template_path), dirname) loader = jinja2.FileSystemLoader(template_dir) - output = self._render(str(template_file), loader) return output def _render(self, template_name, loader): env = jinja2.Environment(loader=loader, undefined=self.undefined) - print(template_name) template = env.get_template(template_name) try: - output = template.render(**self.data) + rendered = template.render(**self.data) except jinja2.UndefinedError as ee: - raise Exception(f"Undefined variable in jinja template\n{ee}") + raise Exception(f"Undefined variable in Jinja2 template\n{ee}") - return output + return rendered def save(self, output_file): with open(output_file, 'wb') as fh: From 72d1ea5a38cae935b4e8d92d9a03864932a0a1f1 Mon Sep 17 00:00:00 2001 From: Rahul Mahajan Date: Fri, 23 Sep 2022 11:52:12 -0400 Subject: [PATCH 3/4] add NWS in the orgs list and a filesystem utility file --- ush/python/{ => pygw}/README.md | 0 ush/python/{ => pygw}/setup.cfg | 4 +- ush/python/{ => pygw}/setup.py | 0 ush/python/{ => pygw}/src/pygw/__init__.py | 0 ush/python/{ => pygw}/src/pygw/attrdict.py | 0 ush/python/pygw/src/pygw/fsutils.py | 46 +++++++++++++++++++ ush/python/{ => pygw}/src/pygw/jinja.py | 0 ush/python/{ => pygw}/src/pygw/template.py | 0 ush/python/{ => pygw}/src/pygw/timetools.py | 0 ush/python/{ => pygw}/src/pygw/yaml_file.py | 0 .../{ => pygw}/src/tests/test_template.py | 0 11 files changed, 48 insertions(+), 2 deletions(-) rename ush/python/{ => pygw}/README.md (100%) rename ush/python/{ => pygw}/setup.cfg (95%) rename ush/python/{ => pygw}/setup.py (100%) rename ush/python/{ => pygw}/src/pygw/__init__.py (100%) rename ush/python/{ => pygw}/src/pygw/attrdict.py (100%) create mode 100644 ush/python/pygw/src/pygw/fsutils.py rename ush/python/{ => pygw}/src/pygw/jinja.py (100%) rename ush/python/{ => pygw}/src/pygw/template.py (100%) rename ush/python/{ => pygw}/src/pygw/timetools.py (100%) rename ush/python/{ => pygw}/src/pygw/yaml_file.py (100%) rename ush/python/{ => pygw}/src/tests/test_template.py (100%) diff --git a/ush/python/README.md b/ush/python/pygw/README.md similarity index 100% rename from ush/python/README.md rename to ush/python/pygw/README.md diff --git a/ush/python/setup.cfg b/ush/python/pygw/setup.cfg similarity index 95% rename from ush/python/setup.cfg rename to ush/python/pygw/setup.cfg index bb288b56a0..4dd739f2a0 100644 --- a/ush/python/setup.cfg +++ b/ush/python/pygw/setup.cfg @@ -4,9 +4,9 @@ version = 0.0.1 description = Global applications specific workflow related tools long_description = file: README.md long_description_content_type = text/markdown -author = "NOAA/NCEP/EMC" +author = "NOAA/NWS/NCEP/EMC" #author_email = first.last@domain.tld -keywords = NOAA, NCEP, EMC, GFS, GEFS +keywords = NOAA, NWS, NCEP, EMC, GFS, GEFS home_page = https://github.com/noaa-emc/global-workflow license = GNU Lesser General Public License classifiers = diff --git a/ush/python/setup.py b/ush/python/pygw/setup.py similarity index 100% rename from ush/python/setup.py rename to ush/python/pygw/setup.py diff --git a/ush/python/src/pygw/__init__.py b/ush/python/pygw/src/pygw/__init__.py similarity index 100% rename from ush/python/src/pygw/__init__.py rename to ush/python/pygw/src/pygw/__init__.py diff --git a/ush/python/src/pygw/attrdict.py b/ush/python/pygw/src/pygw/attrdict.py similarity index 100% rename from ush/python/src/pygw/attrdict.py rename to ush/python/pygw/src/pygw/attrdict.py diff --git a/ush/python/pygw/src/pygw/fsutils.py b/ush/python/pygw/src/pygw/fsutils.py new file mode 100644 index 0000000000..b408e76e3c --- /dev/null +++ b/ush/python/pygw/src/pygw/fsutils.py @@ -0,0 +1,46 @@ +import os +import errno +import shutil +import contextlib + +__all__ = ['mkdir', 'mkdir_p', 'rmdir', 'chdir', 'rm_p'] + + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError as exc: + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise OSError(f"unable to create directory at {path}") + + +mkdir = mkdir_p + + +def rmdir(dir_path): + try: + shutil.rmtree(dir_path) + except OSError as exc: + raise OSError(f"unable to remove {dir_path}") + + +@contextlib.contextmanager +def chdir(path): + cwd = os.getcwd() + try: + os.chdir(path) + yield + finally: + os.chdir(cwd) + + +def rm_p(path): + try: + os.unlink(path) + except OSError as exc: + if exc.errno == errno.ENOENT: + pass + else: + raise OSError(f"unable to remove {path}") diff --git a/ush/python/src/pygw/jinja.py b/ush/python/pygw/src/pygw/jinja.py similarity index 100% rename from ush/python/src/pygw/jinja.py rename to ush/python/pygw/src/pygw/jinja.py diff --git a/ush/python/src/pygw/template.py b/ush/python/pygw/src/pygw/template.py similarity index 100% rename from ush/python/src/pygw/template.py rename to ush/python/pygw/src/pygw/template.py diff --git a/ush/python/src/pygw/timetools.py b/ush/python/pygw/src/pygw/timetools.py similarity index 100% rename from ush/python/src/pygw/timetools.py rename to ush/python/pygw/src/pygw/timetools.py diff --git a/ush/python/src/pygw/yaml_file.py b/ush/python/pygw/src/pygw/yaml_file.py similarity index 100% rename from ush/python/src/pygw/yaml_file.py rename to ush/python/pygw/src/pygw/yaml_file.py diff --git a/ush/python/src/tests/test_template.py b/ush/python/pygw/src/tests/test_template.py similarity index 100% rename from ush/python/src/tests/test_template.py rename to ush/python/pygw/src/tests/test_template.py From 82b787b5d40cadb1052137608905f44ae005569c Mon Sep 17 00:00:00 2001 From: Rahul Mahajan Date: Mon, 26 Sep 2022 09:58:49 -0400 Subject: [PATCH 4/4] add timedelta_to_HMS, warnings when path and data are both supplied to yaml_file --- ush/python/pygw/src/pygw/fsutils.py | 1 + ush/python/pygw/src/pygw/timetools.py | 19 ++++++++++++++++--- ush/python/pygw/src/pygw/yaml_file.py | 2 ++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/ush/python/pygw/src/pygw/fsutils.py b/ush/python/pygw/src/pygw/fsutils.py index b408e76e3c..66a62455f5 100644 --- a/ush/python/pygw/src/pygw/fsutils.py +++ b/ush/python/pygw/src/pygw/fsutils.py @@ -33,6 +33,7 @@ def chdir(path): os.chdir(path) yield finally: + print(f"WARNING: Unable to chdir({path})") # TODO: use logging os.chdir(cwd) diff --git a/ush/python/pygw/src/pygw/timetools.py b/ush/python/pygw/src/pygw/timetools.py index 4a9e5f8e27..40f4a6c5df 100644 --- a/ush/python/pygw/src/pygw/timetools.py +++ b/ush/python/pygw/src/pygw/timetools.py @@ -3,7 +3,8 @@ __all__ = ["to_datetime", "to_timedelta", - "to_YMDH", "to_YMD", + "datetime_to_YMDH", "datetime_to_YMD", + "timedelta_to_HMS", "strftime", "strptime"] @@ -71,7 +72,7 @@ def to_timedelta(tdstr): raise Exception(f"Bad timedelta string: '{tdstr}'") -def to_YMDH(dt): +def datetime_to_YMDH(dt): """ Translate a datetime object to 'YYYYmmddHH' format. """ @@ -81,7 +82,7 @@ def to_YMDH(dt): raise Exception(f"Bad datetime: '{dt}'") -def to_YMD(dt): +def datetime_to_YMD(dt): """ Translate a datetime object to 'YYYYmmdd' format. """ @@ -91,6 +92,18 @@ def to_YMD(dt): raise Exception(f"Bad datetime: '{dt}'") +def timedelta_to_HMS(td): + """ + Translate a timedelta object to 'HHMMSS' format. + """ + try: + hours, remainder = divmod(int(td.total_seconds()), 3600) + minutes, seconds = divmod(remainder, 60) + return f"{hours:02d}:{minutes:02d}:{seconds:02d}" + except Exception as ee: + raise Exception(f"Bad timedelta: '{td}'") + + def strftime(dt, fmt): """ Return a formatted string from a datetime object. diff --git a/ush/python/pygw/src/pygw/yaml_file.py b/ush/python/pygw/src/pygw/yaml_file.py index db8cb213d6..e25c18619f 100644 --- a/ush/python/pygw/src/pygw/yaml_file.py +++ b/ush/python/pygw/src/pygw/yaml_file.py @@ -17,6 +17,8 @@ class YAMLFile(AttrDict): def __init__(self, path=None, data=None): super().__init__() + if path and data: + print("Ignoring 'data' and using 'path' argument") # TODO: use logging if path: config = parse_yaml(path=path) elif data: