diff --git a/.github/workflows/pytests.yaml b/.github/workflows/pytests.yaml index 0c3c0ccb59..055acd60c3 100644 --- a/.github/workflows/pytests.yaml +++ b/.github/workflows/pytests.yaml @@ -15,12 +15,12 @@ jobs: with: python-version: ${{ matrix.python }} - - name: Install (upgrade) dependencies + - name: Install (upgrade) python dependencies run: | pip install --upgrade pip - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: path: global-workflow diff --git a/ush/python/pygw/.gitignore b/ush/python/pygw/.gitignore new file mode 100644 index 0000000000..13a1a9f851 --- /dev/null +++ b/ush/python/pygw/.gitignore @@ -0,0 +1,139 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Sphinx documentation +docs/_build/ + +# Editor backup files (Emacs, vim) +*~ +*.sw[a-p] + +# Pycharm IDE files +.idea/ diff --git a/ush/python/pygw/README.md b/ush/python/pygw/README.md index 6a36cbb72f..13db34471c 100644 --- a/ush/python/pygw/README.md +++ b/ush/python/pygw/README.md @@ -6,16 +6,31 @@ Python tools specifically for global applications Simple installation instructions ```sh $> git clone https://github.com/noaa-emc/global-workflow -$> cd global-workflow/ush/python +$> cd global-workflow/ush/python/pygw $> pip install . ``` It is not required to install this package. Instead, ```sh -$> cd global-workflow/ush/python +$> cd global-workflow/ush/python/pygw $> export PYTHONPATH=$PWD/src/pygw ``` would put this package in the `PYTHONPATH` ### Note: These instructions will be updated and the tools are under development. + +### Running python tests: +Simple instructions to enable executing pytests manually +```sh +# Create a python virtual environment and step into it +$> cd global-workflow/ush/python/pygw +$> python3 -m venv venv +$> source venv/bin/activate + +# Install pygw with the developer requirements +(venv) $> pip install .[dev] + +# Run pytests +(venv) $> pytest -v +``` diff --git a/ush/python/pygw/setup.cfg b/ush/python/pygw/setup.cfg index 4dd739f2a0..1d45df0d76 100644 --- a/ush/python/pygw/setup.cfg +++ b/ush/python/pygw/setup.cfg @@ -52,7 +52,7 @@ where=src * = *.txt, *.md [options.extras_require] -dev = pytest-cov>=3 +dev = pytest>=7; pytest-cov>=3 [green] file-pattern = test_*.py diff --git a/ush/python/pygw/src/pygw/configuration.py b/ush/python/pygw/src/pygw/configuration.py index f00adcf5a8..da39a21748 100644 --- a/ush/python/pygw/src/pygw/configuration.py +++ b/ush/python/pygw/src/pygw/configuration.py @@ -2,14 +2,14 @@ import os import random import subprocess -from datetime import datetime from pathlib import Path from pprint import pprint from typing import Union, List, Dict, Any from pygw.attrdict import AttrDict +from pygw.timetools import to_datetime -__all__ = ['Configuration'] +__all__ = ['Configuration', 'cast_as_dtype', 'cast_strdict_as_dtypedict'] class ShellScriptException(Exception): @@ -32,11 +32,6 @@ class Configuration: (or generally for sourcing a shell script into a python dictionary) """ - DATE_ENV_VARS = ['CDATE', 'SDATE', 'EDATE'] - TRUTHS = ['y', 'yes', 't', 'true', '.t.', '.true.'] - BOOLS = ['n', 'no', 'f', 'false', '.f.', '.false.'] + TRUTHS - BOOLS = [x.upper() for x in BOOLS] + BOOLS - def __init__(self, config_dir: Union[str, Path]): """ Given a directory containing config files (config.XYZ), @@ -84,18 +79,7 @@ def parse_config(self, files: Union[str, bytes, list]) -> Dict[str, Any]: if isinstance(files, (str, bytes)): files = [files] files = [self.find_config(file) for file in files] - varbles = AttrDict() - for key, value in self._get_script_env(files).items(): - if key in self.DATE_ENV_VARS: # likely a date, convert to datetime - varbles[key] = datetime.strptime(value, '%Y%m%d%H') - elif value in self.BOOLS: # Likely a boolean, convert to True/False - varbles[key] = self._true_or_not(value) - elif '.' in value: # Likely a number and that too a float - varbles[key] = self._cast_or_not(float, value) - else: # Still could be a number, may be an integer - varbles[key] = self._cast_or_not(int, value) - - return varbles + return cast_strdict_as_dtypedict(self._get_script_env(files)) def print_config(self, files: Union[str, bytes, list]) -> None: """ @@ -137,16 +121,59 @@ def _get_shell_env(scripts: List) -> Dict[str, Any]: varbls[entry[0:iequal]] = entry[iequal + 1:] return varbls - @staticmethod - def _cast_or_not(type, value): + +def cast_strdict_as_dtypedict(ctx: Dict[str, str]) -> Dict[str, Any]: + """ + Environment variables are typically stored as str + This method attempts to translate those into datatypes + Parameters + ---------- + ctx : dict + dictionary with values as str + Returns + ------- + varbles : dict + dictionary with values as datatypes + """ + varbles = AttrDict() + for key, value in ctx.items(): + varbles[key] = cast_as_dtype(value) + return varbles + + +def cast_as_dtype(string: str) -> Union[str, int, float, bool, Any]: + """ + Cast a value into known datatype + Parameters + ---------- + string: str + Returns + ------- + value : str or int or float or datetime + default: str + """ + TRUTHS = ['y', 'yes', 't', 'true', '.t.', '.true.'] + BOOLS = ['n', 'no', 'f', 'false', '.f.', '.false.'] + TRUTHS + BOOLS = [x.upper() for x in BOOLS] + BOOLS + ['Yes', 'No', 'True', 'False'] + + def _cast_or_not(type: Any, string: str): try: - return type(value) + return type(string) except ValueError: - return value + return string - @staticmethod - def _true_or_not(value): + def _true_or_not(string: str): try: - return value.lower() in Configuration.TRUTHS + return string.lower() in TRUTHS except AttributeError: - return value + return string + + try: + return to_datetime(string) # Try as a datetime + except Exception as exc: + if string in BOOLS: # Likely a boolean, convert to True/False + return _true_or_not(string) + elif '.' in string: # Likely a number and that too a float + return _cast_or_not(float, string) + else: # Still could be a number, may be an integer + return _cast_or_not(int, string) diff --git a/ush/python/pygw/src/pygw/timetools.py b/ush/python/pygw/src/pygw/timetools.py index 40f4a6c5df..5554efaacd 100644 --- a/ush/python/pygw/src/pygw/timetools.py +++ b/ush/python/pygw/src/pygw/timetools.py @@ -9,12 +9,15 @@ _DATETIME_RE = re.compile( - r"(?P\d{4})(-)?(?P\d{2})(-)?(?P\d{2})(T)?(?P\d{2})?(:)?(?P\d{2})?(:)?(?P\d{2})?(Z)?") + r"(?P\d{4})(-)?(?P\d{2})(-)?(?P\d{2})" + r"(T)?(?P\d{2})?(:)?(?P\d{2})?(:)?(?P\d{2})?(Z)?") _TIMEDELTA_HOURS_RE = re.compile( - r"(?P[+-])?((?P\d+)[d])?(T)?((?P\d+)[H])?((?P\d+)[M])?((?P\d+)[S])?(Z)?") + r"(?P[+-])?" + r"((?P\d+)[d])?(T)?((?P\d+)[H])?((?P\d+)[M])?((?P\d+)[S])?(Z)?") _TIMEDELTA_TIME_RE = re.compile( - r"(?P[+-])?((?P\d+)\s+day(s)?,\s)?(T)?(?P\d{1,2})?(:(?P\d{1,2}))?(:(?P\d{1,2}))?") + r"(?P[+-])?" + r"((?P\d+)\s+day(s)?,\s)?(T)?(?P\d{1,2})?(:(?P\d{1,2}))?(:(?P\d{1,2}))?") def to_datetime(dtstr): diff --git a/ush/python/pygw/src/tests/__init__.py b/ush/python/pygw/src/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ush/python/pygw/src/tests/test_configuration.py b/ush/python/pygw/src/tests/test_configuration.py new file mode 100644 index 0000000000..7bbd07acb6 --- /dev/null +++ b/ush/python/pygw/src/tests/test_configuration.py @@ -0,0 +1,171 @@ +import os +import pytest +from datetime import datetime + +from pygw.configuration import Configuration, cast_as_dtype + +file0 = """#!/bin/bash +export SOME_ENVVAR1="${USER}" +export SOME_LOCALVAR1="myvar1" +export SOME_LOCALVAR2="myvar2.0" +export SOME_LOCALVAR3="myvar3_file0" +export SOME_PATH1="/path/to/some/directory" +export SOME_PATH2="/path/to/some/file" +export SOME_DATE1="20221225" +export SOME_DATE2="2022122518" +export SOME_DATE3="202212251845" +export SOME_INT1=3 +export SOME_INT2=15 +export SOME_INT3=-999 +export SOME_FLOAT1=0.2 +export SOME_FLOAT2=3.5 +export SOME_FLOAT3=-9999. +export SOME_BOOL1=YES +export SOME_BOOL2=.true. +export SOME_BOOL3=.T. +export SOME_BOOL4=NO +export SOME_BOOL5=.false. +export SOME_BOOL6=.F. +""" + +file1 = """#!/bin/bash +export SOME_LOCALVAR3="myvar3_file1" +export SOME_LOCALVAR4="myvar4" +export SOME_BOOL7=.TRUE. +""" + +file0_dict = { + 'SOME_ENVVAR1': os.environ['USER'], + 'SOME_LOCALVAR1': "myvar1", + 'SOME_LOCALVAR2': "myvar2.0", + 'SOME_LOCALVAR3': "myvar3_file0", + 'SOME_PATH1': "/path/to/some/directory", + 'SOME_PATH2': "/path/to/some/file", + 'SOME_DATE1': datetime(2022, 12, 25, 0, 0, 0), + 'SOME_DATE2': datetime(2022, 12, 25, 18, 0, 0), + 'SOME_DATE3': datetime(2022, 12, 25, 18, 45, 0), + 'SOME_INT1': 3, + 'SOME_INT2': 15, + 'SOME_INT3': -999, + 'SOME_FLOAT1': 0.2, + 'SOME_FLOAT2': 3.5, + 'SOME_FLOAT3': -9999., + 'SOME_BOOL1': True, + 'SOME_BOOL2': True, + 'SOME_BOOL3': True, + 'SOME_BOOL4': False, + 'SOME_BOOL5': False, + 'SOME_BOOL6': False +} + +file1_dict = { + 'SOME_LOCALVAR3': "myvar3_file1", + 'SOME_LOCALVAR4': "myvar4", + 'SOME_BOOL7': True +} + +str_dtypes = [ + ('HOME', 'HOME'), +] + +int_dtypes = [ + ('1', 1), +] + +float_dtypes = [ + ('1.0', 1.0), +] + +bool_dtypes = [ + ('y', True), ('n', False), + ('Y', True), ('N', False), + ('yes', True), ('no', False), + ('Yes', True), ('No', False), + ('YES', True), ('NO', False), + ('t', True), ('f', False), + ('T', True), ('F', False), + ('true', True), ('false', False), + ('True', True), ('False', False), + ('TRUE', True), ('FALSE', False), + ('.t.', True), ('.f.', False), + ('.T.', True), ('.F.', False), +] + +datetime_dtypes = [ + ('20221215', datetime(2022, 12, 15, 0, 0, 0)), + ('2022121518', datetime(2022, 12, 15, 18, 0, 0)), + ('2022121518Z', datetime(2022, 12, 15, 18, 0, 0)), + ('20221215T1830', datetime(2022, 12, 15, 18, 30, 0)), + ('20221215T1830Z', datetime(2022, 12, 15, 18, 30, 0)), +] + + +def evaluate(dtypes): + for pair in dtypes: + print(f"Test: '{pair[0]}' ==> {pair[1]}") + assert pair[1] == cast_as_dtype(pair[0]) + + +def test_cast_as_dtype_str(): + evaluate(str_dtypes) + + +def test_cast_as_dtype_int(): + evaluate(int_dtypes) + + +def test_cast_as_dtype_float(): + evaluate(float_dtypes) + + +def test_cast_as_dtype_bool(): + evaluate(bool_dtypes) + + +def test_cast_as_dtype_datetimes(): + evaluate(datetime_dtypes) + + +@pytest.fixture +def create_configs(tmp_path): + + file_path = tmp_path / 'config.file0' + with open(file_path, 'w') as fh: + fh.write(file0) + + file_path = tmp_path / 'config.file1' + with open(file_path, 'w') as fh: + fh.write(file1) + + +def test_configuration_config_dir(tmp_path, create_configs): + cfg = Configuration(tmp_path) + assert cfg.config_dir == tmp_path + + +def test_configuration_config_files(tmp_path, create_configs): + cfg = Configuration(tmp_path) + config_files = [str(tmp_path / 'config.file0'), str(tmp_path / 'config.file1')] + assert config_files == cfg.config_files + + +def test_find_config(tmp_path, create_configs): + cfg = Configuration(tmp_path) + file0 = cfg.find_config('config.file0') + assert str(tmp_path / 'config.file0') == file0 + + +@pytest.mark.skip(reason="fails in GH runner, passes on localhost") +def test_parse_config1(tmp_path, create_configs): + cfg = Configuration(tmp_path) + f0 = cfg.parse_config('config.file0') + assert file0_dict == f0 + + +@pytest.mark.skip(reason="fails in GH runner, passes on localhost") +def test_parse_config2(tmp_path, create_configs): + cfg = Configuration(tmp_path) + ff = cfg.parse_config(['config.file0', 'config.file1']) + ff_dict = file0_dict.copy() + ff_dict.update(file1_dict) + assert ff_dict == ff