From 38431f948594da1d7e4efb45db52a62e57477c72 Mon Sep 17 00:00:00 2001 From: Ken Kroenlein Date: Mon, 30 Jan 2023 09:00:35 -0700 Subject: [PATCH 1/3] Add support for scaling factors to gemd-python --- gemd/units/impl.py | 52 ++++++++++++++++++++++++++++----- gemd/units/tests/test_parser.py | 6 +++- setup.py | 2 +- 3 files changed, 51 insertions(+), 9 deletions(-) diff --git a/gemd/units/impl.py b/gemd/units/impl.py index cbcf74ea..fcb74d40 100644 --- a/gemd/units/impl.py +++ b/gemd/units/impl.py @@ -1,21 +1,59 @@ """Implementation of units.""" -from pint import UnitRegistry, Unit -import pkg_resources - -import functools -from typing import Union +import re +from pint import UnitRegistry, Unit +from pint.compat import tokenizer +from tokenize import NAME, NUMBER, OP # alias the error that is thrown when units are incompatible # this helps to isolate the dependence on pint from pint.errors import DimensionalityError as IncompatibleUnitsError # noqa Import from pint.errors import UndefinedUnitError +import functools +import pkg_resources +from typing import Union + # use the default unit registry for now DEFAULT_FILE = pkg_resources.resource_filename("gemd.units", "citrine_en.txt") -registry = UnitRegistry(filename=DEFAULT_FILE) -@functools.lru_cache(maxsize=None) +def _scaling_preprocessor(input_string: str) -> str: + global registry + tokens = tokenizer(input_string) + exponent = False + division = False + tight_division = False + scales = [] + + for token in tokens: + # Note that while this prevents adding a bunch of numbers to the registry, + # no test would break if the `exponent` logic were removed + if tight_division: + # A unit for a scaling factor is in the denominator if the factor is + scales[-1][-1] = token.type == NAME + tight_division = False + if not exponent and token.type == NUMBER: + scales.append([token.string, False]) + tight_division = division + exponent = token.type == OP and token.string in {"^", "**"} + division = token.type == OP and token.string in {"/", "//"} + + for scale, division in scales: + # There's probably something to be said for stashing these, but this sin + # should be ameliorated by the LRU cache + regex = rf"\b{re.escape(scale)}\b" + valid = "_" + scale.replace(".", "o").replace("+", "p").replace("-", "m") + trailing = "/" if division else "" + registry.define(f"{scale} = {scale} = {scale} = {valid}") + input_string = re.sub(regex, valid + trailing, input_string) + + return input_string + + +registry = UnitRegistry(filename=DEFAULT_FILE, preprocessors=[_scaling_preprocessor]) + + +@functools.lru_cache(maxsize=1024) def parse_units(units: Union[str, Unit, None]) -> Union[str, Unit, None]: """ Parse a string or Unit into a standard string representation of the unit. diff --git a/gemd/units/tests/test_parser.py b/gemd/units/tests/test_parser.py index 42b7dade..439dc0b0 100644 --- a/gemd/units/tests/test_parser.py +++ b/gemd/units/tests/test_parser.py @@ -20,11 +20,15 @@ def test_parse_expected(): reg("kg").u, "amu", # A line that was edited "Seconds", # Added support for some title-case units - "delta_Celsius / hour" # Added to make sure pint version is right (>0.10) + "delta_Celsius / hour", # Added to make sure pint version is right (>0.10) + "g / 2.5 cm", # Scaling factors are acceptable ] for unit in expected: parse_units(unit) assert parse_units("") == 'dimensionless' + # Scaling factors bind tightly to trailing units + assert parse_units("g / 2.5 cm") == parse_units("g / (2.5 cm)") + assert parse_units("g / 2.5 * cm") == parse_units("g cm / 2.5") def test_parse_unexpected(): diff --git a/setup.py b/setup.py index b92edfe3..251c3378 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup(name='gemd', - version='1.12.1', + version='1.12.2', url='http://github.com/CitrineInformatics/gemd-python', description="Python binding for Citrine's GEMD data model", author='Citrine Informatics', From da55b2990a7e930f983bbd1b1ce62d61120a9955 Mon Sep 17 00:00:00 2001 From: Ken Kroenlein Date: Mon, 30 Jan 2023 10:17:49 -0700 Subject: [PATCH 2/3] pint 0.20 requires fullnames be valid Python identifiers --- gemd/units/impl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gemd/units/impl.py b/gemd/units/impl.py index fcb74d40..849a3379 100644 --- a/gemd/units/impl.py +++ b/gemd/units/impl.py @@ -42,9 +42,9 @@ def _scaling_preprocessor(input_string: str) -> str: # There's probably something to be said for stashing these, but this sin # should be ameliorated by the LRU cache regex = rf"\b{re.escape(scale)}\b" - valid = "_" + scale.replace(".", "o").replace("+", "p").replace("-", "m") + valid = "_" + scale.replace(".", "_").replace("+", "").replace("-", "_") trailing = "/" if division else "" - registry.define(f"{scale} = {scale} = {scale} = {valid}") + registry.define(f"{valid} = {scale} = {scale}") input_string = re.sub(regex, valid + trailing, input_string) return input_string From f3dec09f939516445abb563035893bee58feff73 Mon Sep 17 00:00:00 2001 From: Ken Kroenlein Date: Mon, 30 Jan 2023 13:26:28 -0700 Subject: [PATCH 3/3] Make parse_units format consistent with historical behavior --- .travis.yml | 4 +-- gemd/units/impl.py | 47 ++++++++++++++++++++++++++------- gemd/units/tests/test_parser.py | 29 ++++++++++++++++++++ requirements.txt | 2 +- setup.py | 2 +- 5 files changed, 71 insertions(+), 13 deletions(-) diff --git a/.travis.yml b/.travis.yml index cf243a8c..b02ad411 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,7 @@ python: - '3.9' - '3.10' env: -- PINT_VERSION=0.13 +- PINT_VERSION=0.18 - PINT_VERSION=0.20 jobs: exclude: @@ -32,7 +32,7 @@ deploy: on: tags: true python: '3.7' # only need this to run once - env: PINT_VERSION=0.13 + env: PINT_VERSION=0.18 - provider: pypi user: "CitrineInformatics" password: "$PYPI_PASSWORD" diff --git a/gemd/units/impl.py b/gemd/units/impl.py index 849a3379..08f23d91 100644 --- a/gemd/units/impl.py +++ b/gemd/units/impl.py @@ -1,7 +1,7 @@ """Implementation of units.""" import re -from pint import UnitRegistry, Unit +from pint import UnitRegistry, Unit, register_unit_format from pint.compat import tokenizer from tokenize import NAME, NUMBER, OP # alias the error that is thrown when units are incompatible @@ -18,7 +18,8 @@ def _scaling_preprocessor(input_string: str) -> str: - global registry + """Preprocessor that turns scaling factors into non-dimensional units.""" + global _REGISTRY tokens = tokenizer(input_string) exponent = False division = False @@ -41,16 +42,44 @@ def _scaling_preprocessor(input_string: str) -> str: for scale, division in scales: # There's probably something to be said for stashing these, but this sin # should be ameliorated by the LRU cache - regex = rf"\b{re.escape(scale)}\b" + regex = rf"\b{re.escape(scale)}(?!=[0-9.])" valid = "_" + scale.replace(".", "_").replace("+", "").replace("-", "_") trailing = "/" if division else "" - registry.define(f"{valid} = {scale} = {scale}") + _REGISTRY.define(f"{valid} = {scale} = {scale}") input_string = re.sub(regex, valid + trailing, input_string) return input_string -registry = UnitRegistry(filename=DEFAULT_FILE, preprocessors=[_scaling_preprocessor]) +_REGISTRY = UnitRegistry(filename=DEFAULT_FILE, preprocessors=[_scaling_preprocessor]) + + +@register_unit_format("clean") +def _format_clean(unit, registry, **options): + """Formatter that turns scaling-factor-units into numbers again.""" + numerator = [] + denominator = [] + for u, p in unit.items(): + if re.match(r"_[\d_]+$", u): + # Munged scaling factor; drop leading underscore, restore . and - + u = re.sub(r"(?<=\d)_(?=\d)", ".", u[1:]).replace("_", "-") + + if p == 1: + numerator.append(u) + elif p > 1: + numerator.append(f"{u} ** {p}") + elif p == -1: + denominator.append(u) + elif p < -1: + denominator.append(f"{u} ** {-p}") + + if len(numerator) == 0: + numerator = ["1"] + + if len(denominator) > 0: + return " / ".join((" * ".join(numerator), " / ".join(denominator))) + else: + return " * ".join(numerator) @functools.lru_cache(maxsize=1024) @@ -74,7 +103,7 @@ def parse_units(units: Union[str, Unit, None]) -> Union[str, Unit, None]: elif units == '': return 'dimensionless' elif isinstance(units, str): - return str(registry(units).units) + return f"{_REGISTRY(units).u:clean}" elif isinstance(units, Unit): return units else: @@ -104,7 +133,7 @@ def convert_units(value: float, starting_unit: str, final_unit: str) -> float: if starting_unit == final_unit: return value # skip computation else: - return registry.Quantity(value, starting_unit).to(final_unit).magnitude + return _REGISTRY.Quantity(value, starting_unit).to(final_unit).magnitude def change_definitions_file(filename: str = None): @@ -117,8 +146,8 @@ def change_definitions_file(filename: str = None): The file to use """ - global registry + global _REGISTRY convert_units.cache_clear() # Units will change if filename is None: filename = DEFAULT_FILE - registry = UnitRegistry(filename=filename) + _REGISTRY = UnitRegistry(filename=filename) diff --git a/gemd/units/tests/test_parser.py b/gemd/units/tests/test_parser.py index 439dc0b0..3e96e764 100644 --- a/gemd/units/tests/test_parser.py +++ b/gemd/units/tests/test_parser.py @@ -1,3 +1,5 @@ +import re + import pytest import pkg_resources from contextlib import contextmanager @@ -28,6 +30,8 @@ def test_parse_expected(): assert parse_units("") == 'dimensionless' # Scaling factors bind tightly to trailing units assert parse_units("g / 2.5 cm") == parse_units("g / (2.5 cm)") + assert parse_units("g / 2.5cm") == parse_units("g / (2.5 cm)") + assert parse_units("g / 25.mm") == parse_units("g / (25. mm)") assert parse_units("g / 2.5 * cm") == parse_units("g cm / 2.5") @@ -50,6 +54,31 @@ def test_parse_none(): assert parse_units(None) is None +def test_format(): + """Test that custom formatting behaves as we hope.""" + # use the default unit registry for now + reg = UnitRegistry(filename=pkg_resources.resource_filename("gemd.units", "citrine_en.txt")) + + result = parse_units("K^-2 m^-1 C^0 g^1 s^2") + assert "-" not in result + assert "[time]" in reg(result).dimensionality + assert "[current]" not in reg(result).dimensionality + kelvin = str(reg("K").units) + gram = str(reg("g").units) + second = str(reg("s").units) + assert kelvin in result + assert gram in result + assert second in result + assert result.index(gram) < result.index(kelvin) + assert result.index(gram) < result.index(second) + + assert not re.search(r"\d", parse_units("m kg / s")) + assert "/" not in parse_units("m kg s") + assert "1" not in parse_units("s") + assert "1" in parse_units("s^-1") + assert "2.5" in parse_units("g / 2.5 cm") + + def test_conversion(): """Tests that check if particular units are interoperable.""" conversions = {"in_lb": "foot_pound"} diff --git a/requirements.txt b/requirements.txt index 38cd57e0..3ccdcf7c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ toolz==0.10.0 -pint==0.13 +pint==0.18 sphinx==4.3.0 sphinxcontrib-apidoc==0.3.0 sphinx-rtd-theme==1.0.0 diff --git a/setup.py b/setup.py index 251c3378..ba47a188 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ }, install_requires=[ "toolz>=0.10.0,<1", - "pint>=0.13,<0.21", + "pint>=0.18,<0.21", "deprecation>=2.0.7,<3" ], extras_require={