-
Notifications
You must be signed in to change notification settings - Fork 36
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add initial
extract_udf_dependencies()
to extract UDF dependencies …
…from UDF code Open-EO/openeo-geopyspark-driver#237
- Loading branch information
Showing
5 changed files
with
298 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import json | ||
import re | ||
from typing import Union | ||
|
||
try: | ||
import tomllib | ||
except ImportError: | ||
try: | ||
import tomli as tomllib | ||
except ImportError: | ||
# Will be assigned with fallback implementation below | ||
tomllib = None | ||
|
||
|
||
class FlimsyTomlParser: | ||
""" | ||
This is a rudimentary, low-tech, incomplete implementation of TOML parsing functionality | ||
for simple TOML use cases where the dependency on a full-fledged TOML library is not justified. | ||
It is intended to be used as a best-effort drop-in replacement | ||
for the `loads()` functionality from full-fledged TOML libraries | ||
like `tomllib` (part of standard library since Python 3.11) | ||
or `tomli` (`tomllib` backport for earlier Python versions). | ||
""" | ||
|
||
class TomlParseError(ValueError): | ||
pass | ||
|
||
KEY_PAIR_REGEX = re.compile( | ||
r"(?P<key>^[a-z0-9_-]+)\s*=\s*(?P<value>.*(\s+^\s+.*)*(\s+^])?)", | ||
flags=re.MULTILINE | re.VERBOSE | re.IGNORECASE, | ||
) | ||
|
||
@classmethod | ||
def loads(cls, data: str) -> dict: | ||
if re.search(r"^\[", data, flags=re.MULTILINE): | ||
raise cls.TomlParseError("Tables are not supported") | ||
if re.search(r"^[a-z0-9_-]+\.[a-z0-9_.-]+\s*=", data, flags=re.MULTILINE | re.IGNORECASE): | ||
raise cls.TomlParseError("Dotted keys are not supported") | ||
return { | ||
match.group("key"): cls._parse_toml_value_like_json(match.group("value")) | ||
for match in cls.KEY_PAIR_REGEX.finditer(data) | ||
} | ||
|
||
@classmethod | ||
def _parse_toml_value_like_json(cls, value: str) -> Union[int, float, list]: | ||
""" | ||
Try to parse a TOML value by pretending it's (almost) JSON, | ||
which covers the basics (simple strings, numbers, arrays, a bit of nesting, ...) | ||
""" | ||
# A bit of preprocessing to make it more JSON-like (strip comments, strip trailing commas) | ||
value = re.sub(r"#.*$", "", value, flags=re.MULTILINE) | ||
value = re.sub(r",\s*\]", "]", value) | ||
# Rudimentarily convert single quote strings to double quotes. | ||
value = re.sub("'([^'\"]*)'", r'"\1"', value) | ||
try: | ||
data = json.loads(value) | ||
except json.JSONDecodeError as e: | ||
raise cls.TomlParseError(f"Failed to parse TOML value {value!r}") from e | ||
return data | ||
|
||
|
||
if tomllib is None: | ||
tomllib = FlimsyTomlParser |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
import textwrap | ||
|
||
import pytest | ||
|
||
from openeo.udf._compat import FlimsyTomlParser | ||
|
||
|
||
class TestFlimsyTomlLib: | ||
@pytest.mark.parametrize( | ||
["value", "expected"], | ||
[ | ||
# Numbers | ||
("123", 123), | ||
("12.5", 12.5), | ||
# Strings | ||
('"Basic string"', "Basic string"), | ||
("'Literal string'", "Literal string"), | ||
('''"I'm a string"''', "I'm a string"), | ||
(r'''"You can \"quote\" me"''', 'You can "quote" me'), | ||
# Arrays (aka lists) | ||
("[]", []), | ||
("[1, 2, 3]", [1, 2, 3]), | ||
("[1.5, 2.5, 3.5]", [1.5, 2.5, 3.5]), | ||
("[1, 2, 3,]", [1, 2, 3]), | ||
("[\n 1,\n 2,\n 3,\n]", [1, 2, 3]), | ||
('["blue", "yellow"]', ["blue", "yellow"]), | ||
("['blue', 'yellow']", ["blue", "yellow"]), | ||
( | ||
""" | ||
[ | ||
"blue", | ||
"yellow", | ||
] | ||
""", | ||
["blue", "yellow"], | ||
), | ||
("[1, 'two', 3.0, \"four\"]", [1, "two", 3.0, "four"]), | ||
( | ||
""" | ||
[ | ||
'one', | ||
[2, 3], | ||
] | ||
""", | ||
["one", [2, 3]], | ||
), | ||
], | ||
) | ||
def test_parse_toml_value_like_json(self, value, expected): | ||
assert FlimsyTomlParser._parse_toml_value_like_json(value) == expected | ||
|
||
def test_loads_basic(self): | ||
data = textwrap.dedent( | ||
""" | ||
title = "TOML Example" | ||
colors = ["blue", "yellow"] | ||
size = 132 | ||
""" | ||
) | ||
assert FlimsyTomlParser.loads(data) == { | ||
"title": "TOML Example", | ||
"colors": ["blue", "yellow"], | ||
"size": 132, | ||
} | ||
|
||
def test_loads_multiline_values(self): | ||
data = textwrap.dedent( | ||
""" | ||
# Some colors | ||
colors = [ | ||
"blue", | ||
"yellow", | ||
] | ||
sizes = [ | ||
12, | ||
34, | ||
# This closing bracket is intentionally indented too | ||
] | ||
shape = "round" | ||
""" | ||
) | ||
assert FlimsyTomlParser.loads(data) == { | ||
"colors": ["blue", "yellow"], | ||
"sizes": [12, 34], | ||
"shape": "round", | ||
} | ||
|
||
def test_loads_special_keys(self): | ||
data = textwrap.dedent( | ||
""" | ||
1234 = "one two three four" | ||
bare_key = "underscore" | ||
another-key = "dash" | ||
""" | ||
) | ||
assert FlimsyTomlParser.loads(data) == { | ||
"1234": "one two three four", | ||
"another-key": "dash", | ||
"bare_key": "underscore", | ||
} | ||
|
||
def test_loads_tables(self): | ||
data = textwrap.dedent( | ||
""" | ||
title = "Vroom" | ||
[car] | ||
brand = "HobbleBlob" | ||
""" | ||
) | ||
with pytest.raises(FlimsyTomlParser.TomlParseError, match="Tables are not supported"): | ||
_ = FlimsyTomlParser.loads(data) | ||
|
||
def test_loads_dotted_keys(self): | ||
data = textwrap.dedent( | ||
""" | ||
title = "Vroom" | ||
car.brand = "HobbleBlob" | ||
""" | ||
) | ||
with pytest.raises(FlimsyTomlParser.TomlParseError, match="Dotted keys are not supported"): | ||
_ = FlimsyTomlParser.loads(data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters