Skip to content

Commit

Permalink
Merge pull request #91 from PickwickSoft/feature/#71/data-loader-for-…
Browse files Browse the repository at this point in the history
…yaml

✨ Add data loader for YAML
  • Loading branch information
garlontas committed May 14, 2024
2 parents 7d90acd + dfd11af commit 471b6ea
Show file tree
Hide file tree
Showing 8 changed files with 215 additions and 44 deletions.
84 changes: 64 additions & 20 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@ packages = [

[tool.poetry.dependencies]
python = ">=3.7,<4.0"
joblib = ">=1.2,<1.4"
joblib = ">=1.2,<=1.4.2"
defusedxml = { version = ">=0.7,<0.8", optional = true }
pyyaml = "^6.0.1"

[tool.poetry.extras]
xml_loader = ["defusedxml"]
all = ["defusedxml"]
yaml_loader = ["pyyaml"]
all = ["defusedxml", "pyyaml"]

[tool.poetry.group.test.dependencies]
parameterized = "*"
Expand Down
4 changes: 3 additions & 1 deletion pystreamapi/loaders/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from pystreamapi.loaders.__csv.__csv_loader import csv
from pystreamapi.loaders.__json.__json_loader import json
from pystreamapi.loaders.__xml.__xml_loader import xml
from pystreamapi.loaders.__yaml.__yaml_loader import yaml

__all__ = [
'csv',
'json',
'xml'
'xml',
'yaml'
]
Empty file.
56 changes: 56 additions & 0 deletions pystreamapi/loaders/__yaml/__yaml_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
try:
import yaml as yaml_lib
except ImportError as exc:
raise ImportError(
"Please install the yaml_loader extra dependency to use the yaml loader."
) from exc
from collections import namedtuple

from pystreamapi.loaders.__lazy_file_iterable import LazyFileIterable
from pystreamapi.loaders.__loader_utils import LoaderUtils


def yaml(src: str, read_from_src=False) -> LazyFileIterable:
"""
Loads YAML data from either a path or a string and converts it into a list of namedtuples.
Args:
src (str): Either the path to a YAML file or a YAML string.
read_from_src (bool): If True, src is treated as a YAML string. If False, src is treated as
a path to a YAML file.
Returns:
list: A list of namedtuples, where each namedtuple represents an object in the YAML.
"""
if read_from_src:
return LazyFileIterable(lambda: __load_yaml_string(src))
path = LoaderUtils.validate_path(src)
return LazyFileIterable(lambda: __load_yaml_file(path))


def __load_yaml_file(file_path):
"""Load a YAML file and convert it into a list of namedtuples"""
# skipcq: PTC-W6004
with open(file_path, mode='r', encoding='utf-8') as yamlfile:
src = yamlfile.read()
if src == '':
return []
data = yaml_lib.safe_load(src)
return __convert_to_namedtuples(data)


def __load_yaml_string(yaml_string):
"""Load YAML data from a string and convert it into a list of namedtuples"""
data = yaml_lib.safe_load(yaml_string)
return [] if data is None else __convert_to_namedtuples(data)


def __convert_to_namedtuples(data, name='Item'):
"""Convert YAML data to a list of namedtuples"""
if isinstance(data, dict):
fields = list(data.keys())
Item = namedtuple(name, fields)
return Item(**{k: __convert_to_namedtuples(v, k) for k, v in data.items()})
if isinstance(data, list):
return [__convert_to_namedtuples(item, name) for item in data]
return data
44 changes: 23 additions & 21 deletions tests/_loaders/test_json_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,31 @@

file_content = """
[
{
"attr1": 1,
"attr2": 2.0
},
{
"attr1": "a",
"attr2": "b"
}
{
"attr1": 1,
"attr2": 2.0
},
{
"attr1": [
{
"attr1": "a"
}
],
"attr2": "b"
}
]
"""
file_path = 'path/to/data.json'


class TestJsonLoader(TestCase):

def test_json_loader_from_file(self):
with (patch(OPEN, mock_open(read_data=file_content)),
patch(PATH_EXISTS, return_value=True),
patch(PATH_ISFILE, return_value=True)):
data = json(file_path)
self.assertEqual(len(data), 2)
self.assertEqual(data[0].attr1, 1)
self.assertIsInstance(data[0].attr1, int)
self.assertEqual(data[0].attr2, 2.0)
self.assertIsInstance(data[0].attr2, float)
self.assertEqual(data[1].attr1, 'a')
self.assertIsInstance(data[1].attr1, str)
self._check_extracted_data(data)

def test_json_loader_is_iterable(self):
with (patch(OPEN, mock_open(read_data=file_content)),
Expand All @@ -59,14 +58,17 @@ def test_json_loader_with_no_file(self):

def test_json_loader_from_string(self):
data = json(file_content, read_from_src=True)
self._check_extracted_data(data)

def test_json_loader_from_empty_string(self):
with self.assertRaises(JSONDecodeError):
len(json('', read_from_src=True))

def _check_extracted_data(self, data):
self.assertEqual(len(data), 2)
self.assertEqual(data[0].attr1, 1)
self.assertIsInstance(data[0].attr1, int)
self.assertEqual(data[0].attr2, 2.0)
self.assertIsInstance(data[0].attr2, float)
self.assertEqual(data[1].attr1, 'a')
self.assertIsInstance(data[1].attr1, str)

def test_json_loader_from_empty_string(self):
with self.assertRaises(JSONDecodeError):
len(json('', read_from_src=True))
self.assertIsInstance(data[1].attr1, list)
self.assertEqual(data[1].attr1[0].attr1, 'a')
Loading

0 comments on commit 471b6ea

Please sign in to comment.