In [34]:
import unittest
from datetime import datetime, date, time
import uuid
import ulid
import pyarrow as pa
import json
from data_object import DataObject, ArrowConversionError
from typing import List, Dict, Any

In [39]:
pip install pydantic

2289.14s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


[0mNote: you may need to restart the kernel to use updated packages.


In [37]:
class TestDataObject(unittest.TestCase):
    def setUp(self):
        class TestObject(DataObject):                
            string_field: str = "test"
            int_field: int = 42
            float_field: float = 3.14
            bool_field: bool = True
            datetime_field: datetime = datetime.now()
            date_field: date = date.today()
            time_field: time = datetime.now().time()
            uuid_field: uuid.UUID = uuid.uuid4()
            ulid_field: str = str(ulid.ulid())
            list_field: List[str] = ["a", "b", "c"]
            dict_field: Dict[str, Any] = {"key": "value"}

        self.TestObject = TestObject

        self.test_data = {
            "string_field": "test",
            "int_field": 42,
            "float_field": 3.14,
            "bool_field": True,
            "datetime_field": datetime.now(),
            "date_field": date.today(),
            "time_field": datetime.now().time(),
            "uuid_field": uuid.uuid4(),
            "ulid_field": ulid.ulid(),
            "list_field": ["a", "b", "c"],
            "dict_field": {"key": "value"},
        }

    def test_create_data_object(self):
        obj = self.TestObject(**self.test_data)
        self.assertIsInstance(obj, DataObject)
        self.assertEqual(obj.string_field, self.test_data["string_field"])
        self.assertEqual(obj.int_field, self.test_data["int_field"])
        self.assertEqual(obj.float_field, self.test_data["float_field"])
        self.assertEqual(obj.bool_field, self.test_data["bool_field"])
        self.assertEqual(obj.datetime_field, self.test_data["datetime_field"])
        self.assertEqual(obj.date_field, self.test_data["date_field"])
        self.assertEqual(obj.time_field, self.test_data["time_field"])
        self.assertEqual(obj.uuid_field, self.test_data["uuid_field"])
        self.assertEqual(obj.ulid_field, self.test_data["ulid_field"])
        self.assertEqual(obj.list_field, self.test_data["list_field"])
        self.assertEqual(obj.dict_field, self.test_data["dict_field"])

    def test_to_arrow(self):
        obj = self.TestObject(**self.test_data)
        arrow_table = obj.to_arrow()
        self.assertIsInstance(arrow_table, pa.Table)
        self.assertEqual(len(arrow_table), 1)
        self.assertEqual(
            len(arrow_table.schema), len(self.test_data) + 3
        )  # +3 for id, updated_at, and metadata

    def test_from_arrow(self):
        obj = self.TestObject(**self.test_data)
        arrow_table = obj.to_arrow()
        new_obj = self.TestObject.from_arrow(arrow_table)
        self.assertEqual(obj.dict(), new_obj.dict())

    def test_to_json_schema(self):
        schema = self.TestObject.to_json_schema()
        self.assertIsInstance(schema, dict)
        self.assertIn("properties", schema)
        self.assertIn("string_field", schema["properties"])
        self.assertIn("int_field", schema["properties"])
        self.assertIn("float_field", schema["properties"])

    def test_from_json(self):
        obj = self.TestObject(**self.test_data)
        json_data = obj.to_json()
        new_obj = self.TestObject.from_json(json_data)
        self.assertEqual(obj.dict(), new_obj.dict())

    def test_to_json(self):
        obj = self.TestObject(**self.test_data)
        json_data = obj.to_json()
        self.assertIsInstance(json_data, str)
        parsed_data = json.loads(json_data)
        self.assertIn("string_field", parsed_data)
        self.assertIn("int_field", parsed_data)
        self.assertIn("float_field", parsed_data)

    def test_get_arrow_schema(self):
        schema = self.TestObject.get_arrow_schema()
        self.assertIsInstance(schema, pa.Schema)
        self.assertIn("string_field", schema.names)
        self.assertIn("int_field", schema.names)
        self.assertIn("float_field", schema.names)

    def test_to_arrow_batch(self):
        obj1 = self.TestObject(**self.test_data)
        obj2 = self.TestObject(**self.test_data)
        batch = self.TestObject.to_arrow_batch([obj1, obj2])
        self.assertIsInstance(batch, pa.RecordBatch)
        self.assertEqual(len(batch), 2)

    def test_from_arrow_batch(self):
        obj1 = self.TestObject(**self.test_data)
        obj2 = self.TestObject(**self.test_data)
        batch = self.TestObject.to_arrow_batch([obj1, obj2])
        objects = self.TestObject.from_arrow_batch(batch)
        self.assertEqual(len(objects), 2)
        self.assertEqual(objects[0].dict(), obj1.dict())
        self.assertEqual(objects[1].dict(), obj2.dict())

    def test_to_arrow_table(self):
        obj1 = self.TestObject(**self.test_data)
        obj2 = self.TestObject(**self.test_data)
        table = self.TestObject.to_arrow_table([obj1, obj2])
        self.assertIsInstance(table, pa.Table)
        self.assertEqual(len(table), 2)

    def test_from_arrow_table(self):
        obj1 = self.TestObject(**self.test_data)
        obj2 = self.TestObject(**self.test_data)
        table = self.TestObject.to_arrow_table([obj1, obj2])
        objects = self.TestObject.from_arrow_table(table)
        self.assertEqual(len(objects), 2)
        self.assertEqual(objects[0].dict(), obj1.dict())
        self.assertEqual(objects[1].dict(), obj2.dict())

    def test_to_dataframe(self):
        obj1 = self.TestObject(**self.test_data)
        obj2 = self.TestObject(**self.test_data)
        df = self.TestObject.to_dataframe([obj1, obj2])
        self.assertEqual(len(df), 2)

    def test_invalid_arrow_conversion(self):
        invalid_data = {"invalid_field": "invalid_value"}
        with self.assertRaises(ArrowConversionError):
            self.TestObject(**invalid_data).to_arrow()

In [38]:

suite = unittest.TestLoader().loadTestsFromTestCase(TestDataObject)

# Run the tests
unittest.TextTestRunner(verbosity=2).run(suite)


test_create_data_object (__main__.TestDataObject.test_create_data_object) ... ERROR
test_from_arrow (__main__.TestDataObject.test_from_arrow) ... ERROR
test_from_arrow_batch (__main__.TestDataObject.test_from_arrow_batch) ... ERROR
test_from_arrow_table (__main__.TestDataObject.test_from_arrow_table) ... ERROR
test_from_json (__main__.TestDataObject.test_from_json) ... ERROR
test_get_arrow_schema (__main__.TestDataObject.test_get_arrow_schema) ... ERROR
test_invalid_arrow_conversion (__main__.TestDataObject.test_invalid_arrow_conversion) ... ERROR
test_to_arrow (__main__.TestDataObject.test_to_arrow) ... ERROR
test_to_arrow_batch (__main__.TestDataObject.test_to_arrow_batch) ... ERROR
test_to_arrow_table (__main__.TestDataObject.test_to_arrow_table) ... ERROR
test_to_dataframe (__main__.TestDataObject.test_to_dataframe) ... ERROR
test_to_json (__main__.TestDataObject.test_to_json) ... ERROR
test_to_json_schema (__main__.TestDataObject.test_to_json_schema) ... ERROR

ERROR: test_create_

<unittest.runner.TextTestResult run=13 errors=13 failures=0>