Exabyte-io · VsevolodX · Apr 4, 2025 · Mar 27, 2025 · Mar 27, 2025 · Mar 27, 2025
diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-20.04
     strategy:
       matrix:
-        python-version: [3.8.6]
+        python-version: [3.10.13]
 
     steps:
       - name: Checkout this repository
@@ -37,10 +37,9 @@ jobs:
     strategy:
       matrix:
         python-version:
-          - 3.8.x
-          - 3.9.x
           - 3.10.x
           - 3.11.x
+          - 3.12.x
 
     steps:
       - name: Checkout this repository

diff --git a/.gitignore b/.gitignore
@@ -126,7 +126,7 @@ celerybeat.pid
 
 # Environments
 .env
-.venv
+.venv*
 env/
 venv/
 ENV/
@@ -176,3 +176,4 @@ node_modules/
 *.DS_Store
 
 tsconfig.tsbuildinfo
+.python-version
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,9 +4,9 @@ dynamic = ["version"]
 description = "COre DEfinitions."
 readme = "README.md"
 requires-python = ">=3.8"
-license = {file = "LICENSE.md"}
+license = { file = "LICENSE.md" }
 authors = [
-    {name = "Exabyte Inc.", email = "info@mat3ra.com"}
+    { name = "Exabyte Inc.", email = "info@mat3ra.com" }
 ]
 classifiers = [
     "Programming Language :: Python",
@@ -18,6 +18,8 @@ dependencies = [
     # add requirements here
     "numpy",
     "jsonschema>=2.6.0",
+    "pydantic>=2.10.5",
+    "mat3ra-esse",
     "mat3ra-utils>=2024.5.15.post0",
 ]
 
@@ -79,3 +81,11 @@ target-version = "py38"
 profile = "black"
 multi_line_output = 3
 include_trailing_comma = true
+
+[tool.pytest.ini_options]
+pythonpath = [
+    "src/py",
+]
+testpaths = [
+    "tests/py"
+]
diff --git a/src/py/mat3ra/__init__.py b/src/py/mat3ra/__init__.py
@@ -0,0 +1,2 @@
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)
+# otherwise, `mat3ra.utils` path leads to an empty __init__.py file in the code.py package
diff --git a/src/py/mat3ra/code/array_with_ids.py b/src/py/mat3ra/code/array_with_ids.py
@@ -0,0 +1,119 @@
+import json
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+from mat3ra.utils.mixins import RoundNumericValuesMixin
+from pydantic import BaseModel, model_serializer
+
+from .value_with_id import RoundedValueWithId, ValueWithId
+
+
+class ArrayWithIds(BaseModel):
+    values: List[Any]
+    ids: List[int]
+
+    @classmethod
+    def from_values(cls, values: List[Any]) -> "ArrayWithIds":
+        try:
+            ids = list(range(len(values)))
+            return cls(values=values, ids=ids)
+        except KeyError:
+            raise ValueError("Values must be a list")
+
+    @classmethod
+    def get_values_and_ids_from_list_of_dicts(cls, list_of_dicts: List[Dict[str, Any]]) -> Tuple[List[Any], List[int]]:
+        try:
+            values = [item["value"] for item in list_of_dicts]
+            ids = [item["id"] for item in list_of_dicts]
+            return values, ids
+        except KeyError:
+            raise ValueError("List of dictionaries must contain 'id' and 'value' keys")
+
+    @classmethod
+    def from_list_of_dicts(cls, list_of_dicts: List[Dict[str, Any]]) -> "ArrayWithIds":
+        try:
+            values, ids = cls.get_values_and_ids_from_list_of_dicts(list_of_dicts)
+            return cls(values=values, ids=ids)
+        except KeyError:
+            raise ValueError("List of dictionaries must contain 'id' and 'value' keys")
+
+    @model_serializer
+    def to_dict(self) -> List[Dict[str, Any]]:
+        return list(map(lambda x: x.to_dict(), self.to_array_of_values_with_ids()))
+
+    def to_json(self, skip_rounding=True) -> str:
+        return json.dumps(self.to_dict())
+
+    def to_array_of_values_with_ids(self) -> List[ValueWithId]:
+        return [ValueWithId(id=id, value=item) for id, item in zip(self.ids, self.values)]
+
+    def get_element_value_by_index(self, index: int) -> Any:
+        return self.values[index] if index < len(self.values) else None
+
+    def get_element_id_by_value(self, value: Any) -> Optional[int]:
+        try:
+            return self.ids[self.values.index(value)]
+        except ValueError:
+            return None
+
+    def filter_by_values(self, values: Union[List[Any], Any]):
+        def make_hashable(value):
+            return tuple(value) if isinstance(value, list) else value
+
+        values_to_keep = set(make_hashable(v) for v in values) if isinstance(values, list) else {make_hashable(values)}
+        filtered_items = [(v, i) for v, i in zip(self.values, self.ids) if make_hashable(v) in values_to_keep]
+        if filtered_items:
+            values_unpacked, ids_unpacked = zip(*filtered_items)
+            self.values = list(values_unpacked)
+            self.ids = list(ids_unpacked)
+        else:
+            self.values = []
+            self.ids = []
+
+    def filter_by_indices(self, indices: Union[List[int], int]):
+        index_set = set(indices) if isinstance(indices, list) else {indices}
+        self.values = [self.values[i] for i in range(len(self.values)) if i in index_set]
+        self.ids = [self.ids[i] for i in range(len(self.ids)) if i in index_set]
+
+    def filter_by_ids(self, ids: Union[List[int], int], invert: bool = False):
+        if isinstance(ids, int):
+            ids = [ids]
+        if not invert:
+            ids_set = set(ids)
+        else:
+            ids_set = set(self.ids) - set(ids)
+        keep_indices = [index for index, id_ in enumerate(self.ids) if id_ in ids_set]
+        self.values = [self.values[index] for index in keep_indices]
+        self.ids = [self.ids[index] for index in keep_indices]
+
+    def __eq__(self, other: object) -> bool:
+        return isinstance(other, ArrayWithIds) and self.values == other.values and self.ids == other.ids
+
+    def map_array_in_place(self, func: Callable):
+        self.values = list(map(func, self.values))
+
+    def add_item(self, element: Any, id: Optional[int] = None):
+        if id is None:
+            new_id = max(self.ids, default=-1) + 1
+        else:
+            new_id = id
+        self.values.append(element)
+        self.ids.append(new_id)
+
+    def remove_item(self, index: int, id: Optional[int] = None):
+        if id is not None:
+            try:
+                index = self.ids.index(id)
+            except ValueError:
+                raise ValueError("ID not found in the list")
+        if index < len(self.values):
+            del self.values[index]
+            del self.ids[index]
+        else:
+            raise IndexError("Index out of range")
+
+
+class RoundedArrayWithIds(RoundNumericValuesMixin, ArrayWithIds):
+    def to_array_of_values_with_ids(self) -> List[ValueWithId]:
+        class_reference = RoundedValueWithId
+        class_reference.__round_precision__ = self.__round_precision__
+        return [class_reference(id=id, value=item) for id, item in zip(self.ids, self.values)]
diff --git a/src/py/mat3ra/code/constants.py b/src/py/mat3ra/code/constants.py
@@ -1,5 +1,9 @@
 from math import pi
 
+from mat3ra.esse.models.definitions.constants import FundamentalConstants
+
+CONSTANTS = FundamentalConstants()
+
 
 class Coefficients:
     # Same as used in: JS/TS
@@ -13,18 +17,19 @@ class Coefficients:
     # and originally taken from https://github.com/hplgit/physical-quantities/blob/master/PhysicalQuantities.py
 
     # Internal, for convenience purposes
-    _c = 299792458.0  # speed of light, m/s
-    _mu0 = 4.0e-7 * pi  # permeability of vacuum
-    _eps0 = 1 / _mu0 / _c**2  # permittivity of vacuum
-    _Grav = 6.67259e-11  # gravitational constant
-    _hplanck = 6.6260755e-34  # Planck constant, J s
-    _hbar = _hplanck / (2 * pi)  # Planck constant / 2pi, J s
-    _e = 1.60217733e-19  # elementary charge
-    _me = 9.1093897e-31  # electron mass
+    _c = CONSTANTS.c  # speed of light, m/s
+    _Grav = CONSTANTS.G  # gravitational constant
+    _hplanck = CONSTANTS.h  # Planck constant, J s
+    _e = CONSTANTS.e  # elementary charge
+    _me = CONSTANTS.me  # electron mass
+    _mu0 = 4.0e-7 * pi  # permeability of vacuum, atomic units
+
     _mp = 1.6726231e-27  # proton mass
     _Nav = 6.0221367e23  # Avogadro number
     _k = 1.380658e-23  # Boltzmann constant, J/K
     _amu = 1.6605402e-27  # atomic mass unit, kg
+    _eps0 = 1 / _mu0 / _c**2  # permittivity of vacuum
+    _hbar = _hplanck / (2 * pi)  # Planck constant / 2pi, J s
 
     # External
     BOHR = 4e10 * pi * _eps0 * _hbar**2 / _me / _e**2  # Bohr radius in angstrom

diff --git a/src/py/mat3ra/code/entity.py b/src/py/mat3ra/code/entity.py
@@ -1,30 +1,90 @@
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Type, TypeVar
 
 import jsonschema
 from mat3ra.utils import object as object_utils
+from pydantic import BaseModel
+from typing_extensions import Self
 
 from . import BaseUnderscoreJsonPropsHandler
 from .mixins import DefaultableMixin, HasDescriptionMixin, HasMetadataMixin, NamedMixin
 
+T = TypeVar("T", bound="InMemoryEntityPydantic")
+B = TypeVar("B", bound="BaseModel")
 
+
+# TODO: remove in the next PR
 class ValidationErrorCode:
     IN_MEMORY_ENTITY_DATA_INVALID = "IN_MEMORY_ENTITY_DATA_INVALID"
 
 
+# TODO: remove in the next PR
 class ErrorDetails:
     def __init__(self, error: Optional[Dict[str, Any]], json: Dict[str, Any], schema: Dict):
         self.error = error
         self.json = json
         self.schema = schema
 
 
+# TODO: remove in the next PR
 class EntityError(Exception):
     def __init__(self, code: ValidationErrorCode, details: Optional[ErrorDetails] = None):
         super().__init__(code)
         self.code = code
         self.details = details
 
 
+class InMemoryEntityPydantic(BaseModel):
+    model_config = {"arbitrary_types_allowed": True}
+
+    @classmethod
+    def create(cls: Type[T], config: Dict[str, Any]) -> T:
+        return cls.validate(config)
+
+    @classmethod
+    def validate(cls, value: Any) -> Self:
+        # this will clean and validate data
+        return cls.model_validate(value)
+
+    @classmethod
+    def is_valid(cls, value: Any) -> bool:
+        try:
+            cls.validate(value)
+            return True
+        except Exception:
+            return False
+
+    @classmethod
+    def from_json(cls: Type[T], json_str: str) -> T:
+        return cls.model_validate_json(json_str)
+
+    @classmethod
+    def clean(cls: Type[T], config: Dict[str, Any]) -> Dict[str, Any]:
+        validated_model = cls.model_validate(config)
+        return validated_model.model_dump()
+
+    def get_schema(self) -> Dict[str, Any]:
+        return self.model_json_schema()
+
+    def get_data_model(self) -> Type[B]:
+        for base in self.__class__.__bases__:
+            if issubclass(base, BaseModel) and base is not self.__class__:
+                return base
+        raise ValueError(f"No schema base model found for {self.__class__.__name__}")
+
+    def get_cls_name(self) -> str:
+        return self.__class__.__name__
+
+    def to_dict(self, exclude: Optional[List[str]] = None) -> Dict[str, Any]:
+        return self.model_dump(exclude=set(exclude) if exclude else None)
+
+    def to_json(self, exclude: Optional[List[str]] = None) -> str:
+        return self.model_dump_json(exclude=set(exclude) if exclude else None)
+
+    def clone(self: T, extra_context: Optional[Dict[str, Any]] = None, deep=True) -> T:
+        return self.model_copy(update=extra_context or {}, deep=deep)
+
+
+# TODO: remove in the next PR
 class InMemoryEntity(BaseUnderscoreJsonPropsHandler):
     jsonSchema: Optional[Dict] = None
 
@@ -97,7 +157,7 @@ def get_as_entity_reference(self, by_id_only: bool = False) -> Dict[str, str]:
             return {"_id": self.id, "slug": self.slug, "cls": self.get_cls_name()}
 
 
-class HasDescriptionHasMetadataNamedDefaultableInMemoryEntity(
-    InMemoryEntity, DefaultableMixin, NamedMixin, HasMetadataMixin, HasDescriptionMixin
+class HasDescriptionHasMetadataNamedDefaultableInMemoryEntityPydantic(
+    InMemoryEntityPydantic, DefaultableMixin, NamedMixin, HasMetadataMixin, HasDescriptionMixin
 ):
     pass
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		__path__ = __import__("pkgutil").extend_path(__path__, __name__)
		# otherwise, `mat3ra.utils` path leads to an empty __init__.py file in the code.py package