diff --git a/.github/workflows/black-and-clang-format.yml b/.github/workflows/check-code-quality.yml similarity index 59% rename from .github/workflows/black-and-clang-format.yml rename to .github/workflows/check-code-quality.yml index 22dc34c785..9697889682 100644 --- a/.github/workflows/black-and-clang-format.yml +++ b/.github/workflows/check-code-quality.yml @@ -4,12 +4,12 @@ # GitHub Action that uses -# Black to reformat the Python code in an incoming pull request. +# isort, black, mypy and pylint to reformat the Python code in an incoming pull request. # clang-format to reformat the C++ code in an incoming pull request. # If all code in the pull request is compliant with Black and clang-format then this Action # does nothing. Otherwise, it will print the files which need to be reformatted and raise an error. -name: Format Code +name: Check Code Quality on: # run pipeline on push event of main or release branch @@ -21,7 +21,7 @@ on: pull_request: jobs: - code-format-check: + check-code-quality: if: (github.event_name == 'push') || (!startsWith(github.head_ref, 'release')) runs-on: ubuntu-latest @@ -33,23 +33,41 @@ jobs: uses: actions/setup-python@v4 with: python-version: 3.8 - - - name: Install Black and clang-format + + - name: Upgrade pip + run: pip install --upgrade pip + + - name: Install and run isort + run: | + pip install isort + isort . + + - name: Install and run black run: | pip install black - sudo apt-get update && sudo apt-get install -y clang-format + black . - - name: Run black - run: black . + - name: Install and run mypy + run: | + pip install mypy + mypy . + + - name: Install and run pylint + run: | + pip install pylint . + pylint power_grid_model + git restore README.md - - name: Run clang-format - run: find . -regex '.*\.\(cpp\|hpp\|cc\|cxx\)' -exec clang-format -style=file -i {} \; + - name: Install and run clang-format + run: | + sudo apt-get update && sudo apt-get install -y clang-format + find . -regex '.*\.\(cpp\|hpp\|cc\|cxx\)' -exec clang-format -style=file -i {} \; - name: If needed raise error run: | if [[ `git status --porcelain --untracked-files=no` ]]; then - echo "Formatting not correct! See blow the files which need to be reformatted!" + echo "Formatting not correct! See below the files which need to be reformatted!" git status --porcelain --untracked-files=no exit 1 fi diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0be33d4caf..317c13edfe 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,11 +10,16 @@ repos: - repo: https://github.com/pycqa/isort rev: 5.10.1 hooks: - - id: isort + - id: isort - repo: https://github.com/psf/black rev: 22.6.0 hooks: - id: black + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.971 + hooks: + - id: mypy + files: ^(src|tests|scripts)/.+\.py$ - repo: local hooks: - id: pylint diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4e1f89e8a4..0e423a7f27 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -64,9 +64,10 @@ corrections) to your code (style) before each commit. It is up to the developer use this tool or not. The goal is to make sure that each commit will pass the quality checks in the github actions workflow. Currently, these hooks are defined in [`.pre-commit-config.yaml`](.pre-commit-config.yaml): * **reuse**: check if all licence headers and files are in place -* **isort**: sort import statements -* **black**: check and correct code style -* **pylint**: check code style +* **isort**: group and sort import statements +* **black**: check and correct code style in a very strict manner +* **mypy**: checks type hinting and data types in general (static type checker) +* **pylint**: check code style and comments * **pytest**: run all unit tests You can manually run pre-commit whenever you like: diff --git a/README.md b/README.md index 51c0528f86..9c5317c437 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ SPDX-License-Identifier: MPL-2.0 [![PyPI version](https://badge.fury.io/py/power-grid-model.svg)](https://badge.fury.io/py/power-grid-model) [![License: MIT](https://img.shields.io/badge/License-MPL2.0-informational.svg)](https://github.com/alliander-opensource/power-grid-model/blob/main/LICENSE) [![Build and Test C++ and Python](https://github.com/alliander-opensource/power-grid-model/actions/workflows/main.yml/badge.svg)](https://github.com/alliander-opensource/power-grid-model/actions/workflows/main.yml) -[![Format Code](https://github.com/alliander-opensource/power-grid-model/actions/workflows/black-and-clang-format.yml/badge.svg)](https://github.com/alliander-opensource/power-grid-model/actions/workflows/black-and-clang-format.yml) +[![Check Code Quality](https://github.com/alliander-opensource/power-grid-model/actions/workflows/check-code-quality.yml/badge.svg)](https://github.com/alliander-opensource/power-grid-model/actions/workflows/check-code-quality.yml) [![REUSE Compliance Check](https://github.com/alliander-opensource/power-grid-model/actions/workflows/reuse-compliance.yml/badge.svg)](https://github.com/alliander-opensource/power-grid-model/actions/workflows/reuse-compliance.yml) [![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=alliander-opensource_power-grid-model&metric=alert_status)](https://sonarcloud.io/summary/new_code?id=alliander-opensource_power-grid-model) diff --git a/docs/python-api-reference.md b/docs/python-api-reference.md index 7c0eb809b6..c61a7976c2 100644 --- a/docs/python-api-reference.md +++ b/docs/python-api-reference.md @@ -17,7 +17,7 @@ The Python API consists of the following components: This is a pure Python module. * `power_grid_model.PowerGridModel`: the main class. This is inside a C++ extension module. -* `power_grid_model.manual_testing`: containing the functions for load and save test dataset. +* `power_grid_model.utils`: containing the functions for load and save test dataset. See [Make Test Dataset](../examples/Make%20Test%20Dataset.ipynb) for examples of how to make test datasets. * `power_grid_model.validation`: optional validation and assertion functions. See [Validation Examples](../examples/Validation%20Examples.ipynb) for more information on how to validate input diff --git a/examples/Make Test Dataset.ipynb b/examples/Make Test Dataset.ipynb index 60b7496dd3..43a2486618 100644 --- a/examples/Make Test Dataset.ipynb +++ b/examples/Make Test Dataset.ipynb @@ -342,9 +342,9 @@ "source": [ "# Helper Functions to Import and Export\n", "\n", - "In the module `power_grid_model.manual_testing` we have some helper functions to import a json file to a `power-grid-model` compatible dataset, or the other way around. \n", + "In the module `power_grid_model.utils` we have some helper functions to import a json file to a `power-grid-model` compatible dataset, or the other way around. \n", "\n", - "Please refer to the [source code](../src/power_grid_model/manual_testing.py) for detailed function signature.\n", + "Please refer to the [source code](../src/power_grid_model/utils.py) for detailed function signature.\n", "\n", "In this notebook we export the example network from [Power Flow](./Power%20Flow%20Example.ipynb) to json. " ] @@ -422,7 +422,7 @@ "metadata": {}, "outputs": [], "source": [ - "from power_grid_model.manual_testing import export_json_data\n", + "from power_grid_model.utils import export_json_data\n", "import tempfile\n", "from pathlib import Path\n", "\n", @@ -560,7 +560,7 @@ "source": [ "# round trip and run power flow\n", "\n", - "from power_grid_model.manual_testing import import_json_data\n", + "from power_grid_model.utils import import_json_data\n", "\n", "imported_data = import_json_data(temp_path / \"input.json\", \"input\")\n", "\n", diff --git a/pyproject.toml b/pyproject.toml index 8e95bd8cdd..26ca4c6e77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ dynamic = ["version"] [project.optional-dependencies] dev = [ "pre-commit", + "pylint", "pytest", "pytest-cov", ] @@ -69,8 +70,15 @@ line-length = 120 target-version = ['py38'] [tool.isort] -src_paths = ["src", "tests/unit", "scripts"] profile = "black" +line_length = 120 [tool.pylint] max-line-length = 120 + +[tool.mypy] +follow_imports = "silent" +ignore_missing_imports = true +show_column_numbers = true +non_interactive = true +install_types = true diff --git a/scripts/validate_batch_data.py b/scripts/validate_batch_data.py index fd3df70ecb..05411b8293 100644 --- a/scripts/validate_batch_data.py +++ b/scripts/validate_batch_data.py @@ -3,22 +3,16 @@ # SPDX-License-Identifier: MPL-2.0 from pathlib import Path -from typing import cast from power_grid_model import CalculationType -from power_grid_model.manual_testing import import_json_data -from power_grid_model.validation import ( - InputData, - UpdateData, - errors_to_string, - validate_batch_data, -) +from power_grid_model.utils import import_input_data, import_update_data +from power_grid_model.validation import errors_to_string, validate_batch_data input_file = Path("../tests/data/power_flow/dummy-test-batch/input.json") update_file = Path("../tests/data/power_flow/dummy-test-batch/update_batch.json") -input_data = cast(InputData, import_json_data(json_file=input_file, data_type="input")) -update_data = cast(UpdateData, import_json_data(json_file=update_file, data_type="update")) +input_data = import_input_data(json_file=input_file) +update_data = import_update_data(json_file=update_file) update_errors = validate_batch_data( input_data=input_data, update_data=update_data, calculation_type=CalculationType.power_flow, symmetric=True diff --git a/scripts/validate_input_data.py b/scripts/validate_input_data.py index dba7ba8ddb..9a24ed3c86 100644 --- a/scripts/validate_input_data.py +++ b/scripts/validate_input_data.py @@ -3,15 +3,14 @@ # SPDX-License-Identifier: MPL-2.0 from pathlib import Path -from typing import cast from power_grid_model import CalculationType -from power_grid_model.manual_testing import import_json_data -from power_grid_model.validation import InputData, errors_to_string, validate_input_data +from power_grid_model.utils import import_input_data +from power_grid_model.validation import errors_to_string, validate_input_data input_file = Path("../tests/data/state_estimation/dummy-test-sym/input.json") -input_data = cast(InputData, import_json_data(json_file=input_file, data_type="input")) +input_data = import_input_data(json_file=input_file) input_errors = validate_input_data( input_data=input_data, calculation_type=CalculationType.state_estimation, symmetric=True diff --git a/setup.py b/setup.py index 6b3adcefee..d698a6c4e2 100644 --- a/setup.py +++ b/setup.py @@ -9,6 +9,7 @@ from itertools import chain from pathlib import Path from sysconfig import get_paths +from typing import List # noinspection PyPackageRequirements import Cython.Compiler.Main as CythonCompiler @@ -88,10 +89,10 @@ def generate_build_ext(pkg_dir: Path, pkg_name: str): str(pkg_dir / "include"), # The include-folder of the repo self ] # compiler and link flag - cflags = [] - lflags = [] - library_dirs = [] - libraries = [] + cflags: List[str] = [] + lflags: List[str] = [] + library_dirs: List[str] = [] + libraries: List[str] = [] # macro define_macros = [ ("EIGEN_MPL2_ONLY", "1"), # only MPL-2 part of eigen3 diff --git a/src/power_grid_model/__init__.py b/src/power_grid_model/__init__.py index 791d027d44..cc700a3a6d 100644 --- a/src/power_grid_model/__init__.py +++ b/src/power_grid_model/__init__.py @@ -6,8 +6,8 @@ # pylint: disable=no-name-in-module -from ._power_grid_core import PowerGridModel, initialize_array, power_grid_meta_data -from .enum import ( +from power_grid_model._power_grid_core import PowerGridModel, initialize_array, power_grid_meta_data +from power_grid_model.enum import ( BranchSide, CalculationMethod, CalculationType, diff --git a/src/power_grid_model/data_types.py b/src/power_grid_model/data_types.py new file mode 100644 index 0000000000..7b533521be --- /dev/null +++ b/src/power_grid_model/data_types.py @@ -0,0 +1,181 @@ +# SPDX-FileCopyrightText: 2022 Contributors to the Power Grid Model project +# +# SPDX-License-Identifier: MPL-2.0 +""" +Many data types are used throughout the power grid model project. In an attempt to clarify type hints, some types +have been defined and explained in this file +""" + +from typing import Any, Dict, List, Tuple, Union + +import numpy as np + +# When we're dropping python 3.8, we should introduce +# SingleArray = np.ndarray (ndim=1) +# DenseBatchArray = np.ndarray (ndim=2) + +SparseBatchArray = Dict[str, np.ndarray] +""" +A sparse batch array is a dictionary containing the keys "indptr" and "data". + indptr: a one-dimensional numpy int32 array + data: a one-dimensional structured numpy array. The exact dtype depends on the type component. + +Example: {"indptr": <1d-array>, "data": <1d-array>} +""" + +BatchArray = Union[np.ndarray, SparseBatchArray] +""" +A batch is a either a dense or a sparse batch array + +Examples: + dense: <2d-array> + sparse: {"indptr": <1d-array>, "data": <1d-array>} +""" + +SingleDataset = Dict[str, np.ndarray] +""" +A single dataset is a dictionary where the keys are the component types and the values are one-dimensional +structured numpy arrays. + +Example: {"node": <1d-array>, "line": <1d-array>} +""" + +BatchDataset = Dict[str, BatchArray] +""" +A batch dataset is a dictionary where the keys are the component types and the values are either two-dimensional +structured numpy arrays (dense batch array) or dictionaries with an indptr and a one-dimensional structured numpy +array (sparse batch array). + +Example: {"node": <2d-array>, "line": {"indptr": <1d-array>, "data": <1d-array>}} +""" + +Dataset = Union[SingleDataset, BatchDataset] +""" +A general data set can be a single or a batch dataset. + +Examples: + single: {"node": <1d-array>, "line": <1d-array>} + batch: {"node": <2d-array>, "line": {"indptr": <1d-array>, "data": <1d-array>}} + +""" + +BatchList = List[SingleDataset] +""" +A batch list is an alternative representation of a batch. It is a list of single datasets, where each single dataset +is actually a batch. The batch list is intended as an intermediate data type, during conversions. + +Example: [{"node": <1d-array>, "line": <1d-array>}, {"node": <1d-array>, "line": <1d-array>}] +""" + +Nominal = int +""" +Nominal values can be IDs, booleans, enums, tap pos + +Example: 123 +""" + +RealValue = float +""" +Symmetrical values can be anything like cable properties, symmetric loads, etc. + +Example: 10500.0 +""" + +AsymValue = Tuple[RealValue, RealValue, RealValue] +""" +Asymmetrical values are three-phase values like p or u_measured. + +Example: (10400.0, 10500.0, 10600.0) +""" + +AttributeValue = Union[Nominal, RealValue, AsymValue] +""" +When representing a grid as a native python structure, each attribute (u_rated etc) is either a nominal value, +a real value, or a tuple of three real values. + +Examples: + nominal: 123 + real: 10500.0 + asym: (10400.0, 10500.0, 10600.0) + +""" + +Component = Dict[str, AttributeValue] +""" +A component, when represented in native python format, is a dictionary, where the keys are the attributes and the values +are the corresponding values. + +Example: {"id": 1, "u_rated": 10500.0} +""" + +ComponentList = List[Component] +""" +A component list is a list containing components. In essence it stores the same information as a np.ndarray, +but in a native python format, without using numpy. + +Example: [{"id": 1, "u_rated": 10500.0}, {"id": 2, "u_rated": 10500.0}] +""" + +SinglePythonDataset = Dict[str, ComponentList] +""" +A single dataset in native python representation is a dictionary, where the keys are the component names and the +values are a list of all the instances of such a component. In essence it stores the same information as a +SingleDataset, but in a native python format, without using numpy. + +Example: + { + "node": [{"id": 1, "u_rated": 10500.0}, {"id": 2, "u_rated": 10500.0}], + "line": [{"id": 3, "from_node": 1, "to_node": 2, ...}], + } +""" + +BatchPythonDataset = List[SinglePythonDataset] +""" +A batch dataset in native python representation is a list of dictionaries, where the keys are the component names and +the values are a list of all the instances of such a component. In essence it stores the same information as a +BatchDataset, but in a native python format, without using numpy. Actually it looks more like the BatchList. + +Example: + [ + { + "line": [{"id": 3, "from_status": 0, "to_status": 0, ...}], + }, + { + "line": [{"id": 3, "from_status": 1, "to_status": 1, ...}], + } + ] +""" + +PythonDataset = Union[SinglePythonDataset, BatchPythonDataset] +""" +A general python data set can be a single or a batch python dataset. + +Examples: + single: + { + "node": [{"id": 1, "u_rated": 10500.0}, {"id": 2, "u_rated": 10500.0}], + "line": [{"id": 3, "from_node": 1, "to_node": 2, ...}], + } + batch: + [ + { + "line": [{"id": 3, "from_status": 0, "to_status": 0, ...}], + }, + { + "line": [{"id": 3, "from_status": 1, "to_status": 1, ...}], + } + ] +""" + +ExtraInfo = Dict[int, Any] +""" +Extra info is a dictionary that contains information about the objects. It is indexed on the object IDs and the +actual information can be anything. + +Example: + { + 1: "First Node", + 2: "Second Node", + 3: {"name": "Cable", "material": "Aluminum"} + } +""" diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py deleted file mode 100644 index a289618e4a..0000000000 --- a/src/power_grid_model/manual_testing.py +++ /dev/null @@ -1,382 +0,0 @@ -# SPDX-FileCopyrightText: 2022 Contributors to the Power Grid Model project -# -# SPDX-License-Identifier: MPL-2.0 - -""" -This file contains all the helper functions for testing purpose -""" - -import json -from pathlib import Path -from typing import IO, Any, Dict, List, Optional, Union - -import numpy as np - -from . import initialize_array - - -def is_nan(data) -> bool: - """ - Determine if the data point is valid - Args: - data: a single scaler or numpy array - - Returns: - True if all the data points are invalid - False otherwise - """ - nan_func = { - np.dtype("f8"): lambda x: np.all(np.isnan(x)), - np.dtype("i4"): lambda x: np.all(x == np.iinfo("i4").min), - np.dtype("i1"): lambda x: np.all(x == np.iinfo("i1").min), - } - return nan_func[data.dtype](data) - - -def convert_list_to_batch_data( - list_data: List[Dict[str, np.ndarray]] -) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]: - """ - Convert a list of datasets to one single batch dataset - - Example data formats: - input: [{"node": <1d-array>, "line": <1d-array>}, {"node": <1d-array>, "line": <1d-array>}] - output: {"node": <2d-array>, "line": <2d-array>} - -or-: {"indptr": <1d-array>, "data": <1d-array>} - Args: - list_data: list of dataset - - Returns: - batch dataset - For a certain component, if all the length is the same for all the batches, a 2D array is used - Otherwise use a dict of indptr/data key - """ - - # List all *unique* types - components = {x for dataset in list_data for x in dataset.keys()} - - batch_data = {} - for component in components: - - # Create a 2D array if the component exists in all datasets and number of objects is the same in each dataset - comp_exists_in_all_datasets = all(component in x for x in list_data) - if comp_exists_in_all_datasets: - all_sizes_are_the_same = all(x[component].size == list_data[0][component].size for x in list_data) - if all_sizes_are_the_same: - batch_data[component] = np.stack([x[component] for x in list_data], axis=0) - continue - - # otherwise use indptr/data dict - indptr = [0] - data = [] - for dataset in list_data: - - if component in dataset: - # If the current dataset contains the component, increase the indptr for this batch and append the data - objects = dataset[component] - indptr.append(indptr[-1] + len(objects)) - data.append(objects) - - else: - # If the current dataset does not contain the component, add the last indptr again. - indptr.append(indptr[-1]) - - # Convert the index pointers to a numpy array and combine the list of object numpy arrays into a singe - # numpy array. All objects of all batches are now stores in one large array, the index pointers define - # which elemets of the array (rows) belong to which batch. - batch_data[component] = {"indptr": np.array(indptr, dtype=np.int32), "data": np.concatenate(data, axis=0)} - - return batch_data - - -def convert_python_to_numpy( - data: Union[Dict, List], data_type: str -) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]: - """ - Convert native python data to internal numpy - Args: - data: data in dict or list - data_type: type of data: input, update, sym_output, or asym_output - - Returns: - A single or batch dataset for power-grid-model - - """ - - # If the inpute data is a list, we are dealing with batch data. Each element in the list is a batch. We'll - # first convert each batch seperately, by recusively calling this function for each batch. Then the numpy - # data for all batches in converted into a proper and compact numpy structure. - if isinstance(data, list): - list_data = [convert_python_to_numpy(json_dict, data_type=data_type) for json_dict in data] - return convert_list_to_batch_data(list_data) - - # This should be a normal (non-batch) structure, with a list of objects (dictionaries) per component. - if not isinstance(data, dict): - raise TypeError("Only list or dict is allowed in JSON data!") - - dataset: Dict[str, np.ndarray] = {} - for component, objects in data.items(): - - # We'll initialize an 1d-array with NaN values for all the objects of this component type - dataset[component] = initialize_array(data_type, component, len(objects)) - - for i, obj in enumerate(objects): - # As each object is a separate dictionary, and the attributes may differ per object, we need to check - # all attributes. Non-existing attributes - for attribute, value in obj.items(): - if attribute == "extra": - # The "extra" attribute is a special one. It can store any type of information associated with - # an object, but it will not be used in the calculations. Therefore it is not included in the - # numpy array, so we can skip this attribute - continue - - if attribute not in dataset[component].dtype.names: - # If a attribute doen't exist, the user made a mistake. Let's be merciless in that case, - # for their own good. - raise ValueError(f"Invalid attribute '{attribute}' for {component} {data_type} data.") - - # Now just assign the value and raise an error if the value cannot be stored in the specific - # numpy array data format for this attribute. - try: - dataset[component][i][attribute] = value - except ValueError as ex: - raise ValueError(f"Invalid '{attribute}' value for {component} {data_type} data: {ex}") from ex - return dataset - - -def convert_batch_to_list_data( - batch_data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] -) -> List[Dict[str, np.ndarray]]: - """ - Convert list of dataset to one single batch dataset - Args: - batch_data: a batch dataset for power-grid-model - - Returns: - list of single dataset - """ - - # If the batch data is empty, return an empty list - if not batch_data: - return [] - - # Get the data for an arbitrary component; assuming that the number of batches of each component is the same. - # The structure may differ per component - example_batch_data = next(iter(batch_data.values())) - - if isinstance(example_batch_data, np.ndarray): - # We expect the batch data to be a 2d numpy array of n_batches x n_objects - if len(example_batch_data.shape) != 2: - raise ValueError("Invalid batch data format") - n_batches = example_batch_data.shape[0] - elif isinstance(example_batch_data, dict): - # If the batch data is a dictionary, we assume that it is an indptr/data structure (otherwise it is an - # invalid dictionary). There is always one indptr more than there are batches. - if "indptr" not in example_batch_data: - raise ValueError("Invalid batch data format") - n_batches = example_batch_data["indptr"].size - 1 - else: - # If the batch data is not a numpy array and not a dictionary, it is invalid - raise ValueError("Invalid batch data format") - - # Initialize an empty list with dictionaries - # Note that [{}] * n_batches would result in n copies of the same dict. - list_data = [{} for _ in range(n_batches)] - - # While the number of batches must be the same for each component, the structure (2d numpy array or indptr/data) - # doesn't have to be. Therefore, we'll check the structure for each component and copy the data accordingly. - for component, data in batch_data.items(): - if isinstance(data, np.ndarray): - # For 2d numpy arrays, copy each batch into an element of the list - for i, batch in enumerate(data): - list_data[i][component] = batch - else: - # For indptr/data structures, use the indptr to select the items for each batch. - indptr = data["indptr"] - for i, (idx0, idx1) in enumerate(zip(indptr[:-1], indptr[1:])): - list_data[i][component] = data["data"][idx0:idx1] - return list_data - - -def convert_numpy_to_python( - data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] -) -> Union[Dict[str, List[Dict[str, Union[int, float]]]], List[Dict[str, List[Dict[str, Union[int, float]]]]]]: - """ - Convert internal numpy arrays to native python data - If an attribute is not available (NaN value), it will not be exported. - Args: - data: A single or batch dataset for power-grid-model - Returns: - A json dict for single dataset - A json list for batch dataset - - """ - # Check if the dataset is a single dataset or batch dataset - # It is batch dataset if it is 2D array or a indptr/data structure - example_data = next(iter(data.values())) - is_dense_batch = isinstance(example_data, np.ndarray) and example_data.ndim == 2 - is_sparse_batch = isinstance(example_data, dict) and "indptr" in example_data and "data" in example_data - - # If it is a batch, convert the batch data to a list of batches, then convert each batch individually. - if is_dense_batch or is_sparse_batch: - list_data = convert_batch_to_list_data(data) - return [convert_numpy_to_python(x) for x in list_data] - - # Otherwise it should be a single data set - if not isinstance(example_data, np.ndarray) or example_data.ndim != 1: - raise ValueError("Invalid data format") - - # Convert each numpy array to a list of objects, which contains only the non-NaN attributes: - # For example: {"node": [{"id": 0, ...}, {"id": 1, ...}], "line": [{"id": 2, ...}]} - return { - component: [ - {attribute: obj[attribute].tolist() for attribute in objects.dtype.names if not is_nan(obj[attribute])} - for obj in objects - ] - for component, objects in data.items() - } - - -def import_json_data(json_file: Path, data_type: str) -> Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]]: - """ - import json data - for a list, import individual entry as dictionary of arrays - Args: - json_file: path to the json file - data_type: type of data: input, update, sym_output, or asym_output - - Returns: - A single or batch dataset for power-grid-model - """ - with open(json_file, mode="r", encoding="utf-8") as file_pointer: - json_data = json.load(file_pointer) - return convert_python_to_numpy(json_data, data_type) - - -def export_json_data( - json_file: Path, - data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]], - indent: Optional[int] = 2, - compact: bool = False, - extra_info: Optional[Union[Dict[int, Any], List[Dict[int, Any]]]] = None, -): - """ - export json data - Args: - json_file: path to json file - data: a single or batch dataset for power-grid-model - indent: indent of the file, default 2 - compact: write components on a single line - extra_info: extra information (in any json-serializable format), indexed on the object ids - e.g. a string representing the original id, or a dictionary storing even more information. - - Returns: - Save to file - """ - json_data = convert_numpy_to_python(data) - if extra_info is not None: - _inject_extra_info(data=json_data, extra_info=extra_info) - - with open(json_file, mode="w", encoding="utf-8") as file_pointer: - if compact and indent: - is_batch_data = isinstance(json_data, list) - max_level = 4 if is_batch_data else 3 - _compact_json_dump(json_data, file_pointer, indent=indent, max_level=max_level) - else: - json.dump(json_data, file_pointer, indent=indent) - - -def _inject_extra_info( - data: Union[Dict[str, List[Dict[str, Union[float, int]]]], List[Dict[str, List[Dict[str, Union[float, int]]]]]], - extra_info: Union[Dict[int, Any], List[Dict[int, Any]]], -): - """ - Injects extra info to the objects by ID - - Args: - data: Power Grid Model Python data, as written to pgm json files. - extra_info: A dictionary indexed by object id. The value may be anything. - - """ - if isinstance(data, list): - if isinstance(extra_info, list): - # If both data and extra_info are lists, expect one extra info set per batch - for batch, info in zip(data, extra_info): - _inject_extra_info(batch, info) - else: - # If only data is a list, copy extra_info for each batch - for batch in data: - _inject_extra_info(batch, extra_info) - elif isinstance(data, dict): - if not isinstance(extra_info, dict): - raise TypeError("Invalid extra info data type") - for _, objects in data.items(): - for obj in objects: - if obj["id"] in extra_info: - obj["extra"] = extra_info[obj["id"]] - else: - raise TypeError("Invalid data type") - - -def _compact_json_dump(data: Any, io_stream: IO[str], indent: int, max_level: int, level: int = 0): - """Custom compact JSON writer that is intended to put data belonging to a single object on a single line. - - For example: - { - "node": [ - {"id": 0, "u_rated": 10500.0, "extra": {"original_id": 123}}, - {"id": 1, "u_rated": 10500.0, "extra": {"original_id": 456}}, - ], - "line": [ - {"id": 2, "node_from": 0, "node_to": 1, ...} - ] - } - - The function is being called recursively, starting at level 0 and recursing until max_level is reached. It is - basically a full json writer, but for efficiency reasons, on the last levels the native json.dump method is used. - """ - - # Let's define a 'tab' indent, depending on the level - tab = " " * level * indent - - # If we are at the max_level, or the data simply doesn't contain any more levels, write the indent and serialize - # the data on a single line. - if level >= max_level or not isinstance(data, (list, dict)): - io_stream.write(tab) - json.dump(data, io_stream, indent=None) - return - - # We'll need the number of objects later on - n_obj = len(data) - - # If the data is a list: - # 1. start with an opening bracket - # 2. dump each element in the list - # 3. add a comma and a new line after each element, except for the last element, there we don't need a comma. - # 4. finish with a closing bracket - if isinstance(data, list): - io_stream.write(tab + "[\n") - for i, obj in enumerate(data, start=1): - _compact_json_dump(obj, io_stream, indent, max_level, level + 1) - io_stream.write(",\n" if i < n_obj else "\n") - io_stream.write(tab + "]") - return - - # If the data is a dictionary: - # 1. start with an opening curly bracket - # 2. for each element: write it's key, plus a colon ':' - # 3. if the next level would be the max_level, add a space and dump the element on a single, - # else add a new line before dumping the element recursively. - # 4. add a comma and a new line after each element, except for the last element, there we don't need a comma. - # 5. finish with a closing curly bracket - io_stream.write(tab + "{\n") - for i, (key, obj) in enumerate(data.items(), start=1): - io_stream.write(tab + " " * indent + f'"{key}":') - if level == max_level - 1 or not isinstance(obj, (list, dict)): - io_stream.write(" ") - json.dump(obj, io_stream, indent=None) - else: - io_stream.write("\n") - _compact_json_dump(obj, io_stream, indent, max_level, level + 2) - io_stream.write(",\n" if i < n_obj else "\n") - io_stream.write(tab + "}") diff --git a/src/power_grid_model/utils.py b/src/power_grid_model/utils.py new file mode 100644 index 0000000000..f30f9f5077 --- /dev/null +++ b/src/power_grid_model/utils.py @@ -0,0 +1,614 @@ +# SPDX-FileCopyrightText: 2022 Contributors to the Power Grid Model project +# +# SPDX-License-Identifier: MPL-2.0 + +""" +This file contains all the helper functions for testing purpose +""" + +import json +from pathlib import Path +from typing import IO, Any, List, Optional, Union, cast + +import numpy as np + +from power_grid_model import initialize_array +from power_grid_model.data_types import ( + BatchArray, + BatchDataset, + BatchList, + BatchPythonDataset, + ComponentList, + Dataset, + ExtraInfo, + Nominal, + PythonDataset, + SingleDataset, + SinglePythonDataset, + SparseBatchArray, +) + + +def is_nan(data) -> bool: + """ + Determine if the data point is valid + Args: + data: a single scaler or numpy array + + Returns: + True if all the data points are invalid + False otherwise + """ + nan_func = { + np.dtype("f8"): lambda x: np.all(np.isnan(x)), + np.dtype("i4"): lambda x: np.all(x == np.iinfo("i4").min), + np.dtype("i1"): lambda x: np.all(x == np.iinfo("i1").min), + } + return bool(nan_func[data.dtype](data)) + + +def convert_list_to_batch_data(list_data: BatchList) -> BatchDataset: + """ + Convert a list of datasets to one single batch dataset + + Example data formats: + input: [{"node": <1d-array>, "line": <1d-array>}, {"node": <1d-array>, "line": <1d-array>}] + output: {"node": <2d-array>, "line": <2d-array>} + -or-: {"indptr": <1d-array>, "data": <1d-array>} + Args: + list_data: list of dataset + + Returns: + batch dataset + For a certain component, if all the length is the same for all the batches, a 2D array is used + Otherwise use a dict of indptr/data key + """ + + # List all *unique* types + components = {x for dataset in list_data for x in dataset.keys()} + + batch_data: BatchDataset = {} + for component in components: + + # Create a 2D array if the component exists in all datasets and number of objects is the same in each dataset + comp_exists_in_all_datasets = all(component in x for x in list_data) + if comp_exists_in_all_datasets: + all_sizes_are_the_same = all(x[component].size == list_data[0][component].size for x in list_data) + if all_sizes_are_the_same: + batch_data[component] = np.stack([x[component] for x in list_data], axis=0) + continue + + # otherwise use indptr/data dict + indptr = [0] + data = [] + for dataset in list_data: + + if component in dataset: + # If the current dataset contains the component, increase the indptr for this batch and append the data + objects = dataset[component] + indptr.append(indptr[-1] + len(objects)) + data.append(objects) + + else: + # If the current dataset does not contain the component, add the last indptr again. + indptr.append(indptr[-1]) + + # Convert the index pointers to a numpy array and combine the list of object numpy arrays into a singe + # numpy array. All objects of all batches are now stores in one large array, the index pointers define + # which elemets of the array (rows) belong to which batch. + batch_data[component] = {"indptr": np.array(indptr, dtype=np.int32), "data": np.concatenate(data, axis=0)} + + return batch_data + + +def convert_python_to_numpy(data: PythonDataset, data_type: str) -> Dataset: + """ + Convert native python data to internal numpy + Args: + data: data in dict or list + data_type: type of data: input, update, sym_output, or asym_output + + Returns: + A single or batch dataset for power-grid-model + + """ + + # If the input data is a list, we are dealing with batch data. Each element in the list is a batch. We'll + # first convert each batch separately, by recursively calling this function for each batch. Then the numpy + # data for all batches in converted into a proper and compact numpy structure. + if isinstance(data, list): + list_data = [ + convert_python_single_dataset_to_single_dataset(json_dict, data_type=data_type) for json_dict in data + ] + return convert_list_to_batch_data(list_data) + + # Otherwise this should be a normal (non-batch) structure, with a list of objects (dictionaries) per component. + if not isinstance(data, dict): + raise TypeError("Data should be either a list or a dictionary!") + + return convert_python_single_dataset_to_single_dataset(data=data, data_type=data_type) + + +def convert_python_single_dataset_to_single_dataset(data: SinglePythonDataset, data_type: str) -> SingleDataset: + """ + Convert native python data to internal numpy + Args: + data: data in dict + data_type: type of data: input, update, sym_output, or asym_output + + Returns: + A single dataset for power-grid-model + + """ + + dataset: SingleDataset = {} + for component, objects in data.items(): + dataset[component] = convert_component_list_to_numpy(objects=objects, component=component, data_type=data_type) + + return dataset + + +def convert_component_list_to_numpy(objects: ComponentList, component: str, data_type: str) -> np.ndarray: + """ + Convert native python data to internal numpy + Args: + objects: data in dict + component: the name of the component + data_type: type of data: input, update, sym_output, or asym_output + + Returns: + A single numpy array + + """ + + # We'll initialize an 1d-array with NaN values for all the objects of this component type + array = initialize_array(data_type, component, len(objects)) + + for i, obj in enumerate(objects): + # As each object is a separate dictionary, and the attributes may differ per object, we need to check + # all attributes. Non-existing attributes + for attribute, value in obj.items(): + if attribute == "extra": + # The "extra" attribute is a special one. It can store any type of information associated with + # an object, but it will not be used in the calculations. Therefore it is not included in the + # numpy array, so we can skip this attribute + continue + + if attribute not in array.dtype.names: + # If an attribute doesn't exist, the user made a mistake. Let's be merciless in that case, + # for their own good. + raise ValueError(f"Invalid attribute '{attribute}' for {component} {data_type} data.") + + # Now just assign the value and raise an error if the value cannot be stored in the specific + # numpy array data format for this attribute. + try: + array[i][attribute] = value + except ValueError as ex: + raise ValueError(f"Invalid '{attribute}' value for {component} {data_type} data: {ex}") from ex + return array + + +def convert_batch_dataset_to_batch_list(batch_data: BatchDataset) -> BatchList: + """ + Convert batch datasets to a list of individual batches + Args: + batch_data: a batch dataset for power-grid-model + Returns: + A list of individual batches + """ + + # If the batch data is empty, return an empty list + if len(batch_data) == 0: + return [] + + n_batches = get_and_verify_batch_sizes(batch_data=batch_data) + + # Initialize an empty list with dictionaries + # Note that [{}] * n_batches would result in n copies of the same dict. + list_data: BatchList = [{} for _ in range(n_batches)] + + # While the number of batches must be the same for each component, the structure (2d numpy array or indptr/data) + # doesn't have to be. Therefore, we'll check the structure for each component and copy the data accordingly. + for component, data in batch_data.items(): + if isinstance(data, np.ndarray): + component_batches = split_numpy_array_in_batches(data, component) + elif isinstance(data, dict): + component_batches = split_sparse_batches_in_batches(data, component) + else: + raise TypeError( + f"Invalid data type {type(data).__name__} in batch data for '{component}' " + "(should be a Numpy structured array or a python dictionary)." + ) + for i, batch in enumerate(component_batches): + if batch.size > 0: + list_data[i][component] = batch + return list_data + + +def get_and_verify_batch_sizes(batch_data: BatchDataset) -> int: + """ + Determine the number of batches for each component and verify that each component has the same number of batches + Args: + batch_data: a batch dataset for power-grid-model + + Returns: + The number of batches + """ + + n_batch_size = 0 + checked_components: List[str] = [] + for component, data in batch_data.items(): + n_component_batch_size = get_batch_size(data) + if checked_components and n_component_batch_size != n_batch_size: + if len(checked_components) == 1: + checked_components_str = f"'{checked_components.pop()}'" + else: + checked_components_str = "/".join(sorted(checked_components)) + raise ValueError( + f"Inconsistent number of batches in batch data. " + f"Component '{component}' contains {n_component_batch_size} batches, " + f"while {checked_components_str} contained {n_batch_size} batches." + ) + n_batch_size = n_component_batch_size + checked_components.append(component) + return n_batch_size + + +def get_batch_size(batch_data: BatchArray) -> int: + """ + Determine the number of batches and verify the data structure while we're at it. + Args: + batch_data: a batch array for power-grid-model + + Returns: + The number of batches + """ + if isinstance(batch_data, np.ndarray): + # We expect the batch data to be a 2d numpy array of n_batches x n_objects. If it is a 1d numpy array instead, + # we assume that it is a single batch. + if batch_data.ndim == 1: + return 1 + n_batches = batch_data.shape[0] + elif isinstance(batch_data, dict): + # If the batch data is a dictionary, we assume that it is an indptr/data structure (otherwise it is an + # invalid dictionary). There is always one indptr more than there are batches. + if "indptr" not in batch_data: + raise ValueError("Invalid batch data format, expected 'indptr' and 'data' entries") + n_batches = batch_data["indptr"].size - 1 + else: + # If the batch data is not a numpy array and not a dictionary, it is invalid + raise ValueError( + "Invalid batch data format, expected a 2-d numpy array or a dictionary with an 'indptr' and 'data' entry" + ) + return n_batches + + +def split_numpy_array_in_batches(data: np.ndarray, component: str) -> List[np.ndarray]: + """ + Split a single dense numpy array into one or more batches + + Args: + data: A 1D or 2D Numpy structured array. A 1D array is a single table / batch, a 2D array is a batch per table. + component: The name of the component to which the data belongs, only used for errors. + + Returns: + A list with a single numpy structured array per batch + + """ + if not isinstance(data, np.ndarray): + raise TypeError( + f"Invalid data type {type(data).__name__} in batch data for '{component}' " + "(should be a 1D/2D Numpy structured array)." + ) + if data.ndim == 1: + return [data] + if data.ndim == 2: + return [data[i, :] for i in range(data.shape[0])] + raise TypeError( + f"Invalid data dimension {data.ndim} in batch data for '{component}' " + "(should be a 1D/2D Numpy structured array)." + ) + + +def split_sparse_batches_in_batches(batch_data: SparseBatchArray, component: str) -> List[np.ndarray]: + """ + Split a single numpy array representing, a compressed sparse structure, into one or more batches + + Args: + batch_data: Sparse batch data + component: The name of the component to which the data belongs, only used for errors. + + Returns: + A list with a single numpy structured array per batch + + """ + + for key in ["indptr", "data"]: + if key not in batch_data: + raise KeyError( + f"Missing '{key}' in sparse batch data for '{component}' " + "(expected a python dictionary containing two keys: 'indptr' and 'data')." + ) + + data = batch_data["data"] + indptr = batch_data["indptr"] + + if not isinstance(data, np.ndarray) or data.ndim != 1: + raise TypeError( + f"Invalid data type {type(data).__name__} in sparse batch data for '{component}' " + "(should be a 1D Numpy structured array (i.e. a single 'table'))." + ) + + if not isinstance(indptr, np.ndarray) or indptr.ndim != 1 or not np.issubdtype(indptr.dtype, np.integer): + raise TypeError( + f"Invalid indptr data type {type(indptr).__name__} in batch data for '{component}' " + "(should be a 1D Numpy array (i.e. a single 'list'), " + "containing indices (i.e. integers))." + ) + + if indptr[0] != 0 or indptr[-1] != len(data) or any(indptr[i] > indptr[i + 1] for i in range(len(indptr) - 1)): + raise TypeError( + f"Invalid indptr in batch data for '{component}' " + f"(should start with 0, end with the number of objects ({len(data)}) " + "and be monotonic increasing)." + ) + + return [data[indptr[i] : indptr[i + 1]] for i in range(len(indptr) - 1)] + + +def convert_dataset_to_python_dataset(data: Dataset) -> PythonDataset: + """ + Convert internal numpy arrays to native python data + If an attribute is not available (NaN value), it will not be exported. + Args: + data: A single or batch dataset for power-grid-model + Returns: + A python dict for single dataset + A python list for batch dataset + + """ + + # Check if the dataset is a single dataset or batch dataset + # It is batch dataset if it is 2D array or a indptr/data structure + is_batch: Optional[bool] = None + for component, array in data.items(): + is_dense_batch = isinstance(array, np.ndarray) and array.ndim == 2 + is_sparse_batch = isinstance(array, dict) and "indptr" in array and "data" in array + if is_batch is not None and is_batch != (is_dense_batch or is_sparse_batch): + raise ValueError( + f"Mixed {'' if is_batch else 'non-'}batch data " + f"with {'non-' if is_batch else ''}batch data ({component})." + ) + is_batch = is_dense_batch or is_sparse_batch + + # If it is a batch, convert the batch data to a list of batches, then convert each batch individually. + if is_batch: + # We have established that this is batch data, so let's tell the type checker that this is a BatchDataset + data = cast(BatchDataset, data) + list_data = convert_batch_dataset_to_batch_list(data) + return [convert_single_dataset_to_python_single_dataset(data=x) for x in list_data] + + # We have established that this is not batch data, so let's tell the type checker that this is a BatchDataset + data = cast(SingleDataset, data) + return convert_single_dataset_to_python_single_dataset(data=data) + + +def convert_single_dataset_to_python_single_dataset(data: SingleDataset) -> SinglePythonDataset: + """ + Convert internal numpy arrays to native python data + If an attribute is not available (NaN value), it will not be exported. + Args: + data: A single dataset for power-grid-model + Returns: + A python dict for single dataset + """ + + # This should be a single data set + for component, array in data.items(): + if not isinstance(array, np.ndarray) or array.ndim != 1: + raise ValueError("Invalid data format") + + # Convert each numpy array to a list of objects, which contains only the non-NaN attributes: + # For example: {"node": [{"id": 0, ...}, {"id": 1, ...}], "line": [{"id": 2, ...}]} + return { + component: [ + {attribute: obj[attribute].tolist() for attribute in objects.dtype.names if not is_nan(obj[attribute])} + for obj in objects + ] + for component, objects in data.items() + } + + +def import_json_data(json_file: Path, data_type: str) -> Dataset: + """ + import json data + Args: + json_file: path to the json file + data_type: type of data: input, update, sym_output, or asym_output + + Returns: + A single or batch dataset for power-grid-model + """ + with open(json_file, mode="r", encoding="utf-8") as file_pointer: + json_data = json.load(file_pointer) + return convert_python_to_numpy(json_data, data_type) + + +def import_input_data(json_file: Path) -> SingleDataset: + """ + import input json data + Args: + json_file: path to the json file + + Returns: + A single dataset for power-grid-model + """ + data = import_json_data(json_file=json_file, data_type="input") + assert isinstance(data, dict) + assert all(isinstance(component, np.ndarray) and component.ndim == 1 for component in data.values()) + return cast(SingleDataset, data) + + +def import_update_data(json_file: Path) -> BatchDataset: + """ + import update json data + Args: + json_file: path to the json file + + Returns: + A batch dataset for power-grid-model + """ + return cast(BatchDataset, import_json_data(json_file=json_file, data_type="update")) + + +def export_json_data( + json_file: Path, + data: Dataset, + indent: Optional[int] = 2, + compact: bool = False, + extra_info: Optional[Union[ExtraInfo, List[ExtraInfo]]] = None, +): + """ + export json data + Args: + json_file: path to json file + data: a single or batch dataset for power-grid-model + indent: indent of the file, default 2 + compact: write components on a single line + extra_info: extra information (in any json-serializable format), indexed on the object ids + e.g. a string representing the original id, or a dictionary storing even more information. + + Returns: + Save to file + """ + json_data = convert_dataset_to_python_dataset(data) + if extra_info is not None: + inject_extra_info(data=json_data, extra_info=extra_info) + + with open(json_file, mode="w", encoding="utf-8") as file_pointer: + if compact and indent: + is_batch_data = isinstance(json_data, list) + max_level = 4 if is_batch_data else 3 + compact_json_dump(json_data, file_pointer, indent=indent, max_level=max_level) + else: + json.dump(json_data, file_pointer, indent=indent) + + +def inject_extra_info(data: PythonDataset, extra_info: Union[ExtraInfo, List[ExtraInfo]]): + """ + Injects extra info to the objects by ID + + Args: + data: Power Grid Model Python data, as written to pgm json files. + extra_info: A dictionary indexed by object id. The value may be anything. + + """ + if isinstance(data, list): + _inject_extra_info_batch(data=data, extra_info=extra_info) + elif isinstance(data, dict): + _inject_extra_info_single(data=data, extra_info=cast(ExtraInfo, extra_info)) + else: + raise TypeError("Invalid data type") + + +def _inject_extra_info_single(data: SinglePythonDataset, extra_info: ExtraInfo): + """ + Injects extra info to the objects by ID + + Args: + data: Power Grid Model Python data, as written to pgm json files. + extra_info: A dictionary indexed by object id. The value may be anything. + + """ + if not isinstance(extra_info, dict): + raise TypeError("Invalid extra info data type") + + for _, objects in data.items(): + for obj in objects: + if obj["id"] in extra_info: + # IDs are always nominal values, so let's tell the type checker: + obj_id = cast(Nominal, obj["id"]) + obj["extra"] = extra_info[obj_id] + + +def _inject_extra_info_batch(data: BatchPythonDataset, extra_info: Union[ExtraInfo, List[ExtraInfo]]): + """ + Injects extra info to the objects by ID + + Args: + data: Power Grid Model Python data, as written to pgm json files. + extra_info: A dictionary indexed by object id. The value may be anything. + + """ + if isinstance(extra_info, list): + # If both data and extra_info are lists, expect one extra info set per batch + for batch, info in zip(data, extra_info): + _inject_extra_info_single(batch, info) + else: + # If only data is a list, copy extra_info for each batch + for batch in data: + _inject_extra_info_single(batch, extra_info) + + +def compact_json_dump(data: Any, io_stream: IO[str], indent: int, max_level: int, level: int = 0): + """Custom compact JSON writer that is intended to put data belonging to a single object on a single line. + + For example: + { + "node": [ + {"id": 0, "u_rated": 10500.0, "extra": {"original_id": 123}}, + {"id": 1, "u_rated": 10500.0, "extra": {"original_id": 456}}, + ], + "line": [ + {"id": 2, "node_from": 0, "node_to": 1, ...} + ] + } + + The function is being called recursively, starting at level 0 and recursing until max_level is reached. It is + basically a full json writer, but for efficiency reasons, on the last levels the native json.dump method is used. + """ + + # Let's define a 'tab' indent, depending on the level + tab = " " * level * indent + + # If we are at the max_level, or the data simply doesn't contain any more levels, write the indent and serialize + # the data on a single line. + if level >= max_level or not isinstance(data, (list, dict)): + io_stream.write(tab) + json.dump(data, io_stream, indent=None) + return + + # We'll need the number of objects later on + n_obj = len(data) + + # If the data is a list: + # 1. start with an opening bracket + # 2. dump each element in the list + # 3. add a comma and a new line after each element, except for the last element, there we don't need a comma. + # 4. finish with a closing bracket + if isinstance(data, list): + io_stream.write(tab + "[\n") + for i, obj in enumerate(data, start=1): + compact_json_dump(obj, io_stream, indent, max_level, level + 1) + io_stream.write(",\n" if i < n_obj else "\n") + io_stream.write(tab + "]") + return + + # If the data is a dictionary: + # 1. start with an opening curly bracket + # 2. for each element: write it's key, plus a colon ':' + # 3. if the next level would be the max_level, add a space and dump the element on a single, + # else add a new line before dumping the element recursively. + # 4. add a comma and a new line after each element, except for the last element, there we don't need a comma. + # 5. finish with a closing curly bracket + io_stream.write(tab + "{\n") + for i, (key, obj) in enumerate(data.items(), start=1): + io_stream.write(tab + " " * indent + f'"{key}":') + if level == max_level - 1 or not isinstance(obj, (list, dict)): + io_stream.write(" ") + json.dump(obj, io_stream, indent=None) + else: + io_stream.write("\n") + compact_json_dump(obj, io_stream, indent, max_level, level + 2) + io_stream.write(",\n" if i < n_obj else "\n") + io_stream.write(tab + "}") diff --git a/src/power_grid_model/validation/__init__.py b/src/power_grid_model/validation/__init__.py index 5e75021484..0e19329a2e 100644 --- a/src/power_grid_model/validation/__init__.py +++ b/src/power_grid_model/validation/__init__.py @@ -4,11 +4,7 @@ """Power Grid Model input/update data validation""" -from .assertions import ( - ValidationException, - assert_valid_batch_data, - assert_valid_input_data, -) -from .errors import ValidationError -from .utils import InputData, UpdateData, errors_to_string -from .validation import validate_batch_data, validate_input_data +from power_grid_model.validation.assertions import ValidationException, assert_valid_batch_data, assert_valid_input_data +from power_grid_model.validation.errors import ValidationError +from power_grid_model.validation.utils import errors_to_string +from power_grid_model.validation.validation import validate_batch_data, validate_input_data diff --git a/src/power_grid_model/validation/assertions.py b/src/power_grid_model/validation/assertions.py index af7f5b76b8..5b200579a0 100644 --- a/src/power_grid_model/validation/assertions.py +++ b/src/power_grid_model/validation/assertions.py @@ -8,10 +8,11 @@ """ from typing import Dict, List, Optional, Union -from ..enum import CalculationType -from .errors import ValidationError -from .utils import InputData, UpdateData, errors_to_string -from .validation import validate_batch_data, validate_input_data +from power_grid_model.data_types import BatchDataset, SingleDataset +from power_grid_model.enum import CalculationType +from power_grid_model.validation.errors import ValidationError +from power_grid_model.validation.utils import errors_to_string +from power_grid_model.validation.validation import validate_batch_data, validate_input_data class ValidationException(ValueError): @@ -30,7 +31,7 @@ def __str__(self): def assert_valid_input_data( - input_data: InputData, calculation_type: Optional[CalculationType] = None, symmetric: bool = True + input_data: SingleDataset, calculation_type: Optional[CalculationType] = None, symmetric: bool = True ): """ Validates the entire input dataset: @@ -57,8 +58,8 @@ def assert_valid_input_data( def assert_valid_batch_data( - input_data: InputData, - update_data: UpdateData, + input_data: SingleDataset, + update_data: BatchDataset, calculation_type: Optional[CalculationType] = None, symmetric: bool = True, ): diff --git a/src/power_grid_model/validation/errors.py b/src/power_grid_model/validation/errors.py index e5f7b21486..ba5974fb24 100644 --- a/src/power_grid_model/validation/errors.py +++ b/src/power_grid_model/validation/errors.py @@ -10,8 +10,6 @@ from enum import Enum from typing import Any, Dict, List, Optional, Tuple, Type, Union -# No need to explain each class, as the class name shoul dbe self explanatory - class ValidationError(ABC): """ @@ -43,7 +41,7 @@ class ValidationError(ABC): components are being addressed. """ - ids: Union[List[int], List[Tuple[str, int]]] = [] + ids: Optional[Union[List[int], List[Tuple[str, int]]]] = None """ The object identifiers to which the error applies. A field object identifier can also be a tuple (component, id) when multiple components are being addressed. @@ -77,14 +75,16 @@ def get_context(self, id_lookup: Optional[Union[List[str], Dict[int, str]]] = No if id_lookup: if isinstance(id_lookup, list): id_lookup = dict(enumerate(id_lookup)) - context["ids"] = {i: id_lookup.get(i[1] if isinstance(i, tuple) else i) for i in self.ids} + context["ids"] = ( + {i: id_lookup.get(i[1] if isinstance(i, tuple) else i) for i in self.ids} if self.ids else set() + ) for key in context: if hasattr(self, key + "_str"): context[key] = str(getattr(self, key + "_str")) return context def __str__(self) -> str: - n_objects = len(self.ids) + n_objects = len(self.ids) if self.ids else 0 context = self.get_context() context["n"] = n_objects context["objects"] = context.get("component", "object") diff --git a/src/power_grid_model/validation/rules.py b/src/power_grid_model/validation/rules.py index 969cbc5751..977cbe21d6 100644 --- a/src/power_grid_model/validation/rules.py +++ b/src/power_grid_model/validation/rules.py @@ -20,7 +20,7 @@ Input data: - data: Dict[str, np.ndarray] + data: SingleDataset The entire input/update data set component: str @@ -39,8 +39,9 @@ import numpy as np -from ..enum import WindingType -from .errors import ( +from power_grid_model.data_types import SingleDataset +from power_grid_model.enum import WindingType +from power_grid_model.validation.errors import ( ComparisonError, IdNotInDatasetError, InfinityError, @@ -61,9 +62,10 @@ TwoValuesZeroError, ValidationError, ) -from .utils import eval_expression, nan_type +from power_grid_model.validation.utils import eval_expression, nan_type Error = TypeVar("Error", bound=ValidationError) +CompError = TypeVar("CompError", bound=ComparisonError) class ComparisonFn(Protocol): # pylint: disable=too-few-public-methods @@ -75,7 +77,7 @@ def __call__(self, val: np.ndarray, *ref: np.ndarray) -> np.ndarray: ... -def all_greater_than_zero(data: Dict[str, np.ndarray], component: str, field: str) -> List[NotGreaterThanError]: +def all_greater_than_zero(data: SingleDataset, component: str, field: str) -> List[NotGreaterThanError]: """ Check that for all records of a particular type of component, the values in the 'field' column are greater than zero. Returns an empty list on success, or a list containing a single error object on failure. @@ -92,9 +94,7 @@ def all_greater_than_zero(data: Dict[str, np.ndarray], component: str, field: st return all_greater_than(data, component, field, 0.0) -def all_greater_than_or_equal_to_zero( - data: Dict[str, np.ndarray], component: str, field: str -) -> List[NotGreaterOrEqualError]: +def all_greater_than_or_equal_to_zero(data: SingleDataset, component: str, field: str) -> List[NotGreaterOrEqualError]: """ Check that for all records of a particular type of component, the values in the 'field' column are greater than, or equal to zero. Returns an empty list on success, or a list containing a single error object on failure. @@ -112,7 +112,7 @@ def all_greater_than_or_equal_to_zero( def all_greater_than( - data: Dict[str, np.ndarray], component: str, field: str, ref_value: Union[int, float, str] + data: SingleDataset, component: str, field: str, ref_value: Union[int, float, str] ) -> List[NotGreaterThanError]: """ Check that for all records of a particular type of component, the values in the 'field' column are greater than @@ -131,14 +131,14 @@ def all_greater_than( was less than, or equal to, the ref_value. """ - def not_greater(val: np.ndarray, ref: np.ndarray): - return np.less_equal(val, ref) + def not_greater(val: np.ndarray, *ref: np.ndarray): + return np.less_equal(val, *ref) return none_match_comparison(data, component, field, not_greater, ref_value, NotGreaterThanError) def all_greater_or_equal( - data: Dict[str, np.ndarray], component: str, field: str, ref_value: Union[int, float, str] + data: SingleDataset, component: str, field: str, ref_value: Union[int, float, str] ) -> List[NotGreaterOrEqualError]: """ Check that for all records of a particular type of component, the values in the 'field' column are greater than, @@ -159,14 +159,14 @@ def all_greater_or_equal( """ - def not_greater_or_equal(val: np.ndarray, ref: np.ndarray): - return np.less(val, ref) + def not_greater_or_equal(val: np.ndarray, *ref: np.ndarray): + return np.less(val, *ref) return none_match_comparison(data, component, field, not_greater_or_equal, ref_value, NotGreaterOrEqualError) def all_less_than( - data: Dict[str, np.ndarray], component: str, field: str, ref_value: Union[int, float, str] + data: SingleDataset, component: str, field: str, ref_value: Union[int, float, str] ) -> List[NotLessThanError]: """ Check that for all records of a particular type of component, the values in the 'field' column are less than the @@ -185,14 +185,14 @@ def all_less_than( greater than, or equal to, the ref_value. """ - def not_less(val: np.ndarray, ref: np.ndarray): - return np.greater_equal(val, ref) + def not_less(val: np.ndarray, *ref: np.ndarray): + return np.greater_equal(val, *ref) return none_match_comparison(data, component, field, not_less, ref_value, NotLessThanError) def all_less_or_equal( - data: Dict[str, np.ndarray], component: str, field: str, ref_value: Union[int, float, str] + data: SingleDataset, component: str, field: str, ref_value: Union[int, float, str] ) -> List[NotLessOrEqualError]: """ Check that for all records of a particular type of component, the values in the 'field' column are less than, @@ -213,14 +213,14 @@ def all_less_or_equal( """ - def not_less_or_equal(val: np.ndarray, ref: np.ndarray): - return np.greater(val, ref) + def not_less_or_equal(val: np.ndarray, *ref: np.ndarray): + return np.greater(val, *ref) return none_match_comparison(data, component, field, not_less_or_equal, ref_value, NotLessOrEqualError) def all_between( - data: Dict[str, np.ndarray], + data: SingleDataset, component: str, field: str, ref_value_1: Union[int, float, str], @@ -254,7 +254,7 @@ def outside(val: np.ndarray, *ref: np.ndarray) -> np.ndarray: def all_between_or_at( - data: Dict[str, np.ndarray], + data: SingleDataset, component: str, field: str, ref_value_1: Union[int, float, str], @@ -288,13 +288,13 @@ def outside(val: np.ndarray, *ref: np.ndarray) -> np.ndarray: def none_match_comparison( - data: Dict[str, np.ndarray], + data: SingleDataset, component: str, field: str, compare_fn: ComparisonFn, ref_value: ComparisonError.RefType, - error: Type[Error] = ComparisonError, -) -> List[Error]: + error: Type[CompError] = ComparisonError, # type: ignore +) -> List[CompError]: # pylint: disable=too-many-arguments """ For all records of a particular type of component, check if the value in the 'field' column match the comparison. @@ -315,21 +315,21 @@ def none_match_comparison( A list containing zero or one comparison errors (should be a sub class of ComparisonError), listing all ids where the value in the field of interest matched the comparison. """ - data = data[component] + component_data = data[component] if isinstance(ref_value, tuple): - ref = (eval_expression(data, v) for v in ref_value) + ref = tuple(eval_expression(component_data, v) for v in ref_value) else: - ref = (eval_expression(data, ref_value),) - matches = compare_fn(data[field], *ref) + ref = (eval_expression(component_data, ref_value),) + matches = compare_fn(component_data[field], *ref) if matches.any(): if matches.ndim > 1: matches = matches.any(axis=1) - ids = data["id"][matches].flatten().tolist() + ids = component_data["id"][matches].flatten().tolist() return [error(component, field, ids, ref_value)] return [] -def all_unique(data: Dict[str, np.ndarray], component: str, field: str) -> List[NotUniqueError]: +def all_unique(data: SingleDataset, component: str, field: str) -> List[NotUniqueError]: """ Check that for all records of a particular type of component, the values in the 'field' column are unique within the 'field' column of that component. @@ -357,7 +357,7 @@ def all_unique(data: Dict[str, np.ndarray], component: str, field: str) -> List[ def all_cross_unique( - data: Dict[str, np.ndarray], fields: List[Tuple[str, str]], cross_only=True + data: SingleDataset, fields: List[Tuple[str, str]], cross_only=True ) -> List[MultiComponentNotUniqueError]: """ Check that for all records of a particular type of component, the values in the 'field' column are unique within @@ -373,7 +373,7 @@ def all_cross_unique( A list containing zero or one MultiComponentNotUniqueError, listing all fields and ids where the value was not unique between the fields. """ - all_values = {} + all_values: Dict[int, List[Tuple[Tuple[str, str], int]]] = {} duplicate_ids = set() for component, field in fields: for obj_id, value in zip(data[component]["id"], data[component][field]): @@ -392,7 +392,7 @@ def all_cross_unique( def all_valid_enum_values( - data: Dict[str, np.ndarray], component: str, field: str, enum: Type[Enum] + data: SingleDataset, component: str, field: str, enum: Type[Enum] ) -> List[InvalidEnumValueError]: """ Check that for all records of a particular type of component, the values in the 'field' column are valid values for @@ -417,7 +417,7 @@ def all_valid_enum_values( def all_valid_ids( - data: Dict[str, np.ndarray], component: str, field: str, ref_components: Union[str, List[str]], **filters: Any + data: SingleDataset, component: str, field: str, ref_components: Union[str, List[str]], **filters: Any ) -> List[InvalidIdError]: """ For a column which should contain object identifiers (ids), check if the id exists in the data, for a specific set @@ -464,7 +464,7 @@ def all_valid_ids( return [] -def all_boolean(data: Dict[str, np.ndarray], component: str, field: str) -> List[NotBooleanError]: +def all_boolean(data: SingleDataset, component: str, field: str) -> List[NotBooleanError]: """ Check that for all records of a particular type of component, the values in the 'field' column are valid boolean values, i.e. 0 or 1. Returns an empty list on success, or a list containing a single error object on failure. @@ -486,7 +486,7 @@ def all_boolean(data: Dict[str, np.ndarray], component: str, field: str) -> List def all_not_two_values_zero( - data: Dict[str, np.ndarray], component: str, field_1: str, field_2: str + data: SingleDataset, component: str, field_1: str, field_2: str ) -> List[TwoValuesZeroError]: """ Check that for all records of a particular type of component, the values in the 'field_1' and 'field_2' column are @@ -510,9 +510,7 @@ def all_not_two_values_zero( return [] -def all_not_two_values_equal( - data: Dict[str, np.ndarray], component: str, field_1: str, field_2: str -) -> List[SameValueError]: +def all_not_two_values_equal(data: SingleDataset, component: str, field_1: str, field_2: str) -> List[SameValueError]: """ Check that for all records of a particular type of component, the values in the 'field_1' and 'field_2' column are not both the same value. E.g. from_node and to_node of a line. Returns an empty list on success, or a list @@ -537,7 +535,7 @@ def all_not_two_values_equal( def all_ids_exist_in_data_set( - data: Dict[str, np.ndarray], ref_data: Dict[str, np.ndarray], component: str, ref_name: str + data: SingleDataset, ref_data: SingleDataset, component: str, ref_name: str ) -> List[IdNotInDatasetError]: """ Check that for all records of a particular type of component, the ids exist in the reference data set. @@ -558,7 +556,7 @@ def all_ids_exist_in_data_set( return [] -def all_finite(data: Dict[str, np.ndarray]) -> List[InfinityError]: +def all_finite(data: SingleDataset) -> List[InfinityError]: """ Check that for all records in all component, the values in all columns are finite value, i.e. float values other than inf, or -inf. Nan values are ignored, as in all other comparison functions. You can use non_missing() to @@ -584,7 +582,7 @@ def all_finite(data: Dict[str, np.ndarray]) -> List[InfinityError]: return errors -def none_missing(data: Dict[str, np.ndarray], component: str, fields: Union[str, List[str]]) -> List[MissingValueError]: +def none_missing(data: SingleDataset, component: str, fields: Union[str, List[str]]) -> List[MissingValueError]: """ Check that for all records of a particular type of component, the values in the 'fields' columns are not NaN. Returns an empty list on success, or a list containing a single error object on failure. @@ -616,8 +614,8 @@ def none_missing(data: Dict[str, np.ndarray], component: str, fields: Union[str, def all_clocks_valid( - data: Dict[str, np.ndarray], component: str, clock_field: str, winding_from_field: str, winding_to_field: str -) -> List[MissingValueError]: + data: SingleDataset, component: str, clock_field: str, winding_from_field: str, winding_to_field: str +) -> List[TransformerClockError]: """ Custom validation rule: Odd clock number is only allowed for Dy(n) or Y(N)d configuration. diff --git a/src/power_grid_model/validation/utils.py b/src/power_grid_model/validation/utils.py index ee82981cad..6b6df5edba 100644 --- a/src/power_grid_model/validation/utils.py +++ b/src/power_grid_model/validation/utils.py @@ -6,17 +6,13 @@ Utilities used for validation. Only errors_to_string() is intended for end users. """ import re -from itertools import chain from typing import Dict, List, Optional, Union import numpy as np -from .. import power_grid_meta_data -from .errors import ValidationError - -InputData = Dict[str, np.ndarray] -UpdateData = Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] -BatchData = List[Dict[str, np.ndarray]] +from power_grid_model import power_grid_meta_data +from power_grid_model.data_types import SingleDataset +from power_grid_model.validation.errors import ValidationError def eval_expression(data: np.ndarray, expression: Union[int, float, str]) -> np.ndarray: @@ -89,139 +85,36 @@ def eval_field_expression(data: np.ndarray, expression: str) -> np.ndarray: return np.true_divide(data[fields[0]], data[fields[1]]) -def split_update_data_in_batches(update_data: UpdateData) -> BatchData: +def update_input_data(input_data: SingleDataset, update_data: SingleDataset): """ - - Args: - update_data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] - - Returns: List[Dict[str, np.ndarray]] - - """ - batches = [] - for component, data in update_data.items(): - if isinstance(data, np.ndarray): - component_batches = split_numpy_array_in_batches(data, component) - elif isinstance(data, dict): - for key in ["indptr", "data"]: - if key not in data: - raise KeyError( - f"Missing '{key}' in sparse update data for '{component}' " - "(expected a python dictionary containing two keys: 'indptr' and 'data')." - ) - component_batches = split_compressed_sparse_structure_in_batches(data["data"], data["indptr"], component) - else: - raise TypeError( - f"Invalid data type {type(data).__name__} in update data for '{component}' " - "(should be a Numpy structured array or a python dictionary)." - ) - if not batches: - batches = [{} for _ in component_batches] - elif len(component_batches) != len(batches): - previous_components = set(chain(*(batch.keys() for batch in batches))) - if len(previous_components) == 1: - previous_components = f"'{previous_components.pop()}'" - else: - previous_components = "/".join(sorted(previous_components)) - raise ValueError( - f"Inconsistent number of batches in update data. " - f"Component '{component}' contains {len(component_batches)} batches, " - f"while {previous_components} contained {len(batches)} batches." - ) - - for i, batch_data in enumerate(component_batches): - if batch_data.size > 0: - batches[i][component] = batch_data - return batches - - -def split_numpy_array_in_batches(data: np.ndarray, component: str) -> List[np.ndarray]: - """ - Split a single dense numpy array into one or more batches - - Args: - data: A 1D or 2D Numpy structured array. A 1D array is a single table / batch, a 2D array is a batch per table. - component: The name of the component to which the data belongs, only used for errors. - - Returns: - A list with a single numpy structured array per batch - - """ - if not isinstance(data, np.ndarray): - raise TypeError( - f"Invalid data type {type(data).__name__} in update data for '{component}' " - "(should be a 1D/2D Numpy structured array)." - ) - if data.ndim == 1: - return [data] - if data.ndim == 2: - return [data[i, :] for i in range(data.shape[0])] - raise TypeError( - f"Invalid data dimension {data.ndim} in update data for '{component}' " - "(should be a 1D/2D Numpy structured array)." - ) - - -def split_compressed_sparse_structure_in_batches( - data: np.ndarray, indptr: np.ndarray, component: str -) -> List[np.ndarray]: - """ - Split a single numpy array representing, a compressed sparse structure, into one or more batches - - Args: - data: A 1D Numpy structured array - indptr: A 1D numpy integer array - component: The name of the component to which the data belongs, only used for errors. - - Returns: - A list with a single numpy structured array per batch - + Update the input data using the available non-nan values in the update data. """ - if not isinstance(data, np.ndarray) or data.ndim != 1: - raise TypeError( - f"Invalid data type {type(data).__name__} in sparse update data for '{component}' " - "(should be a 1D Numpy structured array (i.e. a single 'table'))." - ) - - if not isinstance(indptr, np.ndarray) or indptr.ndim != 1 or not np.issubdtype(indptr.dtype, np.integer): - raise TypeError( - f"Invalid indptr data type {type(indptr).__name__} in update data for '{component}' " - "(should be a 1D Numpy array (i.e. a single 'list'), " - "containing indices (i.e. integers))." - ) - if indptr[0] != 0 or indptr[-1] != len(data) or any(indptr[i] > indptr[i + 1] for i in range(len(indptr) - 1)): - raise TypeError( - f"Invalid indptr in update data for '{component}' " - f"(should start with 0, end with the number of objects ({len(data)}) " - "and be monotonic increasing)." - ) - - return [data[indptr[i] : indptr[i + 1]] for i in range(len(indptr) - 1)] + merged_data = {component: array.copy() for component, array in input_data.items()} + for component in update_data.keys(): + update_component_data(component, merged_data[component], update_data[component]) + return merged_data -def update_input_data(input_data: Dict[str, np.ndarray], update_data: Dict[str, np.ndarray]): +def update_component_data(component: str, input_data: np.ndarray, update_data: np.ndarray) -> None: """ - Update the input data using the available non-nan values in the update data. + Update the data in a numpy array, with another numpy array, + indexed on the "id" field and only non-NaN values are overwritten. """ - - merged_data = {component: array.copy() for component, array in input_data.items()} - for component, array in update_data.items(): - for field in array.dtype.names: - if field == "id": - continue - nan = nan_type(component, field, "update") - if np.isnan(nan): - mask = ~np.isnan(array[field]) - else: - mask = np.not_equal(array[field], nan) - if mask.ndim == 2: - mask = np.any(mask, axis=1) - data = array[["id", field]][mask] - idx = np.where(merged_data[component]["id"] == np.reshape(data["id"], (-1, 1))) - if isinstance(idx, tuple): - merged_data[component][field][idx[1]] = data[field] - return merged_data + for field in update_data.dtype.names: + if field == "id": + continue + nan = nan_type(component, field, "update") + if np.isnan(nan): + mask = ~np.isnan(update_data[field]) + else: + mask = np.not_equal(update_data[field], nan) + if mask.ndim == 2: + mask = np.any(mask, axis=1) + data = update_data[["id", field]][mask] + idx = np.where(input_data["id"] == np.reshape(data["id"], (-1, 1))) + if isinstance(idx, tuple): + input_data[field][idx[1]] = data[field] def errors_to_string( diff --git a/src/power_grid_model/validation/validation.py b/src/power_grid_model/validation/validation.py index 8d19f1844b..12278ed286 100644 --- a/src/power_grid_model/validation/validation.py +++ b/src/power_grid_model/validation/validation.py @@ -9,25 +9,21 @@ """ from itertools import chain -from typing import Dict, List, Optional, Union +from typing import Dict, List, Optional import numpy as np -from .. import power_grid_meta_data -from ..enum import ( - BranchSide, - CalculationType, - LoadGenType, - MeasuredTerminalType, - WindingType, -) -from .errors import ( +from power_grid_model import power_grid_meta_data +from power_grid_model.data_types import BatchDataset, Dataset, SingleDataset +from power_grid_model.enum import BranchSide, CalculationType, LoadGenType, MeasuredTerminalType, WindingType +from power_grid_model.utils import convert_batch_dataset_to_batch_list +from power_grid_model.validation.errors import ( IdNotInDatasetError, MissingValueError, MultiComponentNotUniqueError, ValidationError, ) -from .rules import ( +from power_grid_model.validation.rules import ( all_between, all_between_or_at, all_boolean, @@ -46,16 +42,11 @@ all_valid_ids, none_missing, ) -from .utils import ( - InputData, - UpdateData, - split_update_data_in_batches, - update_input_data, -) +from power_grid_model.validation.utils import update_input_data def validate_input_data( - input_data: InputData, calculation_type: Optional[CalculationType] = None, symmetric: bool = True + input_data: SingleDataset, calculation_type: Optional[CalculationType] = None, symmetric: bool = True ) -> Optional[List[ValidationError]]: """ Validates the entire input dataset: @@ -86,8 +77,8 @@ def validate_input_data( def validate_batch_data( - input_data: InputData, - update_data: UpdateData, + input_data: SingleDataset, + update_data: BatchDataset, calculation_type: Optional[CalculationType] = None, symmetric: bool = True, ) -> Optional[Dict[int, List[ValidationError]]]: @@ -119,16 +110,17 @@ def validate_batch_data( where the key is the batch number (0-indexed). """ assert_valid_data_structure(input_data, "input") - input_errors: List[ValidationError] = validate_unique_ids_across_components(input_data) + + input_errors: List[ValidationError] = list(validate_unique_ids_across_components(input_data)) # Splitting update_data_into_batches may raise TypeErrors and ValueErrors - batch_data = split_update_data_in_batches(update_data) + batch_data = convert_batch_dataset_to_batch_list(update_data) errors = {} for batch, batch_update_data in enumerate(batch_data): assert_valid_data_structure(batch_update_data, "update") - id_errors: List[ValidationError] = validate_ids_exist(batch_update_data, input_data) + id_errors: List[ValidationError] = list(validate_ids_exist(batch_update_data, input_data)) batch_errors = input_errors + id_errors if not id_errors: @@ -142,7 +134,7 @@ def validate_batch_data( return errors if errors else None -def assert_valid_data_structure(data: Union[InputData, UpdateData], data_type: str) -> None: +def assert_valid_data_structure(data: Dataset, data_type: str) -> None: """ Checks if all component names are valid and if the data inside the component matches the required Numpy structured array as defined in the Power Grid Model meta data. @@ -181,7 +173,7 @@ def assert_valid_data_structure(data: Union[InputData, UpdateData], data_type: s ) -def validate_unique_ids_across_components(data: InputData) -> List[MultiComponentNotUniqueError]: +def validate_unique_ids_across_components(data: SingleDataset) -> List[MultiComponentNotUniqueError]: """ Checks if all ids in the input dataset are unique @@ -195,7 +187,7 @@ def validate_unique_ids_across_components(data: InputData) -> List[MultiComponen return all_cross_unique(data, [(component, "id") for component in data]) -def validate_ids_exist(update_data: Dict[str, np.ndarray], input_data: InputData) -> List[IdNotInDatasetError]: +def validate_ids_exist(update_data: Dict[str, np.ndarray], input_data: SingleDataset) -> List[IdNotInDatasetError]: """ Checks if all ids of the components in the update data exist in the input data. This needs to be true, because you can only update existing components. @@ -215,7 +207,7 @@ def validate_ids_exist(update_data: Dict[str, np.ndarray], input_data: InputData def validate_required_values( - data: InputData, calculation_type: Optional[CalculationType] = None, symmetric: bool = True + data: SingleDataset, calculation_type: Optional[CalculationType] = None, symmetric: bool = True ) -> List[MissingValueError]: """ Checks if all required data is available. @@ -288,7 +280,7 @@ def validate_required_values( return list(chain(*(none_missing(data, component, required.get(component, [])) for component in data))) -def validate_values(data: InputData) -> List[ValidationError]: # pylint: disable=too-many-branches +def validate_values(data: SingleDataset) -> List[ValidationError]: # pylint: disable=too-many-branches """ For each component supplied in the data, call the appropriate validation function @@ -298,7 +290,7 @@ def validate_values(data: InputData) -> List[ValidationError]: # pylint: disabl Returns: an empty list if all required data is valid, or a list of ValidationErrors. """ - errors: List[ValidationError] = all_finite(data) + errors: List[ValidationError] = list(all_finite(data)) if "node" in data: errors += validate_node(data) if "line" in data: @@ -333,18 +325,18 @@ def validate_values(data: InputData) -> List[ValidationError]: # pylint: disabl # pylint: disable=missing-function-docstring -def validate_base(data: InputData, component: str) -> List[ValidationError]: - errors = all_unique(data, component, "id") +def validate_base(data: SingleDataset, component: str) -> List[ValidationError]: + errors: List[ValidationError] = list(all_unique(data, component, "id")) return errors -def validate_node(data: InputData) -> List[ValidationError]: +def validate_node(data: SingleDataset) -> List[ValidationError]: errors = validate_base(data, "node") errors += all_greater_than_zero(data, "node", "u_rated") return errors -def validate_branch(data: InputData, component: str) -> List[ValidationError]: +def validate_branch(data: SingleDataset, component: str) -> List[ValidationError]: errors = validate_base(data, component) errors += all_valid_ids(data, component, "from_node", "node") errors += all_valid_ids(data, component, "to_node", "node") @@ -354,7 +346,7 @@ def validate_branch(data: InputData, component: str) -> List[ValidationError]: return errors -def validate_line(data: InputData) -> List[ValidationError]: +def validate_line(data: SingleDataset) -> List[ValidationError]: errors = validate_branch(data, "line") errors += all_not_two_values_zero(data, "line", "r1", "x1") errors += all_not_two_values_zero(data, "line", "r0", "x0") @@ -362,7 +354,7 @@ def validate_line(data: InputData) -> List[ValidationError]: return errors -def validate_transformer(data: InputData) -> List[ValidationError]: +def validate_transformer(data: SingleDataset) -> List[ValidationError]: errors = validate_branch(data, "transformer") errors += all_greater_than_zero(data, "transformer", "u1") errors += all_greater_than_zero(data, "transformer", "u2") @@ -392,14 +384,14 @@ def validate_transformer(data: InputData) -> List[ValidationError]: return errors -def validate_appliance(data: InputData, component: str) -> List[ValidationError]: +def validate_appliance(data: SingleDataset, component: str) -> List[ValidationError]: errors = validate_base(data, component) errors += all_boolean(data, component, "status") errors += all_valid_ids(data, component, "node", "node") return errors -def validate_source(data: InputData) -> List[ValidationError]: +def validate_source(data: SingleDataset) -> List[ValidationError]: errors = validate_appliance(data, "source") errors += all_greater_than_zero(data, "source", "u_ref") errors += all_greater_than_zero(data, "source", "sk") @@ -408,18 +400,18 @@ def validate_source(data: InputData) -> List[ValidationError]: return errors -def validate_generic_load_gen(data: InputData, component: str) -> List[ValidationError]: +def validate_generic_load_gen(data: SingleDataset, component: str) -> List[ValidationError]: errors = validate_appliance(data, component) errors += all_valid_enum_values(data, component, "type", LoadGenType) return errors -def validate_shunt(data: InputData) -> List[ValidationError]: +def validate_shunt(data: SingleDataset) -> List[ValidationError]: errors = validate_appliance(data, "shunt") return errors -def validate_generic_voltage_sensor(data: InputData, component: str) -> List[ValidationError]: +def validate_generic_voltage_sensor(data: SingleDataset, component: str) -> List[ValidationError]: errors = validate_base(data, component) errors += all_greater_than_zero(data, component, "u_sigma") errors += all_greater_than_zero(data, component, "u_measured") @@ -427,7 +419,7 @@ def validate_generic_voltage_sensor(data: InputData, component: str) -> List[Val return errors -def validate_generic_power_sensor(data: InputData, component: str) -> List[ValidationError]: +def validate_generic_power_sensor(data: SingleDataset, component: str) -> List[ValidationError]: errors = validate_base(data, component) errors += all_greater_than_zero(data, component, "power_sigma") errors += all_valid_enum_values(data, component, "measured_terminal_type", MeasuredTerminalType) diff --git a/tests/unit/test_0Z_model_validation.py b/tests/unit/test_0Z_model_validation.py index 9ae117321f..26a069a57d 100644 --- a/tests/unit/test_0Z_model_validation.py +++ b/tests/unit/test_0Z_model_validation.py @@ -8,15 +8,9 @@ import pytest from power_grid_model import PowerGridModel -from power_grid_model.manual_testing import convert_batch_to_list_data +from power_grid_model.utils import convert_batch_dataset_to_batch_list -from .utils import ( - EXPORT_OUTPUT, - compare_result, - import_case_data, - pytest_cases, - save_json_data, -) +from .utils import EXPORT_OUTPUT, compare_result, import_case_data, pytest_cases, save_json_data calculation_function_map = { "power_flow": PowerGridModel.calculate_power_flow, @@ -76,9 +70,9 @@ def test_batch_validation( case_data = import_case_data(case_path, sym=sym) model = PowerGridModel(case_data["input"], system_frequency=50.0) update_batch = case_data["update_batch"] - update_list = convert_batch_to_list_data(update_batch) + update_list = convert_batch_dataset_to_batch_list(update_batch) reference_output_batch = case_data["output_batch"] - reference_output_list = convert_batch_to_list_data(reference_output_batch) + reference_output_list = convert_batch_dataset_to_batch_list(reference_output_batch) # execute batch calculation by applying update method for update_data, reference_result in zip(update_list, reference_output_list): @@ -94,7 +88,7 @@ def test_batch_validation( result_batch = calculation_function_map[calculation_type]( model, symmetric=sym, calculation_method=calculation_method, update_data=update_batch, threading=threading ) - result_list = convert_batch_to_list_data(result_batch) + result_list = convert_batch_dataset_to_batch_list(result_batch) for result, reference_result in zip(result_list, reference_output_list): compare_result(result, reference_result, rtol, atol) # assert batch parameters diff --git a/tests/unit/test_manual_testing.py b/tests/unit/test_manual_testing.py deleted file mode 100644 index 02c50868cf..0000000000 --- a/tests/unit/test_manual_testing.py +++ /dev/null @@ -1,326 +0,0 @@ -# SPDX-FileCopyrightText: 2022 Contributors to the Power Grid Model project -# -# SPDX-License-Identifier: MPL-2.0 - -import io -from pathlib import Path -from unittest.mock import MagicMock, mock_open, patch - -import numpy as np -import pytest - -from power_grid_model.manual_testing import ( - _compact_json_dump, - _inject_extra_info, - convert_batch_to_list_data, - convert_numpy_to_python, - convert_python_to_numpy, - export_json_data, - is_nan, -) - - -@pytest.fixture(name="two_nodes_one_line") -def two_nodes_one_line_fixture(): - return { - "node": [{"id": 11, "u_rated": 10.5e3}, {"id": 12, "u_rated": 10.5e3}], - "line": [ - { - "id": 21, - "from_node": 11, - "to_node": 12, - "from_status": 1, - "to_status": 1, - "r1": 0.11, - "x1": 0.12, - "c1": 4.1380285203892784e-05, - "tan1": 0.1076923076923077, - "i_n": 510.0, - } - ], - } - - -@pytest.fixture(name="two_nodes_two_lines") -def two_nodes_two_lines_fixture(): - return { - "node": [{"id": 11, "u_rated": 10.5e3}, {"id": 12, "u_rated": 10.5e3}], - "line": [ - { - "id": 21, - "from_node": 11, - "to_node": 12, - "from_status": 1, - "to_status": 1, - "r1": 0.11, - "x1": 0.12, - "c1": 4.1380285203892784e-05, - "tan1": 0.1076923076923077, - "i_n": 510.0, - }, - { - "id": 31, - "from_node": 11, - "to_node": 12, - "from_status": 1, - "to_status": 1, - "r1": 0.11, - "x1": 0.12, - "c1": 4.1380285203892784e-05, - "tan1": 0.1076923076923077, - "i_n": 510.0, - }, - ], - } - - -def test_is_nan(): - single_value = np.array([np.nan]) - assert is_nan(single_value) - array_f8 = np.array([0.1, 0.2, np.nan], dtype=np.dtype("f8")) - assert not is_nan(array_f8) - array_i4 = np.array([10, 2, -(2**31), 40], dtype=np.dtype("i4")) - assert not is_nan(array_i4) - array_i1 = np.array([1, 0, -(2**7), 1], dtype=np.dtype("i1")) - assert not is_nan(array_i1) - nan_array = np.array([np.nan, np.nan, np.nan]) - assert is_nan(nan_array) - - -def test_convert_json_to_numpy(two_nodes_one_line, two_nodes_two_lines): - pgm_data = convert_python_to_numpy(two_nodes_one_line, "input") - assert len(pgm_data) == 2 - assert len(pgm_data["node"]) == 2 - assert pgm_data["node"][0]["id"] == 11 - assert pgm_data["node"][0]["u_rated"] == 10.5e3 - assert len(pgm_data["line"]) == 1 - - json_list = [two_nodes_one_line, two_nodes_two_lines, two_nodes_one_line] - pgm_data_batch = convert_python_to_numpy(json_list, "input") - assert pgm_data_batch["node"].shape == (3, 2) - assert np.allclose(pgm_data_batch["line"]["indptr"], [0, 1, 3, 4]) - - -def test_round_trip_json_numpy_json(two_nodes_one_line, two_nodes_two_lines): - pgm_data = convert_python_to_numpy(two_nodes_one_line, "input") - json_dict = convert_numpy_to_python(pgm_data) - assert json_dict == two_nodes_one_line - - json_list = [two_nodes_one_line, two_nodes_two_lines, two_nodes_one_line] - pgm_data_list = convert_python_to_numpy(json_list, "input") - json_return_list = convert_numpy_to_python(pgm_data_list) - assert json_return_list == json_list - - -def test_convert_python_to_numpy__raises_value_error(): - with pytest.raises(ValueError, match="Invalid attribute 'u' for line input data."): - convert_python_to_numpy({"line": [{"id": 1, "u": 10.5e3}]}, "input") - with pytest.raises(ValueError, match="Invalid 'id' value for line input data."): - convert_python_to_numpy({"line": [{"id": "my_line", "u_rated": 10.5e3}]}, "input") - - -def test_convert_python_to_numpy__raises_type_error(): - with pytest.raises(TypeError, match="Only list or dict is allowed in JSON data!"): - convert_python_to_numpy(123, "input") - - -def test_convert_batch_to_list_data__zero_batches(): - assert convert_batch_to_list_data({}) == [] - - -@patch("json.dump") -@patch("builtins.open", new_callable=mock_open) -@patch("power_grid_model.manual_testing.convert_numpy_to_python") -def test_export_json_data(convert_mock: MagicMock, open_mock: MagicMock, json_dump_mock: MagicMock): - convert_mock.return_value = {"foo": [{"val": 123}]} - export_json_data(json_file=Path("output.json"), data={}, indent=2) - convert_mock.assert_called_once() - json_dump_mock.assert_called_once_with({"foo": [{"val": 123}]}, open_mock(), indent=2) - - -@patch("json.dump") -@patch("builtins.open", new_callable=mock_open) -@patch("power_grid_model.manual_testing.convert_numpy_to_python") -@patch("power_grid_model.manual_testing._inject_extra_info") -def test_export_json_data_extra_info( - extra_info_mock: MagicMock, convert_mock: MagicMock, _open_mock: MagicMock, _json_dump_mock: MagicMock -): - convert_mock.return_value = {"foo": [{"id": 123}]} - export_json_data(json_file=Path(), data={}, extra_info={123: "Extra information"}) - extra_info_mock.assert_called_once_with(data={"foo": [{"id": 123}]}, extra_info={123: "Extra information"}) - - -def test_inject_extra_info_single(): - data = {"node": [{"id": 0, "foo": 123}, {"id": 1, "bar": 456}], "line": [{"id": 2, "baz": 789}]} - extra_info = {2: 42, 1: {"sheet": "Nodes", "Number": "00123"}} - _inject_extra_info(data=data, extra_info=extra_info) - assert data == { - "node": [{"id": 0, "foo": 123}, {"id": 1, "bar": 456, "extra": {"sheet": "Nodes", "Number": "00123"}}], - "line": [{"id": 2, "baz": 789, "extra": 42}], - } - - -def test_inject_extra_info_batch(): - data = [ - {"node": [{"id": 0, "foo": 111}, {"id": 1, "bar": 222}], "line": [{"id": 2, "baz": 333}]}, - {"node": [{"id": 0, "foo": 444}, {"id": 1, "bar": 555}], "line": [{"id": 2, "baz": 666}]}, - ] - extra_info = [{2: 42, 1: {"sheet": "Nodes", "Number": "00123"}}, {2: 43, 0: None}] - _inject_extra_info(data=data, extra_info=extra_info) - assert data == [ - { - "node": [{"id": 0, "foo": 111}, {"id": 1, "bar": 222, "extra": {"sheet": "Nodes", "Number": "00123"}}], - "line": [{"id": 2, "baz": 333, "extra": 42}], - }, - { - "node": [{"id": 0, "foo": 444, "extra": None}, {"id": 1, "bar": 555}], - "line": [{"id": 2, "baz": 666, "extra": 43}], - }, - ] - - -def test_inject_extra_info_batch_copy_info(): - data = [ - {"node": [{"id": 0, "foo": 111}, {"id": 1, "bar": 222}], "line": [{"id": 2, "baz": 333}]}, - {"node": [{"id": 0, "foo": 444}, {"id": 1, "bar": 555}], "line": [{"id": 2, "baz": 666}]}, - ] - extra_info = {2: 42, 1: {"sheet": "Nodes", "Number": "00123"}} - _inject_extra_info(data=data, extra_info=extra_info) - assert data == [ - { - "node": [{"id": 0, "foo": 111}, {"id": 1, "bar": 222, "extra": {"sheet": "Nodes", "Number": "00123"}}], - "line": [{"id": 2, "baz": 333, "extra": 42}], - }, - { - "node": [{"id": 0, "foo": 444}, {"id": 1, "bar": 555, "extra": {"sheet": "Nodes", "Number": "00123"}}], - "line": [{"id": 2, "baz": 666, "extra": 42}], - }, - ] - - -def test_inject_extra_info_single_dataset_with_batch_info(): - data = {"node": [{"id": 0, "foo": 123}, {"id": 1, "bar": 456}], "line": [{"id": 2, "baz": 789}]} - extra_info = [{2: 42, 1: {"sheet": "Nodes", "Number": "00123"}}, {2: 43, 0: None}] - with pytest.raises(TypeError): - _inject_extra_info(data=data, extra_info=extra_info) - - -def test_compact_json_dump(): - data = { - "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], - "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}], - } - - string_stream = io.StringIO() - _compact_json_dump(data, string_stream, indent=2, max_level=0) - assert ( - string_stream.getvalue() - == """{"node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]}""" - ) - - string_stream = io.StringIO() - _compact_json_dump(data, string_stream, indent=2, max_level=1) - assert ( - string_stream.getvalue() - == """{ - "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], - "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}] -}""" - ) - - string_stream = io.StringIO() - _compact_json_dump(data, string_stream, indent=2, max_level=2) - assert ( - string_stream.getvalue() - == """{ - "node": - [{"id": 1, "x": 2}, {"id": 3, "x": 4}], - "line": - [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}] -}""" - ) - - string_stream = io.StringIO() - _compact_json_dump(data, string_stream, indent=2, max_level=3) - assert ( - string_stream.getvalue() - == """{ - "node": - [ - {"id": 1, "x": 2}, - {"id": 3, "x": 4} - ], - "line": - [ - {"id": 5, "x": 6}, - {"id": 7, "x": {"y": 8.1, "z": 8.2}} - ] -}""" - ) - - -def test_compact_json_dump_string(): - data = "test" - - string_stream = io.StringIO() - _compact_json_dump(data, string_stream, indent=2, max_level=2) - assert string_stream.getvalue() == '"test"' - - -def test_compact_json_dump_deep(): - data = { - "foo": 1, - "bar": {"x": 2, "y": 3}, - } - - string_stream = io.StringIO() - _compact_json_dump(data, string_stream, indent=2, max_level=10) - assert ( - string_stream.getvalue() - == """{ - "foo": 1, - "bar": - { - "x": 2, - "y": 3 - } -}""" - ) - - -def test_compact_json_dump_batch(): - data = [ - { - "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], - "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}], - }, - { - "line": [{"id": 9, "x": 10}, {"id": 11, "x": 12}], - }, - ] - string_stream = io.StringIO() - _compact_json_dump(data, string_stream, indent=2, max_level=4) - assert ( - string_stream.getvalue() - == """[ - { - "node": - [ - {"id": 1, "x": 2}, - {"id": 3, "x": 4} - ], - "line": - [ - {"id": 5, "x": 6}, - {"id": 7, "x": {"y": 8.1, "z": 8.2}} - ] - }, - { - "line": - [ - {"id": 9, "x": 10}, - {"id": 11, "x": 12} - ] - } -]""" - ) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py new file mode 100644 index 0000000000..e0bb4940e8 --- /dev/null +++ b/tests/unit/test_utils.py @@ -0,0 +1,642 @@ +# SPDX-FileCopyrightText: 2022 Contributors to the Power Grid Model project +# +# SPDX-License-Identifier: MPL-2.0 + +import io +from pathlib import Path +from unittest.mock import MagicMock, mock_open, patch + +import numpy as np +import pytest + +from power_grid_model.data_types import BatchDataset, BatchList, Dataset +from power_grid_model.utils import ( + compact_json_dump, + convert_batch_dataset_to_batch_list, + convert_dataset_to_python_dataset, + convert_python_to_numpy, + export_json_data, + get_and_verify_batch_sizes, + inject_extra_info, + is_nan, + split_numpy_array_in_batches, + split_sparse_batches_in_batches, +) + + +@pytest.fixture(name="two_nodes_one_line") +def two_nodes_one_line_fixture(): + return { + "node": [{"id": 11, "u_rated": 10.5e3}, {"id": 12, "u_rated": 10.5e3}], + "line": [ + { + "id": 21, + "from_node": 11, + "to_node": 12, + "from_status": 1, + "to_status": 1, + "r1": 0.11, + "x1": 0.12, + "c1": 4.1380285203892784e-05, + "tan1": 0.1076923076923077, + "i_n": 510.0, + } + ], + } + + +@pytest.fixture(name="two_nodes_two_lines") +def two_nodes_two_lines_fixture(): + return { + "node": [{"id": 11, "u_rated": 10.5e3}, {"id": 12, "u_rated": 10.5e3}], + "line": [ + { + "id": 21, + "from_node": 11, + "to_node": 12, + "from_status": 1, + "to_status": 1, + "r1": 0.11, + "x1": 0.12, + "c1": 4.1380285203892784e-05, + "tan1": 0.1076923076923077, + "i_n": 510.0, + }, + { + "id": 31, + "from_node": 11, + "to_node": 12, + "from_status": 1, + "to_status": 1, + "r1": 0.11, + "x1": 0.12, + "c1": 4.1380285203892784e-05, + "tan1": 0.1076923076923077, + "i_n": 510.0, + }, + ], + } + + +def assert_list_of_numpy_arrays_equal(expected, actual): + assert type(actual) is type(expected) + assert len(actual) == len(expected) + for i in range(len(expected)): + assert isinstance(expected[i], type(actual[i])) + np.testing.assert_array_equal(expected[i], actual[i]) + + +def assert_list_of_dicts_of_numpy_arrays_equal(expected, actual): + assert type(actual) is type(expected) + assert len(actual) == len(expected) + for i in range(len(expected)): + assert isinstance(expected[i], type(actual[i])) + assert actual[i].keys() == expected[i].keys() + for key in expected[i]: + np.testing.assert_array_equal(expected[i][key], actual[i][key]) + + +def test_is_nan(): + single_value = np.array([np.nan]) + assert is_nan(single_value) + array_f8 = np.array([0.1, 0.2, np.nan], dtype=np.dtype("f8")) + assert not is_nan(array_f8) + array_i4 = np.array([10, 2, -(2**31), 40], dtype=np.dtype("i4")) + assert not is_nan(array_i4) + array_i1 = np.array([1, 0, -(2**7), 1], dtype=np.dtype("i1")) + assert not is_nan(array_i1) + nan_array = np.array([np.nan, np.nan, np.nan]) + assert is_nan(nan_array) + + +def test_convert_json_to_numpy(two_nodes_one_line, two_nodes_two_lines): + pgm_data = convert_python_to_numpy(two_nodes_one_line, "input") + assert len(pgm_data) == 2 + assert len(pgm_data["node"]) == 2 + assert pgm_data["node"][0]["id"] == 11 + assert pgm_data["node"][0]["u_rated"] == 10.5e3 + assert len(pgm_data["line"]) == 1 + + json_list = [two_nodes_one_line, two_nodes_two_lines, two_nodes_one_line] + pgm_data_batch = convert_python_to_numpy(json_list, "input") + assert pgm_data_batch["node"].shape == (3, 2) + assert np.allclose(pgm_data_batch["line"]["indptr"], [0, 1, 3, 4]) + + +def test_round_trip_json_numpy_json(two_nodes_one_line, two_nodes_two_lines): + pgm_data = convert_python_to_numpy(two_nodes_one_line, "input") + json_dict = convert_dataset_to_python_dataset(pgm_data) + assert json_dict == two_nodes_one_line + + json_list = [two_nodes_one_line, two_nodes_two_lines, two_nodes_one_line] + pgm_data_list = convert_python_to_numpy(json_list, "input") + json_return_list = convert_dataset_to_python_dataset(pgm_data_list) + assert json_return_list == json_list + + +def test_convert_python_to_numpy__raises_value_error(): + with pytest.raises(ValueError, match="Invalid attribute 'u' for line input data."): + convert_python_to_numpy({"line": [{"id": 1, "u": 10.5e3}]}, "input") + with pytest.raises(ValueError, match="Invalid 'id' value for line input data."): + convert_python_to_numpy({"line": [{"id": "my_line", "u_rated": 10.5e3}]}, "input") + + +def test_convert_python_to_numpy__raises_type_error(): + with pytest.raises(TypeError, match="Data should be either a list or a dictionary!"): + convert_python_to_numpy(123, "input") + + +def test_convert_batch_to_list_data__zero_batches(): + assert convert_batch_dataset_to_batch_list({}) == [] + + +@patch("json.dump") +@patch("builtins.open", new_callable=mock_open) +@patch("power_grid_model.utils.convert_dataset_to_python_dataset") +def test_export_json_data(convert_mock: MagicMock, open_mock: MagicMock, json_dump_mock: MagicMock): + convert_mock.return_value = {"foo": [{"val": 123}]} + data: Dataset = {} # type: ignore + export_json_data(json_file=Path("output.json"), data=data, indent=2) + convert_mock.assert_called_once() + json_dump_mock.assert_called_once_with({"foo": [{"val": 123}]}, open_mock(), indent=2) + + +@patch("json.dump") +@patch("builtins.open", new_callable=mock_open) +@patch("power_grid_model.utils.convert_dataset_to_python_dataset") +@patch("power_grid_model.utils.inject_extra_info") +def test_export_json_data_extra_info( + extra_info_mock: MagicMock, convert_mock: MagicMock, _open_mock: MagicMock, _json_dump_mock: MagicMock +): + data: Dataset = {} # type: ignore + convert_mock.return_value = {"foo": [{"id": 123}]} + export_json_data(json_file=Path(), data=data, extra_info={123: "Extra information"}) + extra_info_mock.assert_called_once_with(data={"foo": [{"id": 123}]}, extra_info={123: "Extra information"}) + + +def test_inject_extra_info_single(): + data = {"node": [{"id": 0, "foo": 123}, {"id": 1, "bar": 456}], "line": [{"id": 2, "baz": 789}]} + extra_info = {2: 42, 1: {"sheet": "Nodes", "Number": "00123"}} + inject_extra_info(data=data, extra_info=extra_info) + assert data == { + "node": [{"id": 0, "foo": 123}, {"id": 1, "bar": 456, "extra": {"sheet": "Nodes", "Number": "00123"}}], + "line": [{"id": 2, "baz": 789, "extra": 42}], + } + + +def test_inject_extra_info_batch(): + data = [ + {"node": [{"id": 0, "foo": 111}, {"id": 1, "bar": 222}], "line": [{"id": 2, "baz": 333}]}, + {"node": [{"id": 0, "foo": 444}, {"id": 1, "bar": 555}], "line": [{"id": 2, "baz": 666}]}, + ] + extra_info = [{2: 42, 1: {"sheet": "Nodes", "Number": "00123"}}, {2: 43, 0: None}] + inject_extra_info(data=data, extra_info=extra_info) + assert data == [ + { + "node": [{"id": 0, "foo": 111}, {"id": 1, "bar": 222, "extra": {"sheet": "Nodes", "Number": "00123"}}], + "line": [{"id": 2, "baz": 333, "extra": 42}], + }, + { + "node": [{"id": 0, "foo": 444, "extra": None}, {"id": 1, "bar": 555}], + "line": [{"id": 2, "baz": 666, "extra": 43}], + }, + ] + + +def test_inject_extra_info_batch_copy_info(): + data = [ + {"node": [{"id": 0, "foo": 111}, {"id": 1, "bar": 222}], "line": [{"id": 2, "baz": 333}]}, + {"node": [{"id": 0, "foo": 444}, {"id": 1, "bar": 555}], "line": [{"id": 2, "baz": 666}]}, + ] + extra_info = {2: 42, 1: {"sheet": "Nodes", "Number": "00123"}} + inject_extra_info(data=data, extra_info=extra_info) + assert data == [ + { + "node": [{"id": 0, "foo": 111}, {"id": 1, "bar": 222, "extra": {"sheet": "Nodes", "Number": "00123"}}], + "line": [{"id": 2, "baz": 333, "extra": 42}], + }, + { + "node": [{"id": 0, "foo": 444}, {"id": 1, "bar": 555, "extra": {"sheet": "Nodes", "Number": "00123"}}], + "line": [{"id": 2, "baz": 666, "extra": 42}], + }, + ] + + +def test_inject_extra_info_single_dataset_with_batch_info(): + data = {"node": [{"id": 0, "foo": 123}, {"id": 1, "bar": 456}], "line": [{"id": 2, "baz": 789}]} + extra_info = [{2: 42, 1: {"sheet": "Nodes", "Number": "00123"}}, {2: 43, 0: None}] + with pytest.raises(TypeError): + inject_extra_info(data=data, extra_info=extra_info) + + +def test_compact_json_dump(): + data = { + "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], + "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}], + } + + string_stream = io.StringIO() + compact_json_dump(data, string_stream, indent=2, max_level=0) + assert ( + string_stream.getvalue() + == """{"node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]}""" + ) + + string_stream = io.StringIO() + compact_json_dump(data, string_stream, indent=2, max_level=1) + assert ( + string_stream.getvalue() + == """{ + "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], + "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}] +}""" + ) + + string_stream = io.StringIO() + compact_json_dump(data, string_stream, indent=2, max_level=2) + assert ( + string_stream.getvalue() + == """{ + "node": + [{"id": 1, "x": 2}, {"id": 3, "x": 4}], + "line": + [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}] +}""" + ) + + string_stream = io.StringIO() + compact_json_dump(data, string_stream, indent=2, max_level=3) + assert ( + string_stream.getvalue() + == """{ + "node": + [ + {"id": 1, "x": 2}, + {"id": 3, "x": 4} + ], + "line": + [ + {"id": 5, "x": 6}, + {"id": 7, "x": {"y": 8.1, "z": 8.2}} + ] +}""" + ) + + +def test_compact_json_dump_string(): + data = "test" + + string_stream = io.StringIO() + compact_json_dump(data, string_stream, indent=2, max_level=2) + assert string_stream.getvalue() == '"test"' + + +def test_compact_json_dump_deep(): + data = { + "foo": 1, + "bar": {"x": 2, "y": 3}, + } + + string_stream = io.StringIO() + compact_json_dump(data, string_stream, indent=2, max_level=10) + assert ( + string_stream.getvalue() + == """{ + "foo": 1, + "bar": + { + "x": 2, + "y": 3 + } +}""" + ) + + +def test_compact_json_dump_batch(): + data = [ + { + "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], + "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}], + }, + { + "line": [{"id": 9, "x": 10}, {"id": 11, "x": 12}], + }, + ] + string_stream = io.StringIO() + compact_json_dump(data, string_stream, indent=2, max_level=4) + assert ( + string_stream.getvalue() + == """[ + { + "node": + [ + {"id": 1, "x": 2}, + {"id": 3, "x": 4} + ], + "line": + [ + {"id": 5, "x": 6}, + {"id": 7, "x": {"y": 8.1, "z": 8.2}} + ] + }, + { + "line": + [ + {"id": 9, "x": 10}, + {"id": 11, "x": 12} + ] + } +]""" + ) + + +def test_split_numpy_array_in_batches_n1(): + foo = [("a", "i4"), ("b", "i4"), ("c", "i4")] + update_data = np.array([(111, 121, 131), (112, 122, 132), (113, 123, 133), (114, 124, 134)], dtype=foo) + expected = [np.array([(111, 121, 131), (112, 122, 132), (113, 123, 133), (114, 124, 134)], dtype=foo)] + actual = split_numpy_array_in_batches(update_data, "") + assert_list_of_numpy_arrays_equal(expected, actual) + + +def test_split_numpy_array_in_batches_n2(): + foo = [("a", "i4"), ("b", "i4"), ("c", "i4")] + update_data = np.array( + [ + [(1111, 1121, 1131), (1112, 1122, 132), (1113, 1123, 1133), (1114, 1124, 1134)], + [(2111, 2121, 2131), (2112, 2122, 232), (2113, 2123, 2133), (2114, 2124, 2134)], + ], + dtype=foo, + ) + expected = [ + np.array([(1111, 1121, 1131), (1112, 1122, 132), (1113, 1123, 1133), (1114, 1124, 1134)], dtype=foo), + np.array([(2111, 2121, 2131), (2112, 2122, 232), (2113, 2123, 2133), (2114, 2124, 2134)], dtype=foo), + ] + actual = split_numpy_array_in_batches(update_data, "") + assert_list_of_numpy_arrays_equal(expected, actual) + + +def test_split_numpy_array_in_batches_wrong_data_type(): + update_data = [1, 2, 3] + with pytest.raises( + TypeError, + match="Invalid data type list in batch data for 'foo' " r"\(should be a 1D/2D Numpy structured array\).", + ): + split_numpy_array_in_batches(update_data, "foo") # type: ignore + + +def test_split_numpy_array_in_batches_wrong_data_dim(): + update_date = np.array([[[1, 2, 3]]]) + with pytest.raises( + TypeError, + match="Invalid data dimension 3 in batch data for 'foo' " r"\(should be a 1D/2D Numpy structured array\).", + ): + split_numpy_array_in_batches(update_date, "foo") + + +def test_normalize_batch_data_structure_n3_sparse(): + foo = [("a", "i4"), ("b", "i4"), ("c", "i4")] + update_data = np.array( + [ + (1111, 1121, 1131), + (1112, 1122, 132), + (1113, 1123, 1133), + (1114, 1124, 1134), + (2111, 2121, 2131), + (2112, 2122, 232), + (2113, 2123, 2133), + (2114, 2124, 2134), + ], + dtype=foo, + ) + indptr = np.array([0, 4, 4, 8]) + expected = [ + np.array([(1111, 1121, 1131), (1112, 1122, 132), (1113, 1123, 1133), (1114, 1124, 1134)], dtype=foo), + np.array([], dtype=foo), + np.array([(2111, 2121, 2131), (2112, 2122, 232), (2113, 2123, 2133), (2114, 2124, 2134)], dtype=foo), + ] + actual = split_sparse_batches_in_batches(batch_data={"data": update_data, "indptr": indptr}, component="") + assert_list_of_numpy_arrays_equal(expected, actual) + + +def test_split_compressed_sparse_structure_in_batches_wrong_data(): + data_1 = [1, 2, 3, 4, 5, 6, 7, 8] # wrong type + data_2 = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) # wrong dimension + indptr = np.array([0, 4, 4, 8]) + with pytest.raises(TypeError, match="Invalid data type list in sparse batch data for 'foo' "): + split_sparse_batches_in_batches(batch_data={"data": data_1, "indptr": indptr}, component="foo") # type: ignore + with pytest.raises(TypeError, match="Invalid data type ndarray in sparse batch data for 'bar' "): + split_sparse_batches_in_batches(batch_data={"data": data_2, "indptr": indptr}, component="bar") + + +def test_split_compressed_sparse_structure_in_batches_wrong_indptr(): + foo = [("a", "i4"), ("b", "i4"), ("c", "i4")] + update_data = np.array( + [ + (1111, 1121, 1131), + (1112, 1122, 132), + (1113, 1123, 1133), + (1114, 1124, 1134), + (2111, 2121, 2131), + (2112, 2122, 232), + (2113, 2123, 2133), + (2114, 2124, 2134), + ], + dtype=foo, + ) + indptr_1 = [0, 4, 4, 8] # wrong type + indptr_2 = np.array([[0, 4], [4, 8]]) # wrong dimension + indptr_3 = np.array([0.0, 4.0, 4.0, 8.0]) # wrong dtype + with pytest.raises(TypeError, match="Invalid indptr data type list in batch data for 'foo' "): + split_sparse_batches_in_batches(batch_data={"data": update_data, "indptr": indptr_1}, component="foo") # type: ignore + with pytest.raises(TypeError, match="Invalid indptr data type ndarray in batch data for 'foo' "): + split_sparse_batches_in_batches(batch_data={"data": update_data, "indptr": indptr_2}, component="foo") # type: ignore + with pytest.raises(TypeError, match="Invalid indptr data type ndarray in batch data for 'foo' "): + split_sparse_batches_in_batches(batch_data={"data": update_data, "indptr": indptr_3}, component="foo") # type: ignore + + +def test_split_compressed_sparse_structure_in_batches_wrong_indptr_values(): + foo = [("a", "i4"), ("b", "i4"), ("c", "i4")] + update_data = np.array( + [ + (1111, 1121, 1131), + (1112, 1122, 132), + (1113, 1123, 1133), + (1114, 1124, 1134), + (2111, 2121, 2131), + (2112, 2122, 232), + (2113, 2123, 2133), + (2114, 2124, 2134), + ], + dtype=foo, + ) + indptr_1 = np.array([1, 4, 4, 8]) + indptr_2 = np.array([0, 4, 4, 9]) + indptr_3 = np.array([0, 4, 3, 8]) + with pytest.raises( + TypeError, + match="Invalid indptr in batch data for 'foo' " + r"\(should start with 0, end with the number of objects \(8\) " + r"and be monotonic increasing\).", + ): + split_sparse_batches_in_batches(batch_data={"data": update_data, "indptr": indptr_1}, component="foo") + with pytest.raises( + TypeError, + match="Invalid indptr in batch data for 'foo' " + r"\(should start with 0, end with the number of objects \(8\) " + r"and be monotonic increasing\).", + ): + split_sparse_batches_in_batches(batch_data={"data": update_data, "indptr": indptr_2}, component="foo") + with pytest.raises( + TypeError, + match="Invalid indptr in batch data for 'foo' " + r"\(should start with 0, end with the number of objects \(8\) " + r"and be monotonic increasing\).", + ): + split_sparse_batches_in_batches(batch_data={"data": update_data, "indptr": indptr_3}, component="foo") + + +def test_convert_batch_dataset_to_batch_list_one_batch_dense(): + foo = [("a", "i4"), ("b", "i4"), ("c", "i4")] + bar = [("x", "i4"), ("y", "i4"), ("z", "i4")] + update_data: BatchDataset = { + "foo": np.array([(111, 121, 131), (112, 122, 132), (113, 123, 133), (114, 124, 134)], dtype=foo), + "bar": np.array([(211, 221, 231), (212, 222, 232), (213, 223, 233), (214, 224, 234)], dtype=bar), + } + expected: BatchList = [ + { + "foo": np.array([(111, 121, 131), (112, 122, 132), (113, 123, 133), (114, 124, 134)], dtype=foo), + "bar": np.array([(211, 221, 231), (212, 222, 232), (213, 223, 233), (214, 224, 234)], dtype=bar), + } + ] + actual = convert_batch_dataset_to_batch_list(update_data) + assert_list_of_dicts_of_numpy_arrays_equal(expected, actual) + + +def test_convert_batch_dataset_to_batch_list_two_batches_dense(): + foo = [("a", "i4"), ("b", "i4"), ("c", "i4")] + bar = [("x", "i4"), ("y", "i4"), ("z", "i4")] + update_data: BatchDataset = { + "foo": np.array( + [ + [(1111, 1121, 1131), (1112, 1122, 132), (1113, 1123, 1133), (1114, 1124, 1134)], + [(2111, 2121, 2131), (2112, 2122, 232), (2113, 2123, 2133), (2114, 2124, 2134)], + ], + dtype=foo, + ), + "bar": np.array( + [ + [(1211, 1221, 1231), (1212, 1222, 232), (1213, 1223, 1233), (1214, 1224, 1234)], + [(2211, 2221, 2231), (2212, 2222, 232), (2213, 2223, 2233), (2214, 2224, 2234)], + ], + dtype=bar, + ), + } + expected: BatchList = [ + { + "foo": np.array([(1111, 1121, 1131), (1112, 1122, 132), (1113, 1123, 1133), (1114, 1124, 1134)], dtype=foo), + "bar": np.array([(1211, 1221, 1231), (1212, 1222, 232), (1213, 1223, 1233), (1214, 1224, 1234)], dtype=bar), + }, + { + "foo": np.array([(2111, 2121, 2131), (2112, 2122, 232), (2113, 2123, 2133), (2114, 2124, 2134)], dtype=foo), + "bar": np.array([(2211, 2221, 2231), (2212, 2222, 232), (2213, 2223, 2233), (2214, 2224, 2234)], dtype=bar), + }, + ] + actual = convert_batch_dataset_to_batch_list(update_data) + assert_list_of_dicts_of_numpy_arrays_equal(expected, actual) + + +def test_convert_batch_dataset_to_batch_list_three_batches_sparse(): + foo = [("a", "i4"), ("b", "i4"), ("c", "i4")] + bar = [("x", "i4"), ("y", "i4"), ("z", "i4")] + update_data: BatchDataset = { + "foo": { + "indptr": np.array([0, 4, 8, 8]), + "data": np.array( + [ + (1111, 1121, 1131), + (1112, 1122, 132), + (1113, 1123, 1133), + (1114, 1124, 1134), + (2111, 2121, 2131), + (2112, 2122, 232), + (2113, 2123, 2133), + (2114, 2124, 2134), + ], + dtype=foo, + ), + }, + "bar": { + "indptr": np.array([0, 4, 4, 8]), + "data": np.array( + [ + (1211, 1221, 1231), + (1212, 1222, 232), + (1213, 1223, 1233), + (1214, 1224, 1234), + (3211, 3221, 3231), + (3212, 3222, 332), + (3213, 3223, 3233), + (3214, 3224, 3234), + ], + dtype=bar, + ), + }, + } + expected: BatchList = [ + { + "foo": np.array([(1111, 1121, 1131), (1112, 1122, 132), (1113, 1123, 1133), (1114, 1124, 1134)], dtype=foo), + "bar": np.array([(1211, 1221, 1231), (1212, 1222, 232), (1213, 1223, 1233), (1214, 1224, 1234)], dtype=bar), + }, + { + "foo": np.array([(2111, 2121, 2131), (2112, 2122, 232), (2113, 2123, 2133), (2114, 2124, 2134)], dtype=foo), + }, + {"bar": np.array([(3211, 3221, 3231), (3212, 3222, 332), (3213, 3223, 3233), (3214, 3224, 3234)], dtype=bar)}, + ] + actual = convert_batch_dataset_to_batch_list(update_data) + assert_list_of_dicts_of_numpy_arrays_equal(expected, actual) + + +def test_get_and_verify_batch_sizes_inconsistent_batch_sizes_two_components(): + update_data: BatchDataset = {"foo": np.empty(shape=(3, 3)), "bar": np.empty(shape=(2, 3))} + with pytest.raises( + ValueError, + match="Inconsistent number of batches in batch data. " + "Component 'bar' contains 2 batches, while 'foo' contained 3 batches.", + ): + get_and_verify_batch_sizes(update_data) + + +def test_convert_get_and_verify_batch_sizes_inconsistent_batch_sizes_more_than_two_components(): + update_data: BatchDataset = { + "foo": np.empty(shape=(3, 3)), + "bar": np.empty(shape=(3, 3)), + "baz": np.empty(shape=(2, 3)), + } + with pytest.raises( + ValueError, + match="Inconsistent number of batches in batch data. " + "Component 'baz' contains 2 batches, while bar/foo contained 3 batches.", + ): + get_and_verify_batch_sizes(update_data) + + +@patch("power_grid_model.utils.get_and_verify_batch_sizes") +def test_convert_batch_dataset_to_batch_list_missing_key_sparse(_mock: MagicMock): + update_data: BatchDataset = {"foo": {"a": np.empty(3), "data": np.empty(3)}} + with pytest.raises( + KeyError, + match="Missing 'indptr' in sparse batch data for 'foo' " + r"\(expected a python dictionary containing two keys: 'indptr' and 'data'\).", + ): + convert_batch_dataset_to_batch_list(update_data) + + +@patch("power_grid_model.utils.get_and_verify_batch_sizes") +def test_convert_batch_dataset_to_batch_list_invalid_type_sparse(_mock: MagicMock): + update_data: BatchDataset = {"foo": "wrong type"} # type: ignore + with pytest.raises( + TypeError, + match="Invalid data type str in batch data for 'foo' " + r"\(should be a Numpy structured array or a python dictionary\).", + ): + convert_batch_dataset_to_batch_list(update_data) diff --git a/tests/unit/utils.py b/tests/unit/utils.py index 15c56dd931..968eaae15f 100644 --- a/tests/unit/utils.py +++ b/tests/unit/utils.py @@ -11,7 +11,8 @@ import numpy as np import pytest -from power_grid_model.manual_testing import export_json_data, import_json_data +from power_grid_model.data_types import Dataset, SingleDataset +from power_grid_model.utils import export_json_data, import_json_data BASE_PATH = Path(__file__).parent.parent DATA_PATH = BASE_PATH / "data" @@ -101,16 +102,14 @@ def import_case_data(data_path: Path, sym: bool): return return_dict -def save_json_data(json_file: str, data: Union[dict, list]): +def save_json_data(json_file: str, data: Dataset): OUPUT_PATH.mkdir(parents=True, exist_ok=True) data_file = OUPUT_PATH / json_file data_file.parent.mkdir(parents=True, exist_ok=True) export_json_data(data_file, data) -def compare_result( - actual: Dict[str, np.ndarray], expected: Dict[str, np.ndarray], rtol: float, atol: Union[float, Dict[str, float]] -): +def compare_result(actual: SingleDataset, expected: SingleDataset, rtol: float, atol: Union[float, Dict[str, float]]): for key, expected_data in expected.items(): for col_name in expected_data.dtype.names: actual_col = actual[key][col_name] diff --git a/tests/unit/validation/test_assertions.py b/tests/unit/validation/test_assertions.py index a7b71b7571..ed6c478219 100644 --- a/tests/unit/validation/test_assertions.py +++ b/tests/unit/validation/test_assertions.py @@ -8,11 +8,7 @@ import pytest from power_grid_model.enum import CalculationType -from power_grid_model.validation.assertions import ( - ValidationException, - assert_valid_batch_data, - assert_valid_input_data, -) +from power_grid_model.validation.assertions import ValidationException, assert_valid_batch_data, assert_valid_input_data from power_grid_model.validation.errors import ValidationError diff --git a/tests/unit/validation/test_batch_validation.py b/tests/unit/validation/test_batch_validation.py index c3f9bf7eb2..074c765357 100644 --- a/tests/unit/validation/test_batch_validation.py +++ b/tests/unit/validation/test_batch_validation.py @@ -9,10 +9,7 @@ from power_grid_model import LoadGenType, initialize_array from power_grid_model.validation import validate_batch_data -from power_grid_model.validation.errors import ( - MultiComponentNotUniqueError, - NotBooleanError, -) +from power_grid_model.validation.errors import MultiComponentNotUniqueError, NotBooleanError @pytest.fixture diff --git a/tests/unit/validation/test_input_validation.py b/tests/unit/validation/test_input_validation.py index b5bb4804ba..df97e5b571 100644 --- a/tests/unit/validation/test_input_validation.py +++ b/tests/unit/validation/test_input_validation.py @@ -7,13 +7,7 @@ import numpy as np import pytest -from power_grid_model import ( - BranchSide, - LoadGenType, - MeasuredTerminalType, - WindingType, - initialize_array, -) +from power_grid_model import BranchSide, LoadGenType, MeasuredTerminalType, WindingType, initialize_array from power_grid_model.validation import validate_input_data from power_grid_model.validation.errors import ( InvalidEnumValueError, diff --git a/tests/unit/validation/test_utils.py b/tests/unit/validation/test_utils.py index a897a2450e..bfe1015a31 100644 --- a/tests/unit/validation/test_utils.py +++ b/tests/unit/validation/test_utils.py @@ -6,17 +6,9 @@ import pytest from power_grid_model import initialize_array +from power_grid_model.data_types import BatchDataset, BatchList from power_grid_model.validation.errors import NotGreaterThanError -from power_grid_model.validation.utils import ( - BatchData, - UpdateData, - errors_to_string, - eval_field_expression, - split_compressed_sparse_structure_in_batches, - split_numpy_array_in_batches, - split_update_data_in_batches, - update_input_data, -) +from power_grid_model.validation.utils import errors_to_string, eval_field_expression, update_input_data def test_eval_field_expression(): @@ -69,296 +61,6 @@ def assert_list_of_dicts_of_numpy_arrays_equal(expected, actual): np.testing.assert_array_equal(expected[i][key], actual[i][key]) -def test_split_numpy_array_in_batches_n1(): - foo = [("a", "i4"), ("b", "i4"), ("c", "i4")] - update_data = np.array([(111, 121, 131), (112, 122, 132), (113, 123, 133), (114, 124, 134)], dtype=foo) - expected = [np.array([(111, 121, 131), (112, 122, 132), (113, 123, 133), (114, 124, 134)], dtype=foo)] - actual = split_numpy_array_in_batches(update_data, "") - assert_list_of_numpy_arrays_equal(expected, actual) - - -def test_split_numpy_array_in_batches_n2(): - foo = [("a", "i4"), ("b", "i4"), ("c", "i4")] - update_data = np.array( - [ - [(1111, 1121, 1131), (1112, 1122, 132), (1113, 1123, 1133), (1114, 1124, 1134)], - [(2111, 2121, 2131), (2112, 2122, 232), (2113, 2123, 2133), (2114, 2124, 2134)], - ], - dtype=foo, - ) - expected = [ - np.array([(1111, 1121, 1131), (1112, 1122, 132), (1113, 1123, 1133), (1114, 1124, 1134)], dtype=foo), - np.array([(2111, 2121, 2131), (2112, 2122, 232), (2113, 2123, 2133), (2114, 2124, 2134)], dtype=foo), - ] - actual = split_numpy_array_in_batches(update_data, "") - assert_list_of_numpy_arrays_equal(expected, actual) - - -def test_split_numpy_array_in_batches_wrong_data_type(): - update_data = [1, 2, 3] - with pytest.raises( - TypeError, - match="Invalid data type list in update data for 'foo' " r"\(should be a 1D/2D Numpy structured array\).", - ): - split_numpy_array_in_batches(update_data, "foo") # type: ignore - - -def test_split_numpy_array_in_batches_wrong_data_dim(): - update_date = np.array([[[1, 2, 3]]]) - with pytest.raises( - TypeError, - match="Invalid data dimension 3 in update data for 'foo' " r"\(should be a 1D/2D Numpy structured array\).", - ): - split_numpy_array_in_batches(update_date, "foo") - - -def test_normalize_batch_data_structure_n3_sparse(): - foo = [("a", "i4"), ("b", "i4"), ("c", "i4")] - update_data = np.array( - [ - (1111, 1121, 1131), - (1112, 1122, 132), - (1113, 1123, 1133), - (1114, 1124, 1134), - (2111, 2121, 2131), - (2112, 2122, 232), - (2113, 2123, 2133), - (2114, 2124, 2134), - ], - dtype=foo, - ) - indptr = np.array([0, 4, 4, 8]) - expected = [ - np.array([(1111, 1121, 1131), (1112, 1122, 132), (1113, 1123, 1133), (1114, 1124, 1134)], dtype=foo), - np.array([], dtype=foo), - np.array([(2111, 2121, 2131), (2112, 2122, 232), (2113, 2123, 2133), (2114, 2124, 2134)], dtype=foo), - ] - actual = split_compressed_sparse_structure_in_batches(update_data, indptr, "") - assert_list_of_numpy_arrays_equal(expected, actual) - - -def test_split_compressed_sparse_structure_in_batches_wrong_data(): - data_1 = [1, 2, 3, 4, 5, 6, 7, 8] # wrong type - data_2 = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) # wrong dimension - indptr = np.array([0, 4, 4, 8]) - with pytest.raises(TypeError, match="Invalid data type list in sparse update data for 'foo' "): - split_compressed_sparse_structure_in_batches(data=data_1, indptr=indptr, component="foo") # type: ignore - with pytest.raises(TypeError, match="Invalid data type ndarray in sparse update data for 'bar' "): - split_compressed_sparse_structure_in_batches(data=data_2, indptr=indptr, component="bar") - - -def test_split_compressed_sparse_structure_in_batches_wrong_indptr(): - foo = [("a", "i4"), ("b", "i4"), ("c", "i4")] - update_data = np.array( - [ - (1111, 1121, 1131), - (1112, 1122, 132), - (1113, 1123, 1133), - (1114, 1124, 1134), - (2111, 2121, 2131), - (2112, 2122, 232), - (2113, 2123, 2133), - (2114, 2124, 2134), - ], - dtype=foo, - ) - indptr_1 = [0, 4, 4, 8] # wrong type - indptr_2 = np.array([[0, 4], [4, 8]]) # wrong dimension - indptr_3 = np.array([0.0, 4.0, 4.0, 8.0]) # wrong dtype - with pytest.raises(TypeError, match="Invalid indptr data type list in update data for 'foo' "): - split_compressed_sparse_structure_in_batches(data=update_data, indptr=indptr_1, component="foo") # type: ignore - with pytest.raises(TypeError, match="Invalid indptr data type ndarray in update data for 'foo' "): - split_compressed_sparse_structure_in_batches(data=update_data, indptr=indptr_2, component="foo") # type: ignore - with pytest.raises(TypeError, match="Invalid indptr data type ndarray in update data for 'foo' "): - split_compressed_sparse_structure_in_batches(data=update_data, indptr=indptr_3, component="foo") # type: ignore - - -def test_split_compressed_sparse_structure_in_batches_wrong_indptr_values(): - foo = [("a", "i4"), ("b", "i4"), ("c", "i4")] - update_data = np.array( - [ - (1111, 1121, 1131), - (1112, 1122, 132), - (1113, 1123, 1133), - (1114, 1124, 1134), - (2111, 2121, 2131), - (2112, 2122, 232), - (2113, 2123, 2133), - (2114, 2124, 2134), - ], - dtype=foo, - ) - indptr_1 = np.array([1, 4, 4, 8]) - indptr_2 = np.array([0, 4, 4, 9]) - indptr_3 = np.array([0, 4, 3, 8]) - with pytest.raises( - TypeError, - match="Invalid indptr in update data for 'foo' " - r"\(should start with 0, end with the number of objects \(8\) " - r"and be monotonic increasing\).", - ): - split_compressed_sparse_structure_in_batches(data=update_data, indptr=indptr_1, component="foo") - with pytest.raises( - TypeError, - match="Invalid indptr in update data for 'foo' " - r"\(should start with 0, end with the number of objects \(8\) " - r"and be monotonic increasing\).", - ): - split_compressed_sparse_structure_in_batches(data=update_data, indptr=indptr_2, component="foo") - with pytest.raises( - TypeError, - match="Invalid indptr in update data for 'foo' " - r"\(should start with 0, end with the number of objects \(8\) " - r"and be monotonic increasing\).", - ): - split_compressed_sparse_structure_in_batches(data=update_data, indptr=indptr_3, component="foo") - - -def test_split_update_data_in_batches_one_batch_dense(): - foo = [("a", "i4"), ("b", "i4"), ("c", "i4")] - bar = [("x", "i4"), ("y", "i4"), ("z", "i4")] - update_data: UpdateData = { - "foo": np.array([(111, 121, 131), (112, 122, 132), (113, 123, 133), (114, 124, 134)], dtype=foo), - "bar": np.array([(211, 221, 231), (212, 222, 232), (213, 223, 233), (214, 224, 234)], dtype=bar), - } - expected: BatchData = [ - { - "foo": np.array([(111, 121, 131), (112, 122, 132), (113, 123, 133), (114, 124, 134)], dtype=foo), - "bar": np.array([(211, 221, 231), (212, 222, 232), (213, 223, 233), (214, 224, 234)], dtype=bar), - } - ] - actual = split_update_data_in_batches(update_data) - assert_list_of_dicts_of_numpy_arrays_equal(expected, actual) - - -def test_split_update_data_in_batches_two_batches_dense(): - foo = [("a", "i4"), ("b", "i4"), ("c", "i4")] - bar = [("x", "i4"), ("y", "i4"), ("z", "i4")] - update_data: UpdateData = { - "foo": np.array( - [ - [(1111, 1121, 1131), (1112, 1122, 132), (1113, 1123, 1133), (1114, 1124, 1134)], - [(2111, 2121, 2131), (2112, 2122, 232), (2113, 2123, 2133), (2114, 2124, 2134)], - ], - dtype=foo, - ), - "bar": np.array( - [ - [(1211, 1221, 1231), (1212, 1222, 232), (1213, 1223, 1233), (1214, 1224, 1234)], - [(2211, 2221, 2231), (2212, 2222, 232), (2213, 2223, 2233), (2214, 2224, 2234)], - ], - dtype=bar, - ), - } - expected: BatchData = [ - { - "foo": np.array([(1111, 1121, 1131), (1112, 1122, 132), (1113, 1123, 1133), (1114, 1124, 1134)], dtype=foo), - "bar": np.array([(1211, 1221, 1231), (1212, 1222, 232), (1213, 1223, 1233), (1214, 1224, 1234)], dtype=bar), - }, - { - "foo": np.array([(2111, 2121, 2131), (2112, 2122, 232), (2113, 2123, 2133), (2114, 2124, 2134)], dtype=foo), - "bar": np.array([(2211, 2221, 2231), (2212, 2222, 232), (2213, 2223, 2233), (2214, 2224, 2234)], dtype=bar), - }, - ] - actual = split_update_data_in_batches(update_data) - assert_list_of_dicts_of_numpy_arrays_equal(expected, actual) - - -def test_split_update_data_in_batches_three_batches_sparse(): - foo = [("a", "i4"), ("b", "i4"), ("c", "i4")] - bar = [("x", "i4"), ("y", "i4"), ("z", "i4")] - update_data: UpdateData = { - "foo": { - "indptr": np.array([0, 4, 8, 8]), - "data": np.array( - [ - (1111, 1121, 1131), - (1112, 1122, 132), - (1113, 1123, 1133), - (1114, 1124, 1134), - (2111, 2121, 2131), - (2112, 2122, 232), - (2113, 2123, 2133), - (2114, 2124, 2134), - ], - dtype=foo, - ), - }, - "bar": { - "indptr": np.array([0, 4, 4, 8]), - "data": np.array( - [ - (1211, 1221, 1231), - (1212, 1222, 232), - (1213, 1223, 1233), - (1214, 1224, 1234), - (3211, 3221, 3231), - (3212, 3222, 332), - (3213, 3223, 3233), - (3214, 3224, 3234), - ], - dtype=bar, - ), - }, - } - expected: BatchData = [ - { - "foo": np.array([(1111, 1121, 1131), (1112, 1122, 132), (1113, 1123, 1133), (1114, 1124, 1134)], dtype=foo), - "bar": np.array([(1211, 1221, 1231), (1212, 1222, 232), (1213, 1223, 1233), (1214, 1224, 1234)], dtype=bar), - }, - { - "foo": np.array([(2111, 2121, 2131), (2112, 2122, 232), (2113, 2123, 2133), (2114, 2124, 2134)], dtype=foo), - }, - {"bar": np.array([(3211, 3221, 3231), (3212, 3222, 332), (3213, 3223, 3233), (3214, 3224, 3234)], dtype=bar)}, - ] - actual = split_update_data_in_batches(update_data) - assert_list_of_dicts_of_numpy_arrays_equal(expected, actual) - - -def test_split_update_data_in_batches_inconsistent_batch_sizes_two_components(): - update_data: UpdateData = {"foo": np.empty(shape=(3, 3)), "bar": np.empty(shape=(2, 3))} - with pytest.raises( - ValueError, - match="Inconsistent number of batches in update data. " - "Component 'bar' contains 2 batches, while 'foo' contained 3 batches.", - ): - split_update_data_in_batches(update_data) - - -def test_split_update_data_in_batches_inconsistent_batch_sizes_more_than_two_components(): - update_data: UpdateData = { - "foo": np.empty(shape=(3, 3)), - "bar": np.empty(shape=(3, 3)), - "baz": np.empty(shape=(2, 3)), - } - with pytest.raises( - ValueError, - match="Inconsistent number of batches in update data. " - "Component 'baz' contains 2 batches, while bar/foo contained 3 batches.", - ): - split_update_data_in_batches(update_data) - - -def test_split_update_data_in_batches_missing_key_sparse(): - update_data: UpdateData = {"foo": {"a": np.empty(3), "data": np.empty(3)}} - with pytest.raises( - KeyError, - match="Missing 'indptr' in sparse update data for 'foo' " - r"\(expected a python dictionary containing two keys: 'indptr' and 'data'\).", - ): - split_update_data_in_batches(update_data) - - -def test_split_update_data_in_batches_invalid_type_sparse(): - update_data: UpdateData = {"foo": "wrong type"} # type: ignore - with pytest.raises( - TypeError, - match="Invalid data type str in update data for 'foo' " - r"\(should be a Numpy structured array or a python dictionary\).", - ): - split_update_data_in_batches(update_data) - - def test_update_input_data(): input_test = np.array( [(4, 4.0, 4.1), (5, 5.0, 5.1), (6, 6.0, 6.1), (1, 1.0, np.nan), (2, 2.0, np.nan), (3, 3.0, np.nan)], diff --git a/tests/unit/validation/test_validation_functions.py b/tests/unit/validation/test_validation_functions.py index cb131901f0..0a58969b44 100644 --- a/tests/unit/validation/test_validation_functions.py +++ b/tests/unit/validation/test_validation_functions.py @@ -7,11 +7,7 @@ from power_grid_model import initialize_array, power_grid_meta_data from power_grid_model.enum import CalculationType -from power_grid_model.validation.errors import ( - IdNotInDatasetError, - MissingValueError, - MultiComponentNotUniqueError, -) +from power_grid_model.validation.errors import IdNotInDatasetError, MissingValueError, MultiComponentNotUniqueError from power_grid_model.validation.validation import ( assert_valid_data_structure, validate_ids_exist,