diff --git a/.gitignore b/.gitignore index 0e7aa8e1..58cfaf60 100644 --- a/.gitignore +++ b/.gitignore @@ -5,9 +5,10 @@ tests/post/*.yaml tests/problem_definition/*.csv examples/**/*.png tests/problem_definition/split.csv + # Heavy data *.cgns -!notebooks/ex_rotor37_pv.cgns +!tests/post/**/*.cgns # Byte-compiled / optimized / DLL files __pycache__/ @@ -59,10 +60,6 @@ report.xml # VisualStudioCode .vscode/* -!.vscode/settings.json -!.vscode/tasks.json -!.vscode/launch.json -!.vscode/extensions.json *.code-workspace **/.vscode *-jvsc-*.ipynb diff --git a/CHANGELOG.md b/CHANGELOG.md index f9461f22..3fc6f88f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,16 +9,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- (docs) explain release process in Contributing page - ### Changed +- (sample) Restructuring of the Sample class to store a global (tensor of arbitrary order) at a given time step: replaces scalar and time_series. All Sample data are now stored in CGNS trees. ### Fixes +- (docs) explain release process in Contributing page + +### Changed + ### Removed +- (sample) time_series support, now handle directly globals at time steps. + ## [0.1.9] - 2025-09-24 diff --git a/docs/source/core_concepts/dataset.md b/docs/source/core_concepts/dataset.md index 2c4b67c1..700051b4 100644 --- a/docs/source/core_concepts/dataset.md +++ b/docs/source/core_concepts/dataset.md @@ -63,7 +63,6 @@ ids = dataset.get_sample_ids() ```python dataset.get_scalar_names(ids=None) -dataset.get_time_series_names(ids=None) dataset.get_field_names(ids=None, zone_name=None, base_name=None) # Structured, hashable descriptors of features (recommended) diff --git a/docs/source/core_concepts/feature_identifiers.md b/docs/source/core_concepts/feature_identifiers.md index 9a3a49c8..da9afaf5 100644 --- a/docs/source/core_concepts/feature_identifiers.md +++ b/docs/source/core_concepts/feature_identifiers.md @@ -6,7 +6,7 @@ title: Feature identifiers Feature identifiers are a concise, unambiguous way to point to any feature in PLAID. They replace legacy name-only APIs (now deprecated) and make it possible to uniquely address features across time steps, bases, zones and locations. -- A feature is one of: scalar, time_series, field, nodes. +- A feature is one of: scalar, field, nodes. - A FeatureIdentifier is a small dictionary that encodes the feature type and, when relevant, its context (e.g., base, zone, location, time). Why this matters: @@ -19,7 +19,6 @@ Why this matters: FeatureIdentifier is a `dict[str, str | float]` with a mandatory `type` key. Depending on the feature type, other keys are required or optional: - scalar: `{"type": "scalar", "name": }` -- time_series: `{"type": "time_series", "name": }` - field: `{"type": "field", "name": , "base_name": , "zone_name": , "location": , "time": }` - `location` must be one of: `Vertex`, `EdgeCenter`, `FaceCenter`, `CellCenter`. - `base_name`, `zone_name`, `location`, `time` are optional if default value mechanics apply (see {doc}`defaults`). @@ -37,7 +36,6 @@ Minimal identifiers: from plaid.types import FeatureIdentifier fid_scalar = FeatureIdentifier({"type": "scalar", "name": "Re"}) -fid_ts = FeatureIdentifier({"type": "time_series", "name": "load_curve"}) fid_field = FeatureIdentifier({ "type": "field", diff --git a/docs/source/core_concepts/sample.md b/docs/source/core_concepts/sample.md index 3da146fa..ff01c058 100644 --- a/docs/source/core_concepts/sample.md +++ b/docs/source/core_concepts/sample.md @@ -6,7 +6,6 @@ title: Sample {py:class}`~plaid.containers.sample.Sample` represents one observation. It contains {doc}`feature_identifiers` among (all optional): - scalars: name → value -- time series: name → (time_sequence, values) - meshes containing: - nodes: mesh node coordinates, that can be located: - in different bases @@ -17,8 +16,8 @@ title: Sample - in different locations in each base/zone among: `Vertex`, `EdgeCenter`, `FaceCenter`, or `CellCenter` Key APIs include: -- Feature accessors: {py:meth}`~plaid.containers.sample.Sample.get_scalar`, {py:meth}`~plaid.containers.sample.Sample.get_time_series`, {py:meth}`~plaid.containers.sample.Sample.get_field`, {py:meth}`~plaid.containers.sample.Sample.get_nodes` -- Feature updates: {py:meth}`~plaid.containers.sample.Sample.add_scalar`, {py:meth}`~plaid.containers.sample.Sample.add_time_series`, {py:meth}`~plaid.containers.sample.Sample.add_field`, {py:meth}`~plaid.containers.sample.Sample.set_nodes`, and high-level identifier-based updates +- Feature accessors: {py:meth}`~plaid.containers.sample.Sample.get_scalar`, {py:meth}`~plaid.containers.sample.Sample.get_field`, {py:meth}`~plaid.containers.sample.Sample.get_nodes` +- Feature updates: {py:meth}`~plaid.containers.sample.Sample.add_scalar`, {py:meth}`~plaid.containers.sample.Sample.add_field`, {py:meth}`~plaid.containers.sample.Sample.set_nodes`, and high-level identifier-based updates - Discovery: {py:meth}`~plaid.containers.sample.Sample.get_all_features_identifiers()` or {py:meth}`~plaid.containers.sample.Sample.get_all_features_identifiers_by_type()` to list all available features with their context See also: {doc}`../notebooks` for hands-on examples. diff --git a/environment.yml b/environment.yml index 9f99a29b..31d8cc44 100644 --- a/environment.yml +++ b/environment.yml @@ -1,6 +1,7 @@ name: plaid-dev channels: - conda-forge + - nodefaults channel_priority: strict dependencies: - python=3.11 @@ -16,6 +17,7 @@ dependencies: - datasets >=3.0 - numpy - matplotlib-base + - optree #---# data validation - pydantic ##### DEV/TESTS/EXAMPLES ##### diff --git a/examples/bridges/huggingface_example.py b/examples/bridges/huggingface_example.py index 8372225e..442d64d3 100644 --- a/examples/bridges/huggingface_example.py +++ b/examples/bridges/huggingface_example.py @@ -1,12 +1,13 @@ # --- # jupyter: # jupytext: +# custom_cell_magics: kql # formats: ipynb,py:percent # text_representation: # extension: .py # format_name: percent # format_version: '1.3' -# jupytext_version: 1.17.3 +# jupytext_version: 1.11.2 # kernelspec: # display_name: plaid-dev # language: python @@ -39,6 +40,7 @@ from plaid import Dataset from plaid import Sample from plaid import ProblemDefinition +from plaid.types import FeatureIdentifier # %% @@ -51,7 +53,7 @@ def show_sample(sample: Sample): # %% [markdown] -# ## Initialize plaid dataset and problem_definition +# ## Initialize plaid dataset, infos and problem_definition # %% # Input data @@ -73,21 +75,23 @@ def show_sample(sample: Sample): ] ) - dataset = Dataset() +scalar_feat_id = FeatureIdentifier({"type": "scalar", "name": "scalar"}) +node_field_feat_id = FeatureIdentifier({"type": "field", "name": "node_field", "location": "Vertex"}) +cell_field_feat_id = FeatureIdentifier({"type": "field", "name": "cell_field", "location": "CellCenter"}) + print("Creating meshes dataset...") for _ in range(3): mesh = MCT.CreateMeshOfTriangles(points, triangles) sample = Sample() - sample.meshes.add_tree(MeshToCGNS(mesh)) - sample.add_scalar("scalar", np.random.randn()) - sample.add_field("node_field", np.random.rand(len(points)), location="Vertex") - sample.add_field( - "cell_field", np.random.rand(len(triangles)), location="CellCenter" - ) + sample.add_tree(MeshToCGNS(mesh, exportOriginalIDs = False)) + + sample.update_features_from_identifier(scalar_feat_id, np.random.randn(), in_place=True) + sample.update_features_from_identifier(node_field_feat_id, np.random.rand(len(points)), in_place=True) + sample.update_features_from_identifier(cell_field_feat_id, np.random.rand(len(triangles)), in_place=True) dataset.add_sample(sample) @@ -99,36 +103,39 @@ def show_sample(sample: Sample): dataset.set_infos(infos) print(f" {dataset = }") +print(f" {infos = }") -problem = ProblemDefinition() -problem.add_output_scalars_names(["scalar"]) -problem.add_output_fields_names(["node_field", "cell_field"]) -problem.add_input_meshes_names(["/Base/Zone"]) +pb_def = ProblemDefinition() +pb_def.add_in_features_identifiers([scalar_feat_id, node_field_feat_id]) +pb_def.add_out_features_identifiers([cell_field_feat_id]) -problem.set_task("regression") -problem.set_split({"train": [0, 1], "test": [2]}) +pb_def.set_task("regression") +pb_def.set_split({"train": [0, 1], "test": [2]}) -print(f" {problem = }") +print(f" {pb_def = }") # %% [markdown] -# ## Section 1: Convert plaid dataset to Hugging Face -# -# The description field of Hugging Face dataset is automatically configured to include data from the plaid dataset info and problem_definition to prevent loss of information and equivalence of format. +# ## Section 1: Convert plaid dataset to Hugging Face Dataset # %% -hf_dataset = huggingface_bridge.plaid_dataset_to_huggingface(dataset, problem) +hf_dataset = huggingface_bridge.plaid_dataset_to_huggingface(dataset) print() print(f"{hf_dataset = }") -print(f"{hf_dataset.description = }") # %% [markdown] -# The previous code generates a Hugging Face dataset containing all the samples from the plaid dataset, the splits being defined in the hf_dataset descriptions. For splits, Hugging Face proposes `DatasetDict`, which are dictionaries of hf datasets, with keys being the name of the corresponding splits. It is possible de generate a hf datasetdict directly from plaid: +# By default, all the indices from all splits are taken into account. One can generate a Hugging Face dataset for a given split by providing the problem_definition: # %% -hf_datasetdict = huggingface_bridge.plaid_dataset_to_huggingface_datasetdict(dataset, problem, main_splits = ['train', 'test']) +hf_dataset = huggingface_bridge.plaid_dataset_to_huggingface(dataset, pb_def.get_split("train"), split_name="train") +print(hf_dataset) + +# %% [markdown] +# The previous code generates a Hugging Face dataset containing all the samples from the plaid dataset, the splits being defined in the hf_dataset descriptions. For splits, Hugging Face proposes `DatasetDict`, which are dictionaries of hf datasets, with keys being the name of the corresponding splits. It is possible to generate a hf datasetdict directly from plaid: + +# %% +hf_datasetdict = huggingface_bridge.plaid_dataset_to_huggingface_datasetdict(dataset, main_splits = pb_def.get_split()) print() -print(f"{hf_datasetdict['train'] = }") -print(f"{hf_datasetdict['test'] = }") +print(f"{hf_datasetdict = }") # %% [markdown] @@ -143,59 +150,62 @@ def generator(): hf_dataset_gen = huggingface_bridge.plaid_generator_to_huggingface( - generator, infos, problem + generator ) -print() print(f"{hf_dataset_gen = }") -print(f"{hf_dataset_gen.description = }") # %% [markdown] # The same is available with datasetdict: # %% -hf_datasetdict_gen = huggingface_bridge.plaid_generator_to_huggingface_datasetdict( - generator, infos, problem, main_splits = ['train', 'test'] +generators = {} +for split_name, ids in pb_def.get_split().items(): + def generator_(ids=ids): + for id in ids: + yield {"sample": pickle.dumps(dataset[id])} + generators[split_name] = generator_ + +hf_datasetdict = huggingface_bridge.plaid_generator_to_huggingface_datasetdict( + generators ) -print() -print(f"{hf_datasetdict['train'] = }") -print(f"{hf_datasetdict['test'] = }") +print(f"{hf_datasetdict = }") # %% [markdown] # ## Section 3: Convert a Hugging Face dataset to plaid -# -# Plaid dataset infos and problem_defitinion are recovered from the huggingface dataset # %% -dataset_2, problem_2 = huggingface_bridge.huggingface_dataset_to_plaid(hf_dataset) +dataset_2 = huggingface_bridge.huggingface_dataset_to_plaid(hf_dataset) print() print(f"{dataset_2 = }") -print(f"{dataset_2.get_infos() = }") -print(f"{problem_2 = }") # %% [markdown] # ## Section 4: Save and Load Hugging Face datasets # # ### From and to disk +# +# Saving datasetdict, infos and problem definition to disk: # %% -# Save to disk -hf_dataset.save_to_disk("/tmp/path/to/dir") +huggingface_bridge.save_dataset_dict_to_disk("/tmp/test_dir", hf_datasetdict) +huggingface_bridge.save_dataset_infos_to_disk("/tmp/test_dir", infos) +huggingface_bridge.save_problem_definition_to_disk("/tmp/test_dir", "task_1", pb_def) -# %% -# Load from disk -from datasets import load_from_disk +# %% [markdown] +# Loading datasetdict, infos and problem definition from disk: -loaded_hf_dataset = load_from_disk("/tmp/path/to/dir") +# %% +loaded_hf_datasetdict = huggingface_bridge.load_dataset_dict_from_to_disk("/tmp/test_dir") +loaded_infos = huggingface_bridge.load_dataset_infos_from_disk("/tmp/test_dir") +loaded_pb_def = huggingface_bridge.load_problem_definition_from_disk("/tmp/test_dir", "task_1") -print() -print(f"{loaded_hf_dataset = }") -print(f"{loaded_hf_dataset.description = }") +print(f"{loaded_hf_datasetdict = }") +print(f"{loaded_infos = }") +print(f"{loaded_pb_def = }") # %% [markdown] # ### From and to the Hugging Face hub # -# You need an huggingface account, with a configured access token, and to install huggingface_hub[cli]. -# Pushing and loading a huggingface dataset without loss of information requires the configuration of a DatasetCard. +# To save a dataset on the Hub, you need an huggingface account, with a configured access token, and to install huggingface_hub[cli]. # # Find below example of instruction (not executed by this notebook). # @@ -207,106 +217,78 @@ def generator(): # ``` # and enter you access token. # -# Then, the following python instruction enable pushing a dataset to the hub: +# Then, the following python instruction enable pushing datasetdict, infos and problem_definitions to the hub: # ```python -# hf_dataset.push_to_hub("chanel/dataset") -# -# from datasets import load_dataset_builder -# -# datasetInfo = load_dataset_builder("chanel/dataset").__getstate__()['info'] -# -# from huggingface_hub import DatasetCard -# -# card_text = create_string_for_huggingface_dataset_card( -# description = description, -# download_size_bytes = datasetInfo.download_size, -# dataset_size_bytes = datasetInfo.dataset_size, -# ...) -# dataset_card = DatasetCard(card_text) -# dataset_card.push_to_hub("chanel/dataset") +# huggingface_bridge.push_dataset_dict_to_hub("chanel/dataset", hf_dataset_dict) +# huggingface_bridge.push_dataset_infos_to_hub("chanel/dataset", infos) +# huggingface_bridge.push_problem_definition_to_hub("chanel/dataset", pb_def, "location") # ``` # -# The second upload of the dataset_card is required to ensure that load_dataset from the hub will populate -# the hf-dataset.description field, and be compatible for conversion to plaid. Wihtout a dataset_card, the description field is lost. -# -# +# The dataset card can then be customized online, on the dataset repo page directly. + +# %% [markdown] # ### Load from hub # -# #### General case -# +# To load datasetdict, infos and problem_definitions from the hub: # ```python -# dataset = load_dataset("chanel/dataset", split="all_samples") +# huggingface_bridge.load_hf_dataset_from_hub("chanel/dataset", *args, **kwargs) +# huggingface_bridge.load_hf_infos_from_hub("chanel/dataset") +# huggingface_bridge.load_hf_problem_definition_from_hub("chanel/dataset", "name") # ``` # -# More efficient retrieval are made possible by partial loads and split loads (in the case of a datasetdict): -# +# Partial retrieval and streaming are compatible # ```python -# dataset_train = load_dataset("chanel/dataset", split="train") -# dataset_train_extract = load_dataset("chanel/dataset", split="train[:10]") +# huggingface_bridge.load_hf_dataset_from_hub("chanel/dataset", split="train[:10], *args, **kwargs) +# huggingface_bridge.load_hf_dataset_from_hub("chanel/dataset", streaming=True, *args, **kwargs) # ``` # -# #### Proxy -# -# A retrieval function robust to cases where you are behind a proxy and relying on a private mirror is avalable; +# Native HF datasets commands are also possible: # # ```python -# from plaid.bridges.huggingface_bridge import load_hf_dataset_from_hub -# hf_dataset = load_hf_dataset_from_hub("chanel/dataset", *args, **kwargs) +# dataset_train = load_dataset("chanel/dataset", split="train") +# dataset_train = load_dataset("chanel/dataset", split="train", streaming=True) +# dataset_train_extract = load_dataset("chanel/dataset", split="train[:10]") # ``` # -# - Streaming mode is not supported when using a private mirror. -# - Falls back to local download if streaming or public loading fails. -# - To use behind a proxy, you may need to set: -# - `HF_ENDPOINT` to your private mirror address -# - `CURL_CA_BUNDLE` to your trusted CA certificates -# - `HF_HOME` to a shared cache directory if needed - +# If you are behind a proxy and relying on a private mirror the function `load_hf_dataset_from_hub` is working provided the following is set: +# - `HF_ENDPOINT` to your private mirror address +# - `CURL_CA_BUNDLE` to your trusted CA certificates +# - `HF_HOME` to a shared cache directory if needed # %% [markdown] # ## Section 5: Handle plaid samples from Hugging Face datasets without converting the complete dataset to plaid # -# To fully exploit optimzed data handling of the Hugging Face datasets library, it is possible to extract information from the huggingface dataset without converting to plaid. The ``description`` atttribute includes the plaid dataset _infos attribute and plaid problem_definition attributes. +# To fully exploit optimzed data handling of the Hugging Face datasets library, it is possible to extract information from the huggingface dataset without converting to plaid. -# %% -print(f"{loaded_hf_dataset.description = }") # %% [markdown] # Get the first sample of the first split # %% -split_names = list(loaded_hf_dataset.description["split"].keys()) -id = loaded_hf_dataset.description["split"][split_names[0]] -hf_sample = loaded_hf_dataset[id[0]] +hf_sample = hf_dataset[0] print(f"{hf_sample = }") # %% [markdown] -# We notice that ``hf_sample`` is a binary object efficiently handled by huggingface datasets. It can be converted into a plaid sample using a specific constructor relying on a pydantic validator. +# We notice that ``hf_sample`` contains a binary object efficiently handled by huggingface datasets. It can be converted into a plaid sample using a specific constructor relying on a pydantic validator. # %% plaid_sample = huggingface_bridge.to_plaid_sample(hf_sample) show_sample(plaid_sample) + # %% [markdown] # Very large datasets can be streamed directly from the Hugging Face hub: # # ```python -# hf_dataset_stream = load_dataset("chanel/dataset", split="all_samples", streaming=True) -# +# hf_dataset_stream = load_dataset("chanel/dataset", split="train", streaming=True) # plaid_sample = huggingface_bridge.to_plaid_sample(next(iter(hf_dataset_stream))) -# -# show_sample(plaid_sample) # ``` # -# Or initialize a plaid dataset and problem definition for any number of samples relying on this streaming mechanisme: -# +# If you are behing a proxy: # ```python -# from plaid.bridges.huggingface_bridge import streamed_huggingface_dataset_to_plaid -# -# dataset, pb_def = streamed_huggingface_dataset_to_plaid('PLAID-datasets/VKI-LS59', 2) +# hf_dataset_stream = huggingface_bridge.load_hf_dataset_from_hub("chanel/dataset", split="train", streaming=True) +# plaid_sample = huggingface_bridge.to_plaid_sample(next(iter(hf_dataset_stream))) # ``` - - - diff --git a/examples/containers/dataset_example.py b/examples/containers/dataset_example.py index 089aea08..f6fb3c24 100644 --- a/examples/containers/dataset_example.py +++ b/examples/containers/dataset_example.py @@ -1,12 +1,13 @@ # --- # jupyter: # jupytext: +# custom_cell_magics: kql # formats: ipynb,py:percent # text_representation: # extension: .py # format_name: percent # format_version: '1.3' -# jupytext_version: 1.17.3 +# jupytext_version: 1.11.2 # kernelspec: # display_name: Python 3 # language: python @@ -33,6 +34,7 @@ import platform import numpy as np +import copy # %% # Import necessary libraries and functions @@ -105,7 +107,7 @@ def dprint(name: str, dictio: dict, end: str = "\n"): # %% # Add a CGNS tree structure to the Sample -sample_01.meshes.add_tree(cgns_mesh) +sample_01.features.add_tree(copy.deepcopy(cgns_mesh)) print(f"{sample_01 = }") # %% @@ -136,7 +138,7 @@ def dprint(name: str, dictio: dict, end: str = "\n"): sample_03 = Sample() sample_03.add_scalar("speed", np.random.randn()) sample_03.add_scalar("rotation", sample_01.get_scalar("rotation")) -sample_03.meshes.add_tree(cgns_mesh) +sample_03.features.add_tree(copy.deepcopy(cgns_mesh)) # Show Sample CGNS content sample_03.show_tree() @@ -353,6 +355,7 @@ def dprint(name: str, dictio: dict, end: str = "\n"): dataset.set_infos(infos) dataset.print_infos() + # %% [markdown] # ## Section 4: Saving and Loading Dataset # @@ -367,6 +370,7 @@ def dprint(name: str, dictio: dict, end: str = "\n"): dataset._save_to_dir_(tmpdir) + # %% [markdown] # ### Get the number of Samples that can be loaded from a directory diff --git a/examples/containers/sample_example.py b/examples/containers/sample_example.py index d604e8ef..75dbf64f 100644 --- a/examples/containers/sample_example.py +++ b/examples/containers/sample_example.py @@ -56,7 +56,7 @@ def show_sample(sample: Sample): # %% [markdown] # ## Section 1: Initializing an Empty Sample and Adding Data # -# This section demonstrates how to initialize an empty Sample and add scalars, time series data, and meshes / CGNS trees. +# This section demonstrates how to initialize an empty Sample and add scalars, and meshes / CGNS trees. # %% [markdown] # ### Create and display CGNS tree from an unstructured mesh @@ -123,25 +123,12 @@ def show_sample(sample: Sample): show_sample(sample) -# %% [markdown] -# ### Add time series to a Sample - -# %% -# Add a time series named 'stuff' -sample.add_time_series("stuff", np.arange(10), np.random.randn(10)) - -# Add a time series named 'bluff' -sample.add_time_series("bluff", np.arange(2, 6), np.random.randn(4)) - -# As you can see it is not displayed when printing -show_sample(sample) - # %% [markdown] # ### Add a CGNS Tree to a Sample and display it # %% # Add the previously created CGNS tree to the sample -sample.meshes.add_tree(tree) +sample.features.add_tree(tree) # Display the Sample CGNS tree sample.show_tree() @@ -157,24 +144,15 @@ def show_sample(sample: Sample): meshes_dict = {0.0: tree, 0.5: tree, 1.0: tree} # Set meshes in the Sample -new_sample_mult_mesh.meshes.set_meshes(meshes_dict) +new_sample_mult_mesh.features.set_meshes(meshes_dict) -print(f"{new_sample_mult_mesh.meshes.get_all_mesh_times() = }") +print(f"{new_sample_mult_mesh.features.get_all_mesh_times() = }") -# %% [markdown] -# ### Link tree from another sample - -# %% -path_linked_sample = Path.cwd() / "dataset/samples/sample_000000000/meshes/mesh_000000000.cgns" -new_sample_mult_mesh.link_tree( - path_linked_sample, linked_sample=sample, linked_time=0.0, time=1.5 -) -print(f"{new_sample_mult_mesh.meshes.get_all_mesh_times() = }") # %% [markdown] # ## Section 2: Accessing and Modifying Sample Data # -# This section demonstrates how to access and modify base, zone, node, scalar, field and time series data within the Sample. +# This section demonstrates how to access and modify base, zone, node, scalar and field data within the Sample. # %% [markdown] # ### Initialize CGNS tree base @@ -265,25 +243,14 @@ def show_sample(sample: Sample): print(f"{sample.get_field('T') = }") print(f"{sample.get_field('Temperature') = }") -# %% [markdown] -# ### Access time series data in Sample - -# %% -# It will look for a default base if no base and zone are given -sample.add_time_series("stuff", np.arange(10), np.random.randn(10)) - -print(f"{sample.get_time_series_names() = }") -print(f"{sample.get_time_series('S') = }") -print(f"{sample.get_time_series('stuff') = }") - # %% [markdown] # ### Access to points coordinates # %% # It will look for a default base if no base and zone are given print(f"{sample.get_nodes() = }") -print(f"{sample.meshes.get_points() = }") # same as get_nodes -print(f"{sample.meshes.get_vertices() = }") # same as get_nodes +print(f"{sample.features.get_points() = }") # same as get_nodes +print(f"{sample.features.get_vertices() = }") # same as get_nodes # %% [markdown] # ### Retrieve element connectivity data @@ -293,27 +260,27 @@ def show_sample(sample: Sample): tmp_sample = Sample() # Add the previously created CGNS tree in the Sample -tmp_sample.meshes.add_tree(tree) +tmp_sample.features.add_tree(tree) -print("element connectivity = \n", f"{tmp_sample.meshes.get_elements()}") +print("element connectivity = \n", f"{tmp_sample.features.get_elements()}") # %% [markdown] # ### Access the available base of the CGNS tree # %% # Get base names -bases_names = sample.meshes.get_base_names() +bases_names = sample.features.get_base_names() # Get full base path -full_bases_names = sample.meshes.get_base_names(full_path=True) +full_bases_names = sample.features.get_base_names(full_path=True) print(f"{bases_names=}") print(f"{full_bases_names=}") # %% # Get the first base name -base_name = sample.meshes.get_base_names()[0] +base_name = sample.features.get_base_names()[0] # Get base node -base_node_content = sample.meshes.get_base(base_name) +base_node_content = sample.features.get_base(base_name) print(f"{base_node_content = }") @@ -322,22 +289,22 @@ def show_sample(sample: Sample): # %% # Get the first base name -base_name = sample.meshes.get_base_names()[0] +base_name = sample.features.get_base_names()[0] -print(f"{sample.meshes.has_base(base_name) = }") -print(f"{sample.meshes.has_base('unknown_base_name') = }") +print(f"{sample.features.has_base(base_name) = }") +print(f"{sample.features.has_base('unknown_base_name') = }") # %% [markdown] # ### Access the available zone from a CGNS tree base # %% # Get the first base name -base_name = sample.meshes.get_base_names()[0] +base_name = sample.features.get_base_names()[0] # Get zones associated with the first base -zones_names = sample.meshes.get_zone_names(base_name) +zones_names = sample.features.get_zone_names(base_name) # Get full path of zones associated with the first base -full_zones_names = sample.meshes.get_zone_names(base_name, full_path=True) +full_zones_names = sample.features.get_zone_names(base_name, full_path=True) print(f" - Base : {base_name}") print(f" - Zone(s): {zones_names}") @@ -347,7 +314,7 @@ def show_sample(sample: Sample): # Get the first zone name from a base name zone_name = zones_names[0] # Get base node -zone_node_content = sample.meshes.get_zone(zone_name, base_name) +zone_node_content = sample.features.get_zone(zone_name, base_name) print(f"{zone_node_content = }") @@ -357,7 +324,7 @@ def show_sample(sample: Sample): # %% # Get the first zone name from a base name zone_name = zones_names[0] -z_type = sample.meshes.get_zone_type(zone_name, base_name) +z_type = sample.features.get_zone_type(zone_name, base_name) print(f"zone type = {z_type}") @@ -368,8 +335,8 @@ def show_sample(sample: Sample): # Get the first zone name from a base name zone_name = zones_names[0] -print(f"{sample.meshes.has_zone(zone_name, base_name) = }") -print(f"{sample.meshes.has_zone('unknown_zone_name', base_name) = }") +print(f"{sample.features.has_zone(zone_name, base_name) = }") +print(f"{sample.features.has_zone('unknown_zone_name', base_name) = }") # %% [markdown] # ### Get mesh from sample @@ -383,30 +350,30 @@ def show_sample(sample: Sample): # %% # Before adding new tree -print(f"{sample.meshes.get_all_mesh_times() = }") +print(f"{sample.features.get_all_mesh_times() = }") # Add one CGNS tree at time 1. -sample.meshes.add_tree(tree, 1.0) +sample.features.add_tree(tree, 1.0) # After adding new tree -print(f"{sample.meshes.get_all_mesh_times() = }") +print(f"{sample.features.get_all_mesh_times() = }") # %% [markdown] # ### Creating a Sample Hierarchy with bases, zones, and associated data. # %% -bases_names = sample.meshes.get_base_names() -full_bases_names = sample.meshes.get_base_names(full_path=True) +bases_names = sample.features.get_base_names() +full_bases_names = sample.features.get_base_names(full_path=True) print(f"{bases_names = }") print(f"{full_bases_names = }", end="\n\n") for b_name in bases_names: - zones_names = sample.meshes.get_zone_names(b_name) - full_zones_names = sample.meshes.get_zone_names(b_name, full_path=True) + zones_names = sample.features.get_zone_names(b_name) + full_zones_names = sample.features.get_zone_names(b_name, full_path=True) print(f" - Base : {b_name}") for z_name, f_z_name in zip(zones_names, full_zones_names): print( - f" - {z_name} -> type: {sample.meshes.get_zone_type(z_name, b_name)} | full: {f_z_name}" + f" - {z_name} -> type: {sample.features.get_zone_type(z_name, b_name)} | full: {f_z_name}" ) # %% [markdown] @@ -419,13 +386,13 @@ def show_sample(sample: Sample): # %% # Without a provided default time, it searches the first time available in all mesh times -print(f"{sample.meshes.get_all_mesh_times() = }") -print(f"{sample.meshes.get_time_assignment() = }", end="\n\n") +print(f"{sample.features.get_all_mesh_times() = }") +print(f"{sample.features.get_time_assignment() = }", end="\n\n") # Set default time sample.set_default_time(1.0) # Now that default time has been assigned, there's no need to specify it in function calls. -print(f"{sample.meshes.get_time_assignment() = }", end="\n\n") +print(f"{sample.features.get_time_assignment() = }", end="\n\n") # Print the tree at time 1.0 sample.show_tree() # == sample.show_tree(1.0) @@ -439,21 +406,21 @@ def show_sample(sample: Sample): # %% # Reset default time -sample.meshes._default_active_time = None +sample.features._default_active_time = None # Without a provided default time, it searches the first time available in all mesh times -print(f"{sample.meshes.get_time_assignment() = }", end="\n\n") +print(f"{sample.features.get_time_assignment() = }", end="\n\n") # Create new bases sample.init_base(1, 1, "new_base", 0.0) -print(f"{sample.meshes.get_topological_dim('new_base', 0.0) = }") -print(f"{sample.meshes.get_physical_dim('new_base', 0.0) = }") +print(f"{sample.features.get_topological_dim('new_base', 0.0) = }") +print(f"{sample.features.get_physical_dim('new_base', 0.0) = }") # %% # Attempting to get a base when the default base is not set, and there are multiple bases available. -print(f"{sample.meshes.get_base_names() = }", end="\n\n") +print(f"{sample.features.get_base_names() = }", end="\n\n") try: - sample.meshes.get_base_assignment() + sample.features.get_base_assignment() except KeyError as e: print(str(e)) @@ -462,17 +429,17 @@ def show_sample(sample: Sample): sample.set_default_base("SurfaceMesh", 0.0) # Now that default base and time have been assigned, it is no longer necessary to specify them in function calls. -print(f"{sample.meshes.get_time_assignment() = }") -print(f"{sample.meshes.get_base_assignment() = }", end="\n\n") +print(f"{sample.features.get_time_assignment() = }") +print(f"{sample.features.get_base_assignment() = }", end="\n\n") # Print the topological and physical dim for the default base == 'SurfaceMesh' -print(f"{sample.meshes.get_topological_dim() = }") -print(f"{sample.meshes.get_physical_dim() = }") +print(f"{sample.features.get_topological_dim() = }") +print(f"{sample.features.get_physical_dim() = }") # %% # If base is specified as an argument in a function, it takes precedence over the default base. print( - f"{sample.meshes.get_physical_dim('new_base') = }" + f"{sample.features.get_physical_dim('new_base') = }" ) # Print the 'new_base' physical dim instead of the default base physical dim # %% [markdown] @@ -480,11 +447,11 @@ def show_sample(sample: Sample): # %% # Reset default base and time -sample.meshes._default_active_time = None -sample.meshes._default_active_base = None +sample.features._default_active_time = None +sample.features._default_active_base = None # Without a provided default time, it searches the first time available in all mesh times -print(f"{sample.meshes.get_time_assignment() = }", end="\n\n") +print(f"{sample.features.get_time_assignment() = }", end="\n\n") # Create a new zone in 'SurfaceMesh' base sample.init_zone( @@ -493,43 +460,43 @@ def show_sample(sample: Sample): zone_name="new_zone", base_name="SurfaceMesh", ) -print(f"{sample.meshes.get_zone_type('TestZoneName', 'SurfaceMesh') = }") -print(f"{sample.meshes.get_zone_type('new_zone', 'SurfaceMesh') = }") +print(f"{sample.features.get_zone_type('TestZoneName', 'SurfaceMesh') = }") +print(f"{sample.features.get_zone_type('new_zone', 'SurfaceMesh') = }") # %% # Set default base sample.set_default_base("SurfaceMesh") # Attempting to get a zone when the default zone is not set, and there are multiple zones available in the default base. -print(f"{sample.meshes.get_zone_names() = }", end="\n\n") +print(f"{sample.features.get_zone_names() = }", end="\n\n") try: - sample.meshes.get_zone_assignment() + sample.features.get_zone_assignment() except KeyError as e: print(str(e)) # %% # Reset default base and time -sample.meshes._default_active_time = None -sample.meshes._default_active_base = None +sample.features._default_active_time = None +sample.features._default_active_base = None # Set default base, zone and time sample.set_default_zone_base("TestZoneName", "SurfaceMesh", 0.0) # Now that default base, zone and time have been assigned, it is no longer necessary to specify them in function calls. -print(f"{sample.meshes.get_time_assignment() = }") -print(f"{sample.meshes.get_base_assignment() = }") -print(f"{sample.meshes.get_zone_assignment() = }", end="\n\n") +print(f"{sample.features.get_time_assignment() = }") +print(f"{sample.features.get_base_assignment() = }") +print(f"{sample.features.get_zone_assignment() = }", end="\n\n") # Print the type of the default zone (from the default base) -print(f"{sample.meshes.get_zone_type() = }") +print(f"{sample.features.get_zone_type() = }") # Print the default zone content (from the default base) -print(f"{sample.meshes.get_zone() = }") +print(f"{sample.features.get_zone() = }") # %% # If zone is specified as an argument in a function, it takes precedence over the default zone. print( - f"{sample.meshes.get_zone_type('new_zone') = }" + f"{sample.features.get_zone_type('new_zone') = }" ) # Print the 'new_zone' type instead of the default zone type # %% [markdown] diff --git a/examples/convert_users_data_example.py b/examples/convert_users_data_example.py index 98f30a89..59b7bdbb 100644 --- a/examples/convert_users_data_example.py +++ b/examples/convert_users_data_example.py @@ -158,7 +158,7 @@ def in_notebook(): # Add CGNS Meshe to samples with specific time steps sample = Sample() - sample.meshes.add_tree(cgns_tree) + sample.features.add_tree(cgns_tree) # Add random scalar values to the sample for sname in in_scalars_names: diff --git a/examples/pipelines/pipeline_example.py b/examples/pipelines/pipeline_example.py index a1a53355..4fb738fb 100644 --- a/examples/pipelines/pipeline_example.py +++ b/examples/pipelines/pipeline_example.py @@ -74,7 +74,7 @@ # %% hf_dataset = load_hf_dataset_from_hub("PLAID-datasets/VKI-LS59", split="all_samples[:24]") -dataset_train, _ = huggingface_dataset_to_plaid(hf_dataset, processes_number = n_processes, verbose = False) +dataset_train = huggingface_dataset_to_plaid(hf_dataset, processes_number = n_processes, verbose = False) # %% [markdown] diff --git a/examples/post/bisect_example.py b/examples/post/bisect_example.py index e2f5de03..8ed21b62 100644 --- a/examples/post/bisect_example.py +++ b/examples/post/bisect_example.py @@ -84,8 +84,8 @@ pred_path = dataset_directory / "dataset_pred" problem_path = dataset_directory / "problem_definition" -# Using file paths to generate bisect plot on scalar_2 -plot_bisect(ref_path, pred_path, problem_path, "scalar_2", "differ_bisect_plot") +# Using file paths to generate bisect plot on feature_2 +plot_bisect(ref_path, pred_path, problem_path, "feature_2", "differ_bisect_plot") # %% [markdown] # ## Plotting with PLAID @@ -100,8 +100,8 @@ pred_path = Dataset(dataset_directory / "dataset_pred") problem_path = ProblemDefinition(dataset_directory / "problem_definition") -# Using PLAID objects to generate bisect plot on scalar_2 -plot_bisect(ref_path, pred_path, problem_path, "scalar_2", "equal_bisect_plot") +# Using PLAID objects to generate bisect plot on feature_2 +plot_bisect(ref_path, pred_path, problem_path, "feature_2", "equal_bisect_plot") # %% [markdown] # ## Mixing with Scalar Index and Verbose diff --git a/examples/run_examples.bat b/examples/run_examples.bat index 91147c3e..47ec177a 100644 --- a/examples/run_examples.bat +++ b/examples/run_examples.bat @@ -1,5 +1,5 @@ @echo off -for %%f in (*.py utils\*.py containers\*.py post\*.py) do ( +for %%f in (*.py examples\*.py utils\*.py containers\*.py post\*.py) do ( echo -------------------------------------------------------------------------------------- echo #---# run python %%f python %%f || exit /b 1 diff --git a/examples/run_examples.sh b/examples/run_examples.sh index 379fd62e..8853bbd1 100755 --- a/examples/run_examples.sh +++ b/examples/run_examples.sh @@ -1,9 +1,9 @@ #!/bin/bash if [[ "$(uname)" == "Linux" ]]; then - FILES="*.py utils/*.py containers/*.py post/*.py pipelines/*.py" + FILES="*.py examples/*.py bridges/*.py utils/*.py containers/*.py post/*.py pipelines/*.py" else - FILES="*.py utils/*.py containers/*.py post/*.py" + FILES="*.py examples/*.py utils/*.py containers/*.py post/*.py" fi for file in $FILES diff --git a/pyproject.toml b/pyproject.toml index 13218f38..9fcd01fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ dependencies = [ "numpy", "matplotlib", "pydantic", + "optree", ] dynamic = ["version"] diff --git a/src/plaid/bridges/huggingface_bridge.py b/src/plaid/bridges/huggingface_bridge.py index 1d217e02..d6f3ef07 100644 --- a/src/plaid/bridges/huggingface_bridge.py +++ b/src/plaid/bridges/huggingface_bridge.py @@ -6,13 +6,18 @@ # file 'LICENSE.txt', which is part of this source code package. # # +import io +import json import pickle import shutil import sys +from functools import partial from multiprocessing import Pool from pathlib import Path -from typing import Any, Callable, Optional +from typing import Callable, Optional +import numpy as np +import yaml from tqdm import tqdm if sys.version_info >= (3, 11): @@ -27,19 +32,20 @@ from typing import Union import datasets -from datasets import load_dataset -from huggingface_hub import snapshot_download +from datasets import Sequence, Value, load_dataset, load_from_disk +from huggingface_hub import HfApi, hf_hub_download, snapshot_download from pydantic import ValidationError from plaid import Dataset, ProblemDefinition, Sample -from plaid.containers.features import SampleMeshes, SampleScalars +from plaid.containers.features import SampleFeatures from plaid.types import IndexType +from plaid.utils.cgns_helper import unflatten_cgns_tree +from plaid.utils.deprecation import deprecated logger = logging.getLogger(__name__) """ Convention with hf (Hugging Face) datasets: -- hf-datasets contains a single Hugging Face split, named 'all_samples'. - samples contains a single Hugging Face feature, named called "sample". - Samples are instances of :ref:`Sample`. - Mesh objects included in samples follow the CGNS standard, and can be converted in Muscat.Containers.Mesh.Mesh. @@ -47,6 +53,7 @@ """ +# ------------------------------------------------------------------------------ def load_hf_dataset_from_hub( repo_id: str, streaming: bool = False, *args, **kwargs ) -> Union[ @@ -119,6 +126,241 @@ def _get_cached_path(repo_id_): return load_dataset(repo_id, streaming=streaming, *args, **kwargs) +def load_hf_infos_from_hub( + repo_id: str, +) -> dict[ + str, dict[str, str] +]: # pragma: no cover (to prevent testing from downloading, this is run by examples) + """Load dataset infos from the Hugging Face Hub. + + Downloads the infos.yaml file from the specified repository and parses it as a dictionary. + + Args: + repo_id (str): The repository ID on the Hugging Face Hub. + + Returns: + dict[str, dict[str, str]]: Dictionary containing dataset infos. + """ + # Download infos.yaml + yaml_path = hf_hub_download( + repo_id=repo_id, filename="infos.yaml", repo_type="dataset" + ) + with open(yaml_path, "r", encoding="utf-8") as f: + infos = yaml.safe_load(f) + + return infos + + +def load_hf_problem_definition_from_hub( + repo_id: str, name: str +) -> ( + ProblemDefinition +): # pragma: no cover (to prevent testing from downloading, this is run by examples) + """Load a ProblemDefinition from the Hugging Face Hub. + + Downloads the problem infos YAML and split JSON files from the specified repository and location, + then initializes a ProblemDefinition object with this information. + + Args: + repo_id (str): The repository ID on the Hugging Face Hub. + name (str): The name of the problem_definition stored in the repo. + + Returns: + ProblemDefinition: The loaded problem definition. + """ + # Download split.json + json_path = hf_hub_download( + repo_id=repo_id, + filename=f"problem_definitions/{name}/split.json", + repo_type="dataset", + ) + with open(json_path, "r", encoding="utf-8") as f: + json_data = json.load(f) + + # Download problem_infos.yaml + yaml_path = hf_hub_download( + repo_id=repo_id, + filename=f"problem_definitions/{name}/problem_infos.yaml", + repo_type="dataset", + ) + with open(yaml_path, "r", encoding="utf-8") as f: + yaml_data = yaml.safe_load(f) + + prob_def = ProblemDefinition() + prob_def._initialize_from_problem_infos_dict(yaml_data) + prob_def.set_split(json_data) + + return prob_def + + +def push_dataset_dict_to_hub( + repo_id: str, hf_dataset_dict: datasets.DatasetDict +) -> None: # pragma: no cover (push not tested) + """Push a Hugging Face dataset to the Hugging Face Hub. + + Args: + repo_id (str): The repository ID on the Hugging Face Hub. + hf_dataset_dict (datasets.Dataset): The Hugging Face dataset to push. + """ + hf_dataset_dict.push_to_hub(repo_id) + + +def push_dataset_infos_to_hub( + repo_id: str, infos: dict[str, dict[str, str]] +) -> None: # pragma: no cover (push not tested) + """Upload dataset infos to the Hugging Face Hub. + + Serializes the infos dictionary to YAML and uploads it to the specified repository as infos.yaml. + + Args: + repo_id (str): The repository ID on the Hugging Face Hub. + infos (dict[str, dict[str, str]]): Dictionary containing dataset infos to upload. + + Raises: + ValueError: If the infos dictionary is empty. + """ + if len(infos) > 0: + api = HfApi() + yaml_str = yaml.dump(infos) + yaml_buffer = io.BytesIO(yaml_str.encode("utf-8")) + api.upload_file( + path_or_fileobj=yaml_buffer, + path_in_repo="infos.yaml", + repo_id=repo_id, + repo_type="dataset", + commit_message="Upload infos.yaml", + ) + else: + raise ValueError("'infos' must not be empty") + + +def push_problem_definition_to_hub( + repo_id: str, name: str, pb_def: ProblemDefinition +) -> None: # pragma: no cover (push not tested) + """Upload a ProblemDefinition and its split information to the Hugging Face Hub. + + Args: + repo_id (str): The repository ID on the Hugging Face Hub. + name (str): The name of the problem_definition to store in the repo. + pb_def (ProblemDefinition): The problem definition to upload. + """ + api = HfApi() + data = pb_def._generate_problem_infos_dict() + if data is not None: + yaml_str = yaml.dump(data) + yaml_buffer = io.BytesIO(yaml_str.encode("utf-8")) + + api.upload_file( + path_or_fileobj=yaml_buffer, + path_in_repo=f"problem_definitions/{name}/problem_infos.yaml", + repo_id=repo_id, + repo_type="dataset", + commit_message=f"Upload problem_definitions/{name}/problem_infos.yaml", + ) + + data = pb_def.get_split() + json_str = json.dumps(data) + json_buffer = io.BytesIO(json_str.encode("utf-8")) + + api.upload_file( + path_or_fileobj=json_buffer, + path_in_repo=f"problem_definitions/{name}/split.json", + repo_id=repo_id, + repo_type="dataset", + commit_message=f"Upload problem_definitions/{name}/split.json", + ) + + +# ------------------------------------------------------------------------------ + + +def load_dataset_dict_from_to_disk(path: Union[str, Path]) -> datasets.DatasetDict: + """Load a Hugging Face DatasetDict from disk. + + Args: + path (Union[str, Path]): The directory path from which to load the dataset dict. + + Returns: + datasets.DatasetDict: The loaded Hugging Face DatasetDict. + """ + return load_from_disk(str(path)) + + +def load_dataset_infos_from_disk(path: Union[str, Path]) -> dict[str, dict[str, str]]: + """Load dataset infos from a YAML file on disk. + + Args: + path (Union[str, Path]): The directory path containing the infos file. + + Returns: + dict[str, dict[str, str]]: Dictionary containing dataset infos. + """ + infos_fname = Path(path) / "infos.yaml" + with infos_fname.open("r") as file: + infos = yaml.safe_load(file) + return infos + + +def load_problem_definition_from_disk( + path: Union[str, Path], name: Union[str, Path] +) -> ProblemDefinition: + """Load a ProblemDefinition and its split information from disk. + + Args: + path (Union[str, Path]): The root directory path for loading. + name (str): The name of the problem_definition stored in the disk directory. + + Returns: + ProblemDefinition: The loaded problem definition. + """ + pb_def = ProblemDefinition() + pb_def._load_from_dir_(Path(path) / Path("problem_definitions") / Path(name)) + return pb_def + + +def save_dataset_dict_to_disk( + path: Union[str, Path], hf_dataset_dict: datasets.DatasetDict +) -> None: + """Save a Hugging Face DatasetDict to disk. + + Args: + path (Union[str, Path]): The directory path where the dataset dict will be saved. + hf_dataset_dict (datasets.DatasetDict): The Hugging Face DatasetDict to save. + """ + hf_dataset_dict.save_to_disk(str(path)) + + +def save_dataset_infos_to_disk( + path: Union[str, Path], infos: dict[str, dict[str, str]] +) -> None: + """Save dataset infos as a YAML file to disk. + + Args: + path (Union[str, Path]): The directory path where the infos file will be saved. + infos (dict[str, dict[str, str]]): Dictionary containing dataset infos. + """ + infos_fname = Path(path) / "infos.yaml" + infos_fname.parent.mkdir(parents=True, exist_ok=True) + with open(infos_fname, "w") as file: + yaml.dump(infos, file, default_flow_style=False, sort_keys=False) + + +def save_problem_definition_to_disk( + path: Union[str, Path], name: Union[str, Path], pb_def: ProblemDefinition +) -> None: + """Save a ProblemDefinition and its split information to disk. + + Args: + path (Union[str, Path]): The root directory path for saving. + name (str): The name of the problem_definition to store in the disk directory. + pb_def (ProblemDefinition): The problem definition to save. + """ + pb_def._save_to_dir_(Path(path) / Path("problem_definitions") / Path(name)) + + +# ------------------------------------------------------------------------------ + + def to_plaid_sample(hf_sample: dict[str, bytes]) -> Sample: """Convert a Hugging Face dataset sample to a plaid :class:`Sample `. @@ -126,69 +368,33 @@ def to_plaid_sample(hf_sample: dict[str, bytes]) -> Sample: If it still fails because of a missing key, it raises a KeyError. """ pickled_hf_sample = pickle.loads(hf_sample["sample"]) + try: # Try to validate the sample return Sample.model_validate(pickled_hf_sample) - except ValidationError: - # If it fails, try to build the sample from its components - try: - scalars = SampleScalars(scalars=pickled_hf_sample["scalars"]) - meshes = SampleMeshes( - meshes=pickled_hf_sample["meshes"], - mesh_base_name=pickled_hf_sample.get("mesh_base_name"), - mesh_zone_name=pickled_hf_sample.get("mesh_zone_name"), - links=pickled_hf_sample.get("links"), - paths=pickled_hf_sample.get("paths"), - ) - sample = Sample( - path=pickled_hf_sample.get("path"), - meshes=meshes, - scalars=scalars, - time_series=pickled_hf_sample.get("time_series"), - ) - return Sample.model_validate(sample) - except KeyError as e: - raise KeyError(f"Missing key {e!s} in HF data.") from e - - -def generate_huggingface_description( - infos: dict, problem_definition: ProblemDefinition -) -> dict[str, Any]: - """Generates a Hugging Face dataset description field from a plaid dataset infos and problem definition. - - The conventions chosen here ensure working conversion to and from huggingset datasets. - Args: - infos (dict): infos entry of the plaid dataset from which the Hugging Face description is to be generated - problem_definition (ProblemDefinition): of which the Hugging Face description is to be generated - - Returns: - dict[str]: Hugging Face dataset description - """ - # type hinting the values as Any because they can be of various types - description: dict[str, Any] = {} + except ValidationError: + features = SampleFeatures( + data=pickled_hf_sample.get("meshes"), + ) - description.update(infos) + sample = Sample( + path=pickled_hf_sample.get("path"), + features=features, + ) - split: dict[str, IndexType] = problem_definition.get_split(indices_name=None) # pyright: ignore[reportAssignmentType] - description["split"] = split - description["task"] = problem_definition.get_task() + scalars = pickled_hf_sample.get("scalars") + if scalars: + for sn, val in scalars.items(): + sample.add_scalar(sn, val) - description["in_scalars_names"] = problem_definition.in_scalars_names - description["out_scalars_names"] = problem_definition.out_scalars_names - description["in_timeseries_names"] = problem_definition.in_timeseries_names - description["out_timeseries_names"] = problem_definition.out_timeseries_names - description["in_fields_names"] = problem_definition.in_fields_names - description["out_fields_names"] = problem_definition.out_fields_names - description["in_meshes_names"] = problem_definition.in_meshes_names - description["out_meshes_names"] = problem_definition.out_meshes_names - return description + return Sample.model_validate(sample) def plaid_dataset_to_huggingface( dataset: Dataset, - problem_definition: ProblemDefinition, - split: str = "all_samples", + ids: Optional[list[IndexType]] = None, + split_name: str = "all_samples", processes_number: int = 1, ) -> datasets.Dataset: """Use this function for converting a Hugging Face dataset from a plaid dataset. @@ -197,8 +403,8 @@ def plaid_dataset_to_huggingface( Args: dataset (Dataset): the plaid dataset to be converted in Hugging Face format - problem_definition (ProblemDefinition): the problem definition is used to generate the description of the Hugging Face dataset. - split (str): The name of the split. Default: "all_samples". + ids (list, optional): The specific sample IDs to convert the dataset. Defaults to None. + split_name (str): The name of the split. Default: "all_samples". processes_number (int): The number of processes used to generate the Hugging Face dataset. Default: 1. Returns: @@ -211,10 +417,8 @@ def plaid_dataset_to_huggingface( dataset.save_to_disk("path/to/dir) dataset.push_to_hub("chanel/dataset") """ - if split == "all_samples": + if ids is None: ids = dataset.get_sample_ids() - else: - ids = problem_definition.get_split(split) def generator(): for sample in dataset[ids]: @@ -224,17 +428,14 @@ def generator(): return plaid_generator_to_huggingface( generator=generator, - infos=dataset.get_infos(), - problem_definition=problem_definition, - split=split, + split_name=split_name, processes_number=processes_number, ) def plaid_dataset_to_huggingface_datasetdict( dataset: Dataset, - problem_definition: ProblemDefinition, - main_splits: list[str], + main_splits: dict[str, IndexType], processes_number: int = 1, ) -> datasets.DatasetDict: """Use this function for converting a Hugging Face dataset dict from a plaid dataset. @@ -258,36 +459,30 @@ def plaid_dataset_to_huggingface_datasetdict( dataset.push_to_hub("chanel/dataset") """ _dict = {} - for _, split in enumerate(main_splits): + for split_name, ids in main_splits.items(): ds = plaid_dataset_to_huggingface( dataset=dataset, - problem_definition=problem_definition, - split=split, + ids=ids, processes_number=processes_number, ) - _dict[split] = ds + _dict[split_name] = ds return datasets.DatasetDict(_dict) def plaid_generator_to_huggingface( generator: Callable, - infos: dict, - problem_definition: ProblemDefinition, - split: str = "all_samples", + split_name: str = "all_samples", processes_number: int = 1, ) -> datasets.Dataset: """Use this function for creating a Hugging Face dataset from a sample generator function. This function can be used when the plaid dataset cannot be loaded in RAM all at once due to its size. The generator enables loading samples one by one. - The dataset can then be saved to disk, or pushed to the Hugging Face hub. Args: generator (Callable): a function yielding a dict {"sample" : sample}, where sample is of type 'bytes' - infos (dict): the info is used to generate the description of the Hugging Face dataset. - problem_definition (ProblemDefinition): the problem definition is used to generate the description of the Hugging Face dataset. - split (str): The name of the split. Default: "all_samples". + split_name (str): The name of the split. Default: "all_samples". processes_number (int): The number of processes used to generate the Hugging Face dataset. Default: 1. Returns: @@ -296,39 +491,21 @@ def plaid_generator_to_huggingface( Example: .. code-block:: python - dataset = plaid_generator_to_huggingface(generator, infos, split, problem_definition) - dataset.push_to_hub("chanel/dataset") - dataset.save_to_disk("path/to/dir") + dataset = plaid_generator_to_huggingface(generator, infos, split) """ ds: datasets.Dataset = datasets.Dataset.from_generator( # pyright: ignore[reportAssignmentType] - generator, + generator=generator, features=datasets.Features({"sample": datasets.Value("binary")}), num_proc=processes_number, writer_batch_size=1, - split=datasets.splits.NamedSplit(split), - ) - - def update_dataset_description( - ds: datasets.Dataset, new_desc: dict[str, Any] - ) -> datasets.Dataset: - info = ds.info.copy() - info.description = new_desc # pyright: ignore[reportAttributeAccessIssue] -> info.description is HF's DatasetInfo. We might want to correct this later. - ds._info = info - return ds - - new_description: dict[str, Any] = generate_huggingface_description( - infos, problem_definition + split=datasets.splits.NamedSplit(split_name), ) - ds = update_dataset_description(ds, new_description) return ds def plaid_generator_to_huggingface_datasetdict( - generator: Callable, - infos: dict, - problem_definition: ProblemDefinition, - main_splits: list, + generators: dict[str, Callable], processes_number: int = 1, ) -> datasets.DatasetDict: """Use this function for creating a Hugging Face dataset dict (containing multiple splits) from a sample generator function. @@ -341,10 +518,7 @@ def plaid_generator_to_huggingface_datasetdict( Only the first split will contain the decription. Args: - generator (Callable): a function yielding a dict {"sample" : sample}, where sample is of type 'bytes' - infos (dict): infos entry of the plaid dataset from which the Hugging Face dataset is to be generated - problem_definition (ProblemDefinition): the problem definition is used to generate the description of the Hugging Face dataset. - main_splits (str, optional): The name of the main splits: defining a partitioning of the sample ids. + generators (dict[str, Callable]): a dict of functions yielding a dict {"sample" : sample}, where sample is of type 'bytes' processes_number (int): The number of processes used to generate the Hugging Face dataset. Default: 1. Returns: @@ -353,57 +527,29 @@ def plaid_generator_to_huggingface_datasetdict( Example: .. code-block:: python - dataset = plaid_generator_to_huggingface_datasetdict(generator, infos, problem_definition, main_splits) - dataset.push_to_hub("chanel/dataset") - dataset.save_to_disk("path/to/dir") + hf_dataset_dict = plaid_generator_to_huggingface_datasetdict(generator, infos, problem_definition, main_splits) + push_dataset_dict_to_hub("chanel/dataset", hf_dataset_dict) + hf_dataset_dict.save_to_disk("path/to/dir") """ _dict = {} - for _, split in enumerate(main_splits): + for split_name, generator in generators.items(): ds = plaid_generator_to_huggingface( - generator, - infos, - problem_definition=problem_definition, - split=split, + generator=generator, processes_number=processes_number, + split_name=split_name, ) - _dict[split] = ds + _dict[split_name] = ds return datasets.DatasetDict(_dict) -def huggingface_description_to_problem_definition( - description: dict, -) -> ProblemDefinition: - """Converts a Hugging Face dataset description to a plaid problem definition. - - Args: - description (dict): the description field of a Hugging Face dataset, containing the problem definition - - Returns: - problem_definition (ProblemDefinition): the plaid problem definition initialized from the Hugging Face dataset description - """ - problem_definition = ProblemDefinition() - problem_definition.set_task(description["task"]) - problem_definition.set_split(description["split"]) - problem_definition.add_input_scalars_names(description["in_scalars_names"]) - problem_definition.add_output_scalars_names(description["out_scalars_names"]) - problem_definition.add_input_timeseries_names(description["in_timeseries_names"]) - problem_definition.add_output_timeseries_names(description["out_timeseries_names"]) - problem_definition.add_input_fields_names(description["in_fields_names"]) - problem_definition.add_output_fields_names(description["out_fields_names"]) - problem_definition.add_input_meshes_names(description["in_meshes_names"]) - problem_definition.add_output_meshes_names(description["out_meshes_names"]) - - return problem_definition - - def huggingface_dataset_to_plaid( ds: datasets.Dataset, ids: Optional[list[int]] = None, processes_number: int = 1, large_dataset: bool = False, verbose: bool = True, -) -> tuple[Dataset, ProblemDefinition]: +) -> Dataset: """Use this function for converting a plaid dataset from a Hugging Face dataset. A Hugging Face dataset can be read from disk or the hub. From the hub, the @@ -443,10 +589,9 @@ def huggingface_dataset_to_plaid( "Trying to parallelize with more processes than selected samples in dataset" ) - dataset = Dataset() + description = "Converting Hugging Face dataset to plaid" - if verbose: - print("Converting Hugging Face dataset to plaid dataset...") + dataset = Dataset() if large_dataset: if ids: @@ -465,6 +610,7 @@ def parallel_convert(shard_path, n_workers): pool.imap(converter, range(len(converter.hf_ds))), total=len(converter.hf_ds), disable=not verbose, + desc=description, ) ) @@ -487,78 +633,282 @@ def parallel_convert(shard_path, n_workers): else: indices = range(len(ds)) - with Pool(processes=processes_number) as pool: - for idx, sample in enumerate( - tqdm( - pool.imap(_HFToPlaidSampleConverter(ds), indices), - total=len(indices), - disable=not verbose, - ) + if processes_number == 1: + for idx in tqdm( + indices, total=len(indices), disable=not verbose, desc=description ): - dataset.add_sample(sample, id=indices[idx]) + sample = _HFToPlaidSampleConverter(ds)(idx) + dataset.add_sample(sample, id=idx) - infos = {} - if "legal" in ds.description: - infos["legal"] = ds.description["legal"] - if "data_production" in ds.description: - infos["data_production"] = ds.description["data_production"] + else: + with Pool(processes=processes_number) as pool: + for idx, sample in enumerate( + tqdm( + pool.imap(_HFToPlaidSampleConverter(ds), indices), + total=len(indices), + disable=not verbose, + desc=description, + ) + ): + dataset.add_sample(sample, id=indices[idx]) - dataset.set_infos(infos) + return dataset - problem_definition = huggingface_description_to_problem_definition(ds.description) - return dataset, problem_definition +# ------ +def _process_sample( + idx: int, + ds: datasets.Dataset, + dtypes: dict[str, str], + cgns_types: dict, +) -> Sample: + """Process a single sample from a Hugging Face dataset and reconstruct it as a plaid Sample. + Args: + idx (int): Index of the sample in the Hugging Face dataset. + ds (datasets.Dataset): The Hugging Face dataset. + dtypes (dict[str, str]): Dictionary mapping feature names to numpy dtype strings. + cgns_types (dict): Dictionary describing CGNS types for reconstruction. -def streamed_huggingface_dataset_to_plaid( - hf_repo: str, - number_of_samples: int, -) -> tuple[ - Dataset, ProblemDefinition -]: # pragma: no cover (to prevent testing from downloading, this is run by examples) - """Use this function for creating a plaid dataset by streaming on Hugging Face. + Returns: + Sample: The reconstructed plaid Sample object. + """ + flat_tree = reconstruct_flat_tree_from_hf_sample(ds[idx], dtypes) + unflatten_tree = unflatten_cgns_tree(ds[idx], dtypes, cgns_types) + return Sample(features=SampleFeatures({0.0: unflatten_tree})) - The indices of the retrieved sample is not controled. + +def huggingface_dataset_to_plaid_new( + ds: datasets.Dataset, + dtypes: dict[str, str], + cgns_types: dict, + processes_number: int = 1, + verbose: bool = True, +) -> Dataset: + """Convert a Hugging Face dataset to a plaid Dataset using explicit dtypes and CGNS type information. Args: - hf_repo (str): the name of the repo on Hugging Face - number_of_samples (int): The number of samples to retrieve. + ds (datasets.Dataset): The Hugging Face dataset to convert. + dtypes (dict[str, str]): Dictionary mapping feature names to numpy dtype strings. + cgns_types (dict): Dictionary describing CGNS types for reconstruction. + processes_number (int, optional): Number of processes for parallel conversion. Defaults to 1. + verbose (bool, optional): If True, prints progress using tqdm. Defaults to True. Returns: - dataset (Dataset): the converted dataset. - problem_definition (ProblemDefinition): the problem definition generated from the Hugging Face dataset + Dataset: The converted plaid Dataset. + """ + description = "Converting Hugging Face dataset to plaid" + + ds.set_format("numpy") + + if processes_number == 1: + sample_list = [ + _process_sample(idx, ds, dtypes, cgns_types) + for idx in tqdm(range(len(ds)), desc=description) + ] + return Dataset(samples=sample_list) + + worker = partial(_process_sample, ds=ds, dtypes=dtypes, cgns_types=cgns_types) + + with Pool(processes_number) as pool: + sample_list = list( + tqdm( + pool.imap(worker, range(len(ds))), + total=len(ds), + disable=not verbose, + desc=description, + ) + ) + + return Dataset(samples=sample_list) + - Notes: - .. code-block:: python - from plaid.bridges.huggingface_bridge import streamed_huggingface_dataset_to_plaid - dataset, pb_def = streamed_huggingface_dataset_to_plaid('PLAID-datasets/VKI-LS59', 2) +# ------ + + +def reconstruct_flat_tree_from_hf_sample( + sample: dict[str, object], + dtypes: dict[str, str], +) -> dict[str, object]: + """Reconstruct a flat tree (dict) from a Hugging Face sample using provided dtypes. + + Args: + sample (dict[str, object]): The Hugging Face sample dictionary. + dtypes (dict[str, str]): Dictionary mapping feature names to numpy dtype strings. + + Returns: + dict[str, object]: Flat tree with numpy arrays or scalars for each feature. """ - ds_stream = load_hf_dataset_from_hub(hf_repo, split="all_samples", streaming=True) + flat_tree = {} + + for key, value in sample.items(): + dtype = np.dtype(dtypes[key]) + + # Handle None + if value is None: + flat_tree[key] = None + continue + + # Convert list to np.array with correct dtype + if isinstance(value, (list, tuple)): + arr = np.array(value, dtype=dtype) + flat_tree[key] = arr + # Scalars + elif np.isscalar(value): + flat_tree[key] = dtype(type(value))(value) # ensure dtype matches + else: + # Already np.array + flat_tree[key] = np.array(value, dtype=dtype) - infos = {} - if "legal" in ds_stream.description: - infos["legal"] = ds_stream.description["legal"] - if "data_production" in ds_stream.description: - infos["data_production"] = ds_stream.description["data_production"] + return flat_tree - problem_definition = huggingface_description_to_problem_definition( - ds_stream.description - ) - samples = [] - for _ in range(number_of_samples): - hf_sample = next(iter(ds_stream)) - samples.append(to_plaid_sample(hf_sample)) +def reconstruct_flat_tree_from_hf_sample2( + ds: dict[str, object], + idx +) -> dict[str, object]: + """Reconstruct a flat tree (dict) from a Hugging Face sample using provided dtypes. + + Args: + sample (dict[str, object]): The Hugging Face sample dictionary. + dtypes (dict[str, str]): Dictionary mapping feature names to numpy dtype strings. + + Returns: + dict[str, object]: Flat tree with numpy arrays or scalars for each feature. + """ + import pyarrow as pa + import numpy as np + data_dict = {} + + for key in ds.column_names: + val = ds.data[key][idx] + + if val is None: + data_dict[key] = None + if isinstance(val, pa.Scalar): + data_dict[key] = val.as_py() + elif isinstance(val, (list, tuple)): + data_dict[key] = np.array(val) + elif isinstance(val, pa.Array): + data_dict[key] = val.to_numpy(zero_copy_only=False) + else: + data_dict[key] = val + + return data_dict + + +def infer_hf_features( + flat_tree: dict[str, object], + dtypes: dict[str, str], +) -> dict[str, object]: + """Infer Hugging Face dataset features from a flat tree and dtypes. + + Args: + flat_tree (dict[str, object]): Flat tree with numpy arrays or scalars for each feature. + dtypes (dict[str, str]): Dictionary mapping feature names to numpy dtype strings. + + Returns: + dict[str, object]: Dictionary mapping feature names to Hugging Face feature types. + """ + features = {} + + for key, value in flat_tree.items(): + dtype = np.dtype(dtypes.get(key)) # get original dtype + hf_dtype = str(np.dtype(dtype)) + + # Strings + if hf_dtype.startswith("|S") or hf_dtype == "object": + features[key] = Sequence(Value("string")) + + # None values + elif value is None: + features[key] = Value("null") + + # Scalars + elif np.isscalar(value): + features[key] = Value(hf_dtype) + + # Lists or 1D/ND arrays + elif isinstance(value, (list, tuple, np.ndarray)): + arr = np.array(value, dtype=dtype) + if arr.ndim == 1: + features[key] = Sequence(Value(hf_dtype)) # variable-length 1D + elif arr.ndim == 2: + # 2D arrays: variable-length first dimension + features[key] = Sequence(Sequence(Value(hf_dtype))) + elif arr.ndim == 3: + # 3D arrays: variable-length first dimension + features[key] = Sequence(Sequence(Sequence(Value(hf_dtype)))) + else: + raise TypeError(f"Unsupported ndim for key={key}: {arr.ndim}") + + else: + raise TypeError(f"Unsupported type for key={key}: {type(value)}") + + return features - dataset = Dataset(samples=samples) - dataset.set_infos(infos) +# ---------------------------------------------------------------- - return dataset, problem_definition + +@deprecated("will be removed (no alternative)", version="0.1.9", removal="0.2.0") +def huggingface_description_to_problem_definition( + description: dict, +) -> ProblemDefinition: + """Converts a Hugging Face dataset description to a plaid problem definition. + + Args: + description (dict): the description field of a Hugging Face dataset, containing the problem definition + + Returns: + problem_definition (ProblemDefinition): the plaid problem definition initialized from the Hugging Face dataset description + """ + description = {} if description == "" else description + problem_definition = ProblemDefinition() + for func, key in [ + (problem_definition.set_task, "task"), + (problem_definition.set_split, "split"), + (problem_definition.add_input_scalars_names, "in_scalars_names"), + (problem_definition.add_output_scalars_names, "out_scalars_names"), + (problem_definition.add_input_fields_names, "in_fields_names"), + (problem_definition.add_output_fields_names, "out_fields_names"), + (problem_definition.add_input_meshes_names, "in_meshes_names"), + (problem_definition.add_output_meshes_names, "out_meshes_names"), + ]: + try: + func(description[key]) + except KeyError: + pass + + return problem_definition + + +@deprecated("will be removed (no alternative)", version="0.1.9", removal="0.2.0") +def huggingface_description_to_infos( + description: dict, +) -> dict[str, dict[str, str]]: + """Convert a Hugging Face dataset description dictionary to a PLAID infos dictionary. + + Extracts the "legal" and "data_production" sections from the Hugging Face description + and returns them in a format compatible with PLAID dataset infos. + + Args: + description (dict): The Hugging Face dataset description dictionary. + + Returns: + dict[str, dict[str, str]]: Dictionary containing "legal" and "data_production" infos if present. + """ + infos = {} + if "legal" in description: + infos["legal"] = description["legal"] + if "data_production" in description: + infos["data_production"] = description["data_production"] + return infos +@deprecated("will be removed (no alternative)", version="0.1.9", removal="0.2.0") def create_string_for_huggingface_dataset_card( description: dict, download_size_bytes: int, diff --git a/src/plaid/constants.py b/src/plaid/constants.py index 1f11d0d1..010ebbaf 100644 --- a/src/plaid/constants.py +++ b/src/plaid/constants.py @@ -8,7 +8,7 @@ It includes: - AUTHORIZED_TASKS: List of supported task types (e.g., regression, classification). -- AUTHORIZED_FEATURE_TYPES: List of supported feature types (e.g., scalar, time_series). +- AUTHORIZED_FEATURE_TYPES: List of supported feature types (e.g., scalar, field, nodes). - AUTHORIZED_FEATURE_INFOS: Dictionary specifying allowed metadata keys for various feature types. - AUTHORIZED_INFO_KEYS: Dictionary specifying allowed metadata keys for various information sections. - CGNS_FIELD_LOCATIONS: List of valid field locations as defined by the CGNS standard. @@ -19,15 +19,16 @@ AUTHORIZED_TASKS = ["regression", "classification"] -AUTHORIZED_FEATURE_TYPES = ["scalar", "time_series", "field", "nodes"] +AUTHORIZED_FEATURE_TYPES = ["scalar", "field", "nodes"] AUTHORIZED_FEATURE_INFOS = { "scalar": ["name"], - "time_series": ["name"], "field": ["name", "location", "zone_name", "base_name", "time"], "nodes": ["zone_name", "base_name", "time"], } +# Information keys for dataset metadata +# key ["plaid"]["version"] is not included as it is managed internally AUTHORIZED_INFO_KEYS = { "legal": ["owner", "license"], "data_production": [ diff --git a/src/plaid/containers/dataset.py b/src/plaid/containers/dataset.py index 858233c6..03c9e9bb 100644 --- a/src/plaid/containers/dataset.py +++ b/src/plaid/containers/dataset.py @@ -27,8 +27,11 @@ import numpy as np import yaml +from packaging.specifiers import SpecifierSet +from packaging.version import Version from tqdm import tqdm +import plaid from plaid.constants import AUTHORIZED_INFO_KEYS, CGNS_FIELD_LOCATIONS from plaid.containers.sample import Sample from plaid.containers.utils import check_features_size_homogeneity @@ -106,9 +109,9 @@ def __init__( >>> 3 for sample in dataset: print(sample) - >>> Sample(1 scalar, 0 time series, 1 timestamp, 2 fields) - Sample(1 scalar, 0 time series, 0 timestamps, 0 fields) - Sample(2 scalars, 0 time series, 1 timestamp, 2 fields) + >>> Sample(1 scalar, 1 timestamp, 2 fields) + Sample(1 scalar, 0 timestamps, 0 fields) + Sample(2 scalars, 1 timestamp, 2 fields) # 3. Create Dataset instance from a list of Samples dataset = Dataset(samples=[sample1, sample2, sample3]) @@ -126,7 +129,9 @@ def __init__( """ self._samples: dict[int, Sample] = {} # sample_id -> sample # info_name -> description - self._infos: dict[str, dict[str, str]] = {} + self._infos: dict[str, dict[str, Union[str, Version, SpecifierSet]]] = { + "plaid": {"version": Version(plaid.__version__)} + } if samples is not None and (directory_path is not None or path is not None): raise ValueError("'samples' and 'path' are mutually exclusive") @@ -404,28 +409,6 @@ def get_scalar_names(self, ids: Optional[list[int]] = None) -> list[str]: scalars_names.sort() return scalars_names - # -------------------------------------------------------------------------# - def get_time_series_names(self, ids: Optional[list[int]] = None) -> list[str]: - """Return union of time series names in all samples with id in ids. - - Args: - ids (list[int], optional): Select time series depending on sample id. If None, take all samples. Defaults to None. - - Returns: - list[str]: List of all time series names - """ - if ids is not None and len(set(ids)) != len(ids): - logger.warning("Provided ids are not unique") - - time_series_names = [] - for sample in self.get_samples(ids, as_list=True): - ts_names = sample.get_time_series_names() - for ts_name in ts_names: - if ts_name not in time_series_names: - time_series_names.append(ts_name) - time_series_names.sort() - return time_series_names - # -------------------------------------------------------------------------# def get_field_names( self, @@ -452,18 +435,18 @@ def get_field_names( fields_names = [] for sample in self.get_samples(ids, as_list=True): - times = [time] if time else sample.meshes.get_all_mesh_times() + times = [time] if time else sample.features.get_all_mesh_times() for time in times: base_names = ( [base_name] if base_name - else sample.meshes.get_base_names(time=time) + else sample.features.get_base_names(time=time) ) for base_name in base_names: zone_names = ( [zone_name] if zone_name - else sample.meshes.get_zone_names( + else sample.features.get_zone_names( time=time, base_name=base_name ) ) @@ -506,7 +489,7 @@ def get_all_features_identifiers( def get_all_features_identifiers_by_type( self, - feature_type: Literal["scalar", "nodes", "field", "time_series"], + feature_type: Literal["scalar", "nodes", "field"], ids: list[int] = None, ) -> list[FeatureIdentifier]: """Get all features identifiers from the dataset. @@ -675,7 +658,7 @@ def update_features_from_identifier( ) -> Self: """Update one or several features of the dataset by their identifier(s). - This method applies updates to scalars, time series, fields, or nodes + This method applies updates to scalars, fields, or nodes using feature identifiers, and corresponding feature data. When `in_place=False`, a deep copy of the dataset is created before applying updates, ensuring full isolation from the original. @@ -706,16 +689,14 @@ def update_features_from_identifier( def extract_dataset_from_identifier( self, feature_identifiers: Union[FeatureIdentifier, list[FeatureIdentifier]], - keep_cgns: bool = False, ) -> Self: """Extract features of the dataset by their identifier(s) and return a new dataset containing these features. - This method applies updates to scalars, time series, fields, or nodes + This method applies updates to scalars, fields, or nodes using feature identifiers Args: feature_identifiers (dict or list of dict): One or more feature identifiers. - keep_cgns (bool): If True, keeps the CGNS tree structure in the extracted dataset. Returns: Self: New dataset containing the provided feature identifiers @@ -730,37 +711,6 @@ def extract_dataset_from_identifier( extracted_sample = self[id].extract_sample_from_identifier( feature_identifiers ) - - if keep_cgns and not extracted_sample.meshes.data and self[id].meshes.data: - for time in self[id].meshes.get_all_mesh_times(): - extracted_sample.meshes.init_tree(time=time) - for base_name in self[id].meshes.get_base_names(time=time): - original_base = self[id].meshes.get_base( - base_name=base_name, time=time - ) - extracted_sample.meshes.init_base( - topological_dim=original_base[1][0], - physical_dim=original_base[1][1], - base_name=base_name, - time=time, - ) - for zone_name in self[id].meshes.get_zone_names( - time=time, base_name=base_name - ): - original_zone = self[id].meshes.get_zone( - zone_name=zone_name, base_name=base_name, time=time - ) - original_zone_type = self[id].meshes.get_zone_type( - zone_name=zone_name, base_name=base_name, time=time - ) - extracted_sample.meshes.init_zone( - zone_shape=original_zone[1], - zone_type=original_zone_type, - zone_name=zone_name, - base_name=base_name, - time=time, - ) - dataset.add_sample(sample=extracted_sample, id=id) return dataset @@ -785,7 +735,7 @@ def get_tabular_from_homogeneous_identifiers( """Extract features of the dataset by their identifier(s) and return an array containing these features. Features must have identic sizes to be casted in an array. The first dimension of the array is the number of samples in the dataset. - This method applies updates to scalars, time series, fields, or nodes using feature identifiers. + This method applies updates to scalars, fields, or nodes using feature identifiers. Args: feature_identifiers (list of dict): Feature identifiers. @@ -793,9 +743,6 @@ def get_tabular_from_homogeneous_identifiers( Returns: Array: An containing the provided feature identifiers, size (nb_sample, nb_features, dim_features) - Notes: - Not working with time_series for the moment (time series have 2 elements: time_sequence and values) - Raises: AssertionError: If feature sizes are inconsistent. """ @@ -842,7 +789,7 @@ def get_tabular_from_stacked_identifiers( def add_features_from_tabular( self, tabular: Array, - feature_identifiers: Union[FeatureIdentifier, list[FeatureIdentifier]], + feature_identifiers: list[FeatureIdentifier], restrict_to_features: bool = True, ) -> Self: """Add or update features in the dataset from tabular data using feature identifiers. @@ -854,7 +801,7 @@ def add_features_from_tabular( Parameters: tabular (Array): of size (nb_sample, nb_features) or (nb_sample, nb_features, dim_feature) if dim_feature>1 - feature_identifiers (dict or list of dict): One or more feature identifiers specifying which features to update/add. + feature_identifiers (list of dict): One or more feature identifiers specifying which features to update/add. restrict_to_features (bool, optional): If True, only returns the features from feature identifiers, otherwise keep the other features as well. Defaults to True. Returns: @@ -867,8 +814,6 @@ def add_features_from_tabular( If the number of rows in `tabular` does not match the number of samples in the dataset, or if the number of feature identifiers does not match the number of columns in `tabular`. """ - if not isinstance(feature_identifiers, list): - feature_identifiers = [feature_identifiers] for i_id, feat_id in enumerate(feature_identifiers): feature_identifiers[i_id] = FeatureIdentifier(feat_id) @@ -1020,19 +965,25 @@ def set_infos(self, infos: dict[str, dict[str, str]]) -> None: >>> {'legal': {'owner': 'CompX', 'license': 'li_X'}} """ for cat_key in infos.keys(): # Format checking on "infos" - if cat_key not in AUTHORIZED_INFO_KEYS: - raise KeyError( - f"{cat_key=} not among authorized keys. Maybe you want to try among these keys {list(AUTHORIZED_INFO_KEYS.keys())}" - ) - for info_key in infos[cat_key].keys(): - if info_key not in AUTHORIZED_INFO_KEYS[cat_key]: + if cat_key != "plaid": + if cat_key not in AUTHORIZED_INFO_KEYS: raise KeyError( - f"{info_key=} not among authorized keys. Maybe you want to try among these keys {AUTHORIZED_INFO_KEYS[cat_key]}" + f"{cat_key=} not among authorized keys. Maybe you want to try among these keys {list(AUTHORIZED_INFO_KEYS.keys())}" ) + for info_key in infos[cat_key].keys(): + if info_key not in AUTHORIZED_INFO_KEYS[cat_key]: + raise KeyError( + f"{info_key=} not among authorized keys. Maybe you want to try among these keys {AUTHORIZED_INFO_KEYS[cat_key]}" + ) if len(self._infos) > 0: logger.warning("infos not empty, replacing it anyway") - self._infos = infos + self._infos = copy.deepcopy(infos) + + if "plaid" not in self._infos: + self._infos["plaid"] = {} + if "version" not in self._infos["plaid"]: + self._infos["plaid"]["version"] = Version(plaid.__version__) def get_infos(self) -> dict[str, dict[str, str]]: """Get information from an instance of :class:`Dataset `. @@ -1191,9 +1142,6 @@ def summarize_features(self) -> str: - mach_out: 32/32 samples (100.0%) - power: 30/32 samples (93.8%) - Time Series (0 unique): - None - Fields (8 unique): - M_iso: 30/32 samples (93.8%) - mach: 30/32 samples (93.8%) @@ -1212,12 +1160,10 @@ def summarize_features(self) -> str: # Collect all feature names across all samples all_scalar_names = set() - all_ts_names = set() all_field_names = set() # Count occurrences of each feature scalar_counts = {} - ts_counts = {} field_counts = {} for _, sample in self._samples.items(): @@ -1227,18 +1173,12 @@ def summarize_features(self) -> str: for name in scalar_names: scalar_counts[name] = scalar_counts.get(name, 0) + 1 - # Time series - ts_names = sample.get_time_series_names() - all_ts_names.update(ts_names) - for name in ts_names: - ts_counts[name] = ts_counts.get(name, 0) + 1 - # Fields - times = sample.meshes.get_all_mesh_times() + times = sample.features.get_all_mesh_times() for time in times: - base_names = sample.meshes.get_base_names(time=time) + base_names = sample.features.get_base_names(time=time) for base_name in base_names: - zone_names = sample.meshes.get_zone_names( + zone_names = sample.features.get_zone_names( base_name=base_name, time=time ) for zone_name in zone_names: @@ -1261,16 +1201,6 @@ def summarize_features(self) -> str: summary += " None\n" summary += "\n" - # Time series summary - summary += f"Time Series ({len(all_ts_names)} unique):\n" - if all_ts_names: - for name in sorted(all_ts_names): - count = ts_counts.get(name, 0) - summary += f" - {name}: {count}/{total_samples} samples ({count / total_samples * 100:.1f}%)\n" - else: - summary += " None\n" - summary += "\n" - # Fields summary summary += f"Fields ({len(all_field_names)} unique):\n" if all_field_names: @@ -1320,18 +1250,16 @@ def check_feature_completeness(self) -> str: # Collect all possible features across all samples all_scalar_names = set() - all_ts_names = set() all_field_names = set() for sample in self._samples.values(): all_scalar_names.update(sample.get_scalar_names()) - all_ts_names.update(sample.get_time_series_names()) - times = sample.meshes.get_all_mesh_times() + times = sample.features.get_all_mesh_times() for time in times: - base_names = sample.meshes.get_base_names(time=time) + base_names = sample.features.get_base_names(time=time) for base_name in base_names: - zone_names = sample.meshes.get_zone_names( + zone_names = sample.features.get_zone_names( base_name=base_name, time=time ) for zone_name in zone_names: @@ -1354,19 +1282,13 @@ def check_feature_completeness(self) -> str: if missing_scalars: missing_features.extend([f"scalar:{name}" for name in missing_scalars]) - # Check time series - sample_ts = set(sample.get_time_series_names()) - missing_ts = all_ts_names - sample_ts - if missing_ts: - missing_features.extend([f"time_series:{name}" for name in missing_ts]) - # Check fields sample_fields = set() - times = sample.meshes.get_all_mesh_times() + times = sample.features.get_all_mesh_times() for time in times: - base_names = sample.meshes.get_base_names(time=time) + base_names = sample.features.get_base_names(time=time) for base_name in base_names: - zone_names = sample.meshes.get_zone_names( + zone_names = sample.features.get_zone_names( base_name=base_name, time=time ) for zone_name in zone_names: @@ -1599,29 +1521,34 @@ def _save_to_dir_(self, path: Union[str, Path], verbose: bool = False) -> None: if verbose: # pragma: no cover print(f"Saving database to: {path}") + # Save infos + assert "plaid" in self._infos, f"{self._infos.keys()=} should contain 'plaid'" + assert "version" in self._infos["plaid"], ( + f"{self._infos['plaid'].keys()=} should contain 'version'" + ) + plaid_version = Version(plaid.__version__) + if ( + isinstance(self._infos["plaid"]["version"], SpecifierSet) + or self._infos["plaid"]["version"] != plaid_version + ): + logger.warning( + f"Version mismatch: Dataset was loaded from version: {self._infos['plaid']['version']}, and will be saved with version: {plaid_version}" + ) + self._infos["plaid"]["old_version"] = str(self._infos["plaid"]["version"]) + self._infos["plaid"]["version"] = str(plaid_version) + infos_fname = path / "infos.yaml" + with open(infos_fname, "w") as file: + yaml.dump(self._infos, file, default_flow_style=False, sort_keys=False) + + # Save samples samples_dir = path / "samples" if not (samples_dir.is_dir()): samples_dir.mkdir(parents=True) - # ---# save samples for i_sample, sample in tqdm(self._samples.items(), disable=not (verbose)): sample_fname = samples_dir / f"sample_{i_sample:09d}" sample.save(sample_fname) - # ---# save infos - if len(self._infos) > 0: - infos_fname = path / "infos.yaml" - with open(infos_fname, "w") as file: - yaml.dump(self._infos, file, default_flow_style=False, sort_keys=False) - - # #---# save stats - # stats_fname = path / 'stats.yaml' - # self._stats.save(stats_fname) - - # #---# save flags - # flags_fname = path / 'flags.yaml' - # self._flags.save(flags_fname) - def _load_from_dir_( self, path: Union[str, Path], @@ -1656,6 +1583,22 @@ def _load_from_dir_( if verbose: # pragma: no cover print(f"Reading database located at: {path}") + # Load infos + infos_fname = path / "infos.yaml" + if infos_fname.is_file(): + with open(infos_fname, "r") as file: + self._infos = yaml.safe_load(file) + if "plaid" not in self._infos or "version" not in self._infos["plaid"]: + self._infos.setdefault("plaid", {"version": Version(plaid.__version__)}) + self._infos["plaid"].setdefault("old_version", SpecifierSet("<=0.1.9")) + else: + if not isinstance(self._infos["plaid"]["version"], (Version, SpecifierSet)): + self._infos["plaid"]["version"] = Version( + self._infos["plaid"]["version"] + ) + print(f"=== Loaded dataset version: {self._infos['plaid']['version']}") + + # Load samples sample_paths = sorted( [path for path in (path / "samples").glob("sample_*") if path.is_dir()] ) @@ -1719,11 +1662,6 @@ def update(self, *a): self.set_sample(id, sample) """ - infos_fname = path / "infos.yaml" - if infos_fname.is_file(): - with open(infos_fname, "r") as file: - self._infos = yaml.safe_load(file) - if len(self) == 0: # pragma: no cover print("Warning: dataset contains no sample") @@ -1893,7 +1831,7 @@ def __getitem__( __call__ = __getitem__ - def __repr__(self) -> str: + def __str__(self) -> str: """Return a string representation of the dataset. Returns: @@ -1917,10 +1855,6 @@ def __repr__(self) -> str: nb_scalars = len(self.get_scalar_names()) str_repr += f"{nb_scalars} scalar{'' if nb_scalars == 1 else 's'}, " - # time series - nb_time_series = len(self.get_time_series_names()) - str_repr += f"{nb_time_series} time_series, " - # fields nb_fields = len(self.get_field_names()) str_repr += f"{nb_fields} field{'' if nb_fields == 1 else 's'}, " @@ -1929,3 +1863,5 @@ def __repr__(self) -> str: str_repr = str_repr[:-2] str_repr = str_repr + ")" return str_repr + + __repr__ = __str__ diff --git a/src/plaid/containers/features/meshes.py b/src/plaid/containers/features.py similarity index 77% rename from src/plaid/containers/features/meshes.py rename to src/plaid/containers/features.py index edbfbebd..e0018ccf 100644 --- a/src/plaid/containers/features/meshes.py +++ b/src/plaid/containers/features.py @@ -1,11 +1,8 @@ """Module for implementing collections of features within a Sample.""" -import copy import logging -from pathlib import Path from typing import Optional -import CGNS.MAP as CGM import CGNS.PAT.cgnskeywords as CGK import CGNS.PAT.cgnslib as CGL import CGNS.PAT.cgnsutils as CGU @@ -20,41 +17,173 @@ _check_names, _read_index, ) -from plaid.types import CGNSLink, CGNSNode, CGNSPath, CGNSTree, Field +from plaid.types import CGNSNode, CGNSTree, Field from plaid.utils import cgns_helper as CGH logger = logging.getLogger(__name__) -class SampleMeshes: +class SampleFeatures: """A container for meshes within a Sample. Args: - meshes (dict[float, CGNSTree], optional): A dictionary mapping time steps to CGNSTrees. Defaults to None. - mesh_base_name (str, optional): The base name for the mesh. Defaults to 'Base'. - mesh_zone_name (str, optional): The zone name for the mesh. Defaults to 'Zone'. - links (dict[float, list[CGNSLink]], optional): A dictionary mapping time steps to lists of links. Defaults to None. - paths (dict[float, list[CGNSPath]], optional): A dictionary mapping time steps to lists of paths. Defaults to None. + data (dict[float, CGNSTree], optional): A dictionary mapping time steps to CGNSTrees. Defaults to None. """ def __init__( self, - meshes: Optional[dict[float, CGNSTree]], - mesh_base_name: str = "Base", - mesh_zone_name: str = "Zone", - links: Optional[dict[float, list[CGNSLink]]] = None, - paths: Optional[dict[float, list[CGNSPath]]] = None, + data: Optional[dict[float, CGNSTree]], ): - self.data: dict[float, CGNSTree] = meshes if meshes is not None else {} - self._links = links if links is not None else {} - self._paths = paths if paths is not None else {} + self.data: dict[float, CGNSTree] = data if data is not None else {} self._default_active_base: Optional[str] = None self._default_active_zone: Optional[str] = None self._default_active_time: Optional[float] = None - self._mesh_base_name: str = mesh_base_name - self._mesh_zone_name: str = mesh_zone_name + # -------------------------------------------------------------------------# + + def set_default_base(self, base_name: str, time: Optional[float] = None) -> None: + """Set the default base for the specified time (that will also be set as default if provided). + + The default base is a reference point for various operations in the system. + + Args: + base_name (str): The name of the base to be set as the default. + time (float, optional): The time at which the base should be set as default. If not provided, the default base and active zone will be set with the default time. + + Raises: + ValueError: If the specified base does not exist at the given time. + + Note: + - Setting the default base and is important for synchronizing operations with a specific base in the system's data. + - The available mesh base can be obtained using the `get_base_names` method. + + Example: + .. code-block:: python + + from plaid import Sample + sample = Sample("path_to_plaid_sample") + print(sample) + >>> Sample(2 scalars, 1 timestamp, 5 fields) + print(sample.get_physical_dim("BaseA", 0.5)) + >>> 3 + + # Set "BaseA" as the default base for the default time + sample.set_default_base("BaseA") + + # You can now use class functions with "BaseA" as default base + print(sample.get_physical_dim(0.5)) + >>> 3 + + # Set "BaseB" as the default base for a specific time + sample.set_default_base("BaseB", 0.5) + + # You can now use class functions with "BaseB" as default base and 0.5 as default time + print(sample.get_physical_dim()) # Physical dim of the base "BaseB" + >>> 3 + """ + if time is not None: + self.set_default_time(time) + if base_name in (self._default_active_base, None): + return + if not self.has_base(base_name, time): + raise ValueError(f"base {base_name} does not exist at time {time}") + + self._default_active_base = base_name + + def set_default_zone_base( + self, zone_name: str, base_name: str, time: Optional[float] = None + ) -> None: + """Set the default base and active zone for the specified time (that will also be set as default if provided). + + The default base and active zone serve as reference points for various operations in the system. + + Args: + zone_name (str): The name of the zone to be set as the active zone. + base_name (str): The name of the base to be set as the default. + time (float, optional): The time at which the base and zone should be set as default. If not provided, the default base and active zone will be set with the default time. + + Raises: + ValueError: If the specified base or zone does not exist at the given time + + Note: + - Setting the default base and zone are important for synchronizing operations with a specific base/zone in the system's data. + - The available mesh bases and zones can be obtained using the `get_base_names` and `get_base_zones` methods, respectively. + + Example: + .. code-block:: python + + from plaid import Sample + sample = Sample("path_to_plaid_sample") + print(sample) + >>> Sample(2 scalars, 1 timestamp, 5 fields) + print(sample.get_zone_type("ZoneX", "BaseA", 0.5)) + >>> Structured + + # Set "BaseA" as the default base and "ZoneX" as the active zone for the default time + sample.set_default_zone_base("ZoneX", "BaseA") + + # You can now use class functions with "BaseA" as default base with "ZoneX" as default zone + print(sample.get_zone_type(0.5)) # type of the zone "ZoneX" of base "BaseA" + >>> Structured + + # Set "BaseB" as the default base and "ZoneY" as the active zone for a specific time + sample.set_default_zone_base("ZoneY", "BaseB", 0.5) + + # You can now use class functions with "BaseB" as default base with "ZoneY" as default zone and 0.5 as default time + print(sample.get_zone_type()) # type of the zone "ZoneY" of base "BaseB" at 0.5 + >>> Unstructured + """ + self.set_default_base(base_name, time) + if zone_name in (self._default_active_zone, None): + return + if not self.has_zone(zone_name, base_name, time): + raise ValueError( + f"zone {zone_name} does not exist for the base {base_name} at time {time}" + ) + + self._default_active_zone = zone_name + + def set_default_time(self, time: float) -> None: + """Set the default time for the system. + + This function sets the default time to be used for various operations in the system. + + Args: + time (float): The time value to be set as the default. + + Raises: + ValueError: If the specified time does not exist in the available mesh times. + + Note: + - Setting the default time is important for synchronizing operations with a specific time point in the system's data. + - The available mesh times can be obtained using the `get_all_mesh_times` method. + + Example: + .. code-block:: python + + from plaid import Sample + sample = Sample("path_to_plaid_sample") + print(sample) + >>> Sample(2 scalars, 1 timestamp, 5 fields) + print(sample.show_tree(0.5)) + >>> ... + + # Set the default time to 0.5 seconds + sample.set_default_time(0.5) + + # You can now use class functions with 0.5 as default time + print(sample.show_tree()) # show the cgns tree at the time 0.5 + >>> ... + """ + if time in (self._default_active_time, None): + return + if time not in self.get_all_mesh_times(): + raise ValueError(f"time {time} does not exist in mesh times") + + self._default_active_time = time + + # -------------------------------------------------------------------------# def get_all_mesh_times(self) -> list[float]: """Retrieve all time steps corresponding to the meshes, if available. @@ -85,7 +214,10 @@ def get_time_assignment(self, time: Optional[float] = None) -> float: return self._default_active_time if time is None else time def get_base_assignment( - self, base_name: Optional[str] = None, time: Optional[float] = None + self, + base_name: Optional[str] = None, + time: Optional[float] = None, + globals: bool = False, ) -> str: """Retrieve the default base name for the CGNS operations. @@ -95,6 +227,7 @@ def get_base_assignment( Args: base_name (str, optional): The name of the base to attribute the operation to. If not provided, the default base set in the system will be used. time (str, optional): The time value provided for the operation. If not provided, the default time set in the system will be used. + globals (bool, optional): If True, only consider the "Global" base. If False, consider all bases except "Global". Defaults to False. Raises: KeyError: If no default base can be determined based on the provided or default. @@ -112,7 +245,7 @@ def get_base_assignment( if base_name: return base_name - base_names = self.get_base_names(time=time) + base_names = self.get_base_names(time=time, globals=globals) if len(base_names) == 0: return None elif len(base_names) == 1: @@ -126,6 +259,7 @@ def get_zone_assignment( zone_name: Optional[str] = None, base_name: Optional[str] = None, time: Optional[float] = None, + globals: bool = False, ) -> str: """Retrieve the default zone name for the CGNS operations. @@ -136,6 +270,7 @@ def get_zone_assignment( zone_name (str, optional): The name of the zone to attribute the operation to. If not provided, the default zone set in the system within the specified base will be used. base_name (str, optional): The name of the base within which the zone should be attributed. If not provided, the default base set in the system will be used. time (str, optional): The time value provided for the operation. If not provided, the default time set in the system will be used. + globals (bool, optional): If True, only consider the "Global" base. If False, consider all bases except "Global". Defaults to False. Raises: KeyError: If no default zone can be determined based on the provided or default values. @@ -153,7 +288,7 @@ def get_zone_assignment( if zone_name: return zone_name - base_name = self.get_base_assignment(base_name, time) + base_name = self.get_base_assignment(base_name, time, globals) zone_names = self.get_zone_names(base_name, time=time) if len(zone_names) == 0: return None @@ -178,36 +313,16 @@ def init_tree(self, time: Optional[float] = None) -> CGNSTree: if not self.data: self.data = {time: CGL.newCGNSTree()} - self._links = {time: None} - self._paths = {time: None} elif time not in self.data: self.data[time] = CGL.newCGNSTree() - self._links[time] = None - self._paths[time] = None return self.data[time] - def get_links(self, time: Optional[float] = None) -> list[CGNSLink]: - """Retrieve the CGNS links for a specified time step, if available. - - Args: - time (float, optional): The time step for which to retrieve the CGNS links. If a specific time is not provided, the method will display the links for the default time step. - - Returns: - list: The CGNS links for the specified time step if available; otherwise, returns None. - """ - time = self.get_time_assignment(time) - return self._links[time] if (self._links) else None - - def get_mesh( - self, time: Optional[float] = None, apply_links: bool = False, in_memory=False - ) -> Optional[CGNSTree]: + def get_mesh(self, time: Optional[float] = None) -> Optional[CGNSTree]: """Retrieve the CGNS tree structure for a specified time step, if available. Args: time (float, optional): The time step for which to retrieve the CGNS tree structure. If a specific time is not provided, the method will display the tree structure for the default time step. - apply_links (bool, optional): Activates the following of the CGNS links to reconstruct the complete CGNS tree - in this case, a deepcopy of the tree is made to prevent from modifying the existing tree. - in_memory (bool, optional): Active if apply_links == True, ONLY WORKING if linked mesh is in the current sample. This option follows the link in memory from current sample. Returns: CGNSTree: The CGNS tree structure for the specified time step if available; otherwise, returns None. @@ -216,28 +331,7 @@ def get_mesh( return None time = self.get_time_assignment(time) - tree = self.data[time] - - links = self.get_links(time) - if not apply_links or links is None: - return tree - - tree = copy.deepcopy(tree) - for link in links: - if not in_memory: - subtree, _, _ = CGM.load(str(Path(link[0]) / link[1]), subtree=link[2]) - else: - linked_timestep = int(link[1].split(".cgns")[0].split("_")[1]) - linked_timestamp = list(self.data.keys())[linked_timestep] - subtree = self.get_mesh(linked_timestamp) - node_path = "/".join(link[2].split("/")[:-1]) - node_to_append = CGU.getNodeByPath(tree, node_path) - assert node_to_append is not None, ( - f"nodepath {node_path} not present in tree, cannot apply link" - ) - node_to_append[2].append(CGU.getNodeByPath(subtree, link[2])) - - return tree + return self.data[time] def set_meshes(self, meshes: dict[float, CGNSTree]) -> None: """Set all meshes with their corresponding time step. @@ -250,11 +344,6 @@ def set_meshes(self, meshes: dict[float, CGNSTree]) -> None: """ if not self.data: self.data = meshes - self._links = {} - self._paths = {} - for time in self.data.keys(): - self._links[time] = None - self._paths[time] = None else: raise KeyError( "meshes is already set, you cannot overwrite it, delete it first or extend it with `Sample.add_tree`" @@ -270,6 +359,9 @@ def add_tree(self, tree: CGNSTree, time: Optional[float] = None) -> CGNSTree: Raises: ValueError: If the provided CGNS tree is an empty list. + Note: + `tree` should not be reused after, since it will be modified by functions on the feature. + Returns: CGNSTree: The merged CGNS tree. """ @@ -280,12 +372,8 @@ def add_tree(self, tree: CGNSTree, time: Optional[float] = None) -> CGNSTree: if not self.data: self.data = {time: tree} - self._links = {time: None} - self._paths = {time: None} elif time not in self.data: self.data[time] = tree - self._links[time] = None - self._paths[time] = None else: # TODO: gérer le cas où il y a des bases de mêmes noms... + merge # récursif des nœuds @@ -329,8 +417,6 @@ def del_tree(self, time: float) -> CGNSTree: if time not in self.data: raise KeyError(f"There is no CGNS tree for time {time}.") - self._links.pop(time, None) - self._paths.pop(time, None) return self.data.pop(time) # -------------------------------------------------------------------------# @@ -388,6 +474,7 @@ def init_base( physical_dim: int, base_name: Optional[str] = None, time: Optional[float] = None, + globals: str = False, ) -> CGNSNode: """Create a Base node named `base_name` if it doesn't already exists. @@ -396,6 +483,7 @@ def init_base( physical_dim (int): Ambient space dimension, see [CGNS standard](https://pycgns.github.io/PAT/lib.html#CGNS.PAT.cgnslib.newCGNSBase). base_name (str): If not specified, uses `mesh_base_name` specified in Sample initialization. Defaults to None. time (float, optional): The time at which to initialize the base. If a specific time is not provided, the method will display the tree structure for the default time step. + globals (bool, optional): If True, only consider the "Global" base. If False, consider all bases except "Global". Defaults to False. Returns: CGNSNode: The created Base node. @@ -405,13 +493,7 @@ def init_base( time = self.get_time_assignment(time) if base_name is None: - base_name = ( - self._mesh_base_name - + "_" - + str(topological_dim) - + "_" - + str(physical_dim) - ) + base_name = "Base_" + str(topological_dim) + "_" + str(physical_dim) self.init_tree(time) if not (self.has_base(base_name, time)): @@ -419,7 +501,7 @@ def init_base( self.data[time], base_name, topological_dim, physical_dim ) - base_names = self.get_base_names(time=time) + base_names = self.get_base_names(time=time, globals=globals) for base_name in base_names: base_node = self.get_base(base_name, time=time) if CGU.getValueByPath(base_node, "Time/TimeValues") is None: @@ -468,6 +550,7 @@ def get_base_names( full_path: bool = False, unique: bool = False, time: Optional[float] = None, + globals: bool = False, ) -> list[str]: """Return Base names. @@ -475,6 +558,7 @@ def get_base_names( full_path (bool, optional): If True, returns full paths instead of only Base names. Defaults to False. unique (bool, optional): If True, returns unique names instead of potentially duplicated names. Defaults to False. time (float, optional): The time at which to check for the Base. If a specific time is not provided, the method will display the tree structure for the default time step. + globals (bool, optional): If True, only consider the "Global" base. If False, consider all bases except "Global". Defaults to False. Returns: list[str]: @@ -482,9 +566,23 @@ def get_base_names( time = self.get_time_assignment(time) if self.data and time in self.data and self.data[time] is not None: - return CGH.get_base_names( - self.data[time], full_path=full_path, unique=unique - ) + if globals is False: + return [ + bn + for bn in CGH.get_base_names( + self.data[time], full_path=full_path, unique=unique + ) + if bn != "Global" + ] + else: + return ( + ["Global"] + if "Global" + in CGH.get_base_names( + self.data[time], full_path=full_path, unique=unique + ) + else [] + ) else: return [] @@ -516,12 +614,12 @@ def get_base( CGNSNode or None: The Base node with the specified name or None if it is not found. """ time = self.get_time_assignment(time) - base_name = self.get_base_assignment(base_name, time) - if time not in self.data or self.data[time] is None: logger.warning(f"No mesh exists in the sample at {time=}") return None + if base_name != "Global": + base_name = self.get_base_assignment(base_name, time, globals=False) return CGU.getNodeByPath(self.data[time], f"/CGNSTree/{base_name}") # -------------------------------------------------------------------------# @@ -561,8 +659,7 @@ def init_zone( zone_name = self.get_zone_assignment(zone_name, base_name, time) if zone_name is None: - zone_name = self._mesh_zone_name - + zone_name = "Zone" zone_node = CGL.newZone(base_node, zone_name, zone_shape, zone_type) return zone_node @@ -659,6 +756,7 @@ def get_zone( zone_name: Optional[str] = None, base_name: Optional[str] = None, time: Optional[float] = None, + globals: bool = False, ) -> CGNSNode: """Retrieve a CGNS Zone node by its name within a specific Base and time. @@ -666,6 +764,7 @@ def get_zone( zone_name (str, optional): The name of the Zone node to retrieve. If not specified, checks that there is **at most** one zone in the base, else raises an error. Defaults to None. base_name (str, optional): The Base in which to seek to zone retrieve. If not specified, checks that there is **at most** one base, else raises an error. Defaults to None. time (float, optional): Time at which you want to retrieve the Zone node. + globals (bool, optional): If True, only consider the "Global" base. If False, consider all bases except "Global". Defaults to False. Returns: CGNSNode: Returns a CGNS Zone node if found; otherwise, returns None. @@ -673,13 +772,13 @@ def get_zone( # get_base will look for default base_name and time base_node = self.get_base(base_name, time) if base_node is None: - logger.warning(f"No base with name {base_name} in this tree") + # logger.warning(f"No base with name {base_name} in this tree") return None # _zone_attribution will look for default base_name - zone_name = self.get_zone_assignment(zone_name, base_name, time) + zone_name = self.get_zone_assignment(zone_name, base_name, time, globals) if zone_name is None: - logger.warning(f"No zone with name {zone_name} in this base ({base_name})") + # logger.warning(f"No zone with name {zone_name} in this base ({base_name})") return None return CGU.getNodeByPath(base_node, zone_name) @@ -780,6 +879,111 @@ def get_nodal_tags( return sorted_nodal_tags + # -------------------------------------------------------------------------# + def get_global( + self, + name: str, + time: Optional[float] = None, + ) -> Optional[np.ndarray]: + """Retrieve a global array by name at a specified time. + + Args: + name (str): The name of the global array to retrieve. + time (float, optional): The time step for which to retrieve the global array. If not provided, uses the default time. + + Returns: + Optional[np.ndarray]: The global array if found, otherwise None. Returns a scalar if the array has size 1. + """ + base_names = self.get_base_names(time=time, globals=True) + if "Global" not in base_names: + return None + base_node = self.get_base("Global", time=time) + global_ = CGU.getValueByPath(base_node, name) + return global_.item() if getattr(global_, "size", None) == 1 else global_ + + def add_global( + self, + name: str, + global_array: np.ndarray, + time: Optional[float] = None, + ) -> None: + """Add or update a global array at a specified time. + + Args: + name (str): The name of the global array to add or update. + global_array (np.ndarray): The array to store. + time (float, optional): The time step for which to add the global array. If not provided, uses the default time. + + Note: + If the "Global" base does not exist, it will be created. + If an array with the same name exists, its value will be updated. + """ + _check_names(name) + base_names = self.get_base_names(time=time, globals=True) + if "Global" in base_names: + base_node = self.get_base("Global", time=time) + else: + base_node = self.init_base(1, 1, "Global", time, globals=True) + + if CGU.getValueByPath(base_node, name) is None: + CGL.newDataArray(base_node, name, value=global_array) + else: + global_node = CGU.getNodeByPath(base_node, f"{name}") + CGU.setValue(global_node, np.asfortranarray(global_array)) + + def del_global( + self, + name: str, + time: Optional[float] = None, + ) -> np.ndarray: + """Delete a global array by name at a specified time. + + Args: + name (str): The name of the global array to delete. + time (float, optional): The time step for which to delete the global array. If not provided, uses the default time. + + Raises: + KeyError: If the global array does not exist at the specified time. + + Returns: + np.ndarray: The value of the deleted global array. + """ + val = self.get_global(name, time) + if val is None: + raise KeyError( + f"There is no global with name {name} at the specified time." + ) + + base_node = self.get_base("Global", time=time) + CGU.nodeDelete(base_node, name) + + return val + + def get_global_names(self, time: Optional[float] = None) -> list[str]: + """Return a list of all global array names at the specified time(s). + + Args: + time (float, optional): The time step for which to retrieve global names. If not provided, returns names for all available times. + + Returns: + list[str]: List of global array names (excluding "Time" arrays). + """ + if time is None: + all_times = self.get_all_mesh_times() + else: + all_times = [time] + global_names = [] + for time in all_times: + base_names = self.get_base_names(time=time, globals=True) + if "Global" in base_names: + base_node = self.get_base("Global", time=time) + if base_node is not None: + global_paths = CGU.getAllNodesByTypeSet(base_node, ["DataArray_t"]) + for path in global_paths: + if "Time" not in path: + global_names.append(CGU.getNodeByPath(base_node, path)[0]) + return global_names + # -------------------------------------------------------------------------# def get_nodes( self, @@ -964,7 +1168,7 @@ def get_field_names_one_time_base_zone_location( ) -> list[str]: # get_zone will look for default zone_name, base_name, time search_node = self.get_zone( - zone_name=zone_name, base_name=base_name, time=time + zone_name=zone_name, base_name=base_name, time=time, globals=False ) if search_node is None: # pragma: no cover return [] @@ -990,7 +1194,9 @@ def get_field_names_one_time_base_zone_location( times = [time] if time is not None else self.get_all_mesh_times() for time in times: base_names = ( - [base_name] if base_name is not None else self.get_base_names(time=time) + [base_name] + if base_name is not None + else self.get_base_names(time=time, globals=False) ) for base_name in base_names: zone_names = ( @@ -1036,7 +1242,9 @@ def get_field( Field: A set containing the names of the fields that match the specified criteria. """ # get_zone will look for default time - search_node = self.get_zone(zone_name=zone_name, base_name=base_name, time=time) + search_node = self.get_zone( + zone_name=zone_name, base_name=base_name, time=time, globals=False + ) if search_node is None: return None @@ -1094,7 +1302,9 @@ def add_field( # init_tree will look for default time self.init_tree(time) # get_zone will look for default zone_name, base_name and time - zone_node = self.get_zone(zone_name=zone_name, base_name=base_name, time=time) + zone_node = self.get_zone( + zone_name=zone_name, base_name=base_name, time=time, globals=False + ) if zone_node is None: raise KeyError( @@ -1171,7 +1381,9 @@ def del_field( CGNSTree: The tree at the provided time (without the deleted node) """ # get_zone will look for default zone_name, base_name, and time - zone_node = self.get_zone(zone_name=zone_name, base_name=base_name, time=time) + zone_node = self.get_zone( + zone_name=zone_name, base_name=base_name, time=time, globals=False + ) time = self.get_time_assignment(time) mesh_tree = self.data[time] diff --git a/src/plaid/containers/features/__init__.py b/src/plaid/containers/features/__init__.py deleted file mode 100644 index 8562ec9c..00000000 --- a/src/plaid/containers/features/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -"""Module that implements containers for features stored within a Sample.""" - -from plaid.containers.features.meshes import SampleMeshes -from plaid.containers.features.scalars import SampleScalars - -__all__ = [ - "SampleMeshes", - "SampleScalars", -] diff --git a/src/plaid/containers/features/scalars.py b/src/plaid/containers/features/scalars.py deleted file mode 100644 index f290cdff..00000000 --- a/src/plaid/containers/features/scalars.py +++ /dev/null @@ -1,72 +0,0 @@ -"""Module for implementing collections of features within a Sample.""" - -import logging -from typing import Optional - -from plaid.containers.utils import ( - _check_names, -) -from plaid.types import Scalar - -logger = logging.getLogger(__name__) - - -class SampleScalars: - """A container for scalar features within a Sample. - - Provides dict-like operations for adding, retrieving, and removing scalars. - - Names must be unique and may not contain the character ``/``. - - Args: - scalars (dict[str, Scalar], optional): a dict containing the pairs of (name, value) for each scalar in the `Sample`. - """ - - def __init__(self, scalars: Optional[dict[str, Scalar]]) -> None: - self.data: dict[str, Scalar] = scalars if scalars is not None else {} - - def add(self, name: str, value: Scalar) -> None: - """Add a scalar value to a dictionary. - - Args: - name (str): The name of the scalar value. - value (Scalar): The scalar value to add or update in the dictionary. - """ - _check_names(name) - self.data[name] = value - - def remove(self, name: str) -> Scalar: - """Delete a scalar value from the dictionary. - - Args: - name (str): The name of the scalar value to be deleted. - - Raises: - KeyError: Raised when there is no scalar / there is no scalar with the provided name. - - Returns: - Scalar: The value of the deleted scalar. - """ - if name not in self.data: - raise KeyError(f"There is no scalar value with name {name}.") - - return self.data.pop(name) - - def get(self, name: str) -> Optional[Scalar]: - """Retrieve a scalar value associated with the given name. - - Args: - name (str): The name of the scalar value to retrieve. - - Returns: - Scalar or None: The scalar value associated with the given name, or None if the name is not found. - """ - return self.data.get(name) - - def get_names(self) -> list[str]: - """Get a set of scalar names available in the object. - - Returns: - list[str]: A set containing the names of the available scalars. - """ - return sorted(self.data.keys()) diff --git a/src/plaid/containers/sample.py b/src/plaid/containers/sample.py index 19936ba5..6fae2a9a 100644 --- a/src/plaid/containers/sample.py +++ b/src/plaid/containers/sample.py @@ -24,9 +24,6 @@ from typing import Any, Optional, Union import CGNS.MAP as CGM -import CGNS.PAT.cgnskeywords as CGK -import CGNS.PAT.cgnslib as CGL -import CGNS.PAT.cgnsutils as CGU import numpy as np from pydantic import BaseModel, ConfigDict, PrivateAttr from pydantic import Field as PydanticField @@ -36,45 +33,64 @@ AUTHORIZED_FEATURE_TYPES, CGNS_FIELD_LOCATIONS, ) -from plaid.containers.features import SampleMeshes, SampleScalars -from plaid.containers.utils import _check_names, get_feature_type_and_details_from +from plaid.containers.features import SampleFeatures +from plaid.containers.utils import get_feature_type_and_details_from from plaid.types import ( - CGNSNode, - CGNSTree, Feature, FeatureIdentifier, - Field, Scalar, - TimeSequence, - TimeSeries, ) from plaid.utils import cgns_helper as CGH -from plaid.utils.base import safe_len +from plaid.utils.base import delegate_methods, safe_len from plaid.utils.deprecation import deprecated logger = logging.getLogger(__name__) +FEATURES_METHODS = [ + "set_default_base", + "set_default_zone_base", + "get_base_assignment", + "get_zone_assignment", + "set_default_time", + "get_all_mesh_times", + "get_mesh", + "get_base_names", + "get_zone_names", + "get_nodal_tags", + "get_global", + "add_global", + "del_global", + "get_global_names", + "get_nodes", + "get_elements", + "get_field_names", + "get_field", + "show_tree", + "set_nodes", + "del_field", + "add_field", + "init_base", + "init_zone", + "add_tree", + "del_tree", +] + + +@delegate_methods("features", FEATURES_METHODS) class Sample(BaseModel): """Represents a single sample. It contains data and information related to a single observation or measurement within a dataset. By default, the sample is empty but: - You can provide a path to a folder containing the sample data, and it will be loaded during initialization. - - You can provide `SampleMeshes` and `SampleScalars` instances to initialize the sample with existing data. - - You can also provide a dictionary of time series data. + - You can provide `SampleFeatures` and `SampleFeatures` instances to initialize the sample with existing data. - The default `SampleMeshes` instance is initialized with: - - `meshes=None`, `links=None`, and `paths=None` (i.e., no mesh data). - - `mesh_base_name="Base"` and `mesh_zone_name="Zone"`. - - The default `SampleScalars` instance is initialized with: - - `scalars=None` (i.e., no scalar data). + The default `SampleFeatures` instance is initialized with `data=None`(i.e., no mesh data). """ # Pydantic configuration model_config = ConfigDict( - arbitrary_types_allowed=True, - revalidate_instances="always", + arbitrary_types_allowed=True, revalidate_instances="always", extra="forbid" ) # Attributes @@ -83,23 +99,9 @@ class Sample(BaseModel): description="Path to the folder containing the sample data. If provided, the sample will be loaded from this path during initialization. Defaults to None.", ) - meshes: Optional[SampleMeshes] = PydanticField( - default_factory=lambda _: SampleMeshes( - meshes=None, - mesh_base_name="Base", - mesh_zone_name="Zone", - links=None, - paths=None, - ), - description="An instance of SampleMeshes containing mesh data. Defaults to an empty `SampleMeshes` object.", - ) - scalars: Optional[SampleScalars] = PydanticField( - default_factory=lambda _: SampleScalars(scalars=None), - description="An instance of SampleScalars containing scalar data. Defaults to an empty `SampleScalars` object.", - ) - time_series: Optional[dict[str, TimeSeries]] = PydanticField( - None, - description="A dictionary mapping time series names to their corresponding data. Defaults to None.", + features: Optional[SampleFeatures] = PydanticField( + default_factory=lambda _: SampleFeatures(data=None), + description="An instance of SampleFeatures containing mesh data. Defaults to an empty `SampleFeatures` object.", ) # Private attributes @@ -118,7 +120,7 @@ def copy(self) -> Self: # pyright: ignore[reportIncompatibleMethodOverride] Usage of `model_copy(deep=True)` from Pydantic to ensure all internal data is deeply copied. Returns: - A new `Sample` instance with all internal data (scalars, time series, fields, meshes, etc.) + A new `Sample` instance with all internal data (scalars, fields, meshes, etc.) deeply copied to ensure full isolation from the original. Note: @@ -135,7 +137,7 @@ def get_scalar(self, name: str) -> Optional[Scalar]: Returns: Scalar or None: The scalar value associated with the given name, or None if the name is not found. """ - return self.scalars.get(name) + return self.features.get_global(name) def add_scalar(self, name: str, value: Scalar) -> None: """Add a scalar value to a dictionary. @@ -144,7 +146,7 @@ def add_scalar(self, name: str, value: Scalar) -> None: name (str): The name of the scalar value. value (Scalar): The scalar value to add or update in the dictionary. """ - self.scalars.add(name, value) + self.features.add_global(name, value) def del_scalar(self, name: str) -> Scalar: """Delete a scalar value from the dictionary. @@ -158,7 +160,7 @@ def del_scalar(self, name: str) -> Scalar: Returns: Scalar: The value of the deleted scalar. """ - return self.scalars.remove(name) + return self.features.del_global(name) def get_scalar_names(self) -> list[str]: """Get a set of scalar names available in the object. @@ -166,591 +168,7 @@ def get_scalar_names(self) -> list[str]: Returns: list[str]: A set containing the names of the available scalars. """ - return self.scalars.get_names() - - # -------------------------------------------------------------------------# - - def get_mesh( - self, time: Optional[float] = None, apply_links: bool = False, in_memory=False - ) -> Optional[CGNSTree]: - """Retrieve the CGNS tree structure for a specified time step, if available. - - Args: - time (float, optional): The time step for which to retrieve the CGNS tree structure. If a specific time is not provided, the method will display the tree structure for the default time step. - apply_links (bool, optional): Activates the following of the CGNS links to reconstruct the complete CGNS tree - in this case, a deepcopy of the tree is made to prevent from modifying the existing tree. - in_memory (bool, optional): Active if apply_links == True, ONLY WORKING if linked mesh is in the current sample. This option follows the link in memory from current sample. - - Returns: - CGNSTree: The CGNS tree structure for the specified time step if available; otherwise, returns None. - """ - return self.meshes.get_mesh(time, apply_links, in_memory) - - def set_default_base(self, base_name: str, time: Optional[float] = None) -> None: - """Set the default base for the specified time (that will also be set as default if provided). - - The default base is a reference point for various operations in the system. - - Args: - base_name (str): The name of the base to be set as the default. - time (float, optional): The time at which the base should be set as default. If not provided, the default base and active zone will be set with the default time. - - Raises: - ValueError: If the specified base does not exist at the given time. - - Note: - - Setting the default base and is important for synchronizing operations with a specific base in the system's data. - - The available mesh base can be obtained using the `get_base_names` method. - - Example: - .. code-block:: python - - from plaid import Sample - sample = Sample("path_to_plaid_sample") - print(sample) - >>> Sample(2 scalars, 1 timestamp, 5 fields) - print(sample.get_physical_dim("BaseA", 0.5)) - >>> 3 - - # Set "BaseA" as the default base for the default time - sample.set_default_base("BaseA") - - # You can now use class functions with "BaseA" as default base - print(sample.get_physical_dim(0.5)) - >>> 3 - - # Set "BaseB" as the default base for a specific time - sample.set_default_base("BaseB", 0.5) - - # You can now use class functions with "BaseB" as default base and 0.5 as default time - print(sample.get_physical_dim()) # Physical dim of the base "BaseB" - >>> 3 - """ - if time is not None: - self.set_default_time(time) - if base_name in (self.meshes._default_active_base, None): - return - if not self.meshes.has_base(base_name, time): - raise ValueError(f"base {base_name} does not exist at time {time}") - - self.meshes._default_active_base = base_name - - def set_default_zone_base( - self, zone_name: str, base_name: str, time: Optional[float] = None - ) -> None: - """Set the default base and active zone for the specified time (that will also be set as default if provided). - - The default base and active zone serve as reference points for various operations in the system. - - Args: - zone_name (str): The name of the zone to be set as the active zone. - base_name (str): The name of the base to be set as the default. - time (float, optional): The time at which the base and zone should be set as default. If not provided, the default base and active zone will be set with the default time. - - Raises: - ValueError: If the specified base or zone does not exist at the given time - - Note: - - Setting the default base and zone are important for synchronizing operations with a specific base/zone in the system's data. - - The available mesh bases and zones can be obtained using the `get_base_names` and `get_base_zones` methods, respectively. - - Example: - .. code-block:: python - - from plaid import Sample - sample = Sample("path_to_plaid_sample") - print(sample) - >>> Sample(2 scalars, 1 timestamp, 5 fields) - print(sample.get_zone_type("ZoneX", "BaseA", 0.5)) - >>> Structured - - # Set "BaseA" as the default base and "ZoneX" as the active zone for the default time - sample.set_default_zone_base("ZoneX", "BaseA") - - # You can now use class functions with "BaseA" as default base with "ZoneX" as default zone - print(sample.get_zone_type(0.5)) # type of the zone "ZoneX" of base "BaseA" - >>> Structured - - # Set "BaseB" as the default base and "ZoneY" as the active zone for a specific time - sample.set_default_zone_base("ZoneY", "BaseB", 0.5) - - # You can now use class functions with "BaseB" as default base with "ZoneY" as default zone and 0.5 as default time - print(sample.get_zone_type()) # type of the zone "ZoneY" of base "BaseB" at 0.5 - >>> Unstructured - """ - self.set_default_base(base_name, time) - if zone_name in (self.meshes._default_active_zone, None): - return - if not self.meshes.has_zone(zone_name, base_name, time): - raise ValueError( - f"zone {zone_name} does not exist for the base {base_name} at time {time}" - ) - - self.meshes._default_active_zone = zone_name - - def init_base( - self, - topological_dim: int, - physical_dim: int, - base_name: Optional[str] = None, - time: Optional[float] = None, - ) -> CGNSNode: - """Create a Base node named `base_name` if it doesn't already exists. - - Args: - topological_dim (int): Cell dimension, see [CGNS standard](https://pycgns.github.io/PAT/lib.html#CGNS.PAT.cgnslib.newCGNSBase). - physical_dim (int): Ambient space dimension, see [CGNS standard](https://pycgns.github.io/PAT/lib.html#CGNS.PAT.cgnslib.newCGNSBase). - base_name (str): If not specified, uses `mesh_base_name` specified in Sample initialization. Defaults to None. - time (float, optional): The time at which to initialize the base. If a specific time is not provided, the method will display the tree structure for the default time step. - - Returns: - CGNSNode: The created Base node. - """ - return self.meshes.init_base(topological_dim, physical_dim, base_name, time) - - def init_zone( - self, - zone_shape: np.ndarray, - zone_type: str = CGK.Unstructured_s, - zone_name: Optional[str] = None, - base_name: Optional[str] = None, - time: Optional[float] = None, - ) -> CGNSNode: - """Initialize a new zone within a CGNS base. - - Args: - zone_shape (np.ndarray): An array specifying the shape or dimensions of the zone. - zone_type (str, optional): The type of the zone. Defaults to CGK.Unstructured_s. - zone_name (str, optional): The name of the zone to initialize. If not provided, uses `mesh_zone_name` specified in Sample initialization. Defaults to None. - base_name (str, optional): The name of the base to which the zone will be added. If not provided, the zone will be added to the currently active base. Defaults to None. - time (float, optional): The time at which to initialize the zone. If a specific time is not provided, the method will display the tree structure for the default time step. - - Raises: - KeyError: If the specified base does not exist. You can create a base using `Sample.init_base(base_name)`. - - Returns: - CGLNode: The newly initialized zone node within the CGNS tree. - """ - return self.meshes.init_zone(zone_shape, zone_type, zone_name, base_name, time) - - def set_default_time(self, time: float) -> None: - """Set the default time for the system. - - This function sets the default time to be used for various operations in the system. - - Args: - time (float): The time value to be set as the default. - - Raises: - ValueError: If the specified time does not exist in the available mesh times. - - Note: - - Setting the default time is important for synchronizing operations with a specific time point in the system's data. - - The available mesh times can be obtained using the `get_all_mesh_times` method. - - Example: - .. code-block:: python - - from plaid import Sample - sample = Sample("path_to_plaid_sample") - print(sample) - >>> Sample(2 scalars, 1 timestamp, 5 fields) - print(sample.show_tree(0.5)) - >>> ... - - # Set the default time to 0.5 seconds - sample.set_default_time(0.5) - - # You can now use class functions with 0.5 as default time - print(sample.show_tree()) # show the cgns tree at the time 0.5 - >>> ... - """ - if time in (self.meshes._default_active_time, None): - return - if time not in self.meshes.get_all_mesh_times(): - raise ValueError(f"time {time} does not exist in mesh times") - - self.meshes._default_active_time = time - - def get_field_names( - self, - location: str = None, - zone_name: Optional[str] = None, - base_name: Optional[str] = None, - time: Optional[float] = None, - ) -> list[str]: - """Get a set of field names associated with a specified zone, base, location, and time. - - Args: - location (str, optional): The desired grid location where the field is defined. Defaults to None. - Possible values : :py:const:`plaid.constants.CGNS_FIELD_LOCATIONS` - zone_name (str, optional): The name of the zone to search for. Defaults to None. - base_name (str, optional): The name of the base to search for. Defaults to None. - time (float, optional): The specific time at which to retrieve field names. If a specific time is not provided, the method will display the tree structure for the default time step. - - Returns: - set[str]: A set containing the names of the fields that match the specified criteria. - """ - return self.meshes.get_field_names( - location=location, zone_name=zone_name, base_name=base_name, time=time - ) - - # -------------------------------------------------------------------------# - - def link_tree( - self, - path_linked_sample: Union[str, Path], - linked_sample: "Sample", - linked_time: float, - time: float, - ) -> CGNSTree: - """Link the geometrical features of the CGNS tree of the current sample at a given time, to the ones of another sample. - - Args: - path_linked_sample (Union[str,Path]): The absolute path of the folder containing the linked CGNS - linked_sample (Sample): The linked sample - linked_time (float): The time step of the linked CGNS in the linked sample - time (float): The time step the current sample to which the CGNS tree is linked. - - Returns: - CGNSTree: The deleted CGNS tree. - """ - # see https://pycgns.github.io/MAP/sids-to-python.html#links - # difficulty is to link only the geometrical objects, which can be complex - - # https://pycgns.github.io/MAP/examples.html#save-with-links - # When you load a file all the linked-to files are resolved to produce a full CGNS/Python tree with actual node data. - - path_linked_sample = Path(path_linked_sample) - - if linked_time not in linked_sample.meshes.data: # pragma: no cover - raise KeyError( - f"There is no CGNS tree for time {linked_time} in linked_sample." - ) - if time in self.meshes.data: # pragma: no cover - raise KeyError(f"A CGNS tree is already linked in self for time {time}.") - - tree = CGL.newCGNSTree() - - base_names = linked_sample.meshes.get_base_names(time=linked_time) - - for bn in base_names: - base_node = linked_sample.meshes.get_base(bn, time=linked_time) - base = [bn, base_node[1], [], "CGNSBase_t"] - tree[2].append(base) - - family = [ - "Bulk", - np.array([b"B", b"u", b"l", b"k"], dtype="|S1"), - [], - "FamilyName_t", - ] # maybe get this from linked_sample as well ? - base[2].append(family) - - zone_names = linked_sample.meshes.get_zone_names(bn, time=linked_time) - for zn in zone_names: - zone_node = linked_sample.meshes.get_zone( - zone_name=zn, base_name=bn, time=linked_time - ) - grid = [ - zn, - zone_node[1], - [ - [ - "ZoneType", - np.array( - [ - b"U", - b"n", - b"s", - b"t", - b"r", - b"u", - b"c", - b"t", - b"u", - b"r", - b"e", - b"d", - ], - dtype="|S1", - ), - [], - "ZoneType_t", - ] - ], - "Zone_t", - ] - base[2].append(grid) - zone_family = [ - "FamilyName", - np.array([b"B", b"u", b"l", b"k"], dtype="|S1"), - [], - "FamilyName_t", - ] - grid[2].append(zone_family) - - def find_feature_roots(sample: Sample, time: float, Type_t: str): - Types_t = CGU.getAllNodesByTypeSet(sample.meshes.get_mesh(time), Type_t) - # in case the type is not present in the tree - if Types_t == []: # pragma: no cover - return [] - types = [Types_t[0]] - for t in Types_t[1:]: - for tt in types: - if tt not in t: # pragma: no cover - types.append(t) - return types - - feature_paths = [] - for feature in ["ZoneBC_t", "Elements_t", "GridCoordinates_t"]: - feature_paths += find_feature_roots(linked_sample, linked_time, feature) - - self.meshes.add_tree(tree, time=time) - - dname = path_linked_sample.parent - bname = path_linked_sample.name - self.meshes._links[time] = [[str(dname), bname, fp, fp] for fp in feature_paths] - - return tree - - def show_tree(self, time: Optional[float] = None) -> None: - """Display the structure of the CGNS tree for a specified time. - - Args: - time (float, optional): The time step for which you want to display the CGNS tree structure. Defaults to None. If a specific time is not provided, the method will display the tree structure for the default time step. - - Examples: - .. code-block:: python - - # To display the CGNS tree structure for the default time step: - sample.show_tree() - - # To display the CGNS tree structure for a specific time step: - sample.show_tree(0.5) - """ - self.meshes.show_tree(time) - - def add_field( - self, - name: str, - field: Field, - location: str = "Vertex", - zone_name: Optional[str] = None, - base_name: Optional[str] = None, - time: Optional[float] = None, - warning_overwrite=True, - ) -> None: - """Add a field to a specified zone in the grid. - - Args: - name (str): The name of the field to be added. - field (Field): The field data to be added. - zone_name (str, optional): The name of the zone where the field will be added. Defaults to None. - base_name (str, optional): The name of the base where the zone is located. Defaults to None. - location (str, optional): The grid location where the field will be stored. Defaults to 'Vertex'. - Possible values : :py:const:`plaid.constants.CGNS_FIELD_LOCATIONS` - time (float, optional): The time associated with the field. Defaults to 0. - warning_overwrite (bool, optional): Show warning if an preexisting field is being overwritten - - Raises: - KeyError: Raised if the specified zone does not exist in the given base. - """ - self.meshes.add_field( - name, - field, - location=location, - zone_name=zone_name, - base_name=base_name, - time=time, - warning_overwrite=warning_overwrite, - ) - - def get_field( - self, - name: str, - location: str = "Vertex", - zone_name: Optional[str] = None, - base_name: Optional[str] = None, - time: Optional[float] = None, - ) -> Field: - """Retrieve a field with a specified name from a given zone, base, location, and time. - - Args: - name (str): The name of the field to retrieve. - location (str, optional): The location at which to retrieve the field. Defaults to 'Vertex'. - Possible values : :py:const:`plaid.constants.CGNS_FIELD_LOCATIONS` - zone_name (str, optional): The name of the zone to search for. Defaults to None. - base_name (str, optional): The name of the base to search for. Defaults to None. - time (float, optional): The time value to consider when searching for the field. If a specific time is not provided, the method will display the tree structure for the default time step. - - Returns: - Field: A set containing the names of the fields that match the specified criteria. - """ - return self.meshes.get_field( - name=name, - location=location, - zone_name=zone_name, - base_name=base_name, - time=time, - ) - - def del_field( - self, - name: str, - location: str = "Vertex", - zone_name: Optional[str] = None, - base_name: Optional[str] = None, - time: Optional[float] = None, - ) -> CGNSTree: - """Delete a field from a specified zone in the grid. - - Args: - name (str): The name of the field to be deleted. - location (str, optional): The grid location where the field is stored. Defaults to 'Vertex'. - Possible values : :py:const:`plaid.constants.CGNS_FIELD_LOCATIONS` - zone_name (str, optional): The name of the zone from which the field will be deleted. Defaults to None. - base_name (str, optional): The name of the base where the zone is located. Defaults to None. - time (float, optional): The time associated with the field. Defaults to 0. - - Raises: - KeyError: Raised if the specified zone or field does not exist in the given base. - - Returns: - CGNSTree: The tree at the provided time (without the deleted node) - """ - return self.meshes.del_field( - name=name, - location=location, - zone_name=zone_name, - base_name=base_name, - time=time, - ) - - def get_nodes( - self, - zone_name: Optional[str] = None, - base_name: Optional[str] = None, - time: Optional[float] = None, - ) -> Optional[np.ndarray]: - """Get grid node coordinates from a specified base, zone, and time. - - Args: - zone_name (str, optional): The name of the zone to search for. Defaults to None. - base_name (str, optional): The name of the base to search for. Defaults to None. - time (float, optional): The time value to consider when searching for the zone. If a specific time is not provided, the method will display the tree structure for the default time step. - - Raises: - TypeError: Raised if multiple nodes are found. Only one is expected. - - Returns: - Optional[np.ndarray]: A NumPy array containing the grid node coordinates. - If no matching zone or grid coordinates are found, None is returned. - - Seealso: - This function can also be called using `get_points()` or `get_vertices()`. - """ - return self.meshes.get_nodes(zone_name, base_name, time) - - def set_nodes( - self, - nodes: np.ndarray, - zone_name: Optional[str] = None, - base_name: Optional[str] = None, - time: Optional[float] = None, - ) -> None: - """Set the coordinates of nodes for a specified base and zone at a given time. - - Args: - nodes (np.ndarray): A numpy array containing the new node coordinates. - zone_name (str, optional): The name of the zone where the nodes should be updated. Defaults to None. - base_name (str, optional): The name of the base where the nodes should be updated. Defaults to None. - time (float, optional): The time at which the node coordinates should be updated. If a specific time is not provided, the method will display the tree structure for the default time step. - - Raises: - KeyError: Raised if the specified base or zone do not exist. You should first - create the base and zone using the `Sample.init_zone(zone_name,base_name)` method. - - Seealso: - This function can also be called using `set_points()` or `set_vertices()` - """ - self.meshes.set_nodes(nodes, zone_name, base_name, time) - - # -------------------------------------------------------------------------# - def get_time_series_names(self) -> set[str]: - """Get the names of time series associated with the object. - - Returns: - set[str]: A set of strings containing the names of the time series. - """ - if self.time_series is None: - return [] - else: - return list(self.time_series.keys()) - - def get_time_series(self, name: str) -> Optional[TimeSeries]: - """Retrieve a time series by name. - - Args: - name (str): The name of the time series to retrieve. - - Returns: - TimeSeries or None: If a time series with the given name exists, it returns the corresponding time series, or None otherwise. - - """ - if (self.time_series is None) or (name not in self.time_series): - return None - else: - return self.time_series[name] - - def add_time_series( - self, name: str, time_sequence: TimeSequence, values: Field - ) -> None: - """Add a time series to the sample. - - Args: - name (str): A descriptive name for the time series. - time_sequence (TimeSequence): The time sequence, array of time points. - values (Field): The values corresponding to the time sequence. - - Example: - .. code-block:: python - - from plaid import Sample - sample.add_time_series('stuff', np.arange(2), np.random.randn(2)) - print(sample.get_time_series('stuff')) - >>> (array([0, 1]), array([-0.59630135, -1.15572306])) - - Raises: - TypeError: Raised if the length of `time_sequence` is not equal to the length of `values`. - """ - _check_names([name]) - assert len(time_sequence) == len(values), ( - "time sequence and values do not have the same size" - ) - if self.time_series is None: - self.time_series = {name: (time_sequence, values)} - else: - self.time_series[name] = (time_sequence, values) - - def del_time_series(self, name: str) -> tuple[TimeSequence, Field]: - """Delete a time series from the sample. - - Args: - name (str): The name of the time series to be deleted. - - Raises: - KeyError: Raised when there is no time series / there is no time series with the provided name. - - Returns: - tuple[TimeSequence, Field]: A tuple containing the time sequence and values of the deleted time series. - """ - if self.time_series is None: - raise KeyError("There is no time series inside this sample.") - - if name not in self.time_series: - raise KeyError(f"There is no time series with name {name}.") - - return self.time_series.pop(name) + return self.features.get_global_names() # -------------------------------------------------------------------------# @@ -787,13 +205,11 @@ def get_all_features_identifiers( all_features_identifiers = [] for sn in self.get_scalar_names(): all_features_identifiers.append({"type": "scalar", "name": sn}) - for tsn in self.get_time_series_names(): - all_features_identifiers.append({"type": "time_series", "name": tsn}) - for t in self.meshes.get_all_mesh_times(): - for bn in self.meshes.get_base_names(time=t): - for zn in self.meshes.get_zone_names(base_name=bn, time=t): + for t in self.features.get_all_mesh_times(): + for bn in self.features.get_base_names(time=t): + for zn in self.features.get_zone_names(base_name=bn, time=t): if ( - self.meshes.get_nodes(base_name=bn, zone_name=zn, time=t) + self.features.get_nodes(base_name=bn, zone_name=zn, time=t) is not None ): all_features_identifiers.append( @@ -805,7 +221,7 @@ def get_all_features_identifiers( } ) for loc in CGNS_FIELD_LOCATIONS: - for fn in self.meshes.get_field_names( + for fn in self.features.get_field_names( location=loc, zone_name=zn, base_name=bn, time=t ): all_features_identifiers.append( @@ -849,7 +265,6 @@ def get_feature_from_string_identifier( Supported feature types: - "scalar": expects 1 detail → `scalars.get(name)` - - "time_series": expects 1 detail → `get_time_series(name)` - "field": up to 5 details → `get_field(name, base_name, zone_name, location, time)` - "nodes": up to 3 details → `get_nodes(base_name, zone_name, time)` @@ -864,7 +279,7 @@ def get_feature_from_string_identifier( Warnings: - If "time" is present in a field/nodes identifier, it is cast to float. - - `name` is required for scalar, time_series and field features. + - `name` is required for scalar and field features. - The order of the details must be respected. One cannot specify a detail in the feature_string_identifier string without specified the previous ones. """ splitted_identifier = feature_string_identifier.split("::") @@ -884,8 +299,6 @@ def get_feature_from_string_identifier( f"Unknown scalar {feature_details[0]}" ) # pragma: no cover return val - elif feature_type == "time_series": - return self.get_time_series(feature_details[0]) elif feature_type == "field": kwargs = {arg_names[i]: detail for i, detail in enumerate(feature_details)} for k in kwargs: @@ -910,12 +323,11 @@ def get_feature_from_identifier( The `feature_identifier` must include a `"type"` key specifying the feature kind: - `"scalar"` → calls `scalars.get(name)` - - `"time_series"` → calls `get_time_series(name)` - `"field"` → calls `get_field(name, base_name, zone_name, location, time)` - `"nodes"` → calls `get_nodes(base_name, zone_name, time)` Required keys: - - `"type"`: one of `"scalar"`, `"time_series"`, `"field"`, or `"nodes"` + - `"type"`: one of `"scalar"`, `"field"`, or `"nodes"` - `"name"`: required for all types except `"nodes"` Optional keys depending on type: @@ -936,8 +348,6 @@ def get_feature_from_identifier( if feature_type == "scalar": return self.get_scalar(**feature_details) - elif feature_type == "time_series": - return self.get_time_series(**feature_details) elif feature_type == "field": return self.get_field(**feature_details) elif feature_type == "nodes": @@ -950,12 +360,11 @@ def get_features_from_identifiers( Elements of `feature_identifiers` must include a `"type"` key specifying the feature kind: - `"scalar"` → calls `scalars.get(name)` - - `"time_series"` → calls `get_time_series(name)` - `"field"` → calls `get_field(name, base_name, zone_name, location, time)` - `"nodes"` → calls `get_nodes(base_name, zone_name, time)` Required keys: - - `"type"`: one of `"scalar"`, `"time_series"`, `"field"`, or `"nodes"` + - `"type"`: one of `"scalar"`, `"field"`, or `"nodes"` - `"name"`: required for all types except `"nodes"` Optional keys depending on type: @@ -979,8 +388,6 @@ def get_features_from_identifiers( for feature_type, feature_details in all_features_info: if feature_type == "scalar": features.append(self.get_scalar(**feature_details)) - elif feature_type == "time_series": - features.append(self.get_time_series(**feature_details)) elif feature_type == "field": features.append(self.get_field(**feature_details)) elif feature_type == "nodes": @@ -994,7 +401,7 @@ def _add_feature( ) -> Self: """Add a feature to current sample. - This method applies updates to scalars, time series, fields, or nodes + This method applies updates to scalars, fields, or nodes using feature identifiers, and corresponding feature data. Args: @@ -1015,17 +422,13 @@ def _add_feature( if safe_len(feature) == 1: feature = feature[0] self.add_scalar(**feature_details, value=feature) - elif feature_type == "time_series": - self.add_time_series( - **feature_details, time_sequence=feature[0], values=feature[1] - ) elif feature_type == "field": self.add_field(**feature_details, field=feature, warning_overwrite=False) elif feature_type == "nodes": physical_dim_arg = { k: v for k, v in feature_details.items() if k in ["base_name", "time"] } - phys_dim = self.meshes.get_physical_dim(**physical_dim_arg) + phys_dim = self.features.get_physical_dim(**physical_dim_arg) self.set_nodes(**feature_details, nodes=feature.reshape((-1, phys_dim))) return self @@ -1038,13 +441,13 @@ def update_features_from_identifier( ) -> Self: """Update one or several features of the sample by their identifier(s). - This method applies updates to scalars, time series, fields, or nodes + This method applies updates to scalars, fields, or nodes using feature identifiers, and corresponding feature data. When `in_place=False`, a deep copy of the sample is created before applying updates, ensuring full isolation from the original. Args: feature_identifiers (FeatureIdentifier or list of FeatureIdentifier): One or more feature identifiers. - features (Feature or list of Feature): One or more features corresponding + features (dict of Feature or list of Feature): One or more features corresponding to the identifiers. in_place (bool, optional): If True, modifies the current sample in place. If False, returns a deep copy with updated features. @@ -1075,7 +478,7 @@ def extract_sample_from_identifier( ) -> Self: """Extract features of the sample by their identifier(s) and return a new sample containing these features. - This method applies updates to scalars, time series, fields, or nodes + This method applies updates to scalars, fields, or nodes using feature identifiers Args: @@ -1093,12 +496,8 @@ def extract_sample_from_identifier( if isinstance(feature_identifiers, dict): feature_identifiers = [feature_identifiers] - feature_types = set([feat_id["type"] for feat_id in feature_identifiers]) - - # if field or node features are to extract, copy the source sample and delete all fields - if "field" in feature_types or "nodes" in feature_types: - source_sample = self.copy() - source_sample.del_all_fields() + source_sample = self.copy() + source_sample.del_all_fields() sample = Sample() @@ -1106,14 +505,14 @@ def extract_sample_from_identifier( feature = self.get_feature_from_identifier(feat_id) if feature is not None: - # if trying to add a field or nodes, must check if the corresponding tree exists, and add it if not - if feat_id["type"] in ["field", "nodes"]: - # get time of current feature - time = self.meshes.get_time_assignment(time=feat_id.get("time")) + # get time of current feature + time = self.features.get_time_assignment(time=feat_id.get("time")) - # if the constructed sample does not have a tree, add the one from the source sample, with no field - if not sample.meshes.get_mesh(time): - sample.meshes.add_tree(source_sample.meshes.get_mesh(time)) + # if the constructed sample does not have a tree, add the one from the source sample, with no field + if len(sample.features.get_base_names(time=time, globals=False)) == 0: + sample.features.add_tree(source_sample.features.get_mesh(time)) + for name in sample.features.get_global_names(time=time): + sample.features.del_global(name, time) sample._add_feature(feat_id, feature) @@ -1138,7 +537,7 @@ def from_features_identifier( def merge_features(self, sample: Self, in_place: bool = False) -> Self: """Merge features from another sample into the current sample. - This method applies updates to scalars, time series, fields, or nodes + This method applies updates to scalars, fields, or nodes using features from another sample. When `in_place=False`, a deep copy of the sample is created before applying updates, ensuring full isolation from the original. @@ -1162,15 +561,16 @@ def merge_features(self, sample: Self, in_place: bool = False) -> Self: source_sample = sample.copy() source_sample.del_all_fields() - for feat_id in all_features_identifiers: - # if trying to add a field or nodes, must check if the corresponding tree exists, and add it if not - if feat_id["type"] in ["field", "nodes"]: - # get time of current feature - time = sample.meshes.get_time_assignment(time=feat_id.get("time")) + # DELETE LATER IF CONFIRMED THIS IS NOT NEEDED (WITH GLOBAL, THERE IS ALWAYS A TREE) + # for feat_id in all_features_identifiers: + # # if trying to add a field or nodes, must check if the corresponding tree exists, and add it if not + # if feat_id["type"] in ["field", "nodes"]: + # # get time of current feature + # time = sample.features.get_time_assignment(time=feat_id.get("time")) - # if the constructed sample does not have a tree, add the one from the source sample, with no field - if not merged_dataset.meshes.get_mesh(time): - merged_dataset.meshes.add_tree(source_sample.get_mesh(time)) + # # if the constructed sample does not have a tree, add the one from the source sample, with no field + # if not merged_dataset.features.get_mesh(time): + # merged_dataset.features.add_tree(source_sample.get_mesh(time)) return merged_dataset.update_features_from_identifier( feature_identifiers=all_features_identifiers, @@ -1201,46 +601,16 @@ def save(self, path: Union[str, Path], overwrite: bool = False) -> None: mesh_dir = path / "meshes" - if self.meshes.data: + if self.features.data: mesh_dir.mkdir() - for i, time in enumerate(self.meshes.data.keys()): + for i, time in enumerate(self.features.data.keys()): outfname = mesh_dir / f"mesh_{i:09d}.cgns" status = CGM.save( str(outfname), - self.meshes.data[time], - links=self.meshes._links.get(time), + self.features.data[time], ) logger.debug(f"save -> {status=}") - scalars_names = self.get_scalar_names() - if len(scalars_names) > 0: - scalars = [] - for s_name in scalars_names: - scalars.append(self.get_scalar(s_name)) - scalars = np.array(scalars).reshape((1, -1)) - header = ",".join(scalars_names) - np.savetxt( - path / "scalars.csv", - scalars, - header=header, - delimiter=",", - comments="", - ) - - time_series_names = self.get_time_series_names() - if len(time_series_names) > 0: - for ts_name in time_series_names: - ts = self.get_time_series(ts_name) - data = np.vstack((ts[0], ts[1])).T - header = ",".join(["t", ts_name]) - np.savetxt( - path / f"time_series_{ts_name}.csv", - data, - header=header, - delimiter=",", - comments="", - ) - @classmethod def load_from_dir(cls, path: Union[str, Path]) -> Self: """Load the Sample from directory `path`. @@ -1301,46 +671,53 @@ def load(self, path: Union[str, Path]) -> None: if meshes_dir.is_dir(): meshes_names = list(meshes_dir.glob("*")) nb_meshes = len(meshes_names) - # self.meshes = {} - self.meshes._links = {} - self.meshes._paths = {} for i in range(nb_meshes): - tree, links, paths = CGM.load(str(meshes_dir / f"mesh_{i:09d}.cgns")) + tree, _, _ = CGM.load(str(meshes_dir / f"mesh_{i:09d}.cgns")) time = CGH.get_time_values(tree) - ( - self.meshes.data[time], - self.meshes._links[time], - self.meshes._paths[time], - ) = ( - tree, - links, - paths, - ) - for i in range(len(self.meshes._links[time])): # pragma: no cover - self.meshes._links[time][i][0] = str( - meshes_dir / self.meshes._links[time][i][0] - ) + (self.features.data[time],) = (tree,) - scalars_fname = path / "scalars.csv" - if scalars_fname.is_file(): - names = np.loadtxt( - scalars_fname, dtype=str, max_rows=1, delimiter="," - ).reshape((-1,)) - scalars = np.loadtxt( - scalars_fname, dtype=float, skiprows=1, delimiter="," - ).reshape((-1,)) - for name, value in zip(names, scalars): - self.add_scalar(name, value) - - time_series_files = list(path.glob("time_series_*.csv")) + old_scalars_file = path / "scalars.csv" + if old_scalars_file.is_file(): + self._load_old_scalars(old_scalars_file) + + old_time_series_files = list(path.glob("time_series_*.csv")) + if len(old_time_series_files) > 0: + self._load_old_time_series(old_time_series_files) + + @deprecated( + reason="This Sample was written with plaid<=0.1.9, save it with plaid>=0.1.10 to have all features embedded in the CGNS tree", + version="0.1.10", + removal="0.2.0", + ) + def _load_old_scalars(self, scalars_file: Path): + names = np.loadtxt(scalars_file, dtype=str, max_rows=1, delimiter=",").reshape( + (-1,) + ) + scalars = np.loadtxt( + scalars_file, dtype=float, skiprows=1, delimiter="," + ).reshape((-1,)) + for name, value in zip(names, scalars): + self.add_scalar(name, value) + + @deprecated( + reason="This Sample was written with plaid<=0.1.9, save it with plaid>=0.1.10 to have all features embedded in the CGNS tree", + version="0.1.10", + removal="0.2.0", + ) + def _load_old_time_series(self, time_series_files: list[Path]): for ts_fname in time_series_files: names = np.loadtxt(ts_fname, dtype=str, max_rows=1, delimiter=",").reshape( (-1,) ) assert names[0] == "t" times_and_val = np.loadtxt(ts_fname, dtype=float, skiprows=1, delimiter=",") - self.add_time_series(names[1], times_and_val[:, 0], times_and_val[:, 1]) + for i in range(times_and_val.shape[0]): + self.add_global( + name=names[1], + global_array=times_and_val[i, 1], + time=times_and_val[i, 0], + ) # # -------------------------------------------------------------------------# def __str__(self) -> str: @@ -1356,44 +733,35 @@ def __str__(self) -> str: nb_scalars = len(self.get_scalar_names()) str_repr += f"{nb_scalars} scalar{'' if nb_scalars == 1 else 's'}, " - # time series - nb_ts = len(self.get_time_series_names()) - str_repr += f"{nb_ts} time series, " - # fields - times = self.meshes.get_all_mesh_times() + times = self.features.get_all_mesh_times() nb_timestamps = len(times) str_repr += f"{nb_timestamps} timestamp{'' if nb_timestamps == 1 else 's'}, " field_names = set() for time in times: ## Need to include all possible location within the count - base_names = self.meshes.get_base_names(time=time) + base_names = self.features.get_base_names(time=time) for bn in base_names: - zone_names = self.meshes.get_zone_names(base_name=bn) + zone_names = self.features.get_zone_names(base_name=bn) for zn in zone_names: for location in CGNS_FIELD_LOCATIONS: field_names = field_names.union( - self.meshes.get_field_names( + self.features.get_field_names( location=location, zone_name=zn, base_name=bn, time=time ) ) nb_fields = len(field_names) str_repr += f"{nb_fields} field{'' if nb_fields == 1 else 's'}, " - # CGNS tree - if not self.meshes.data: - str_repr += "no tree, " - else: - # TODO - pass - if str_repr[-2:] == ", ": str_repr = str_repr[:-2] str_repr = str_repr + ")" return str_repr + __repr__ = __str__ + def summarize(self) -> str: """Provide detailed summary of the Sample content, showing feature names and mesh information. @@ -1444,26 +812,16 @@ def summarize(self) -> str: summary += f" - {name}: {value}\n" summary += "\n" - # Time series with names - ts_names = self.get_time_series_names() - if ts_names: - summary += f"Time Series ({len(ts_names)}):\n" - for name in ts_names: - ts = self.get_time_series(name) - if ts is not None: - summary += f" - {name}: {len(ts[0])} time points\n" - summary += "\n" - # Mesh information - times = self.meshes.get_all_mesh_times() + times = self.features.get_all_mesh_times() summary += f"Meshes ({len(times)} timestamps):\n" if times: for time in times: summary += f" Time: {time}\n" - base_names = self.meshes.get_base_names(time=time) + base_names = self.features.get_base_names(time=time) for base_name in base_names: summary += f" Base: {base_name}\n" - zone_names = self.meshes.get_zone_names( + zone_names = self.features.get_zone_names( base_name=base_name, time=time ) for zone_name in zone_names: @@ -1474,7 +832,7 @@ def summarize(self) -> str: ) if nodes is not None: nb_nodes = nodes.shape[0] - nodal_tags = self.meshes.get_nodal_tags( + nodal_tags = self.features.get_nodal_tags( zone_name=zone_name, base_name=base_name, time=time ) summary += f" Nodes ({nb_nodes})\n" @@ -1492,7 +850,7 @@ def summarize(self) -> str: summary += f" Location: {location}\n Fields ({len(field_names)}): {', '.join(field_names)}\n" # Elements and fields at elements - elements = self.meshes.get_elements( + elements = self.features.get_elements( zone_name=zone_name, base_name=base_name, time=time ) summary += f" Elements ({sum([v.shape[0] for v in elements.values()])})\n" @@ -1513,7 +871,6 @@ def check_completeness(self) -> str: Sample Completeness Check: ============================== Has scalars: True - Has time series: False Has meshes: True Total unique fields: 8 Field names: M_iso, mach, nut, ro, roe, rou, rov, sdf @@ -1523,20 +880,18 @@ def check_completeness(self) -> str: # Check if sample has basic features has_scalars = len(self.get_scalar_names()) > 0 - has_time_series = len(self.get_time_series_names()) > 0 - has_meshes = len(self.meshes.get_all_mesh_times()) > 0 + has_meshes = len(self.features.get_all_mesh_times()) > 0 report += f"Has scalars: {has_scalars}\n" - report += f"Has time series: {has_time_series}\n" report += f"Has meshes: {has_meshes}\n" if has_meshes: - times = self.meshes.get_all_mesh_times() + times = self.features.get_all_mesh_times() total_fields = set() for time in times: - base_names = self.meshes.get_base_names(time=time) + base_names = self.features.get_base_names(time=time) for base_name in base_names: - zone_names = self.meshes.get_zone_names( + zone_names = self.features.get_zone_names( base_name=base_name, time=time ) for zone_name in zone_names: diff --git a/src/plaid/containers/utils.py b/src/plaid/containers/utils.py index 86156a86..f3b346f9 100644 --- a/src/plaid/containers/utils.py +++ b/src/plaid/containers/utils.py @@ -155,14 +155,13 @@ def get_feature_type_and_details_from( """Extract and validate the feature type and its associated metadata from a feature identifier. This utility function ensures that the `feature_identifier` dictionary contains a valid - "type" key (e.g., "scalar", "time_series", "field", "node") and returns the type along + "type" key (e.g., "scalar", "field", "node") and returns the type along with the remaining identifier keys, which are specific to the feature type. Args: feature_identifier (dict): A dictionary with a "type" key, and other keys (some optional) depending on the feature type. For example: - {"type": "scalar", "name": "Mach"} - - {"type": "time_series", "name": "AOA"} - {"type": "field", "name": "pressure"} - {"type": "field", "name": "pressure", "time":0.} - {"type": "nodes", "base_name": "Base_2_2"} @@ -207,7 +206,6 @@ def check_features_type_homogeneity( feature_identifiers (list[dict]): dict with a "type" key, and other keys (some optional) depending on the feature type. For example: - {"type": "scalar", "name": "Mach"} - - {"type": "time_series", "name": "AOA"} - {"type": "field", "name": "pressure"} Raises: @@ -238,7 +236,6 @@ def check_features_size_homogeneity( feature_identifiers (list[dict]): dict with a "type" key, and other keys (some optional) depending on the feature type. For example: - {"type": "scalar", "name": "Mach"} - - {"type": "time_series", "name": "AOA"} - {"type": "field", "name": "pressure"} features (dict): dict with sample index as keys and one or more features as values. diff --git a/src/plaid/examples/dataset.py b/src/plaid/examples/dataset.py index ac1af71a..031415a9 100644 --- a/src/plaid/examples/dataset.py +++ b/src/plaid/examples/dataset.py @@ -7,7 +7,7 @@ # # from plaid import Dataset -from plaid.bridges.huggingface_bridge import streamed_huggingface_dataset_to_plaid +from plaid.bridges.huggingface_bridge import load_hf_dataset_from_hub, to_plaid_sample from plaid.examples.config import _HF_REPOS @@ -40,7 +40,12 @@ def _load_dataset( return self._cache[ex_name] try: - dataset, _ = streamed_huggingface_dataset_to_plaid(hf_repo, 2) + ds_stream = load_hf_dataset_from_hub(hf_repo, split="all_samples", streaming=True) + samples = [] + for _ in range(2): + hf_sample = next(iter(ds_stream)) + samples.append(to_plaid_sample(hf_sample)) + dataset = Dataset(samples=samples) self._cache[ex_name] = dataset return dataset except Exception as e: # pragma: no cover diff --git a/src/plaid/pipelines/plaid_blocks.py b/src/plaid/pipelines/plaid_blocks.py index 82a3c916..b16392d8 100644 --- a/src/plaid/pipelines/plaid_blocks.py +++ b/src/plaid/pipelines/plaid_blocks.py @@ -178,9 +178,7 @@ def inverse_transform(self, dataset: Dataset) -> Dataset: if isinstance(transformer_, Pipeline) else transformer_.out_features_identifiers_ ) - sub_dataset = dataset.extract_dataset_from_identifier( - in_feat_id, keep_cgns=True - ) + sub_dataset = dataset.extract_dataset_from_identifier(in_feat_id) transformed = transformer_.inverse_transform(sub_dataset) transformed_datasets.append(transformed) return Dataset.merge_dataset_by_features(transformed_datasets) diff --git a/src/plaid/problem_definition.py b/src/plaid/problem_definition.py index ff4807b1..a2b44393 100644 --- a/src/plaid/problem_definition.py +++ b/src/plaid/problem_definition.py @@ -25,7 +25,10 @@ from typing import Optional, Union import yaml +from packaging.specifiers import SpecifierSet +from packaging.version import Version +import plaid from plaid.constants import AUTHORIZED_TASKS from plaid.types import IndexType from plaid.types.feature_types import FeatureIdentifier @@ -71,7 +74,8 @@ def __init__( print(problem_definition) >>> ProblemDefinition(input_scalars_names=['s_1'], output_scalars_names=['s_2'], input_meshes_names=['mesh'], task='regression') """ - self._task: str = None # list[task name] + self._version: Union[Version, SpecifierSet] = Version(plaid.__version__) + self._task: str = None self.in_features_identifiers: list[FeatureIdentifier] = [] self.out_features_identifiers: list[FeatureIdentifier] = [] self.in_scalars_names: list[str] = [] @@ -1126,6 +1130,41 @@ def get_all_indices(self) -> list[int]: return list(set(all_indices)) # -------------------------------------------------------------------------# + def _generate_problem_infos_dict(self) -> dict[str, Union[str, list]]: + """Generate a dictionary containing all relevant problem definition data. + + Returns: + dict[str, Union[str, list]]: A dictionary with keys for task, input/output features, scalars, fields, timeseries, and meshes. + """ + data = { + "version": str(self._version), + "task": self._task, + "input_features": [dict(**d) for d in self.in_features_identifiers], + "output_features": [dict(**d) for d in self.out_features_identifiers], + } + if Version(plaid.__version__) < Version("0.2.0"): + data.update( + { + "input_scalars": self.in_scalars_names, # list[input scalar name] + "output_scalars": self.out_scalars_names, # list[output scalar name] + "input_fields": self.in_fields_names, # list[input field name] + "output_fields": self.out_fields_names, # list[output field name] + "input_timeseries": self.in_timeseries_names, # list[input timeseries name] + "output_timeseries": self.out_timeseries_names, # list[output timeseries name] + "input_meshes": self.in_meshes_names, # list[input mesh name] + "output_meshes": self.out_meshes_names, # list[output mesh name] + } + ) + + # Save infos + if self._version != Version(plaid.__version__): + logger.warning( + f"Version mismatch: ProblemDefinition was loaded from version: {self._version}, and will be saved with version: {Version(plaid.__version__)}" + ) + data["old_version"] = str(self._version) + data["version"] = str(Version(plaid.__version__)) + return data + def _save_to_dir_(self, path: Union[str, Path]) -> None: """Save problem information, inputs, outputs, and split to the specified directory in YAML and CSV formats. @@ -1142,30 +1181,21 @@ def _save_to_dir_(self, path: Union[str, Path]) -> None: path = Path(path) if not (path.is_dir()): - path.mkdir() + path.mkdir(parents=True) - data = { - "task": self._task, - "input_features": [dict(**d) for d in self.in_features_identifiers], - "output_features": [dict(**d) for d in self.out_features_identifiers], - "input_scalars": self.in_scalars_names, # list[input scalar name] - "output_scalars": self.out_scalars_names, # list[output scalar name] - "input_fields": self.in_fields_names, # list[input field name] - "output_fields": self.out_fields_names, # list[output field name] - "input_timeseries": self.in_timeseries_names, # list[input timeseries name] - "output_timeseries": self.out_timeseries_names, # list[output timeseries name] - "input_meshes": self.in_meshes_names, # list[input mesh name] - "output_meshes": self.out_meshes_names, # list[output mesh name] - } + problem_infos_dict = self._generate_problem_infos_dict() pbdef_fname = path / "problem_infos.yaml" with pbdef_fname.open("w") as file: - yaml.dump(data, file, default_flow_style=False, sort_keys=False) + yaml.dump( + problem_infos_dict, file, default_flow_style=False, sort_keys=False + ) + # Save split split_fname = path / "split.json" - if self._split is not None: + if self.get_split() is not None: with split_fname.open("w") as file: - json.dump(self._split, file) + json.dump(self.get_split(), file) @classmethod def load(cls, path: Union[str, Path]) -> Self: # pragma: no cover @@ -1181,6 +1211,52 @@ def load(cls, path: Union[str, Path]) -> Self: # pragma: no cover instance._load_from_dir_(path) return instance + def _initialize_from_problem_infos_dict( + self, data: dict[str, Union[str, list]] + ) -> None: + if "version" not in data: + self._version = SpecifierSet("<=0.1.7") + else: + self._version = Version(data["version"]) + + self._task = data["task"] + self.in_features_identifiers = ( + [FeatureIdentifier(**tup) for tup in data["input_features"]] + if "input_features" in data + else [] + ) + self.out_features_identifiers = ( + [FeatureIdentifier(**tup) for tup in data["output_features"]] + if "output_features" in data + else [] + ) + + if "version" not in data or Version(data["version"]) < Version("0.2.0"): + self.in_scalars_names = data["input_scalars"] + self.out_scalars_names = data["output_scalars"] + self.in_fields_names = data["input_fields"] + self.out_fields_names = data["output_fields"] + self.in_timeseries_names = data["input_timeseries"] + self.out_timeseries_names = data["output_timeseries"] + self.in_meshes_names = data["input_meshes"] + self.out_meshes_names = data["output_meshes"] + else: + old_keys = [ + "input_scalars", + "input_fields", + "input_timeseries", + "input_meshes", + "output_scalars", + "output_fields", + "output_timeseries", + "output_meshes", + ] + for k in old_keys: + if k in data: + logger.warning( + f"Key '{k}' is deprecated and will be ignored. You should convert your ProblemDefinition using FeatureIdentifiers." + ) + def _load_from_dir_(self, path: Union[str, Path]) -> None: """Load problem information, inputs, outputs, and split from the specified directory in YAML and CSV formats. @@ -1216,21 +1292,7 @@ def _load_from_dir_(self, path: Union[str, Path]) -> None: f"file with path `{pbdef_fname}` does not exist. Abort" ) - self._task = data["task"] - self.in_features_identifiers = [ - FeatureIdentifier(**tup) for tup in data["input_features"] - ] - self.out_features_identifiers = [ - FeatureIdentifier(**tup) for tup in data["output_features"] - ] - self.in_scalars_names = data["input_scalars"] - self.out_scalars_names = data["output_scalars"] - self.in_fields_names = data["input_fields"] - self.out_fields_names = data["output_fields"] - self.in_timeseries_names = data["input_timeseries"] - self.out_timeseries_names = data["output_timeseries"] - self.in_meshes_names = data["input_meshes"] - self.out_meshes_names = data["output_meshes"] + self._initialize_from_problem_infos_dict(data) # if it was saved with version <=0.1.7 it is a .csv else it is .json split = {} @@ -1244,11 +1306,11 @@ def _load_from_dir_(self, path: Union[str, Path]) -> None: elif split_fname_json.is_file(): with split_fname_json.open("r") as file: split = json.load(file) - else: + else: # pragma: no cover logger.warning( f"file with path `{split_fname_csv}` or `{split_fname_json}` does not exist. Splits will not be set" ) - self._split = split + self.set_split(split) def extract_problem_definition_from_identifiers( self, identifiers: list[FeatureIdentifier] diff --git a/src/plaid/types/__init__.py b/src/plaid/types/__init__.py index 780eae8e..d777be58 100644 --- a/src/plaid/types/__init__.py +++ b/src/plaid/types/__init__.py @@ -8,9 +8,7 @@ # from plaid.types.cgns_types import ( - CGNSLink, CGNSNode, - CGNSPath, CGNSTree, ) from plaid.types.common import Array, ArrayDType, IndexType @@ -20,7 +18,6 @@ Field, Scalar, TimeSequence, - TimeSeries, ) from plaid.types.sklearn_types import SklearnBlock @@ -30,12 +27,9 @@ "IndexType", "CGNSNode", "CGNSTree", - "CGNSLink", - "CGNSPath", "Scalar", "Field", "TimeSequence", - "TimeSeries", "Feature", "FeatureIdentifier", "SklearnBlock", diff --git a/src/plaid/types/cgns_types.py b/src/plaid/types/cgns_types.py index e48e31a4..bd8a898d 100644 --- a/src/plaid/types/cgns_types.py +++ b/src/plaid/types/cgns_types.py @@ -34,7 +34,3 @@ class CGNSNode(BaseModel): # A CGNSTree is simply the root CGNSNode CGNSTree: TypeAlias = CGNSNode - -# CGNS links and paths -CGNSLink: TypeAlias = list[str] # [dir, filename, source_path, target_path] -CGNSPath: TypeAlias = tuple[str, ...] # a path in the CGNS tree diff --git a/src/plaid/types/feature_types.py b/src/plaid/types/feature_types.py index 5e0d663e..9187d446 100644 --- a/src/plaid/types/feature_types.py +++ b/src/plaid/types/feature_types.py @@ -22,10 +22,9 @@ Scalar: TypeAlias = Union[float, int] Field: TypeAlias = Array TimeSequence: TypeAlias = Array -TimeSeries: TypeAlias = tuple[TimeSequence, Field] # Feature data types -Feature: TypeAlias = Union[Scalar, Field, TimeSeries, Array] +Feature: TypeAlias = Union[Scalar, Field, Array] # Identifiers diff --git a/src/plaid/utils/base.py b/src/plaid/utils/base.py index fa185adb..fbe1ea77 100644 --- a/src/plaid/utils/base.py +++ b/src/plaid/utils/base.py @@ -9,7 +9,11 @@ # %% Imports +import os +from functools import wraps + import numpy as np +import psutil # %% Functions @@ -50,6 +54,39 @@ def safe_len(obj): return len(obj) if hasattr(obj, "__len__") else 0 +def update_dict_only_new_keys(a: dict, b: dict): + """Update dictionary `a` with keys from `b` that are not already present in `a`.""" + new_keys = b.keys() - a.keys() # set difference is very fast + if new_keys: + a.update({k: b[k] for k in new_keys}) + + +def get_mem(): + """Get the current memory usage of the process in MB.""" + process = psutil.Process(os.getpid()) + # rss = resident set size = actual RAM usage + return process.memory_info().rss / (1024**2) # in MB + + +def delegate_methods(to: str, methods: list[str]): + """Class decorator to forward specific methods from a delegate attribute.""" + + def wrapper(cls): + for name in methods: + + def make_delegate(name): + @wraps(getattr(getattr(cls, to, None), name, lambda *_, **__: None)) + def method(self, *args, **kwargs): + return getattr(getattr(self, to), name)(*args, **kwargs) + + return method + + setattr(cls, name, make_delegate(name)) + return cls + + return wrapper + + class NotAllowedError(Exception): """Exception for not allowed usage.""" diff --git a/src/plaid/utils/cgns_helper.py b/src/plaid/utils/cgns_helper.py index 9de7d7d3..f4f87221 100644 --- a/src/plaid/utils/cgns_helper.py +++ b/src/plaid/utils/cgns_helper.py @@ -9,6 +9,7 @@ import CGNS.PAT.cgnsutils as CGU import numpy as np +import optree from plaid.types import CGNSTree @@ -67,11 +68,11 @@ def get_time_values(tree: CGNSTree) -> np.ndarray: return time_values[0] -def show_cgns_tree(pyTree: list, pre: str = ""): +def show_cgns_tree(pyTree: CGNSTree, pre: str = ""): """Pretty print for CGNS Tree. Args: - pyTree (list): CGNS tree to print + pyTree (CGNSTree): CGNS tree to print pre (str, optional): indentation of print. Defaults to ''. """ if not (isinstance(pyTree, list)): @@ -107,11 +108,354 @@ def printValue(node): np.set_printoptions(edgeitems=3, threshold=1000) -def summarize_cgns_tree(pyTree: list, verbose=True) -> str: +def flatten_cgns_tree_optree_dict(pyTree): + """ + Flatten CGNS tree: + - treedef: for unflatten + - data_dict: path -> data + - cgns_types: path -> CGNS type + """ + data_dict = {} + cgns_types = {} + + def visit(node, prefix=""): + name, data, children, cgns_type = node + path = f"{prefix}/{name}" if prefix else name + data_dict[path] = data + cgns_types[path] = cgns_type + + children_struct = tuple(visit(child, prefix=path) for child in (children or [])) + leaf = path # only the path is stored in leaves + return (leaf, children_struct) + + struct_tree = visit(pyTree) + _, treedef = optree.tree_flatten(struct_tree) + return treedef, data_dict, cgns_types + + +def unflatten_cgns_tree_optree_dict(treedef, data_dict, cgns_types): + """ + Reconstruct CGNS tree from: + - treedef: tree structure + - data_dict: path -> data + - cgns_types: path -> CGNS type + """ + + # Rebuild leaves as (path, data) using the path stored in leaves + leaves = [(path, data_dict[path]) for path in data_dict] + + struct_tree = optree.tree_unflatten(treedef, leaves) + + def build_node(struct_node): + leaf, children_tuple = struct_node + path, data = leaf + name = path.split("/")[-1] + cgns_type = cgns_types[path] + children = [build_node(child) for child in children_tuple] + return [name, data, children, cgns_type] + + return build_node(struct_tree) + + +def flatten_cgns_tree_optree(pyTree): + """Flatten CGNS tree.""" + + cgns_types = {} + + def visit(node): + name, data, children, cgns_type = node + cgns_types[name] = cgns_type + children_struct = tuple(visit(child) for child in (children or [])) + leaf = (name, data) + return (leaf, children_struct) + + struct_tree = visit(pyTree) + leaves, treedef = optree.tree_flatten(struct_tree) + return leaves, treedef, cgns_types + + +def unflatten_cgns_tree_optree(leaves, treedef, cgns_types): + """Reconstruct CGNS tree from leaves + treedef.""" + struct_tree = optree.tree_unflatten(treedef, leaves) + + def build_node(struct_node): + leaf, children_tuple = struct_node + name, data = leaf + cgns_type = cgns_types[name] + children = [build_node(child) for child in children_tuple] + return [name, data, children, cgns_type] + + return build_node(struct_tree) + + +# def flatten_cgns_tree_optree(pyTree) -> tuple[list, optree.PyTreeDef]: +# """Flatten CGNS tree.""" + +# def visit(node): +# name, data, children, cgns_type = node +# children_struct = tuple(visit(child) for child in (children or [])) +# # leaf will contain everything except children +# leaf = (name, data, cgns_type) +# return (leaf, children_struct) + +# struct_tree = visit(pyTree) +# leaves, treedef = optree.tree_flatten(struct_tree) +# return leaves, treedef + + +# def unflatten_cgns_tree_optree(leaves, treedef): +# """Reconstruct CGNS tree from leaves + treedef.""" +# struct_tree = optree.tree_unflatten(treedef, leaves) + +# def build_node(struct_node): +# leaf, children_tuple = struct_node +# name, data, cgns_type = leaf +# children = [build_node(child) for child in children_tuple] +# return [name, data, children, cgns_type] + +# return build_node(struct_tree) + + +# ------------- ORIGINAL --------------------------- + + +def flatten_cgns_tree( + pyTree: CGNSTree, +) -> tuple[dict[str, object], dict[str, str], dict[str, object]]: + """Flatten a CGNS tree into dictionaries of primitives for Hugging Face serialization. + + Traverses the CGNS tree and produces: + - flat: a dictionary mapping paths to primitive values (lists, scalars, or None) + - dtypes: a dictionary mapping paths to dtype strings + - extras: a dictionary mapping paths to extra CGNS metadata + + Args: + pyTree (CGNSTree): The CGNS tree to flatten. + + Returns: + tuple[dict[str, object], dict[str, str], dict[str, object]]: + - flat: dict of paths to primitive values + - dtypes: dict of paths to dtype strings + - extras: dict of paths to extra CGNS metadata + + Example: + >>> flat, dtypes, extras = flatten_cgns_tree(pyTree) + >>> flat["Base1/Zone1/Solution1/Field1"] # [1.0, 2.0, ...] + >>> dtypes["Base1/Zone1/Solution1/Field1"] # 'float64' + """ + flat = {} + dtypes = {} + extras = {} + + def visit(tree, path=""): + for node in tree[2]: + name, data, children, extra = node + new_path = f"{path}/{name}" if path else name + + # Flatten values for HF: always primitive types + if isinstance(data, np.ndarray): + if data.dtype.kind == "S": # string arrays + flat[new_path] = [x.decode("utf-8") for x in data.tolist()] + else: + flat[new_path] = data.tolist() + dtypes[new_path] = str(data.dtype) + elif data is None: + flat[new_path] = None + dtypes[new_path] = None + else: + flat[new_path] = data + dtypes[new_path] = str(np.array(data).dtype) + + extras[new_path] = extra + + if children: + visit(node, new_path) + + visit(pyTree) + return flat, dtypes, extras + + +def unflatten_cgns_tree( + flat: dict[str, object], + dtypes: dict[str, str], + cgns_types: dict[str, str], +) -> CGNSTree: + """Reconstruct a CGNS tree from flattened primitives, dtypes, and CGNS type information. + + Args: + flat (dict[str, object]): Dictionary mapping paths to primitive values (lists, scalars, or None). + dtypes (dict[str, str]): Dictionary mapping paths to dtype strings. + cgns_types (dict[str, str]): Dictionary mapping paths to CGNS type names (ending in '_t'). + + Returns: + CGNSTree: The reconstructed CGNSTree node. + + Example: + >>> tree = unflatten_cgns_tree(flat, dtypes, cgns_types) + """ + # Build all nodes from paths + nodes = {} + + for path, value in flat.items(): + dtype = np.dtype(dtypes.get(path)) + cgns_type = cgns_types.get(path) + + # reconstruct data as numpy array or None + if value is None: + data = None + else: + if dtype is None: + data = None + else: + data = np.array(value, dtype=dtype) + + # empty children for now + nodes[path] = [path.split("/")[-1], data, [], cgns_type] + + # Re-link nodes into tree structure + root = None + for path, node in nodes.items(): + parts = path.split("/") + if len(parts) == 1: + # root-level node + if root is None: + root = ["CGNSTree", None, [node], "CGNSTree_t"] + else: + root[2].append(node) + else: + parent_path = "/".join(parts[:-1]) + parent = nodes[parent_path] + parent[2].append(node) + + return root + + +# def unflatten_cgns_tree(flat: Dict[str, Any], +# dtypes: Dict[str, str], +# cgns_types: Dict[str, str]): + +# nodes = {} +# children_map = defaultdict(list) + +# # Precompute parent paths and node names +# parent_map = {} +# names = {} +# for path in flat: +# last_slash = path.rfind("/") +# if last_slash == -1: +# parent_map[path] = None +# names[path] = path +# else: +# parent_map[path] = path[:last_slash] +# names[path] = path[last_slash+1:] +# children_map[path[:last_slash]].append(path) + +# # Build all nodes: [name, data, empty children list, cgns_type] +# for path, value in flat.items(): +# dtype_str = dtypes.get(path) +# dtype = np.dtype(dtype_str) if dtype_str else None +# cgns_type = cgns_types.get(path) +# if value is None or dtype is None: +# data = None +# else: +# data = np.asarray(value, dtype=dtype) +# nodes[path] = [names[path], data, [], cgns_type] + +# # Link children +# for parent_path, child_paths in children_map.items(): +# parent_node = nodes[parent_path] +# parent_node[2].extend(nodes[child] for child in child_paths) + +# # Collect roots +# roots = [nodes[path] for path, p in parent_map.items() if p is None] +# if len(roots) == 1: +# return roots[0] +# else: +# return ["CGNSTree", None, roots, "CGNSTree_t"] + + +def compare_cgns_trees( + tree1: CGNSTree, + tree2: CGNSTree, + path: str = "CGNSTree", +) -> bool: + """Recursively compare two CGNS trees, ignoring the order of children. + + Checks: + - Node name + - Data (numpy arrays or scalars) with exact dtype and value + - Number and names of children + - CGNS type (extra field) + + Args: + tree1 (CGNSTree): The first CGNS tree node. + tree2 (CGNSTree): The second CGNS tree node. + path (str, optional): Path for error reporting. Defaults to "CGNSTree". + + Returns: + bool: True if trees are identical, False otherwise. + + Example: + >>> identical = compare_cgns_trees(tree1, tree2) + """ + # Compare node name + if tree1[0] != tree2[0]: + print(f"Name mismatch at {path}: {tree1[0]} != {tree2[0]}") + return False + + # Compare data + data1, data2 = tree1[1], tree2[1] + + if data1 is None and data2 is None: + pass + elif isinstance(data1, np.ndarray) and isinstance(data2, np.ndarray): + if data1.dtype != data2.dtype: + print( + f"Dtype mismatch at {path}/{tree1[0]}: {data1.dtype} != {data2.dtype}" + ) + return False + if not np.array_equal(data1, data2): + print(f"Data mismatch at {path}/{tree1[0]}") + return False + else: + if isinstance(data1, np.ndarray) or isinstance(data2, np.ndarray): + print(f"Data type mismatch at {path}/{tree1[0]}") + return False + if data1 != data2: + print(f"Data mismatch at {path}/{tree1[0]}: {data1} != {data2}") + return False + + # Compare extra (CGNS type) + extra1, extra2 = tree1[3], tree2[3] + if extra1 != extra2: + print(f"Type mismatch at {path}/{tree1[0]}: {extra1} != {extra2}") + return False + + # Compare children ignoring order + children1_dict = {c[0]: c for c in tree1[2] or []} + children2_dict = {c[0]: c for c in tree2[2] or []} + + if set(children1_dict.keys()) != set(children2_dict.keys()): + print( + f"Children name mismatch at {path}/{tree1[0]}: {set(children1_dict.keys())} != {set(children2_dict.keys())}" + ) + return False + + # Recursively compare children + for name in children1_dict: + if not compare_cgns_trees( + children1_dict[name], children2_dict[name], path=f"{path}/{tree1[0]}" + ): + return False + + return True + + +def summarize_cgns_tree(pyTree: CGNSTree, verbose=True) -> str: """Provide a summary of a CGNS tree's contents. Args: - pyTree (list): The CGNS tree to summarize. + pyTree (CGNSTree): The CGNS tree to summarize. verbose (bool, optional): If True, include detailed field information. Defaults to True. Example: diff --git a/src/plaid/utils/deprecation.py b/src/plaid/utils/deprecation.py index 51f03ebe..c88cd1ce 100644 --- a/src/plaid/utils/deprecation.py +++ b/src/plaid/utils/deprecation.py @@ -11,7 +11,12 @@ import functools import warnings -from typing import Optional +from typing import Optional, Union + +from packaging.version import Version + +import plaid +from plaid.utils.base import DeprecatedError try: from warnings import deprecated as deprecated_builtin # Python 3.13+ @@ -22,7 +27,9 @@ def deprecated( - reason: str, version: Optional[str] = None, removal: Optional[str] = None + reason: str, + version: Optional[Union[str, Version]] = None, + removal: Optional[Union[str, Version]] = None, ): """Decorator to mark a function, method, or class as deprecated. @@ -31,16 +38,29 @@ def deprecated( Args: reason (str): Explanation and suggested replacement. - version (str, optional): Version since deprecation. - removal (str, optional): Planned removal version. + version (Union[str,Version], optional): Version since deprecation. + removal (Union[str,Version], optional): Planned removal version. """ message_parts = [reason] if version: + if isinstance(version, str): + version = Version(version) message_parts.append(f"[since v{version}]") if removal: + if isinstance(removal, str): + removal = Version(removal) message_parts.append(f"(will be removed in v{removal})") full_message = " ".join(message_parts) + if removal and Version(plaid.__version__) >= removal: + full_message = [f"Removed in v{removal}, {reason}"] + + def decorator(_func): + def wrapper(*_args, **_kwargs): + raise DeprecatedError(full_message) + + return wrapper + if deprecated_builtin is not None: # pragma: no cover def decorator(obj): @@ -84,8 +104,8 @@ def deprecated_argument( old_arg: str, new_arg: str, converter=lambda x: x, - version: Optional[str] = None, - removal: Optional[str] = None, + version: Optional[Union[str, Version]] = None, + removal: Optional[Union[str, Version]] = None, ): """Decorator to mark a function argument as deprecated and redirect it to a new argument. @@ -93,34 +113,53 @@ def deprecated_argument( old_arg (str): Name of the old argument. new_arg (str): Name of the new argument. converter (callable): Function to convert the old value into the new format. - version (str, optional): Version since deprecation. - removal (str, optional): Planned removal version. + version (Union[str,Version], optional): Version since deprecation. + removal (Union[str,Version], optional): Planned removal version. """ - message_parts = [f"Argument `{old_arg}` is deprecated, use `{new_arg}` instead."] - if version: - message_parts.append(f"[since v{version}]") - if removal: - message_parts.append(f"(will be removed in v{removal})") - full_message = " ".join(message_parts) + if isinstance(removal, str): + removal = Version(removal) + + if removal and Version(plaid.__version__) >= removal: + full_message = [ + f"Argument `{old_arg}` has been removed in v{removal}, use `{new_arg}` instead." + ] + + def decorator(_func): + def wrapper(*_args, **_kwargs): + raise DeprecatedError(full_message) - def decorator(func): - @functools.wraps(func) - def wrapper(*args, **kwargs): - if old_arg in kwargs: - # Emit deprecation warning - if deprecated_builtin is not None: # pragma: no cover - # In Python 3.13+, link warning to the function itself - decorated = deprecated_builtin( - full_message, category=DeprecationWarning, stacklevel=2 - )(func) - return decorated( - *args, **{new_arg: converter(kwargs.pop(old_arg)), **kwargs} - ) - else: - warnings.warn(full_message, DeprecationWarning, stacklevel=2) - kwargs[new_arg] = converter(kwargs.pop(old_arg)) - return func(*args, **kwargs) - - return wrapper + return wrapper + else: + if isinstance(version, str): + version = Version(version) + + message_parts = [ + f"Argument `{old_arg}` is deprecated, use `{new_arg}` instead." + ] + if version: + message_parts.append(f"[since v{version}]") + if removal: + message_parts.append(f"(will be removed in v{removal})") + full_message = " ".join(message_parts) + + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + if old_arg in kwargs: + # Emit deprecation warning + if deprecated_builtin is not None: # pragma: no cover + # In Python 3.13+, link warning to the function itself + decorated = deprecated_builtin( + full_message, category=DeprecationWarning, stacklevel=2 + )(func) + return decorated( + *args, **{new_arg: converter(kwargs.pop(old_arg)), **kwargs} + ) + else: + warnings.warn(full_message, DeprecationWarning, stacklevel=2) + kwargs[new_arg] = converter(kwargs.pop(old_arg)) + return func(*args, **kwargs) + + return wrapper return decorator diff --git a/src/plaid/utils/stats.py b/src/plaid/utils/stats.py index 0f1794e8..23a8f9a6 100644 --- a/src/plaid/utils/stats.py +++ b/src/plaid/utils/stats.py @@ -277,11 +277,11 @@ def add_dataset(self, dset: Dataset) -> None: def add_samples(self, samples: Union[list[Sample], Dataset]) -> None: """Add samples or a dataset to compute statistics for. - Compute stats for each features present in the samples among scalars, fields and time_series. - For fields and time_series, as long as the added samples have the same shape as the existing ones, + Compute stats for each features present in the samples among scalars and fields. + For fields, as long as the added samples have the same shape as the existing ones, the stats will be computed per-coordinates (n_features=x.shape[-1]). - But as soon as the shapes differ, the stats and added fields/time_series will be flattened (n_features=1), - then stats will be computed over all values of the field/time_series. + But as soon as the shapes differ, the stats and added fields will be flattened (n_features=1), + then stats will be computed over all values of the field. Args: samples (Union[list[Sample], Dataset]): List of samples or dataset to process @@ -304,9 +304,6 @@ def add_samples(self, samples: Union[list[Sample], Dataset]) -> None: # Process fields self._process_field_data(sample, new_data) - # ---# Time-Series - self._process_time_series_data(sample, new_data) - # ---# SpatialSupport (Meshes) # TODO @@ -384,50 +381,6 @@ def _process_scalar_data(self, sample: Sample, data_dict: dict[str, list]) -> No if value is not None: data_dict[name].append(np.array(value).reshape((1, -1))) - def _process_time_series_data( - self, sample: Sample, data_dict: dict[str, list] - ) -> None: - """Process time series data from a sample. - - Args: - sample (Sample): Sample containing time series data - data_dict (dict[str, list]): Dictionary to store processed data - """ - for name in sample.get_time_series_names(): - timestamps, time_series = sample.get_time_series(name) - timestamps = timestamps.reshape((1, -1)) - time_series = time_series.reshape((1, -1)) - - timestamps_name = f"timestamps/{name}" - time_series_name = f"time_series/{name}" - if timestamps_name not in data_dict: - assert time_series_name not in data_dict - data_dict[timestamps_name] = [] - data_dict[time_series_name] = [] - if timestamps is not None and time_series is not None: - # check if all previous arrays are the same shape as the new one that will be added to data_dict[stat_key] - if len( - data_dict[time_series_name] - ) > 0 and not self._feature_is_flattened.get(time_series_name, False): - prev_shape = data_dict[time_series_name][0].shape - if time_series.shape != prev_shape: - # set this stat as flattened - self._feature_is_flattened[timestamps_name] = True - self._feature_is_flattened[time_series_name] = True - # flatten corresponding stat - if time_series_name in self._stats: - self._stats[time_series_name].flatten_array() - - if self._feature_is_flattened.get(time_series_name, False): - timestamps = timestamps.reshape((-1, 1)) - time_series = time_series.reshape((-1, 1)) - else: - timestamps = timestamps.reshape((1, -1)) - time_series = time_series.reshape((1, -1)) - - data_dict[timestamps_name].append(timestamps) - data_dict[time_series_name].append(time_series) - def _process_field_data(self, sample: Sample, data_dict: dict[str, list]) -> None: """Process field data from a sample. @@ -435,9 +388,9 @@ def _process_field_data(self, sample: Sample, data_dict: dict[str, list]) -> Non sample (Sample): Sample containing field data data_dict (dict[str, list]): Dictionary to store processed data """ - for time in sample.meshes.get_all_mesh_times(): - for base_name in sample.meshes.get_base_names(time=time): - for zone_name in sample.meshes.get_zone_names( + for time in sample.features.get_all_mesh_times(): + for base_name in sample.features.get_base_names(time=time): + for zone_name in sample.features.get_zone_names( base_name=base_name, time=time ): for location in CGNS_FIELD_LOCATIONS: diff --git a/tests/bridges/generate_and_push_hf_dataset_new.py b/tests/bridges/generate_and_push_hf_dataset_new.py new file mode 100644 index 00000000..2ad6ce34 --- /dev/null +++ b/tests/bridges/generate_and_push_hf_dataset_new.py @@ -0,0 +1,175 @@ +import io +import os + +import yaml +from datasets import Dataset, DatasetDict, Features +from huggingface_hub import HfApi + +os.environ["HF_HUB_DISABLE_XET"] = "1" + +from tqdm import tqdm + +from plaid.bridges import huggingface_bridge +from plaid.utils.base import update_dict_only_new_keys +from plaid.utils.cgns_helper import ( + flatten_cgns_tree, flatten_cgns_tree_optree +) + +if __name__ == "__main__": + print("Loading hf dataset old") + hf_dataset = huggingface_bridge.load_hf_dataset_from_hub( + "PLAID-datasets/Tensile2d", split="all_samples" + ) + print("loaded") + pb_def = huggingface_bridge.huggingface_description_to_problem_definition( + hf_dataset.description + ) + infos = huggingface_bridge.huggingface_description_to_infos(hf_dataset.description) + + all_feat_names = ( + pb_def.get_input_scalars_names() + + pb_def.get_output_scalars_names() + + pb_def.get_output_fields_names() + ) + + print("Converting hf dataset old to plaid dataet") + plaid_dataset = huggingface_bridge.huggingface_dataset_to_plaid( + hf_dataset, processes_number=5, verbose=True + ) + + def flat_tree_generator(flat_trees): + """ + Generator yielding samples from a list of flat_trees. + Each flat_tree is a dict {key -> value}. + """ + for ft in flat_trees: + yield ft + + def make_hf_dataset(flat_tree_list, hf_features): + """ + Create a HuggingFace dataset from a list of flat_trees and dtypes. + The features schema is inferred automatically. + """ + dataset = Dataset.from_generator( + lambda: flat_tree_generator(flat_tree_list), + features=Features(hf_features), + ) + return dataset + + print("flattening trees and infering hf features") + + dtypes = {} + cgns_types = {} + hf_features = {} + + flat_tree_list = {} + + split_names = ["train_500", "test", "OOD"] + + # for split_name in split_names: + # flat_tree_list[split_name] = [] + + # for id in tqdm(pb_def.get_split(split_name), desc=f"Processing {split_name}"): + # sample = plaid_dataset[id] + # flat_tree, dtypes_, cgns_types_ = flatten_cgns_tree(sample.features.data[0]) + # update_dict_only_new_keys(dtypes, dtypes_) + # update_dict_only_new_keys(cgns_types, cgns_types_) + + # hf_features_ = huggingface_bridge.infer_hf_features(flat_tree, dtypes) + # update_dict_only_new_keys(hf_features, hf_features_) + + # flat_tree_list[split_name].append(flat_tree) + + for split_name in split_names: + flat_tree_list[split_name] = [] + + for id in tqdm(pb_def.get_split(split_name), desc=f"Processing {split_name}"): + sample = plaid_dataset[id] + leaves, treedef = flatten_cgns_tree_optree(sample.features.data[0]) + # update_dict_only_new_keys(dtypes, dtypes_) + # update_dict_only_new_keys(cgns_types, cgns_types_) + + hf_features_ = huggingface_bridge.infer_hf_features(flat_tree, dtypes) + # update_dict_only_new_keys(hf_features, hf_features_) + + flat_tree_list[split_name].append(flat_tree) + + features_names = {} + for fn in all_feat_names: + for large_name in cgns_types.keys(): + if "/" + fn in large_name: + features_names[fn] = large_name + continue + + 1./0. + + print("Pushing key_mappings, pb_def and infos to the hub") + + repo_id = "fabiencasenave/Tensile2d_test2" + + key_mappings = {} + key_mappings["features_names"] = features_names + key_mappings["dtypes"] = dtypes + key_mappings["cgns_types"] = cgns_types + + api = HfApi() + yaml_str = yaml.dump(key_mappings) + yaml_buffer = io.BytesIO(yaml_str.encode("utf-8")) + api.upload_file( + path_or_fileobj=yaml_buffer, + path_in_repo="key_mappings.yaml", + repo_id=repo_id, + repo_type="dataset", + commit_message="Upload key_mappings.yaml", + ) + + huggingface_bridge.push_dataset_infos_to_hub(repo_id, infos) + huggingface_bridge.push_problem_definition_to_hub(repo_id, "task_1", pb_def) + + print("making hf datasets and pushing to the hub") + + dict_of_hf_datasets = {} + for split_name in split_names: + dict_of_hf_datasets[split_name] = make_hf_dataset( + flat_tree_list[split_name], hf_features + ) + + dset_dict = DatasetDict(dict_of_hf_datasets) + # huggingface_bridge.push_dataset_dict_to_hub(repo_id, dset_dict) + + # ------------------------------------------------------------------------------------------------------------------- + # SOME TESTS BELOW + + # ds = dset_dict["train_500"] + + # arrow_table = ds.data # this is a pyarrow.Table + # arrow_table = arrow_table.select(["Base_2_2/Zone/ZoneBC/Bottom/PointList", "Base_2_2/Zone/PointData/sig11"]) + + # list_array = arrow_table["Base_2_2/Zone/PointData/sig11"][0] # pyarrow.ListArray + # print("arrow?", type(list_array)) + # print("list?", type(ds[0]["Base_2_2/Zone/PointData/sig11"])) + # values = list_array.values # contiguous buffer + + # print() + + # try: + # np_values = values.to_numpy(zero_copy_only=True) # true zero-copy NumPy + # print("zero copy retrieval OK!") + # except: + # print("zero copy retrieval not OK!") + + # print() + + # flat_tree0 = huggingface_bridge.reconstruct_flat_tree_from_hf_sample(ds[0], dtypes) + # unflatten_tree0 = unflatten_cgns_tree(flat_tree0, dtypes, cgns_types) + + # show_cgns_tree(unflatten_tree0) + + # print("trees identical?:", compare_cgns_trees(plaid_dataset[0].features.data[0], unflatten_tree0)) + + # from plaid import Sample + # from plaid.containers.features import SampleFeatures + # sample = Sample(features=SampleFeatures({0.:unflatten_tree0})) + + # print(sample.get_field("U1")) + # print(sample.get_scalar("p1")) diff --git a/tests/bridges/load_and_test_hf_dataset_new.py b/tests/bridges/load_and_test_hf_dataset_new.py new file mode 100644 index 00000000..aacadb33 --- /dev/null +++ b/tests/bridges/load_and_test_hf_dataset_new.py @@ -0,0 +1,145 @@ +from time import time + +import yaml +from huggingface_hub import hf_hub_download + +from plaid.bridges import huggingface_bridge +from plaid.utils.base import get_mem +from plaid.utils.cgns_helper import ( + compare_cgns_trees, + flatten_cgns_tree, + flatten_cgns_tree_optree, + unflatten_cgns_tree, + unflatten_cgns_tree_optree, +) + +if __name__ == "__main__": + print("Initializations:") + + hf_dataset_old = huggingface_bridge.load_hf_dataset_from_hub( + "PLAID-datasets/Tensile2d", split="all_samples" + ) + + repo_id = "fabiencasenave/Tensile2d_test2" + infos = huggingface_bridge.load_hf_infos_from_hub(repo_id) + pb_def = huggingface_bridge.load_hf_problem_definition_from_hub(repo_id, "task_1") + + train_id = range(500) + + yaml_path = hf_hub_download( + repo_id=repo_id, filename="key_mappings.yaml", repo_type="dataset" + ) + with open(yaml_path, "r", encoding="utf-8") as f: + key_mappings = yaml.safe_load(f) + + fn = key_mappings["features_names"] + fn["P"] = "Global/P" + dtypes = key_mappings["dtypes"] + cgns_types = key_mappings["cgns_types"] + + fnn = list(fn.keys()) + + print() + print("Experience 1: zero copy columnar retrieval") + print() + + start = time() + hf_dataset_new = huggingface_bridge.load_hf_dataset_from_hub( + repo_id, split="train_500" + ) + end = time() + print("Time to instanciate cached HF dataset =", end - start) + + print("Initial RAM usage:", get_mem(), "MB") + start = time() + all_data = {} + for i in range(len(hf_dataset_new)): + for n in fnn: + all_data[(i, n)] = hf_dataset_new.data[fn[n]][i].values.to_numpy( + zero_copy_only=True + ) + end = time() + print("Time to initiate numpy objects for all the data =", end - start) + print("RAM usage after loop:", get_mem(), "MB") + + print("check retrieval: sig11=", all_data[(256, "sig11")]) + + print() + + # arrow_table = hf_dataset_new.data # this is a pyarrow.Table + # arrow_table = arrow_table.select([fn["P"], fn["sig11"]]) + + print("Experience 2: plaid dataset generation from HF dataset: old vs new") + print() + + start = time() + plaid_dataset = huggingface_bridge.huggingface_dataset_to_plaid( + hf_dataset_old, ids=train_id, processes_number=1, verbose=True + ) + end = time() + print("binary blob conversion plaid dataset generation =", end - start) + + # tree = plaid_dataset[0].features.data[0] + + # leaves, treedef, data_dict, cgns_types_dict = flatten_cgns_tree_optree(tree) + # # print(leaves[0], leaves[1], leaves[2], leaves[3], leaves[4]) + # # print(type(leaves)) + # print(treedef) + # print(type(treedef)) + # start = time() + # for _ in range(1000): + # unflat = unflatten_cgns_tree_optree(leaves, treedef, data_dict, cgns_types_dict) + # end = time() + # print("1000 unflatten_cgns_tree_optree duration =", end - start) + + # flat, dtypes, cgns_types = flatten_cgns_tree(tree) + # start = time() + # for _ in range(1000): + # unflat = unflatten_cgns_tree(flat, dtypes, cgns_types) + # end = time() + # print("1000 unflatten_cgns_tree duration =", end - start) + + # print( + # "first sample CGNS trees identical?:", + # compare_cgns_trees(tree, unflat), + # ) + + # # show_cgns_tree(tree) + # # print("--------------") + # # show_cgns_tree(unflat) + + + # 1.0 / 0.0 + + start = time() + plaid_dataset_new = huggingface_bridge.huggingface_dataset_to_plaid_new( + hf_dataset_new, dtypes, cgns_types, processes_number=12 + ) + end = time() + + print("tree deflatenning plaid dataset generation =", end - start) + + print( + "first sample CGNS trees identical?:", + compare_cgns_trees( + plaid_dataset[0].features.data[0], plaid_dataset_new[0].features.data[0] + ), + ) + + 1.0 / 0.0 + + print() + print("Experience 3: new HF dataset streaming retrieval time") + print() + + start = time() + hf_dataset_test = huggingface_bridge.load_hf_dataset_from_hub( + repo_id, split="train_500", streaming=True + ) + hf_dataset_col = hf_dataset_test.select_columns(list(fn.values())) + print("Streaming hf dataset new") + for sample in hf_dataset_col: + for n in fnn: + sample[fn[n]] + end = time() + print("Duration streaming retrieval =", end - start) diff --git a/tests/bridges/temp.py b/tests/bridges/temp.py new file mode 100644 index 00000000..86b1f3cd --- /dev/null +++ b/tests/bridges/temp.py @@ -0,0 +1,361 @@ +import io +import os + +import yaml +from datasets import Dataset, DatasetDict, Features +from huggingface_hub import HfApi + +os.environ["HF_HUB_DISABLE_XET"] = "1" + +from tqdm import tqdm + +from plaid.bridges import huggingface_bridge +from plaid.utils.base import update_dict_only_new_keys +from plaid.utils.cgns_helper import ( + flatten_cgns_tree, flatten_cgns_tree_optree, + flatten_cgns_tree_optree_dict, unflatten_cgns_tree_optree_dict +) + +print("Loading hf dataset old") +hf_dataset = huggingface_bridge.load_hf_dataset_from_hub( + "PLAID-datasets/Tensile2d", split="all_samples" +) +print("loaded") +pb_def = huggingface_bridge.huggingface_description_to_problem_definition( + hf_dataset.description +) +infos = huggingface_bridge.huggingface_description_to_infos(hf_dataset.description) + +all_feat_names = ( + pb_def.get_input_scalars_names() + + pb_def.get_output_scalars_names() + + pb_def.get_output_fields_names() +) + +print("Converting hf dataset old to plaid dataet") +plaid_dataset = huggingface_bridge.huggingface_dataset_to_plaid( + hf_dataset, processes_number=5, verbose=True +) + + + +from datasets import Dataset, DatasetDict, Features, Value, Sequence +import numpy as np + +# -------------------------- +# Infer HF feature type from actual value +# -------------------------- +def infer_hf_features_from_value(value): + if value is None: + return Value("null") + + # Scalars + if np.isscalar(value): + dtype = np.array(value).dtype + if np.issubdtype(dtype, np.floating): + return Value("float32") + elif np.issubdtype(dtype, np.integer): + return Value("int64") + elif np.issubdtype(dtype, np.bool_): + return Value("bool") + else: + return Value("string") + + # Arrays / lists + elif isinstance(value, (list, tuple, np.ndarray)): + arr = np.array(value) + base_type = infer_hf_features_from_value(arr.flat[0] if arr.size > 0 else None) + if arr.ndim == 1: + return Sequence(base_type) + elif arr.ndim == 2: + return Sequence(Sequence(base_type)) + elif arr.ndim == 3: + return Sequence(Sequence(Sequence(base_type))) + else: + raise TypeError(f"Unsupported ndim: {arr.ndim}") + else: + return Value("string") + + + + +# -------------------------- +# Collect schema from all trees (union of paths) +# -------------------------- +# def collect_schema_from_trees_data(all_trees): +# """ +# Collect union of all paths and infer HF features from actual tree data. +# """ +# global_types = {} +# for tree in all_trees: +# _, data_dict, cgns_types = flatten_cgns_tree_optree_dict(tree) +# for path, value in data_dict.items(): +# if path not in global_types: +# global_types[path] = infer_hf_features_from_value(value) +# return global_types, Features(global_types) + + +import pickle +import base64 + +def serialize_treedef(treedef): + # Convert to bytes + data_bytes = pickle.dumps(treedef) + # Encode as base64 string so it can be stored as HF Value("string") + return base64.b64encode(data_bytes).decode("utf-8") + + + + + +def collect_schema_from_trees_data(all_trees): + """ + Collect union of all paths across all trees and produce: + - global_cgns_types: path → CGNS type + - hf_features: HuggingFace Features inferred from actual data + """ + global_cgns_types = {} + global_types = {} + + for tree in all_trees: + _, data_dict, cgns_types = flatten_cgns_tree_optree_dict(tree) + for path, value in data_dict.items(): + # Update CGNS types + if path not in global_cgns_types: + global_cgns_types[path] = cgns_types[path] + else: + # Optional: sanity check for conflicts + if global_cgns_types[path] != cgns_types[path]: + raise ValueError( + f"Conflict for path '{path}': {global_cgns_types[path]} vs {cgns_types[path]}" + ) + + # Infer HF feature from value + if path not in global_types: + global_types[path] = infer_hf_features_from_value(value) + # else: already inferred from previous tree + + global_types["treedef"] = Value("string") + hf_features = Features(global_types) + return global_cgns_types, hf_features + +# -------------------------- +# Sample generator +# -------------------------- +def sample_generator(trees, global_cgns_types): + for tree in trees: + treedef, data_dict, _ = flatten_cgns_tree_optree_dict(tree) + sample = {path: None for path in global_cgns_types.keys()} + for path, val in data_dict.items(): + sample[path] = val + sample["treedef"] = serialize_treedef(treedef) + yield sample + +# -------------------------- +# Build DatasetDict +# -------------------------- +def build_hf_dataset_dict(split_names, plaid_dataset, pb_def): + # First pass: collect schema across all splits + all_trees = [] + for split_name in split_names: + trees_list = [plaid_dataset[id].features.data[0.] for id in pb_def.get_split(split_name)] + all_trees.extend(trees_list) + + global_cgns_types, features = collect_schema_from_trees_data(all_trees) + + # Build each split + dict_of_hf_datasets = {} + for split_name in split_names: + trees_list = [plaid_dataset[id].features.data[0.] for id in pb_def.get_split(split_name)] + dict_of_hf_datasets[split_name] = Dataset.from_generator( + lambda trees=trees_list: sample_generator(trees, global_cgns_types), + features=features + ) + + return DatasetDict(dict_of_hf_datasets) + +# -------------------------- +# Usage example +# -------------------------- +split_names = ["train_500", "test", "OOD"] +dset_dict = build_hf_dataset_dict(split_names, plaid_dataset, pb_def) + +# Push to HuggingFace Hub +repo_id = "fabiencasenave/Tensile2d_test3" +huggingface_bridge.push_dataset_dict_to_hub(repo_id, dset_dict) + +1./0. + + + + + + +# tree = plaid_dataset[0].features.data[0.] + +# leaves, treedef, data_dict, cgns_types_dict = flatten_cgns_tree_optree(tree) + +# print(f"{leaves = }") +# print("------") +# print(f"{treedef = }") +# print("------") +# print(f"{data_dict = }") +# print("------") +# print(f"{cgns_types_dict = }") + + + + +# trees = [plaid_dataset[id].features.data[0.] for id in pb_def.get_split("train_500")] + +# global_types, features = collect_schema_from_trees(trees) + +# print(f"{global_types = }") +# print("------") +# print(f"{features = }") + + + + + + + + + + +def flat_tree_generator(flat_trees): + """ + Generator yielding samples from a list of flat_trees. + Each flat_tree is a dict {key -> value}. + """ + for ft in flat_trees: + yield ft + +def make_hf_dataset(flat_tree_list, hf_features): + """ + Create a HuggingFace dataset from a list of flat_trees and dtypes. + The features schema is inferred automatically. + """ + dataset = Dataset.from_generator( + lambda: flat_tree_generator(flat_tree_list), + features=Features(hf_features), + ) + return dataset + +print("flattening trees and infering hf features") + +dtypes = {} +cgns_types = {} +hf_features = {} + +flat_tree_list = {} + +split_names = ["train_500", "test", "OOD"] + +# for split_name in split_names: +# flat_tree_list[split_name] = [] + +# for id in tqdm(pb_def.get_split(split_name), desc=f"Processing {split_name}"): +# sample = plaid_dataset[id] +# flat_tree, dtypes_, cgns_types_ = flatten_cgns_tree(sample.features.data[0]) +# update_dict_only_new_keys(dtypes, dtypes_) +# update_dict_only_new_keys(cgns_types, cgns_types_) + +# hf_features_ = huggingface_bridge.infer_hf_features(flat_tree, dtypes) +# update_dict_only_new_keys(hf_features, hf_features_) + +# flat_tree_list[split_name].append(flat_tree) + +for split_name in split_names: + flat_tree_list[split_name] = [] + + for id in tqdm(pb_def.get_split(split_name), desc=f"Processing {split_name}"): + sample = plaid_dataset[id] + leaves, treedef = flatten_cgns_tree_optree(sample.features.data[0]) + # update_dict_only_new_keys(dtypes, dtypes_) + # update_dict_only_new_keys(cgns_types, cgns_types_) + + hf_features_ = huggingface_bridge.infer_hf_features(flat_tree, dtypes) + # update_dict_only_new_keys(hf_features, hf_features_) + + flat_tree_list[split_name].append(flat_tree) + +features_names = {} +for fn in all_feat_names: + for large_name in cgns_types.keys(): + if "/" + fn in large_name: + features_names[fn] = large_name + continue + +1./0. + +print("Pushing key_mappings, pb_def and infos to the hub") + +repo_id = "fabiencasenave/Tensile2d_test2" + +key_mappings = {} +key_mappings["features_names"] = features_names +key_mappings["dtypes"] = dtypes +key_mappings["cgns_types"] = cgns_types + +api = HfApi() +yaml_str = yaml.dump(key_mappings) +yaml_buffer = io.BytesIO(yaml_str.encode("utf-8")) +api.upload_file( + path_or_fileobj=yaml_buffer, + path_in_repo="key_mappings.yaml", + repo_id=repo_id, + repo_type="dataset", + commit_message="Upload key_mappings.yaml", +) + +huggingface_bridge.push_dataset_infos_to_hub(repo_id, infos) +huggingface_bridge.push_problem_definition_to_hub(repo_id, "task_1", pb_def) + +print("making hf datasets and pushing to the hub") + +dict_of_hf_datasets = {} +for split_name in split_names: + dict_of_hf_datasets[split_name] = make_hf_dataset( + flat_tree_list[split_name], hf_features + ) + +dset_dict = DatasetDict(dict_of_hf_datasets) +# huggingface_bridge.push_dataset_dict_to_hub(repo_id, dset_dict) + +# ------------------------------------------------------------------------------------------------------------------- +# SOME TESTS BELOW + +# ds = dset_dict["train_500"] + +# arrow_table = ds.data # this is a pyarrow.Table +# arrow_table = arrow_table.select(["Base_2_2/Zone/ZoneBC/Bottom/PointList", "Base_2_2/Zone/PointData/sig11"]) + +# list_array = arrow_table["Base_2_2/Zone/PointData/sig11"][0] # pyarrow.ListArray +# print("arrow?", type(list_array)) +# print("list?", type(ds[0]["Base_2_2/Zone/PointData/sig11"])) +# values = list_array.values # contiguous buffer + +# print() + +# try: +# np_values = values.to_numpy(zero_copy_only=True) # true zero-copy NumPy +# print("zero copy retrieval OK!") +# except: +# print("zero copy retrieval not OK!") + +# print() + +# flat_tree0 = huggingface_bridge.reconstruct_flat_tree_from_hf_sample(ds[0], dtypes) +# unflatten_tree0 = unflatten_cgns_tree(flat_tree0, dtypes, cgns_types) + +# show_cgns_tree(unflatten_tree0) + +# print("trees identical?:", compare_cgns_trees(plaid_dataset[0].features.data[0], unflatten_tree0)) + +# from plaid import Sample +# from plaid.containers.features import SampleFeatures +# sample = Sample(features=SampleFeatures({0.:unflatten_tree0})) + +# print(sample.get_field("U1")) +# print(sample.get_scalar("p1")) diff --git a/tests/bridges/temp2.py b/tests/bridges/temp2.py new file mode 100644 index 00000000..8baf3dd7 --- /dev/null +++ b/tests/bridges/temp2.py @@ -0,0 +1,244 @@ +from time import time + +import yaml +from huggingface_hub import hf_hub_download + +from plaid.bridges import huggingface_bridge +from plaid.utils.base import get_mem +from plaid.utils.cgns_helper import ( + compare_cgns_trees, + flatten_cgns_tree, + flatten_cgns_tree_optree, + show_cgns_tree, + unflatten_cgns_tree, + unflatten_cgns_tree_optree, + flatten_cgns_tree_optree_dict, + unflatten_cgns_tree_optree_dict +) + +from tqdm import tqdm + +from plaid import Dataset, ProblemDefinition, Sample +from plaid.containers.features import SampleFeatures + +import pickle +import base64 + +def deserialize_treedef(serialized_str): + # Decode base64, then unpickle + data_bytes = base64.b64decode(serialized_str.encode("utf-8")) + return pickle.loads(data_bytes) + +print("Initializations:") + +hf_dataset_old = huggingface_bridge.load_hf_dataset_from_hub( + "PLAID-datasets/Tensile2d", split="all_samples" +) + +repo_id = "fabiencasenave/Tensile2d_test3" + + +train_id = range(500) + + +print() +print("Experience 1: zero copy columnar retrieval") +print() + +yaml_path = hf_hub_download( + repo_id="fabiencasenave/Tensile2d_test2", filename="key_mappings.yaml", repo_type="dataset" +) +with open(yaml_path, "r", encoding="utf-8") as f: + key_mappings = yaml.safe_load(f) + +fn = key_mappings["features_names"] +fn["P"] = "Global/P" + +fn = {k:"CGNSTree/"+v for k,v in fn.items()} +fnn = list(fn.keys()) + +start = time() +hf_dataset_new = huggingface_bridge.load_hf_dataset_from_hub( + repo_id, split="train_500" +) +end = time() +print("Time to instanciate cached HF dataset =", end - start) + +cols = hf_dataset_new.column_names + +print("Initial RAM usage:", get_mem(), "MB") +start = time() +all_data = {} +for i in range(len(hf_dataset_new)): + for n in fnn: + all_data[(i, n)] = hf_dataset_new.data[fn[n]][i].values.to_numpy( + zero_copy_only=True + ) +end = time() +print("Time to initiate numpy objects for all the data =", end - start) +print("RAM usage after loop:", get_mem(), "MB") + +print() + +print("Experience 2: plaid dataset generation from HF dataset: old vs new") +print() + +start = time() +plaid_dataset = huggingface_bridge.huggingface_dataset_to_plaid( + hf_dataset_old, ids=train_id, processes_number=1, verbose=True +) +end = time() +print("binary blob conversion plaid dataset generation =", end - start) + +tree = plaid_dataset[0].features.data[0] + + +# print(treedef) +# print(type(treedef)) +# start = time() +# for _ in range(1000): +# unflat = unflatten_cgns_tree_optree(leaves, treedef, cgns_types_dict) +# end = time() +# print("1000 unflatten_cgns_tree_optree duration =", end - start) + + +# leaves, treedef, cgns_types_dict = flatten_cgns_tree_optree(tree) +# unflat = unflatten_cgns_tree_optree(leaves, treedef, cgns_types_dict) + + +treedef, data_dict, cgns_types = flatten_cgns_tree_optree_dict(tree) +unflat = unflatten_cgns_tree_optree_dict(treedef, data_dict, cgns_types) + +start = time() +for _ in range(1000): + unflat = unflatten_cgns_tree_optree_dict(treedef, data_dict, cgns_types) +end = time() +print("1000 unflatten_cgns_tree_optree duration =", end - start) + +# show_cgns_tree(tree) +# print("--------------") +# show_cgns_tree(unflat) + +print("first sample CGNS trees identical?:", compare_cgns_trees(tree, unflat),) + +flat, dtypes, cgns_types_ = flatten_cgns_tree(tree) +start = time() +for _ in range(1000): + unflat = unflatten_cgns_tree(flat, dtypes, cgns_types_) +end = time() +print("1000 unflatten_cgns_tree duration =", end - start) + + + +hf_dataset_new.set_format("numpy") + + +from concurrent.futures import ProcessPoolExecutor +from time import time + +def treat(idx): + sample = hf_dataset_new[idx] # pull row inside worker + treedef = deserialize_treedef(str(sample.pop("treedef"))) + unflat = unflatten_cgns_tree_optree_dict(treedef, sample, cgns_types) + return Sample(features=SampleFeatures({0.0: unflat})) + +description = "Building Samples" + +start = time() +with ProcessPoolExecutor(max_workers=12) as executor: + # submit tasks for all indices + futures = [executor.submit(treat, i) for i in range(len(hf_dataset_new))] + + # iterate results with progress bar + list_of_samples = [] + for f in tqdm(futures, desc=description): + list_of_samples.append(f.result()) +plaid_dataset_new = Dataset(samples=list_of_samples) +end = time() + +print("Parallel time:", end - start) + + +description = "Converting Hugging Face dataset to plaid" + +sample_list = [] +t1=t2=t3=0 + +start = time() +# convert once for all samples +# all_columns = {col: hf_dataset_new[col].to_numpy(zero_copy_only=False) for col in hf_dataset_new.column_names} +tic = time() +# hf_dataset_new.set_format("numpy") +t0 = time()-tic +for idx in tqdm(range(len(hf_dataset_new)), desc=description): + tic = time() + # data_dict = huggingface_bridge.reconstruct_flat_tree_from_hf_sample2(hf_dataset_new, idx) + # data_dict = hf_dataset_new[idx]#huggingface_bridge.reconstruct_flat_tree_from_hf_sample2(hf_dataset_new[0]) + # data_dict = {key:hf_dataset_new.data[key][idx].values.to_numpy() for key in hf_dataset_new.data.keys()} + # data_dict = {key: hf_dataset_new.data[key][idx] for key in hf_dataset_new.column_names} + # data_dict = {col: hf_dataset_new[idx][col] for col in hf_dataset_new.column_names} + data_dict = hf_dataset_new[idx] + t1 += time()-tic + tic = time() + treedef = deserialize_treedef(str(data_dict.pop("treedef"))) + t2 += time()-tic + tic = time() + unflat = unflatten_cgns_tree_optree_dict(treedef, data_dict, cgns_types) + t3 += time()-tic + sample_list.append(Sample(features=SampleFeatures({0.0: unflat}))) +plaid_dataset_new = Dataset(samples=sample_list) +end = time() +print("indiv times =", t0, t1, t2, t3) +print("tree deflatenning plaid dataset generation =", end - start) + +# show_cgns_tree(unflat) + + +# print("tree deflatenning plaid dataset generation =", end - start) +# data_dict = huggingface_bridge.reconstruct_flat_tree_from_hf_sample2(hf_dataset_new[0]) +# treedef = deserialize_treedef(data_dict.pop('treedef')) +# unflat = unflatten_cgns_tree_optree_dict(treedef, data_dict, cgns_types) + +# show_cgns_tree(tree) +# print("--------------") +# show_cgns_tree(unflat) + +# print("first sample CGNS trees identical?:", compare_cgns_trees(tree, unflat),) + + +# unflat = unflatten_cgns_tree_optree(leaves, treedef, data_dict, cgns_types_dict) +# print(flat_tree) +1./0. + +start = time() +plaid_dataset_new = huggingface_bridge.huggingface_dataset_to_plaid_new2( + hf_dataset_new, dtypes, cgns_types, processes_number=1 +) +end = time() + +print("tree deflatenning plaid dataset generation =", end - start) + +print( + "first sample CGNS trees identical?:", + compare_cgns_trees( + plaid_dataset[0].features.data[0], plaid_dataset_new[0].features.data[0] + ), +) + +1.0 / 0.0 + +print() +print("Experience 3: new HF dataset streaming retrieval time") +print() + +start = time() +hf_dataset_test = huggingface_bridge.load_hf_dataset_from_hub( + repo_id, split="train_500", streaming=True +) +hf_dataset_col = hf_dataset_test.select_columns(list(fn.values())) +print("Streaming hf dataset new") +for sample in hf_dataset_col: + for n in fnn: + sample[fn[n]] +end = time() +print("Duration streaming retrieval =", end - start) diff --git a/tests/bridges/test_huggingface_bridge.py b/tests/bridges/test_huggingface_bridge.py index cbd98075..a905007b 100644 --- a/tests/bridges/test_huggingface_bridge.py +++ b/tests/bridges/test_huggingface_bridge.py @@ -8,6 +8,8 @@ # %% Imports import pickle +import shutil +from pathlib import Path from typing import Callable import pytest @@ -19,6 +21,11 @@ from plaid.problem_definition import ProblemDefinition +@pytest.fixture() +def current_directory(): + return Path(__file__).absolute().parent + + # %% Fixtures @pytest.fixture() def dataset(samples, infos) -> Dataset: @@ -39,35 +46,46 @@ def problem_definition() -> ProblemDefinition: @pytest.fixture() def generator(dataset) -> Callable: - def generator(): - for id in range(len(dataset)): + def generator_(): + for sample in dataset: yield { - "sample": pickle.dumps(dataset[id]), + "sample": pickle.dumps(sample), } - return generator + return generator_ + + +@pytest.fixture() +def generator_split(dataset, problem_definition) -> Callable: + generators = {} + for split_name, ids in problem_definition.get_split().items(): + + def generator_(ids=ids): + for id in ids: + yield {"sample": pickle.dumps(dataset[id])} + + generators[split_name] = generator_ + return generators @pytest.fixture() -def hf_dataset(generator, infos, problem_definition) -> Dataset: - hf_dataset = huggingface_bridge.plaid_generator_to_huggingface( - generator, infos, problem_definition - ) +def hf_dataset(generator) -> Dataset: + hf_dataset = huggingface_bridge.plaid_generator_to_huggingface(generator) return hf_dataset class Test_Huggingface_Bridge: def assert_hf_dataset(self, hfds): - assert hfds.description["legal"] == {"owner": "PLAID2", "license": "BSD-3"} - assert hfds.description["task"] == "regression" - assert hfds.description["in_scalars_names"][0] == "feature_name_1" - assert hfds.description["in_scalars_names"][1] == "feature_name_2" + # assert hfds.description["legal"] == {"owner": "PLAID2", "license": "BSD-3"} + # assert hfds.description["task"] == "regression" + # assert hfds.description["in_scalars_names"][0] == "feature_name_1" + # assert hfds.description["in_scalars_names"][1] == "feature_name_2" self.assert_sample(to_plaid_sample(hfds[0])) - def assert_plaid_dataset(self, ds, pbdef): - assert ds.get_infos()["legal"] == {"owner": "PLAID2", "license": "BSD-3"} - assert pbdef.get_input_scalars_names()[0] == "feature_name_1" - assert pbdef.get_input_scalars_names()[1] == "feature_name_2" + def assert_plaid_dataset(self, ds): + # assert ds.get_infos()["legal"] == {"owner": "PLAID2", "license": "BSD-3"} + # assert pbdef.get_input_scalars_names()[0] == "feature_name_1" + # assert pbdef.get_input_scalars_names()[1] == "feature_name_2" self.assert_sample(ds[0]) def assert_sample(self, sample): @@ -76,78 +94,46 @@ def assert_sample(self, sample): assert "test_field_same_size" in sample.get_field_names() assert sample.get_field("test_field_same_size").shape[0] == 17 - def test_to_plaid_sample(self, generator, infos, problem_definition): - hfds = huggingface_bridge.plaid_generator_to_huggingface( - generator, infos, problem_definition - ) + def test_to_plaid_sample(self, generator): + hfds = huggingface_bridge.plaid_generator_to_huggingface(generator) to_plaid_sample(hfds[0]) def test_to_plaid_sample_fallback_build_succeeds(self, dataset): sample = dataset[0] - bad_sample = { + old_hf_sample = { "path": getattr(sample, "path", None), - "scalars": sample.scalars.data, - "meshes": sample.meshes.data, - "mesh_base_name": sample.meshes._mesh_base_name, - "mesh_zone_name": sample.meshes._mesh_zone_name, - "links": sample.meshes._links, - "paths": sample.meshes._paths, - "time_series": getattr(sample, "time_series", None), + "scalars": {sn: sample.get_scalar(sn) for sn in sample.get_scalar_names()}, + "meshes": sample.features.data, } - bad_hf_sample = {"sample": pickle.dumps(bad_sample)} - plaid_sample = to_plaid_sample(bad_hf_sample) + old_hf_sample = {"sample": pickle.dumps(old_hf_sample)} + plaid_sample = to_plaid_sample(old_hf_sample) assert isinstance(plaid_sample, Sample) - def test_to_plaid_sample_missing_key_raises_keyerror(self, dataset): - sample = dataset[0] - bad_sample = { - "path": getattr(sample, "path", None), - "meshes": sample.meshes.data, - "mesh_base_name": sample.meshes._mesh_base_name, - "mesh_zone_name": sample.meshes._mesh_zone_name, - "links": sample.meshes._links, - "paths": sample.meshes._paths, - "time_series": getattr(sample, "time_series", None), - } - bad_hf_sample = {"sample": pickle.dumps(bad_sample)} - with pytest.raises(KeyError): - to_plaid_sample(bad_hf_sample) - - def test_plaid_dataset_to_huggingface(self, dataset, problem_definition): - hfds = huggingface_bridge.plaid_dataset_to_huggingface( - dataset, problem_definition, split="train" - ) - hfds = huggingface_bridge.plaid_dataset_to_huggingface( - dataset, problem_definition - ) + def test_plaid_dataset_to_huggingface(self, dataset): + hfds = huggingface_bridge.plaid_dataset_to_huggingface(dataset) + hfds = huggingface_bridge.plaid_dataset_to_huggingface(dataset, ids=[0, 1]) self.assert_hf_dataset(hfds) def test_plaid_dataset_to_huggingface_datasetdict( self, dataset, problem_definition ): huggingface_bridge.plaid_dataset_to_huggingface_datasetdict( - dataset, problem_definition, main_splits=["train", "test"] + dataset, main_splits=problem_definition.get_split() ) - def test_plaid_generator_to_huggingface(self, generator, infos, problem_definition): - hfds = huggingface_bridge.plaid_generator_to_huggingface( - generator, infos, problem_definition, split="train" - ) + def test_plaid_generator_to_huggingface(self, generator): + hfds = huggingface_bridge.plaid_generator_to_huggingface(generator) hfds = huggingface_bridge.plaid_generator_to_huggingface( - generator, infos, problem_definition + generator, processes_number=2 ) self.assert_hf_dataset(hfds) - def test_plaid_generator_to_huggingface_datasetdict( - self, generator, infos, problem_definition - ): - huggingface_bridge.plaid_generator_to_huggingface_datasetdict( - generator, infos, problem_definition, main_splits=["train", "test"] - ) + def test_plaid_generator_to_huggingface_datasetdict(self, generator_split): + huggingface_bridge.plaid_generator_to_huggingface_datasetdict(generator_split) def test_huggingface_dataset_to_plaid(self, hf_dataset): - ds, pbdef = huggingface_bridge.huggingface_dataset_to_plaid(hf_dataset) - self.assert_plaid_dataset(ds, pbdef) + ds = huggingface_bridge.huggingface_dataset_to_plaid(hf_dataset) + self.assert_plaid_dataset(ds) def test_huggingface_dataset_to_plaid_with_ids(self, hf_dataset): huggingface_bridge.huggingface_dataset_to_plaid(hf_dataset, ids=[0, 1]) @@ -175,6 +161,37 @@ def test_huggingface_dataset_to_plaid_error_processes_number_2(self, hf_dataset) hf_dataset, ids=[0], processes_number=2 ) + # ---------------------------------------------------------------- + def test_save_load_to_disk( + self, current_directory, generator_split, infos, problem_definition + ): + hf_dataset_dict = huggingface_bridge.plaid_generator_to_huggingface_datasetdict( + generator_split + ) + test_dir = Path(current_directory) / Path("test") + huggingface_bridge.save_dataset_dict_to_disk(test_dir, hf_dataset_dict) + huggingface_bridge.save_dataset_infos_to_disk(test_dir, infos) + huggingface_bridge.save_problem_definition_to_disk( + test_dir, "task_1", problem_definition + ) + huggingface_bridge.load_dataset_dict_from_to_disk(test_dir) + huggingface_bridge.load_dataset_infos_from_disk(test_dir) + huggingface_bridge.load_problem_definition_from_disk(test_dir, "task_1") + shutil.rmtree(test_dir) + + # ---------------------------------------------------------------- + # deprecated functions + + def test_huggingface_description_to_problem_definition(self, hf_dataset): + huggingface_bridge.huggingface_description_to_problem_definition( + hf_dataset.description + ) + + def test_huggingface_description_to_infos(self, infos): + hf_description = {} + hf_description.update(infos) + huggingface_bridge.huggingface_description_to_infos(hf_description) + def test_create_string_for_huggingface_dataset_card(self, hf_dataset): huggingface_bridge.create_string_for_huggingface_dataset_card( description=hf_dataset.description, diff --git a/tests/conftest.py b/tests/conftest.py index 24aae525..06f006f5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -31,11 +31,6 @@ def generate_samples(nb: int, zone_name: str, base_name: str) -> list[Sample]: sample.init_zone(np.array([0, 0, 0]), zone_name=zone_name, base_name=base_name) sample.add_scalar("test_scalar", float(i)) sample.add_scalar("test_scalar_2", float(i**2)) - sample.add_time_series( - "test_time_series_1", - np.arange(11, dtype=float), - float(i**3) * np.arange(11, dtype=float), - ) sample.add_field( name="test_field_same_size", field=float(i**4) * np.ones(17), @@ -149,7 +144,7 @@ def tree(nodes, triangles, vertex_field, cell_center_field, nodal_tags): def sample_with_tree(tree: CGNSTree) -> Sample: """Generate a Sample objects with a tree.""" sample = Sample() - sample.meshes.add_tree(tree) + sample.features.add_tree(tree) return sample @@ -217,11 +212,6 @@ def heterogeneous_dataset(dataset_with_samples_with_tree): sample_with_scalar.add_scalar("scalar", 1.0) dataset.add_sample(sample_with_scalar) sample_with_ts = Sample() - sample_with_ts.add_time_series( - "test_time_series_1", - np.arange(11, dtype=float), - np.arange(11, dtype=float), - ) dataset.add_sample(sample_with_ts) return dataset diff --git a/tests/containers/dataset/samples/sample_000000000/scalars.csv b/tests/containers/dataset_old/samples/sample_000000000/scalars.csv similarity index 100% rename from tests/containers/dataset/samples/sample_000000000/scalars.csv rename to tests/containers/dataset_old/samples/sample_000000000/scalars.csv diff --git a/tests/containers/dataset/samples/sample_000000001/scalars.csv b/tests/containers/dataset_old/samples/sample_000000001/scalars.csv similarity index 100% rename from tests/containers/dataset/samples/sample_000000001/scalars.csv rename to tests/containers/dataset_old/samples/sample_000000001/scalars.csv diff --git a/tests/containers/dataset_old/samples/sample_000000001/time_series_val.csv b/tests/containers/dataset_old/samples/sample_000000001/time_series_val.csv new file mode 100644 index 00000000..5754faff --- /dev/null +++ b/tests/containers/dataset_old/samples/sample_000000001/time_series_val.csv @@ -0,0 +1,14 @@ +t,val +0.000000000000000000e+00,2.776754545341392766e-01 +1.000000000000000000e+00,2.911961664974219155e-01 +2.000000000000000000e+00,3.040768329216420041e-01 +3.000000000000000000e+00,3.164767210697628785e-01 +4.000000000000000000e+00,3.285723116164224744e-01 +5.000000000000000000e+00,3.403277244091458265e-01 +6.000000000000000000e+00,3.516274940824523076e-01 +7.000000000000000000e+00,3.622111933783603854e-01 +8.000000000000000000e+00,3.714542064914365116e-01 +9.000000000000000000e+00,3.808252645956607951e-01 +1.000000000000000000e+01,3.904890606428199074e-01 +1.100000000000000000e+01,4.002799273706676986e-01 +1.200000000000000000e+01,4.096288782505667081e-01 \ No newline at end of file diff --git a/tests/containers/dataset/samples/sample_000000002/scalars.csv b/tests/containers/dataset_old/samples/sample_000000002/scalars.csv similarity index 100% rename from tests/containers/dataset/samples/sample_000000002/scalars.csv rename to tests/containers/dataset_old/samples/sample_000000002/scalars.csv diff --git a/tests/containers/dataset_old/samples/sample_000067392 b/tests/containers/dataset_old/samples/sample_000067392 new file mode 100644 index 00000000..1675e202 --- /dev/null +++ b/tests/containers/dataset_old/samples/sample_000067392 @@ -0,0 +1 @@ +This file is used for the test: test__init__file_provided in test_sample.py \ No newline at end of file diff --git a/tests/containers/test_dataset.py b/tests/containers/test_dataset.py index bc83e519..93ee436e 100644 --- a/tests/containers/test_dataset.py +++ b/tests/containers/test_dataset.py @@ -8,10 +8,12 @@ # %% Imports import copy +import shutil from pathlib import Path import numpy as np import pytest +from packaging.version import Version import plaid from plaid.containers.dataset import Dataset @@ -31,26 +33,23 @@ def current_directory(): def compare_two_samples(sample_1: Sample, sample_2: Sample): - assert set(sample_1.meshes.get_all_mesh_times()) == set( - sample_2.meshes.get_all_mesh_times() + assert set(sample_1.features.get_all_mesh_times()) == set( + sample_2.features.get_all_mesh_times() ) assert set(sample_1.get_scalar_names()) == set(sample_2.get_scalar_names()) assert set(sample_1.get_field_names()) == set(sample_2.get_field_names()) - assert set(sample_1.get_time_series_names()) == set( - sample_2.get_time_series_names() - ) assert np.array_equal(sample_1.get_nodes(), sample_2.get_nodes()) - assert set(sample_1.meshes.get_base_names()) == set( - sample_2.meshes.get_base_names() + assert set(sample_1.features.get_base_names()) == set( + sample_2.features.get_base_names() ) - for base_name in sample_1.meshes.get_base_names(): - assert set(sample_1.meshes.get_zone_names(base_name)) == set( - sample_2.meshes.get_zone_names(base_name) + for base_name in sample_1.features.get_base_names(): + assert set(sample_1.features.get_zone_names(base_name)) == set( + sample_2.features.get_zone_names(base_name) ) - for zone_name in sample_1.meshes.get_zone_names(base_name): - assert sample_1.meshes.get_zone_type( + for zone_name in sample_1.features.get_zone_names(base_name): + assert sample_1.features.get_zone_type( zone_name, base_name - ) == sample_2.meshes.get_zone_type(zone_name, base_name) + ) == sample_2.features.get_zone_type(zone_name, base_name) # %% Tests @@ -124,14 +123,6 @@ def test_get_all_features_identifiers_by_type( ) == 2 ) - assert ( - len( - dataset_with_samples.get_all_features_identifiers_by_type( - feature_type="times_series" - ) - ) - == 0 - ) assert ( len( dataset_with_samples.get_all_features_identifiers_by_type( @@ -343,16 +334,6 @@ def test_get_scalar_names_same_ids(self, dataset_with_samples): dataset_with_samples.get_scalar_names() dataset_with_samples.get_scalar_names([0, 0]) - def test_get_time_series_names(self, dataset_with_samples, nb_samples): - dataset_with_samples.get_time_series_names() - dataset_with_samples.get_time_series_names( - np.random.randint(2, nb_samples, size=2) - ) - - def test_get_time_series_names_same_ids(self, dataset_with_samples): - dataset_with_samples.get_time_series_names() - dataset_with_samples.get_time_series_names([0, 0]) - def test_get_field_names(self, dataset_with_samples, nb_samples): dataset_with_samples.get_field_names() dataset_with_samples.get_field_names(np.random.randint(2, nb_samples, size=2)) @@ -451,10 +432,6 @@ def test_get_feature_from_string_identifier( ): dataset_with_samples.get_feature_from_string_identifier("scalar::test_scalar") - dataset_with_samples.get_feature_from_string_identifier( - "time_series::test_time_series_1" - ) - dataset_with_samples.get_feature_from_string_identifier( "field::test_field_same_size" ) @@ -488,9 +465,6 @@ def test_get_feature_from_identifier( dataset_with_samples.get_feature_from_identifier( {"type": "scalar", "name": "test_scalar"} ) - dataset_with_samples.get_feature_from_identifier( - {"type": "time_series", "name": "test_time_series_1"} - ) dataset_with_samples.get_feature_from_identifier( {"type": "field", "name": "test_field_same_size"} @@ -596,19 +570,6 @@ def test_update_features_from_identifier( in_place=False, ) - dataset_with_samples.update_features_from_identifier( - feature_identifiers=FeatureIdentifier( - { - "type": "time_series", - "name": "test_time_series_1", - } - ), - features={ - ind: (np.array([0, 1]), np.array([3.14, 3.15])) for ind in indices - }, - in_place=False, - ) - indices = dataset_with_samples_with_tree.get_sample_ids() before = dataset_with_samples_with_tree[0].get_field( name="test_node_field_1", @@ -682,11 +643,6 @@ def test_extract_dataset_from_identifier( {"type": "scalar", "name": "test_scalar"} ), ) - dataset_with_samples.extract_dataset_from_identifier( - feature_identifiers=FeatureIdentifier( - {"type": "time_series", "name": "test_time_series_1"} - ), - ) dataset_with_samples_with_tree.extract_dataset_from_identifier( feature_identifiers=FeatureIdentifier( @@ -712,13 +668,12 @@ def test_extract_dataset_from_identifier( feature_identifiers=[ FeatureIdentifier({"type": "scalar", "name": "test_scalar"}), FeatureIdentifier({"type": "nodes"}), - ], - keep_cgns=True, + ] ) for smp in new_dset: - assert smp.meshes.get_base_names() == ["Base_2_2"] - assert smp.meshes.get_zone_names() == ["Zone"] - assert smp.meshes.get_field_names() == [] + assert smp.features.get_base_names() == ["Base_2_2"] + assert smp.features.get_zone_names() == ["Zone"] + assert smp.features.get_field_names() == [] def test_get_tabular_from_homogeneous_identifiers( self, @@ -904,7 +859,7 @@ def test_set_infos(self, dataset, infos): ) def test_get_infos(self, dataset): - assert dataset.get_infos() == {} + assert dataset.get_infos()["plaid"]["version"] == Version(plaid.__version__) def test_print_infos(self, dataset, infos): dataset.set_infos(infos) @@ -925,9 +880,7 @@ def test_merge_dataset_with_bad_type(self, dataset_with_samples): def test_merge_features(self, dataset_with_samples, other_dataset_with_samples): feat_id = dataset_with_samples.get_all_features_identifiers() - feat_id = [ - fid for fid in feat_id if fid["type"] not in ["scalar", "time_series"] - ] + feat_id = [fid for fid in feat_id if fid["type"] not in ["scalar"]] dataset_1 = dataset_with_samples.extract_dataset_from_identifier(feat_id) feat_id = other_dataset_with_samples.get_all_features_identifiers() feat_id = [fid for fid in feat_id if fid["type"] not in ["field", "node"]] @@ -1072,6 +1025,10 @@ def test_load_from_dir(self, dataset_with_samples, tmp_path): loaded_dataset = Dataset.load_from_dir(dname) assert len(loaded_dataset) == len(dataset_with_samples) + def test_load_from_dir_old(self, current_directory): + loaded_dataset = Dataset.load_from_dir(current_directory / "dataset_old") + assert len(loaded_dataset) == 3 + # -------------------------------------------------------------------------# def test_add_to_dir_creates_and_saves(self, empty_dataset, sample, tmp_path): # Should create the directory and save the sample @@ -1120,6 +1077,7 @@ def test__add_to_dir__both_path_and_save_dir( save_dir = current_directory / "my_dataset_dir" with pytest.raises(ValueError): empty_dataset.add_to_dir(sample, path=save_dir, save_dir=save_dir) + shutil.rmtree(Path(save_dir)) # -------------------------------------------------------------------------# def test__save_to_dir_(self, dataset_with_samples, tmp_path): @@ -1135,7 +1093,9 @@ def test__load_from_dir_(self, dataset_with_samples, infos, tmp_path): new_dataset = Dataset() new_dataset._load_from_dir_(savedir) assert len(new_dataset) == len(dataset_with_samples) - assert new_dataset.get_infos() == infos + new_infos = new_dataset.get_infos() + del new_infos["plaid"] + assert new_infos == infos for sample_1, sample_2 in zip(dataset_with_samples, new_dataset): compare_two_samples(sample_1, sample_2) diff --git a/tests/containers/test_sample.py b/tests/containers/test_sample.py index 2dfbb142..670ef7cb 100644 --- a/tests/containers/test_sample.py +++ b/tests/containers/test_sample.py @@ -16,16 +16,15 @@ from Muscat.Bridges.CGNSBridge import MeshToCGNS from Muscat.MeshTools import MeshCreationTools as MCT -from plaid.containers.sample import ( - Sample, - _check_names, -) +from plaid.containers.sample import Sample from plaid.containers.utils import ( + _check_names, _read_index, _read_index_array, _read_index_range, ) from plaid.types.feature_types import FeatureIdentifier +from plaid.utils.cgns_helper import show_cgns_tree # %% Fixtures @@ -56,14 +55,6 @@ def sample_with_scalar(sample): return sample -@pytest.fixture() -def sample_with_time_series(sample): - sample.add_time_series( - "test_time_series_1", np.arange(111, dtype=float), np.random.randn(111) - ) - return sample - - @pytest.fixture() def nodes3d(): return np.array( @@ -77,17 +68,6 @@ def nodes3d(): ) -@pytest.fixture() -def sample_with_linked_tree(tree, tmp_path): - sample_with_linked_tree = Sample() - sample_with_linked_tree.meshes.add_tree(tree) - path_linked_sample = tmp_path / "test_dir" / "meshes/mesh_000000000.cgns" - sample_with_linked_tree.link_tree( - path_linked_sample, sample_with_linked_tree, linked_time=0.0, time=1.0 - ) - return sample_with_linked_tree - - @pytest.fixture() def tree3d(nodes3d, triangles, vertex_field, cell_center_field): Mesh = MCT.CreateMeshOfTriangles(nodes3d, triangles) @@ -100,43 +80,35 @@ def tree3d(nodes3d, triangles, vertex_field, cell_center_field): @pytest.fixture() def sample_with_tree3d(sample, tree3d): - sample.meshes.add_tree(tree3d) + sample.features.add_tree(tree3d) return sample @pytest.fixture() -def sample_with_tree_and_scalar_and_time_series( +def sample_with_tree_and_scalar( sample_with_tree: Sample, ): sample_with_tree.add_scalar("r", np.random.randn()) sample_with_tree.add_scalar("test_scalar_1", np.random.randn()) - sample_with_tree.add_time_series( - "test_time_series_1", np.arange(111, dtype=float), np.random.randn(111) - ) return sample_with_tree @pytest.fixture() -def full_sample(sample_with_tree_and_scalar_and_time_series: Sample, tree3d): - sample_with_tree_and_scalar_and_time_series.add_scalar("r", np.random.randn()) - sample_with_tree_and_scalar_and_time_series.add_scalar( - "test_scalar_1", np.random.randn() - ) - sample_with_tree_and_scalar_and_time_series.add_time_series( - "test_time_series_1", np.arange(111, dtype=float), np.random.randn(111) - ) - sample_with_tree_and_scalar_and_time_series.add_field( +def full_sample(sample_with_tree_and_scalar: Sample, tree3d): + sample_with_tree_and_scalar.add_scalar("r", np.random.randn()) + sample_with_tree_and_scalar.add_scalar("test_scalar_1", np.random.randn()) + sample_with_tree_and_scalar.add_field( name="test_field_1", field=np.random.randn(5, 3), location="CellCenter" ) - sample_with_tree_and_scalar_and_time_series.init_zone( + sample_with_tree_and_scalar.init_zone( zone_shape=np.array([5, 3]), zone_name="test_field_1" ) - sample_with_tree_and_scalar_and_time_series.init_base( + sample_with_tree_and_scalar.init_base( topological_dim=2, physical_dim=3, base_name="test_base_1" ) - sample_with_tree_and_scalar_and_time_series.meshes.init_tree(time=1.0) - sample_with_tree_and_scalar_and_time_series.meshes.add_tree(tree=tree3d) - return sample_with_tree_and_scalar_and_time_series + sample_with_tree_and_scalar.features.init_tree(time=1.0) + sample_with_tree_and_scalar.features.add_tree(tree=tree3d) + return sample_with_tree_and_scalar # %% Test @@ -182,18 +154,9 @@ def test___init__(self, current_directory): sample_already_filled_1 = Sample(path=sample_path_1) sample_already_filled_2 = Sample(path=sample_path_2) sample_already_filled_3 = Sample(path=sample_path_3) - assert ( - sample_already_filled_1.meshes is not None - and sample_already_filled_1.scalars is not None - ) - assert ( - sample_already_filled_2.meshes is not None - and sample_already_filled_2.scalars is not None - ) - assert ( - sample_already_filled_3.meshes is not None - and sample_already_filled_3.scalars is not None - ) + assert sample_already_filled_1.features + assert sample_already_filled_2.features + assert sample_already_filled_3.features def test__init__unknown_directory(self, current_directory): sample_path = current_directory / "dataset" / "samples" / "sample_000000298" @@ -218,8 +181,8 @@ def test__init__path(self, current_directory): # with pytest.raises(ValueError): # Sample(path=sample_path, directory_path=sample_path) - def test_copy(self, sample_with_tree_and_scalar_and_time_series): - sample_with_tree_and_scalar_and_time_series.copy() + def test_copy(self, sample_with_tree_and_scalar): + sample_with_tree_and_scalar.copy() # -------------------------------------------------------------------------# def test_set_default_base(self, sample: Sample, topological_dim, physical_dim): @@ -227,19 +190,19 @@ def test_set_default_base(self, sample: Sample, topological_dim, physical_dim): sample.set_default_base(f"Base_{topological_dim}_{physical_dim}", 0.5) # check dims getters - assert sample.meshes.get_topological_dim() == topological_dim - assert sample.meshes.get_physical_dim() == physical_dim + assert sample.features.get_topological_dim() == topological_dim + assert sample.features.get_physical_dim() == physical_dim assert ( - sample.meshes.get_base_assignment() + sample.features.get_base_assignment() == f"Base_{topological_dim}_{physical_dim}" ) - assert sample.meshes.get_time_assignment() == 0.5 - assert sample.meshes.get_base_assignment("test") == "test" + assert sample.features.get_time_assignment() == 0.5 + assert sample.features.get_base_assignment("test") == "test" sample.set_default_base(f"Base_{topological_dim}_{physical_dim}") # already set sample.set_default_base(None) # will not assign to None assert ( - sample.meshes.get_base_assignment() + sample.features.get_base_assignment() == f"Base_{topological_dim}_{physical_dim}" ) with pytest.raises(ValueError): @@ -257,16 +220,16 @@ def test_set_default_zone_with_default_base( sample.init_base(topological_dim, physical_dim, base_name, time=0.5) sample.set_default_base(base_name) # No zone provided - assert sample.meshes.get_zone() is None + assert sample.features.get_zone() is None sample.init_zone(zone_shape, CGK.Structured_s, zone_name, base_name=base_name) # Look for the only zone in the default base - assert sample.meshes.get_zone() is not None + assert sample.features.get_zone() is not None sample.init_zone(zone_shape, CGK.Structured_s, zone_name, base_name=base_name) # There is more than one zone in this base with pytest.raises(KeyError): - sample.meshes.get_zone() + sample.features.get_zone() def test_set_default_zone( self, @@ -282,24 +245,24 @@ def test_set_default_zone( sample.set_default_zone_base(zone_name, base_name, 0.5) # check dims getters - assert sample.meshes.get_topological_dim() == topological_dim - assert sample.meshes.get_physical_dim() == physical_dim - assert sample.meshes.get_base_assignment() == base_name - assert sample.meshes.get_time_assignment() == 0.5 + assert sample.features.get_topological_dim() == topological_dim + assert sample.features.get_physical_dim() == physical_dim + assert sample.features.get_base_assignment() == base_name + assert sample.features.get_time_assignment() == 0.5 sample.set_default_base(base_name) # already set sample.set_default_base(None) # will not assign to None - assert sample.meshes.get_base_assignment() == base_name + assert sample.features.get_base_assignment() == base_name with pytest.raises(ValueError): sample.set_default_base("Unknown base name") - assert sample.meshes.get_zone_assignment() == zone_name - assert sample.meshes.get_time_assignment() == 0.5 + assert sample.features.get_zone_assignment() == zone_name + assert sample.features.get_time_assignment() == 0.5 - assert sample.meshes.get_zone() is not None + assert sample.features.get_zone() is not None sample.set_default_zone_base(zone_name, base_name) sample.set_default_zone_base(None, base_name) # will not assign to None - assert sample.meshes.get_zone_assignment() == zone_name + assert sample.features.get_zone_assignment() == zone_name with pytest.raises(ValueError): sample.set_default_zone_base("Unknown zone name", base_name) @@ -307,100 +270,73 @@ def test_set_default_time(self, sample: Sample, topological_dim, physical_dim): sample.init_base(topological_dim, physical_dim, time=0.5) sample.init_base(topological_dim, physical_dim, "OK_name", time=1.5) - assert sample.meshes.get_time_assignment() == 0.5 + assert sample.features.get_time_assignment() == 0.5 sample.set_default_time(1.5) - assert sample.meshes.get_time_assignment() == 1.5, "here" + assert sample.features.get_time_assignment() == 1.5, "here" sample.set_default_time(1.5) # already set sample.set_default_time(None) # will not assign to None - assert sample.meshes.get_time_assignment() == 1.5 + assert sample.features.get_time_assignment() == 1.5 with pytest.raises(ValueError): sample.set_default_time(2.5) # -------------------------------------------------------------------------# - def test_show_tree(self, sample_with_tree_and_scalar_and_time_series): - sample_with_tree_and_scalar_and_time_series.show_tree() + def test_show_tree(self, sample_with_tree_and_scalar): + sample_with_tree_and_scalar.show_tree() def test_init_tree(self, sample: Sample): - sample.meshes.init_tree() - sample.meshes.init_tree(0.5) + sample.features.init_tree() + sample.features.init_tree(0.5) def test_get_mesh_empty(self, sample: Sample): sample.get_mesh() + sample.features.get_mesh() - def test_get_mesh(self, sample_with_tree_and_scalar_and_time_series): - sample_with_tree_and_scalar_and_time_series.get_mesh() - - def test_get_mesh_without_links(self, sample_with_linked_tree): - sample_with_linked_tree.get_mesh(time=1.0, apply_links=False) - - def test_get_mesh_with_links_in_memory(self, sample_with_linked_tree): - sample_with_linked_tree.get_mesh(time=1.0, apply_links=True, in_memory=True) - - def test_get_mesh_with_links(self, sample_with_linked_tree, tmp_path): - sample_with_linked_tree.save(tmp_path / "test_dir") - sample_with_linked_tree.get_mesh(time=1.0, apply_links=True) + def test_get_mesh(self, sample_with_tree_and_scalar): + sample_with_tree_and_scalar.get_mesh() def test_set_meshes_empty(self, sample, tree): - sample.meshes.set_meshes({0.0: tree}) + sample.features.set_meshes({0.0: tree}) def test_set_meshes(self, sample_with_tree: Sample, tree): with pytest.raises(KeyError): - sample_with_tree.meshes.set_meshes({0.0: tree}) + sample_with_tree.features.set_meshes({0.0: tree}) def test_add_tree_empty(self, sample_with_tree: Sample): with pytest.raises(ValueError): - sample_with_tree.meshes.add_tree([]) + sample_with_tree.features.add_tree([]) def test_add_tree(self, sample: Sample, tree): - sample.meshes.add_tree(tree) - sample.meshes.add_tree(tree) - sample.meshes.add_tree(tree, time=0.2) + sample.features.add_tree(tree) + sample.features.add_tree(tree) + sample.features.add_tree(tree, time=0.2) def test_del_tree(self, sample, tree): - sample.meshes.add_tree(tree) - sample.meshes.add_tree(tree, time=0.2) - - assert isinstance(sample.meshes.del_tree(0.2), list) - assert list(sample.meshes.data.keys()) == [0.0] - assert list(sample.meshes._links.keys()) == [0.0] - assert list(sample.meshes._paths.keys()) == [0.0] - - assert isinstance(sample.meshes.del_tree(0.0), list) - assert list(sample.meshes.data.keys()) == [] - assert list(sample.meshes._links.keys()) == [] - assert list(sample.meshes._paths.keys()) == [] - - def test_link_tree(self, sample_with_linked_tree): - link_checks = [ - "/Base_2_2/Zone/Elements_Selections", - "/Base_2_2/Zone/Points_Selections", - "/Base_2_2/Zone/Points_Selections/tag", - "/Base_2_2/Zone/Elements_TRI_3", - "/Base_2_2/Zone/GridCoordinates", - "/Base_2_2/Zone/ZoneBC", - ] - for link in sample_with_linked_tree.meshes._links[1]: - assert link[1] == "mesh_000000000.cgns" - assert link[2] == link[3] - assert link[2] in link_checks + sample.features.add_tree(tree) + sample.features.add_tree(tree, time=0.2) + + assert isinstance(sample.features.del_tree(0.2), list) + assert list(sample.features.data.keys()) == [0.0] + + assert isinstance(sample.features.del_tree(0.0), list) + assert list(sample.features.data.keys()) == [] def test_on_error_del_tree(self, sample, tree): with pytest.raises(KeyError): - sample.meshes.del_tree(0.0) + sample.features.del_tree(0.0) - sample.meshes.add_tree(tree) - sample.meshes.add_tree(tree, time=0.2) + sample.features.add_tree(tree) + sample.features.add_tree(tree, time=0.2) with pytest.raises(KeyError): - sample.meshes.del_tree(0.7) + sample.features.del_tree(0.7) # -------------------------------------------------------------------------# def test_init_base(self, sample: Sample, base_name, topological_dim, physical_dim): sample.init_base(topological_dim, physical_dim, base_name) # check dims getters - assert sample.meshes.get_topological_dim(base_name) == topological_dim - assert sample.meshes.get_physical_dim(base_name) == physical_dim + assert sample.features.get_topological_dim(base_name) == topological_dim + assert sample.features.get_physical_dim(base_name) == physical_dim def test_del_base_existing_base( self, sample: Sample, base_name, topological_dim, physical_dim @@ -410,40 +346,40 @@ def test_del_base_existing_base( sample.init_base(topological_dim, physical_dim, second_base_name) # Delete first base - updated_cgns_tree = sample.meshes.del_base(base_name, 0.0) + updated_cgns_tree = sample.features.del_base(base_name, 0.0) assert updated_cgns_tree is not None and isinstance(updated_cgns_tree, list) # Testing the resulting tree new_sample = Sample() - new_sample.meshes.add_tree(updated_cgns_tree, 0.1) - assert new_sample.meshes.get_topological_dim() == topological_dim - assert new_sample.meshes.get_physical_dim() == physical_dim - assert new_sample.meshes.get_base_names() == [second_base_name] + new_sample.features.add_tree(updated_cgns_tree, 0.1) + assert new_sample.features.get_topological_dim() == topological_dim + assert new_sample.features.get_physical_dim() == physical_dim + assert new_sample.features.get_base_names() == [second_base_name] # Add 2 bases and delete one base at time 0.2 sample.init_base(topological_dim, physical_dim, "tree", 0.2) sample.init_base(topological_dim, physical_dim, base_name, 0.2) - updated_cgns_tree = sample.meshes.del_base("tree", 0.2) - assert sample.meshes.get_base("tree", 0.2) is None - assert sample.meshes.get_base(base_name, 0.2) is not None - assert sample.meshes.get_base(second_base_name) is not None + updated_cgns_tree = sample.features.del_base("tree", 0.2) + assert sample.features.get_base("tree", 0.2) is None + assert sample.features.get_base(base_name, 0.2) is not None + assert sample.features.get_base(second_base_name) is not None assert updated_cgns_tree is not None and isinstance(updated_cgns_tree, list) # Testing the resulting from time 0.2 new_sample = Sample() - new_sample.meshes.add_tree(updated_cgns_tree) - assert new_sample.meshes.get_topological_dim() == topological_dim - assert new_sample.meshes.get_physical_dim() == physical_dim - assert new_sample.meshes.get_base_names() == [base_name] + new_sample.features.add_tree(updated_cgns_tree) + assert new_sample.features.get_topological_dim() == topological_dim + assert new_sample.features.get_physical_dim() == physical_dim + assert new_sample.features.get_base_names() == [base_name] # Deleting the last base at time 0.0 - updated_cgns_tree = sample.meshes.del_base(second_base_name, 0.0) - assert sample.meshes.get_base(second_base_name) is None + updated_cgns_tree = sample.features.del_base(second_base_name, 0.0) + assert sample.features.get_base(second_base_name) is None assert updated_cgns_tree is not None and isinstance(updated_cgns_tree, list) # Deleting the last base at time 0.2 - updated_cgns_tree = sample.meshes.del_base(base_name, 0.2) - assert sample.meshes.get_base(base_name) is None + updated_cgns_tree = sample.features.del_base(base_name, 0.2) + assert sample.features.get_base(base_name) is None assert updated_cgns_tree is not None and isinstance(updated_cgns_tree, list) def test_del_base_nonexistent_base_nonexistent_time( @@ -451,13 +387,13 @@ def test_del_base_nonexistent_base_nonexistent_time( ): sample.init_base(topological_dim, physical_dim, base_name, time=1.0) with pytest.raises(KeyError): - sample.meshes.del_base(base_name, time=2.0) + sample.features.del_base(base_name, time=2.0) with pytest.raises(KeyError): - sample.meshes.del_base("unknown", time=1.0) + sample.features.del_base("unknown", time=1.0) def test_del_base_no_cgns_tree(self, sample): with pytest.raises(KeyError): - sample.meshes.del_base("unknwon", 0.0) + sample.features.del_base("unknwon", 0.0) def test_init_base_no_base_name( self, sample: Sample, topological_dim, physical_dim @@ -466,50 +402,52 @@ def test_init_base_no_base_name( # check dims getters assert ( - sample.meshes.get_topological_dim(f"Base_{topological_dim}_{physical_dim}") + sample.features.get_topological_dim( + f"Base_{topological_dim}_{physical_dim}" + ) == topological_dim ) assert ( - sample.meshes.get_physical_dim(f"Base_{topological_dim}_{physical_dim}") + sample.features.get_physical_dim(f"Base_{topological_dim}_{physical_dim}") == physical_dim ) # check setting default base sample.set_default_base(f"Base_{topological_dim}_{physical_dim}") - assert sample.meshes.get_topological_dim() == topological_dim - assert sample.meshes.get_physical_dim() == physical_dim + assert sample.features.get_topological_dim() == topological_dim + assert sample.features.get_physical_dim() == physical_dim def test_get_base_names(self, sample: Sample): - assert sample.meshes.get_base_names() == [] + assert sample.features.get_base_names() == [] sample.init_base(3, 3, "base_name_1") sample.init_base(3, 3, "base_name_2") - assert sample.meshes.get_base_names() == ["base_name_1", "base_name_2"] - assert sample.meshes.get_base_names(full_path=True) == [ + assert sample.features.get_base_names() == ["base_name_1", "base_name_2"] + assert sample.features.get_base_names(full_path=True) == [ "/base_name_1", "/base_name_2", ] # check dims getters - assert sample.meshes.get_topological_dim("base_name_1") == 3 - assert sample.meshes.get_physical_dim("base_name_1") == 3 - assert sample.meshes.get_topological_dim("base_name_2") == 3 - assert sample.meshes.get_physical_dim("base_name_2") == 3 + assert sample.features.get_topological_dim("base_name_1") == 3 + assert sample.features.get_physical_dim("base_name_1") == 3 + assert sample.features.get_topological_dim("base_name_2") == 3 + assert sample.features.get_physical_dim("base_name_2") == 3 def test_get_base(self, sample: Sample, base_name): - sample.meshes.init_tree() - assert sample.meshes.get_base() is None + sample.features.init_tree() + assert sample.features.get_base() is None sample.init_base(3, 3, base_name) - assert sample.meshes.get_base(base_name) is not None - assert sample.meshes.get_base() is not None + assert sample.features.get_base(base_name) is not None + assert sample.features.get_base() is not None sample.init_base(3, 3, "other_base_name") - assert sample.meshes.get_base(base_name) is not None - assert sample.meshes.get_base(time=1.0) is None + assert sample.features.get_base(base_name) is not None + assert sample.features.get_base(time=1.0) is None with pytest.raises(KeyError): - sample.meshes.get_base() + sample.features.get_base() # check dims getters - assert sample.meshes.get_topological_dim(base_name) == 3 - assert sample.meshes.get_physical_dim(base_name) == 3 - assert sample.meshes.get_topological_dim("other_base_name") == 3 - assert sample.meshes.get_physical_dim("other_base_name") == 3 + assert sample.features.get_topological_dim(base_name) == 3 + assert sample.features.get_physical_dim(base_name) == 3 + assert sample.features.get_topological_dim("other_base_name") == 3 + assert sample.features.get_physical_dim("other_base_name") == 3 # -------------------------------------------------------------------------# def test_init_zone(self, sample: Sample, base_name, zone_name, zone_shape): @@ -519,8 +457,8 @@ def test_init_zone(self, sample: Sample, base_name, zone_name, zone_shape): sample.init_zone(zone_shape, CGK.Structured_s, zone_name, base_name=base_name) sample.init_zone(zone_shape, CGK.Unstructured_s, zone_name, base_name=base_name) # check dims getters - assert sample.meshes.get_topological_dim(base_name) == 3 - assert sample.meshes.get_physical_dim(base_name) == 3 + assert sample.features.get_topological_dim(base_name) == 3 + assert sample.features.get_physical_dim(base_name) == 3 def test_init_zone_defaults_names(self, sample: Sample, zone_shape): sample.init_base(3, 3) @@ -539,13 +477,13 @@ def test_del_zone_existing_zone( ) # Delete first zone - updated_cgns_tree = sample.meshes.del_zone(zone_name, base_name, 0.0) + updated_cgns_tree = sample.features.del_zone(zone_name, base_name, 0.0) assert updated_cgns_tree is not None and isinstance(updated_cgns_tree, list) # Testing the resulting tree new_sample = Sample() - new_sample.meshes.add_tree(updated_cgns_tree, 0.1) - assert new_sample.meshes.get_zone_names() == [second_zone_name] + new_sample.features.add_tree(updated_cgns_tree, 0.1) + assert new_sample.features.get_zone_names() == [second_zone_name] # Add 2 zones and delete one zone at time 0.2 sample.init_base(topological_dim, physical_dim, base_name, 0.2) @@ -556,25 +494,25 @@ def test_del_zone_existing_zone( zone_shape, CGK.Unstructured_s, "test", base_name=base_name, time=0.2 ) - updated_cgns_tree = sample.meshes.del_zone("test", base_name, 0.2) - assert sample.meshes.get_zone("tree", base_name, 0.2) is None - assert sample.meshes.get_zone(zone_name, base_name, 0.2) is not None - assert sample.meshes.get_zone(second_zone_name, base_name) is not None + updated_cgns_tree = sample.features.del_zone("test", base_name, 0.2) + assert sample.features.get_zone("tree", base_name, 0.2) is None + assert sample.features.get_zone(zone_name, base_name, 0.2) is not None + assert sample.features.get_zone(second_zone_name, base_name) is not None assert updated_cgns_tree is not None and isinstance(updated_cgns_tree, list) # Testing the resulting from time 0.2 new_sample = Sample() - new_sample.meshes.add_tree(updated_cgns_tree) - assert new_sample.meshes.get_zone_names(base_name) == [zone_name] + new_sample.features.add_tree(updated_cgns_tree) + assert new_sample.features.get_zone_names(base_name) == [zone_name] # Deleting the last zone at time 0.0 - updated_cgns_tree = sample.meshes.del_zone(second_zone_name, base_name, 0.0) - assert sample.meshes.get_zone(second_zone_name, base_name) is None + updated_cgns_tree = sample.features.del_zone(second_zone_name, base_name, 0.0) + assert sample.features.get_zone(second_zone_name, base_name) is None assert updated_cgns_tree is not None and isinstance(updated_cgns_tree, list) # Deleting the last zone at time 0.2 - updated_cgns_tree = sample.meshes.del_zone(zone_name, base_name, 0.2) - assert sample.meshes.get_zone(zone_name, base_name) is None + updated_cgns_tree = sample.features.del_zone(zone_name, base_name, 0.2) + assert sample.features.get_zone(zone_name, base_name) is None assert updated_cgns_tree is not None and isinstance(updated_cgns_tree, list) def test_del_zone_nonexistent_zone_nonexistent_time( @@ -586,14 +524,14 @@ def test_del_zone_nonexistent_zone_nonexistent_time( zone_shape, CGK.Structured_s, zone_name, base_name=base_name, time=1.0 ) with pytest.raises(KeyError): - sample.meshes.del_zone(zone_name, base_name, 2.0) + sample.features.del_zone(zone_name, base_name, 2.0) with pytest.raises(KeyError): - sample.meshes.del_zone("unknown", base_name, 1.0) + sample.features.del_zone("unknown", base_name, 1.0) def test_del_zone_no_cgns_tree(self, sample: Sample): sample.init_base(2, 3, "only_base") with pytest.raises(KeyError): - sample.meshes.del_zone("unknwon", "only_base", 0.0) + sample.features.del_zone("unknwon", "only_base", 0.0) def test_has_zone(self, sample, base_name, zone_name): sample.init_base(3, 3, base_name) @@ -601,10 +539,10 @@ def test_has_zone(self, sample, base_name, zone_name): np.random.randint(0, 10, size=3), zone_name=zone_name, base_name=base_name ) sample.show_tree() - assert sample.meshes.has_zone(zone_name, base_name) - assert not sample.meshes.has_zone("not_present_zone_name", base_name) - assert not sample.meshes.has_zone(zone_name, "not_present_base_name") - assert not sample.meshes.has_zone( + assert sample.features.has_zone(zone_name, base_name) + assert not sample.features.has_zone("not_present_zone_name", base_name) + assert not sample.features.has_zone(zone_name, "not_present_base_name") + assert not sample.features.has_zone( "not_present_zone_name", "not_present_base_name" ) @@ -620,49 +558,49 @@ def test_get_zone_names(self, sample: Sample, base_name): zone_name="zone_name_2", base_name=base_name, ) - assert sample.meshes.get_zone_names(base_name) == [ + assert sample.features.get_zone_names(base_name) == [ "zone_name_1", "zone_name_2", ] - assert sorted(sample.meshes.get_zone_names(base_name, unique=True)) == sorted( + assert sorted(sample.features.get_zone_names(base_name, unique=True)) == sorted( ["zone_name_1", "zone_name_2"] ) - assert sample.meshes.get_zone_names(base_name, full_path=True) == [ + assert sample.features.get_zone_names(base_name, full_path=True) == [ f"{base_name}/zone_name_1", f"{base_name}/zone_name_2", ] def test_get_zone_type(self, sample: Sample, zone_name, base_name): with pytest.raises(KeyError): - sample.meshes.get_zone_type(zone_name, base_name) - sample.meshes.init_tree() + sample.features.get_zone_type(zone_name, base_name) + sample.features.init_tree() with pytest.raises(KeyError): - sample.meshes.get_zone_type(zone_name, base_name) + sample.features.get_zone_type(zone_name, base_name) sample.init_base(3, 3, base_name) with pytest.raises(KeyError): - sample.meshes.get_zone_type(zone_name, base_name) + sample.features.get_zone_type(zone_name, base_name) sample.init_zone( np.random.randint(0, 10, size=3), zone_name=zone_name, base_name=base_name ) - assert sample.meshes.get_zone_type(zone_name, base_name) == CGK.Unstructured_s + assert sample.features.get_zone_type(zone_name, base_name) == CGK.Unstructured_s def test_get_zone(self, sample: Sample, zone_name, base_name): - assert sample.meshes.get_zone(zone_name, base_name) is None + assert sample.features.get_zone(zone_name, base_name) is None sample.init_base(3, 3, base_name) - assert sample.meshes.get_zone(zone_name, base_name) is None + assert sample.features.get_zone(zone_name, base_name) is None sample.init_zone( np.random.randint(0, 10, size=3), zone_name=zone_name, base_name=base_name ) - assert sample.meshes.get_zone() is not None - assert sample.meshes.get_zone(zone_name, base_name) is not None + assert sample.features.get_zone() is not None + assert sample.features.get_zone(zone_name, base_name) is not None sample.init_zone( np.random.randint(0, 10, size=3), zone_name="other_zone_name", base_name=base_name, ) - assert sample.meshes.get_zone(zone_name, base_name) is not None + assert sample.features.get_zone(zone_name, base_name) is not None with pytest.raises(KeyError): - assert sample.meshes.get_zone() is not None + assert sample.features.get_zone() is not None # -------------------------------------------------------------------------# def test_get_scalar_names(self, sample: Sample): @@ -712,67 +650,12 @@ def test__add_feature(self, sample_with_scalar): feature=[3.1415], ) - # -------------------------------------------------------------------------# - def test_get_time_series_names_empty(self, sample: Sample): - assert sample.get_time_series_names() == [] - - def test_get_time_series_names(self, sample_with_time_series): - assert sample_with_time_series.get_time_series_names() == ["test_time_series_1"] - - def test_get_time_series_empty(self, sample): - assert sample.get_time_series("missing_time_series_name") is None - - def test_get_time_series(self, sample_with_time_series): - assert ( - sample_with_time_series.get_time_series("missing_time_series_name") is None - ) - assert sample_with_time_series.get_time_series("test_time_series_1") is not None - - def test_add_time_series_empty(self, sample_with_time_series): - pass - - def test_add_time_series(self, sample_with_time_series): - sample_with_time_series.add_time_series( - "test_time_series_2", np.arange(111, dtype=float), np.random.randn(111) - ) - - def test_del_time_series_unknown_scalar(self, sample_with_time_series): - with pytest.raises(KeyError): - sample_with_time_series.del_time_series("non_existent_scalar") - - def test_del_time_series_no_scalar(self): - sample = Sample() - with pytest.raises(KeyError): - sample.del_time_series("non_existent_scalar") - - def test_del_time_series(self, sample_with_time_series): - assert len(sample_with_time_series.get_time_series_names()) == 1 - - sample_with_time_series.add_time_series( - "test_time_series_2", np.arange(222, dtype=float), np.random.randn(222) - ) - assert len(sample_with_time_series.get_time_series_names()) == 2 - - time_series = sample_with_time_series.del_time_series("test_time_series_1") - assert len(sample_with_time_series.get_time_series_names()) == 1 - assert time_series is not None - assert isinstance(time_series, tuple) - assert isinstance(time_series[0], np.ndarray) - assert isinstance(time_series[1], np.ndarray) - - time_series = sample_with_time_series.del_time_series("test_time_series_2") - assert len(sample_with_time_series.get_time_series_names()) == 0 - assert time_series is not None - assert isinstance(time_series, tuple) - assert isinstance(time_series[0], np.ndarray) - assert isinstance(time_series[1], np.ndarray) - # -------------------------------------------------------------------------# def test_get_nodal_tags_empty(self, sample): - assert sample.meshes.get_nodal_tags() == {} + assert sample.features.get_nodal_tags() == {} def test_get_nodal_tags(self, sample_with_tree, nodal_tags): - assert np.all(sample_with_tree.meshes.get_nodal_tags()["tag"] == nodal_tags) + assert np.all(sample_with_tree.features.get_nodal_tags()["tag"] == nodal_tags) # -------------------------------------------------------------------------# def test_get_nodes_empty(self, sample): @@ -795,13 +678,13 @@ def test_set_nodes(self, sample, nodes, zone_name, base_name): # -------------------------------------------------------------------------# def test_get_elements_empty(self, sample: Sample): - assert sample.meshes.get_elements() == {} + assert sample.features.get_elements() == {} def test_get_elements(self, sample_with_tree: Sample, triangles): - assert list(sample_with_tree.meshes.get_elements().keys()) == ["TRI_3"] + assert list(sample_with_tree.features.get_elements().keys()) == ["TRI_3"] print(f"{triangles=}") - print(f"{sample_with_tree.meshes.get_elements()=}") - assert np.all(sample_with_tree.meshes.get_elements()["TRI_3"] == triangles) + print(f"{sample_with_tree.features.get_elements()=}") + assert np.all(sample_with_tree.features.get_elements()["TRI_3"] == triangles) # -------------------------------------------------------------------------# def test_get_field_names(self, sample: Sample): @@ -958,7 +841,7 @@ def test_del_field_in_zone(self, zone_name, base_name, cell_center_field): # Testing new tree on field 'test_elem_field_2' new_sample = Sample() - new_sample.meshes.add_tree(new_tree) + new_sample.features.add_tree(new_tree) assert ( new_sample.get_field( @@ -986,7 +869,7 @@ def test_del_field_in_zone(self, zone_name, base_name, cell_center_field): # Testing new tree on field 'test_elem_field_1' new_sample = Sample() - new_sample.meshes.add_tree(new_tree) + new_sample.features.add_tree(new_tree) assert ( new_sample.get_field( @@ -1006,64 +889,50 @@ def test_del_all_fields(self, sample_with_tree): sample_with_tree.del_all_fields() # -------------------------------------------------------------------------# - def test_get_feature_from_string_identifier( - self, sample_with_tree_and_scalar_and_time_series - ): - sample_with_tree_and_scalar_and_time_series.get_feature_from_string_identifier( + def test_get_feature_from_string_identifier(self, sample_with_tree_and_scalar): + sample_with_tree_and_scalar.get_feature_from_string_identifier( "scalar::test_scalar_1" ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_string_identifier( - "time_series::test_time_series_1" - ) - - sample_with_tree_and_scalar_and_time_series.get_feature_from_string_identifier( + sample_with_tree_and_scalar.get_feature_from_string_identifier( "field::test_node_field_1" ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_string_identifier( + sample_with_tree_and_scalar.get_feature_from_string_identifier( "field::test_node_field_1///Base_2_2" ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_string_identifier( + sample_with_tree_and_scalar.get_feature_from_string_identifier( "field::test_node_field_1//Zone/Base_2_2" ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_string_identifier( + sample_with_tree_and_scalar.get_feature_from_string_identifier( "field::test_node_field_1/Vertex/Zone/Base_2_2" ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_string_identifier( + sample_with_tree_and_scalar.get_feature_from_string_identifier( "field::test_node_field_1/Vertex/Zone/Base_2_2/0" ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_string_identifier( - "nodes::" - ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_string_identifier( + sample_with_tree_and_scalar.get_feature_from_string_identifier("nodes::") + sample_with_tree_and_scalar.get_feature_from_string_identifier( "nodes::/Base_2_2" ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_string_identifier( + sample_with_tree_and_scalar.get_feature_from_string_identifier( "nodes::Zone/Base_2_2" ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_string_identifier( + sample_with_tree_and_scalar.get_feature_from_string_identifier( "nodes::Zone/Base_2_2/0" ) - def test_get_feature_from_identifier( - self, sample_with_tree_and_scalar_and_time_series - ): - sample_with_tree_and_scalar_and_time_series.get_feature_from_identifier( + def test_get_feature_from_identifier(self, sample_with_tree_and_scalar): + sample_with_tree_and_scalar.get_feature_from_identifier( {"type": "scalar", "name": "test_scalar_1"} ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_identifier( - {"type": "time_series", "name": "test_time_series_1"} - ) - - sample_with_tree_and_scalar_and_time_series.get_feature_from_identifier( + sample_with_tree_and_scalar.get_feature_from_identifier( {"type": "field", "name": "test_node_field_1"} ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_identifier( + sample_with_tree_and_scalar.get_feature_from_identifier( {"type": "field", "name": "test_node_field_1", "base_name": "Base_2_2"} ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_identifier( + sample_with_tree_and_scalar.get_feature_from_identifier( { "type": "field", "name": "test_node_field_1", @@ -1071,7 +940,7 @@ def test_get_feature_from_identifier( "zone_name": "Zone", } ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_identifier( + sample_with_tree_and_scalar.get_feature_from_identifier( { "type": "field", "name": "test_node_field_1", @@ -1080,7 +949,7 @@ def test_get_feature_from_identifier( "location": "Vertex", } ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_identifier( + sample_with_tree_and_scalar.get_feature_from_identifier( { "type": "field", "name": "test_node_field_1", @@ -1090,10 +959,10 @@ def test_get_feature_from_identifier( "time": 0.0, } ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_identifier( + sample_with_tree_and_scalar.get_feature_from_identifier( {"type": "field", "name": "test_node_field_1", "time": 0.0} ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_identifier( + sample_with_tree_and_scalar.get_feature_from_identifier( { "type": "field", "name": "test_node_field_1", @@ -1101,7 +970,7 @@ def test_get_feature_from_identifier( "time": 0.0, } ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_identifier( + sample_with_tree_and_scalar.get_feature_from_identifier( { "type": "field", "name": "test_node_field_1", @@ -1111,39 +980,34 @@ def test_get_feature_from_identifier( } ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_identifier( - {"type": "nodes"} - ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_identifier( + sample_with_tree_and_scalar.get_feature_from_identifier({"type": "nodes"}) + sample_with_tree_and_scalar.get_feature_from_identifier( {"type": "nodes", "base_name": "Base_2_2"} ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_identifier( + sample_with_tree_and_scalar.get_feature_from_identifier( {"type": "nodes", "base_name": "Base_2_2", "zone_name": "Zone"} ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_identifier( + sample_with_tree_and_scalar.get_feature_from_identifier( {"type": "nodes", "base_name": "Base_2_2", "zone_name": "Zone", "time": 0.0} ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_identifier( + sample_with_tree_and_scalar.get_feature_from_identifier( {"type": "nodes", "zone_name": "Zone"} ) - sample_with_tree_and_scalar_and_time_series.get_feature_from_identifier( + sample_with_tree_and_scalar.get_feature_from_identifier( {"type": "nodes", "time": 0.0} ) - def test_get_features_from_identifiers( - self, sample_with_tree_and_scalar_and_time_series - ): - sample_with_tree_and_scalar_and_time_series.get_features_from_identifiers( + def test_get_features_from_identifiers(self, sample_with_tree_and_scalar): + sample_with_tree_and_scalar.get_features_from_identifiers( [{"type": "scalar", "name": "test_scalar_1"}] ) - sample_with_tree_and_scalar_and_time_series.get_features_from_identifiers( + sample_with_tree_and_scalar.get_features_from_identifiers( [ {"type": "scalar", "name": "test_scalar_1"}, - {"type": "time_series", "name": "test_time_series_1"}, ] ) - sample_with_tree_and_scalar_and_time_series.get_features_from_identifiers( + sample_with_tree_and_scalar.get_features_from_identifiers( [ { "type": "field", @@ -1158,68 +1022,37 @@ def test_get_features_from_identifiers( ] ) - def test_update_features_from_identifier_scalar( - self, sample_with_tree_and_scalar_and_time_series: Sample - ): - before = sample_with_tree_and_scalar_and_time_series.get_scalar("test_scalar_1") - sample_ = ( - sample_with_tree_and_scalar_and_time_series.update_features_from_identifier( - feature_identifiers=FeatureIdentifier( - {"type": "scalar", "name": "test_scalar_1"} - ), - features=3.1415, - in_place=False, - ) + def test_update_features_from_identifier(self, sample_with_tree_and_scalar): + before = sample_with_tree_and_scalar.get_scalar("test_scalar_1") + sample_ = sample_with_tree_and_scalar.update_features_from_identifier( + feature_identifiers={"type": "scalar", "name": "test_scalar_1"}, + features=3.141592, + in_place=False, ) after = sample_.get_scalar("test_scalar_1") + show_cgns_tree(sample_.features.data[0]) assert after != before - def test_update_features_from_identifier_time_series( - self, sample_with_tree_and_scalar_and_time_series: Sample - ): - before = sample_with_tree_and_scalar_and_time_series.get_time_series( - "test_time_series_1" - ) - sample_ = ( - sample_with_tree_and_scalar_and_time_series.update_features_from_identifier( - feature_identifiers=FeatureIdentifier( - { - "type": "time_series", - "name": "test_time_series_1", - } - ), - features=(np.array([0, 1]), np.array([3.14, 3.15])), - in_place=False, - ) - ) - after = sample_.get_time_series("test_time_series_1") - assert len(after[0]) != len(before[0]) - - def test_update_features_from_identifier_field( - self, sample_with_tree_and_scalar_and_time_series: Sample - ): - before = sample_with_tree_and_scalar_and_time_series.get_field( + before = sample_with_tree_and_scalar.get_field( name="test_node_field_1", zone_name="Zone", base_name="Base_2_2", location="Vertex", time=0.0, ) - sample_ = ( - sample_with_tree_and_scalar_and_time_series.update_features_from_identifier( - feature_identifiers=FeatureIdentifier( - { - "type": "field", - "name": "test_node_field_1", - "base_name": "Base_2_2", - "zone_name": "Zone", - "location": "Vertex", - "time": 0.0, - } - ), - features=np.random.rand(*before.shape), - in_place=False, - ) + sample_ = sample_with_tree_and_scalar.update_features_from_identifier( + feature_identifiers=FeatureIdentifier( + { + "type": "field", + "name": "test_node_field_1", + "base_name": "Base_2_2", + "zone_name": "Zone", + "location": "Vertex", + "time": 0.0, + } + ), + features=np.random.rand(*before.shape), + in_place=False, ) after = sample_.get_field( name="test_node_field_1", @@ -1230,145 +1063,97 @@ def test_update_features_from_identifier_field( ) assert np.any(~np.isclose(after, before)) - def test_update_features_from_identifier_nodes( - self, sample_with_tree_and_scalar_and_time_series: Sample - ): - before = sample_with_tree_and_scalar_and_time_series.get_nodes( + before = sample_with_tree_and_scalar.get_nodes( zone_name="Zone", base_name="Base_2_2", time=0.0 ) - sample_ = ( - sample_with_tree_and_scalar_and_time_series.update_features_from_identifier( - feature_identifiers=FeatureIdentifier( - { - "type": "nodes", - "base_name": "Base_2_2", - "zone_name": "Zone", - "time": 0.0, - } - ), - features=np.random.rand(*before.shape), - in_place=False, - ) + sample_ = sample_with_tree_and_scalar.update_features_from_identifier( + feature_identifiers=FeatureIdentifier( + { + "type": "nodes", + "base_name": "Base_2_2", + "zone_name": "Zone", + "time": 0.0, + } + ), + features=np.random.rand(*before.shape), + in_place=False, ) after = sample_.get_nodes(zone_name="Zone", base_name="Base_2_2", time=0.0) assert np.any(~np.isclose(after, before)) - def test_update_features_from_identifier_field_and_nodes( - self, sample_with_tree_and_scalar_and_time_series: Sample - ): - before_1 = sample_with_tree_and_scalar_and_time_series.get_field( - "test_node_field_1" - ) - before_2 = sample_with_tree_and_scalar_and_time_series.get_nodes() - sample_ = ( - sample_with_tree_and_scalar_and_time_series.update_features_from_identifier( - feature_identifiers=[ - FeatureIdentifier({"type": "field", "name": "test_node_field_1"}), - FeatureIdentifier({"type": "nodes"}), - ], - features=[ - np.random.rand(*before_1.shape), - np.random.rand(*before_2.shape), - ], - in_place=False, - ) + before_1 = sample_with_tree_and_scalar.get_field("test_node_field_1") + before_2 = sample_with_tree_and_scalar.get_nodes() + sample_ = sample_with_tree_and_scalar.update_features_from_identifier( + feature_identifiers=[ + {"type": "field", "name": "test_node_field_1"}, + {"type": "nodes"}, + ], + features=[ + np.random.rand(*before_1.shape), + np.random.rand(*before_2.shape), + ], + in_place=False, ) after_1 = sample_.get_field("test_node_field_1") after_2 = sample_.get_nodes() assert np.any(~np.isclose(after_1, before_1)) assert np.any(~np.isclose(after_2, before_2)) - sample_ = ( - sample_with_tree_and_scalar_and_time_series.update_features_from_identifier( - feature_identifiers=[ - FeatureIdentifier({"type": "field", "name": "test_node_field_1"}) - ], - features=[np.random.rand(*before_1.shape)], - in_place=True, - ) - ) - ref_1 = sample_with_tree_and_scalar_and_time_series.get_field( - "test_node_field_1" + sample_ = sample_with_tree_and_scalar.update_features_from_identifier( + feature_identifiers=[{"type": "field", "name": "test_node_field_1"}], + features=[np.random.rand(*before_1.shape)], + in_place=True, ) + ref_1 = sample_with_tree_and_scalar.get_field("test_node_field_1") ref_2 = sample_.get_field("test_node_field_1") assert np.any(np.isclose(ref_1, ref_2)) - def test_extract_sample_from_identifier( - self, sample_with_tree_and_scalar_and_time_series - ): - sample_: Sample = ( - sample_with_tree_and_scalar_and_time_series.extract_sample_from_identifier( - feature_identifiers={"type": "scalar", "name": "test_scalar_1"}, - ) + def test_extract_sample_from_identifier(self, sample_with_tree_and_scalar): + sample_: Sample = sample_with_tree_and_scalar.extract_sample_from_identifier( + feature_identifiers={"type": "scalar", "name": "test_scalar_1"}, ) assert sample_.get_scalar_names() == ["test_scalar_1"] - assert len(sample_.get_time_series_names()) == 0 - assert len(sample_.get_field_names()) == 0 - - sample_: Sample = ( - sample_with_tree_and_scalar_and_time_series.extract_sample_from_identifier( - feature_identifiers={ - "type": "time_series", - "name": "test_time_series_1", - }, - ) - ) - assert len(sample_.get_scalar_names()) == 0 - assert sample_.get_time_series_names() == ["test_time_series_1"] assert len(sample_.get_field_names()) == 0 - sample_: Sample = ( - sample_with_tree_and_scalar_and_time_series.extract_sample_from_identifier( - feature_identifiers={ - "type": "field", - "name": "test_node_field_1", - "base_name": "Base_2_2", - "zone_name": "Zone", - "location": "Vertex", - "time": 0.0, - }, - ) + sample_: Sample = sample_with_tree_and_scalar.extract_sample_from_identifier( + feature_identifiers={ + "type": "field", + "name": "test_node_field_1", + "base_name": "Base_2_2", + "zone_name": "Zone", + "location": "Vertex", + "time": 0.0, + }, ) + show_cgns_tree(sample_with_tree_and_scalar.features.data[0]) assert len(sample_.get_scalar_names()) == 0 - assert len(sample_.get_time_series_names()) == 0 assert sample_.get_field_names() == ["test_node_field_1"] - sample_: Sample = ( - sample_with_tree_and_scalar_and_time_series.extract_sample_from_identifier( - feature_identifiers={ - "type": "nodes", - "base_name": "Base_2_2", - "zone_name": "Zone", - "time": 0.0, - }, - ) + sample_: Sample = sample_with_tree_and_scalar.extract_sample_from_identifier( + feature_identifiers={ + "type": "nodes", + "base_name": "Base_2_2", + "zone_name": "Zone", + "time": 0.0, + }, ) assert len(sample_.get_scalar_names()) == 0 - assert len(sample_.get_time_series_names()) == 0 assert len(sample_.get_field_names()) == 0 - sample_: Sample = ( - sample_with_tree_and_scalar_and_time_series.extract_sample_from_identifier( - feature_identifiers=[ - {"type": "field", "name": "test_node_field_1"}, - {"type": "nodes"}, - ], - ) + sample_: Sample = sample_with_tree_and_scalar.extract_sample_from_identifier( + feature_identifiers=[ + {"type": "field", "name": "test_node_field_1"}, + {"type": "nodes"}, + ], ) assert len(sample_.get_scalar_names()) == 0 - assert len(sample_.get_time_series_names()) == 0 assert sample_.get_field_names() == ["test_node_field_1"] - def test_get_all_features_identifiers( - self, sample_with_tree_and_scalar_and_time_series - ): - feat_ids = ( - sample_with_tree_and_scalar_and_time_series.get_all_features_identifiers() - ) - assert len(feat_ids) == 10 + def test_get_all_features_identifiers(self, sample_with_tree_and_scalar): + feat_ids = sample_with_tree_and_scalar.get_all_features_identifiers() + assert len(feat_ids) == 9 assert {"type": "scalar", "name": "r"} in feat_ids assert {"type": "scalar", "name": "test_scalar_1"} in feat_ids - assert {"type": "time_series", "name": "test_time_series_1"} in feat_ids assert { "type": "nodes", "base_name": "Base_2_2", @@ -1424,22 +1209,15 @@ def test_get_all_features_identifiers( "time": 0.0, } in feat_ids - def test_get_all_features_identifiers_by_type( - self, sample_with_tree_and_scalar_and_time_series - ): - feat_ids = sample_with_tree_and_scalar_and_time_series.get_all_features_identifiers_by_type( + def test_get_all_features_identifiers_by_type(self, sample_with_tree_and_scalar): + feat_ids = sample_with_tree_and_scalar.get_all_features_identifiers_by_type( "scalar" ) assert len(feat_ids) == 2 assert {"type": "scalar", "name": "r"} in feat_ids assert {"type": "scalar", "name": "test_scalar_1"} in feat_ids - feat_ids = sample_with_tree_and_scalar_and_time_series.get_all_features_identifiers_by_type( - "time_series" - ) - assert {"type": "time_series", "name": "test_time_series_1"} in feat_ids - - feat_ids = sample_with_tree_and_scalar_and_time_series.get_all_features_identifiers_by_type( + feat_ids = sample_with_tree_and_scalar.get_all_features_identifiers_by_type( "nodes" ) assert { @@ -1449,7 +1227,7 @@ def test_get_all_features_identifiers_by_type( "time": 0.0, } in feat_ids - feat_ids = sample_with_tree_and_scalar_and_time_series.get_all_features_identifiers_by_type( + feat_ids = sample_with_tree_and_scalar.get_all_features_identifiers_by_type( "field" ) assert len(feat_ids) == 6 @@ -1462,24 +1240,28 @@ def test_get_all_features_identifiers_by_type( "time": 0.0, } in feat_ids - def test_merge_features( - self, - sample_with_tree_and_scalar_and_time_series: Sample, - sample_with_tree: Sample, - ): - feat_id = ( - sample_with_tree_and_scalar_and_time_series.get_all_features_identifiers() - ) - feat_id = [ - fid for fid in feat_id if fid["type"] not in ["scalar", "time_series"] - ] - sample_1 = ( - sample_with_tree_and_scalar_and_time_series.extract_sample_from_identifier( - feat_id - ) + def test_merge_features(self, sample_with_tree_and_scalar, sample_with_tree): + feat_id = sample_with_tree_and_scalar.get_all_features_identifiers() + feat_id = [fid for fid in feat_id if fid["type"] not in ["scalar"]] + sample_1 = sample_with_tree_and_scalar.extract_sample_from_identifier(feat_id) + feat_id = sample_with_tree.get_all_features_identifiers() + feat_id = [fid for fid in feat_id if fid["type"] not in ["field"]] + sample_2 = sample_with_tree.extract_sample_from_identifier(feat_id) + sample_merge_1 = sample_1.merge_features(sample_2, in_place=False) + sample_merge_2 = sample_2.merge_features(sample_1, in_place=False) + assert ( + sample_merge_1.get_all_features_identifiers() + == sample_merge_2.get_all_features_identifiers() ) + sample_2.merge_features(sample_1, in_place=True) + sample_1.merge_features(sample_2, in_place=True) + + def test_merge_features2(self, sample_with_tree_and_scalar, sample_with_tree): + feat_id = sample_with_tree_and_scalar.get_all_features_identifiers() + feat_id = [fid for fid in feat_id if fid["type"] not in ["scalar"]] + sample_1 = sample_with_tree_and_scalar.extract_sample_from_identifier(feat_id) feat_id = sample_with_tree.get_all_features_identifiers() - feat_id = [fid for fid in feat_id if fid["type"] not in ["field", "node"]] + feat_id = [fid for fid in feat_id if fid["type"] not in ["field", "nodes"]] sample_2 = sample_with_tree.extract_sample_from_identifier(feat_id) sample_merge_1 = sample_1.merge_features(sample_2, in_place=False) sample_merge_2 = sample_2.merge_features(sample_1, in_place=False) @@ -1491,32 +1273,30 @@ def test_merge_features( sample_1.merge_features(sample_2, in_place=True) # -------------------------------------------------------------------------# - def test_save(self, sample_with_tree_and_scalar_and_time_series, tmp_path): + def test_save(self, sample_with_tree_and_scalar, tmp_path): save_dir = tmp_path / "test_dir" - sample_with_tree_and_scalar_and_time_series.save(save_dir) + sample_with_tree_and_scalar.save(save_dir) assert save_dir.is_dir() with pytest.raises(ValueError): - sample_with_tree_and_scalar_and_time_series.save(save_dir) - sample_with_tree_and_scalar_and_time_series.save(save_dir, overwrite=True) + sample_with_tree_and_scalar.save(save_dir) + sample_with_tree_and_scalar.save(save_dir, overwrite=True) - def test_load_from_saved_file( - self, sample_with_tree_and_scalar_and_time_series, tmp_path - ): + def test_load_from_saved_file(self, sample_with_tree_and_scalar, tmp_path): save_dir = tmp_path / "test_dir" - sample_with_tree_and_scalar_and_time_series.save(save_dir) + sample_with_tree_and_scalar.save(save_dir) new_sample = Sample() new_sample.load(save_dir) assert CGU.checkSameTree( - sample_with_tree_and_scalar_and_time_series.get_mesh(), + sample_with_tree_and_scalar.get_mesh(), new_sample.get_mesh(), ) - def test_load_from_dir(self, sample_with_tree_and_scalar_and_time_series, tmp_path): + def test_load_from_dir(self, sample_with_tree_and_scalar, tmp_path): save_dir = tmp_path / "test_dir" - sample_with_tree_and_scalar_and_time_series.save(save_dir) + sample_with_tree_and_scalar.save(save_dir) new_sample = Sample.load_from_dir(save_dir) assert CGU.checkSameTree( - sample_with_tree_and_scalar_and_time_series.get_mesh(), + sample_with_tree_and_scalar.get_mesh(), new_sample.get_mesh(), ) @@ -1530,10 +1310,8 @@ def test___repr__with_scalar(self, sample_with_scalar): def test___repr__with_tree(self, sample_with_tree): print(sample_with_tree) - def test___repr__with_tree_and_scalar( - self, sample_with_tree_and_scalar_and_time_series - ): - print(sample_with_tree_and_scalar_and_time_series) + def test___repr__with_tree_and_scalar(self, sample_with_tree_and_scalar): + print(sample_with_tree_and_scalar) def test___repr__full_sample(self, full_sample): print(full_sample) @@ -1549,10 +1327,8 @@ def test_summarize_with_scalar(self, sample_with_scalar): def test_summarize_with_tree(self, sample_with_tree): print(sample_with_tree.summarize()) - def test_summarize_with_tree_and_scalar( - self, sample_with_tree_and_scalar_and_time_series - ): - print(sample_with_tree_and_scalar_and_time_series.summarize()) + def test_summarize_with_tree_and_scalar(self, sample_with_tree_and_scalar): + print(sample_with_tree_and_scalar.summarize()) def test_check_completeness_empty(self, sample): print(sample.check_completeness()) @@ -1563,7 +1339,5 @@ def test_check_completeness_with_scalar(self, sample_with_scalar): def test_check_completeness_with_tree(self, sample_with_tree): print(sample_with_tree.check_completeness()) - def test_check_completeness_with_tree_and_scalar( - self, sample_with_tree_and_scalar_and_time_series - ): - print(sample_with_tree_and_scalar_and_time_series.check_completeness()) + def test_check_completeness_with_tree_and_scalar(self, sample_with_tree_and_scalar): + print(sample_with_tree_and_scalar.check_completeness()) diff --git a/tests/pipelines/conftest.py b/tests/pipelines/conftest.py index f0504e4d..4a8da522 100644 --- a/tests/pipelines/conftest.py +++ b/tests/pipelines/conftest.py @@ -49,11 +49,6 @@ def dataset_with_samples_scalar2_feat_ids(dataset_with_samples): return [dataset_with_samples.get_all_features_identifiers_by_type("scalar")[1]] -@pytest.fixture() -def dataset_with_samples_time_series_feat_ids(dataset_with_samples): - return dataset_with_samples.get_all_features_identifiers_by_type("time_series") - - @pytest.fixture() def dataset_with_samples_with_tree_field_feat_ids(dataset_with_samples_with_tree): return dataset_with_samples_with_tree.get_all_features_identifiers_by_type("field") diff --git a/tests/pipelines/test_sklearn_block_wrappers.py b/tests/pipelines/test_sklearn_block_wrappers.py index 467573f0..cb0bb512 100644 --- a/tests/pipelines/test_sklearn_block_wrappers.py +++ b/tests/pipelines/test_sklearn_block_wrappers.py @@ -12,7 +12,6 @@ def test_get_2Darray_from_homogeneous_identifiers( dataset_with_samples, dataset_with_samples_scalar1_feat_ids, dataset_with_samples_scalar2_feat_ids, - dataset_with_samples_time_series_feat_ids, ): # dataset_with_samples.get_all_features_identifiers() X = get_2Darray_from_homogeneous_identifiers( @@ -26,12 +25,6 @@ def test_get_2Darray_from_homogeneous_identifiers( X = get_2Darray_from_homogeneous_identifiers(dataset_with_samples, feat_ids) assert X.shape == (4, 2) - dataset_with_samples_time_series_feat_ids - # not working yet for time series - # X = get_2Darray_from_homogeneous_identifiers( - # dataset_with_samples, dataset_with_samples_time_series_feat_ids - # ) - field_same_size_feat_id = { "type": "field", "name": "test_field_same_size", diff --git a/tests/post/create_datasets.py b/tests/post/create_datasets.py new file mode 100644 index 00000000..b9d23247 --- /dev/null +++ b/tests/post/create_datasets.py @@ -0,0 +1,76 @@ +import numpy as np + +from plaid import Dataset, ProblemDefinition, Sample +from plaid.types import FeatureIdentifier +from plaid.utils.split import split_dataset + +ins = [] +outs = [] +for i in range(30): + ins.append(np.random.rand()) + outs.append(np.random.rand()) + +samples = [] +for i in range(30): + sample = Sample() + sample.add_scalar("feature_1", ins[i]) + sample.add_scalar("feature_2", outs[i]) + samples.append(sample) + +dataset = Dataset(samples=samples) +dataset._save_to_dir_(path="dataset_ref", verbose=True) + + +samples = [] +for i in range(30): + sample = Sample() + sample.add_scalar("feature_1", 1.00001 * ins[i]) + sample.add_scalar("feature_2", 1.00001 * outs[i]) + samples.append(sample) + +dataset = Dataset(samples=samples) +dataset._save_to_dir_(path="dataset_near_pred", verbose=True) + + +samples = [] +for i in range(30): + sample = Sample() + sample.add_scalar("feature_1", 0.5 * ins[i]) + sample.add_scalar("feature_2", 0.5 * outs[i]) + samples.append(sample) + +dataset = Dataset(samples=samples) +dataset._save_to_dir_(path="dataset_pred", verbose=True) + + +print("dataset =", dataset) +print(dataset[0].get_scalar("feature_1")) + + +pb_def = ProblemDefinition() + +scalar_1_feat_id = FeatureIdentifier({"type": "scalar", "name": "feature_1"}) +scalar_2_feat_id = FeatureIdentifier({"type": "scalar", "name": "feature_2"}) + +pb_def.add_in_feature_identifier(scalar_1_feat_id) +pb_def.add_out_feature_identifier(scalar_2_feat_id) + +pb_def.add_input_scalar_name("feature_1") +pb_def.add_output_scalar_name("feature_2") + +pb_def.set_task("regression") + +options = { + "shuffle": False, + "split_sizes": { + "train": 20, + "test": 10, + }, +} + +split = split_dataset(dataset, options) +print(f"{split = }") + +pb_def.set_split(split) + +pb_def._save_to_dir_("problem_definition") diff --git a/tests/post/dataset_near_pred/infos.yaml b/tests/post/dataset_near_pred/infos.yaml deleted file mode 100644 index 718bd9ae..00000000 --- a/tests/post/dataset_near_pred/infos.yaml +++ /dev/null @@ -1,7 +0,0 @@ -legal: - owner: Owner - license: Licence -data_production: - type: Test - physics: test data for mmgp library - simulator: test diff --git a/tests/post/dataset_near_pred/samples/sample_000000000/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000000/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..a3e0178d Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000000/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000000/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000000/scalars.csv deleted file mode 100644 index 7b4472e3..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000000/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --2.316260024588145117e-01,1.316934606688710785e+00 diff --git a/tests/post/dataset_near_pred/samples/sample_000000001/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000001/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..201f1513 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000001/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000001/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000001/scalars.csv deleted file mode 100644 index 9dc9d4b0..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000001/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --4.071113148416793392e-01,5.551457705224029571e-01 diff --git a/tests/post/dataset_near_pred/samples/sample_000000002/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000002/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..c7bf05b9 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000002/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000002/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000002/scalars.csv deleted file mode 100644 index fc0cb3ab..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000002/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --1.316642848757348272e+00,-3.864278874197880498e+00 diff --git a/tests/post/dataset_near_pred/samples/sample_000000003/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000003/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..a31294cc Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000003/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000003/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000003/scalars.csv deleted file mode 100644 index c84fde0a..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000003/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --1.295794980483114589e+00,-1.312492279713304555e+00 diff --git a/tests/post/dataset_near_pred/samples/sample_000000004/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000004/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..201f8495 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000004/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000004/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000004/scalars.csv deleted file mode 100644 index 4862f636..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000004/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --8.166529015229165855e-01,-1.126563017087640439e+00 diff --git a/tests/post/dataset_near_pred/samples/sample_000000005/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000005/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..d72a0f16 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000005/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000005/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000005/scalars.csv deleted file mode 100644 index 9beafcca..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000005/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --1.157951362493477410e+00,6.703811300084938729e-01 diff --git a/tests/post/dataset_near_pred/samples/sample_000000006/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000006/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..e8350d97 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000006/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000006/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000006/scalars.csv deleted file mode 100644 index 4f3eb99f..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000006/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --7.560567276227481148e-01,-8.130708857345105756e-01 diff --git a/tests/post/dataset_near_pred/samples/sample_000000007/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000007/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..c0b93dae Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000007/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000007/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000007/scalars.csv deleted file mode 100644 index 7f1d62b0..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000007/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -1.437041582795101080e+00,-8.807308582456169210e-01 diff --git a/tests/post/dataset_near_pred/samples/sample_000000008/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000008/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..8718e9bc Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000008/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000008/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000008/scalars.csv deleted file mode 100644 index 985dbd2f..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000008/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --9.544603194035470306e-01,-4.056076599154200824e-01 diff --git a/tests/post/dataset_near_pred/samples/sample_000000009/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000009/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..a506842a Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000009/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000009/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000009/scalars.csv deleted file mode 100644 index 90115a07..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000009/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --3.002057834501626818e-02,-9.579921106507679474e-01 diff --git a/tests/post/dataset_near_pred/samples/sample_000000010/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000010/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..e9674b20 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000010/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000010/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000010/scalars.csv deleted file mode 100644 index 928be241..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000010/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --1.131881327720172248e+00,-6.258177776287130456e-01 diff --git a/tests/post/dataset_near_pred/samples/sample_000000011/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000011/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..38502af0 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000011/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000011/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000011/scalars.csv deleted file mode 100644 index 48d3efcb..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000011/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --6.477163597027486841e-02,1.596843506332501050e+00 diff --git a/tests/post/dataset_near_pred/samples/sample_000000012/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000012/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..d2e634b7 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000012/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000012/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000012/scalars.csv deleted file mode 100644 index 1c2a0a1d..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000012/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -1.562778626767657109e+00,-0.784788233684134079e-01 diff --git a/tests/post/dataset_near_pred/samples/sample_000000013/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000013/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..64a78cc7 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000013/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000013/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000013/scalars.csv deleted file mode 100644 index 12bb02ea..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000013/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -2.845897295694204687e-01,-1.115780792326891513e+00 diff --git a/tests/post/dataset_near_pred/samples/sample_000000014/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000014/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..8f8d8f60 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000014/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000014/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000014/scalars.csv deleted file mode 100644 index 9dc2352d..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000014/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --1.218830153792483939e+00,0.749679891578292423e+00 diff --git a/tests/post/dataset_near_pred/samples/sample_000000015/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000015/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..dd4b32a8 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000015/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000015/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000015/scalars.csv deleted file mode 100644 index d5e4286d..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000015/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -5.162087708753002602e-01,-7.859990261696361014e-01 diff --git a/tests/post/dataset_near_pred/samples/sample_000000016/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000016/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..3e135b69 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000016/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000016/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000016/scalars.csv deleted file mode 100644 index b21d792c..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000016/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --1.407240968085693211e+00,1.724049456495134258e+00 diff --git a/tests/post/dataset_near_pred/samples/sample_000000017/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000017/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..32f8c089 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000017/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000017/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000017/scalars.csv deleted file mode 100644 index 738c8998..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000017/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --4.586821353951379687e-01,-1.876626540572654722e+00 diff --git a/tests/post/dataset_near_pred/samples/sample_000000018/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000018/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..e3609271 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000018/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000018/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000018/scalars.csv deleted file mode 100644 index 67a48236..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000018/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -2.568487380150065724e-01,3.553127427434972785e-01 diff --git a/tests/post/dataset_near_pred/samples/sample_000000019/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000019/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..fe0d4e2c Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000019/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000019/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000019/scalars.csv deleted file mode 100644 index d1432684..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000019/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -2.958720326323473482e-01,3.042862624421485696e-01 diff --git a/tests/post/dataset_near_pred/samples/sample_000000020/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000020/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..d0294231 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000020/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000020/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000020/scalars.csv deleted file mode 100644 index 0cb83947..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000020/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --3.258809707245132747e-01,1.140135423673749537e+00 diff --git a/tests/post/dataset_near_pred/samples/sample_000000021/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000021/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..5d321236 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000021/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000021/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000021/scalars.csv deleted file mode 100644 index 99c418ba..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000021/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -3.555862093163790760e-01,6.301211567245460499e-01 diff --git a/tests/post/dataset_near_pred/samples/sample_000000022/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000022/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..4a012a84 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000022/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000022/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000022/scalars.csv deleted file mode 100644 index 3ff86b58..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000022/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -5.713344679030850637e-01,1.614987997856202637e-02 diff --git a/tests/post/dataset_near_pred/samples/sample_000000023/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000023/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..16ee8777 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000023/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000023/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000023/scalars.csv deleted file mode 100644 index 0dcf0ee3..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000023/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --7.289314954826756621e-01,2.751246751356392162e-01 diff --git a/tests/post/dataset_near_pred/samples/sample_000000024/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000024/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..5f648ce8 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000024/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000024/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000024/scalars.csv deleted file mode 100644 index 19a303b6..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000024/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --5.741370030447030537e-01,-1.716974544661193136e-01 diff --git a/tests/post/dataset_near_pred/samples/sample_000000025/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000025/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..3d47a597 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000025/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000025/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000025/scalars.csv deleted file mode 100644 index a254abe1..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000025/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -1.614057572322172396e+00,7.818416541432641242e-01 diff --git a/tests/post/dataset_near_pred/samples/sample_000000026/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000026/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..e4c736fa Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000026/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000026/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000026/scalars.csv deleted file mode 100644 index 1c47cf78..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000026/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -6.430507187035567895e-01,4.198359150220022893e-01 diff --git a/tests/post/dataset_near_pred/samples/sample_000000027/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000027/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..7ad39c14 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000027/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000027/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000027/scalars.csv deleted file mode 100644 index 1274e044..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000027/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -8.661067523164192039e-01,5.367780239393214989e-01 diff --git a/tests/post/dataset_near_pred/samples/sample_000000028/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000028/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..c02db3d6 Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000028/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000028/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000028/scalars.csv deleted file mode 100644 index 3ef1ba1d..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000028/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -8.474135547725786255e-01,-5.416433228195641765e-02 diff --git a/tests/post/dataset_near_pred/samples/sample_000000029/meshes/mesh_000000000.cgns b/tests/post/dataset_near_pred/samples/sample_000000029/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..82b217ea Binary files /dev/null and b/tests/post/dataset_near_pred/samples/sample_000000029/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_near_pred/samples/sample_000000029/scalars.csv b/tests/post/dataset_near_pred/samples/sample_000000029/scalars.csv deleted file mode 100644 index ab8fa4f5..00000000 --- a/tests/post/dataset_near_pred/samples/sample_000000029/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -1.221581660172051276e+00,-6.608558643195046605e-01 diff --git a/tests/post/dataset_pred/infos.yaml b/tests/post/dataset_pred/infos.yaml deleted file mode 100644 index 718bd9ae..00000000 --- a/tests/post/dataset_pred/infos.yaml +++ /dev/null @@ -1,7 +0,0 @@ -legal: - owner: Owner - license: Licence -data_production: - type: Test - physics: test data for mmgp library - simulator: test diff --git a/tests/post/dataset_pred/samples/sample_000000000/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000000/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..c4c19ff8 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000000/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000000/scalars.csv b/tests/post/dataset_pred/samples/sample_000000000/scalars.csv deleted file mode 100644 index dc6759f5..00000000 --- a/tests/post/dataset_pred/samples/sample_000000000/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -5.805719746243623858e-01,2.565635576869315315e+00 diff --git a/tests/post/dataset_pred/samples/sample_000000001/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000001/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..bd25ce55 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000001/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000001/scalars.csv b/tests/post/dataset_pred/samples/sample_000000001/scalars.csv deleted file mode 100644 index 26c12af8..00000000 --- a/tests/post/dataset_pred/samples/sample_000000001/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -8.358426242760235159e-01,1.579042230752238929e+00 diff --git a/tests/post/dataset_pred/samples/sample_000000002/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000002/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..4af4a2fa Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000002/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000002/scalars.csv b/tests/post/dataset_pred/samples/sample_000000002/scalars.csv deleted file mode 100644 index 29139fd1..00000000 --- a/tests/post/dataset_pred/samples/sample_000000002/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --7.493813441091061733e-01,-9.968272217461510154e-01 diff --git a/tests/post/dataset_pred/samples/sample_000000003/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000003/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..88473870 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000003/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000003/scalars.csv b/tests/post/dataset_pred/samples/sample_000000003/scalars.csv deleted file mode 100644 index 7d5fa272..00000000 --- a/tests/post/dataset_pred/samples/sample_000000003/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --2.103933387402803223e+00,-2.085784973534075437e+00 diff --git a/tests/post/dataset_pred/samples/sample_000000004/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000004/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..8dd0926d Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000004/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000004/scalars.csv b/tests/post/dataset_pred/samples/sample_000000004/scalars.csv deleted file mode 100644 index e3caf43a..00000000 --- a/tests/post/dataset_pred/samples/sample_000000004/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -7.554264503358688598e-01,-7.830507270321841462e-01 diff --git a/tests/post/dataset_pred/samples/sample_000000005/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000005/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..21392d4a Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000005/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000005/scalars.csv b/tests/post/dataset_pred/samples/sample_000000005/scalars.csv deleted file mode 100644 index bea69454..00000000 --- a/tests/post/dataset_pred/samples/sample_000000005/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --3.425284945872824061e-01,4.125932837620399640e-01 diff --git a/tests/post/dataset_pred/samples/sample_000000006/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000006/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..abceafdc Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000006/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000006/scalars.csv b/tests/post/dataset_pred/samples/sample_000000006/scalars.csv deleted file mode 100644 index ba25f80e..00000000 --- a/tests/post/dataset_pred/samples/sample_000000006/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -1.203061185332759742e+00,-2.787055676526754011e-01 diff --git a/tests/post/dataset_pred/samples/sample_000000007/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000007/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..6dfbd9fe Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000007/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000007/scalars.csv b/tests/post/dataset_pred/samples/sample_000000007/scalars.csv deleted file mode 100644 index 80ffe5f5..00000000 --- a/tests/post/dataset_pred/samples/sample_000000007/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -5.643626378346326966e-01,-2.550733088135708274e-04 diff --git a/tests/post/dataset_pred/samples/sample_000000008/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000008/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..02362c8f Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000008/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000008/scalars.csv b/tests/post/dataset_pred/samples/sample_000000008/scalars.csv deleted file mode 100644 index 8dd55b77..00000000 --- a/tests/post/dataset_pred/samples/sample_000000008/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --8.762757967540465986e-02,-1.222274925447992766e+00 diff --git a/tests/post/dataset_pred/samples/sample_000000009/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000009/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..2533df52 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000009/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000009/scalars.csv b/tests/post/dataset_pred/samples/sample_000000009/scalars.csv deleted file mode 100644 index 3b894011..00000000 --- a/tests/post/dataset_pred/samples/sample_000000009/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -1.563362988613945515e+00,-2.440171776534782566e+00 diff --git a/tests/post/dataset_pred/samples/sample_000000010/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000010/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..e21ccea6 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000010/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000010/scalars.csv b/tests/post/dataset_pred/samples/sample_000000010/scalars.csv deleted file mode 100644 index 949268f2..00000000 --- a/tests/post/dataset_pred/samples/sample_000000010/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --6.346119430922391169e-01,6.108211834382098332e-02 diff --git a/tests/post/dataset_pred/samples/sample_000000011/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000011/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..30b6df70 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000011/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000011/scalars.csv b/tests/post/dataset_pred/samples/sample_000000011/scalars.csv deleted file mode 100644 index 57309cd6..00000000 --- a/tests/post/dataset_pred/samples/sample_000000011/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --8.036336607452404523e-02,1.560152424381089586e+00 diff --git a/tests/post/dataset_pred/samples/sample_000000012/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000012/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..51e5720f Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000012/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000012/scalars.csv b/tests/post/dataset_pred/samples/sample_000000012/scalars.csv deleted file mode 100644 index 103fd924..00000000 --- a/tests/post/dataset_pred/samples/sample_000000012/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --1.717441024544600570e+00,3.306123968082468334e-01 diff --git a/tests/post/dataset_pred/samples/sample_000000013/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000013/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..069d7278 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000013/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000013/scalars.csv b/tests/post/dataset_pred/samples/sample_000000013/scalars.csv deleted file mode 100644 index 3344e6a4..00000000 --- a/tests/post/dataset_pred/samples/sample_000000013/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --1.399020667363656545e-01,-3.197167630055578469e-01 diff --git a/tests/post/dataset_pred/samples/sample_000000014/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000014/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..e817f3c2 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000014/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000014/scalars.csv b/tests/post/dataset_pred/samples/sample_000000014/scalars.csv deleted file mode 100644 index 91a40b2f..00000000 --- a/tests/post/dataset_pred/samples/sample_000000014/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -2.205166074019398348e+00,-1.281056928365714720e+00 diff --git a/tests/post/dataset_pred/samples/sample_000000015/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000015/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..4dfaf706 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000015/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000015/scalars.csv b/tests/post/dataset_pred/samples/sample_000000015/scalars.csv deleted file mode 100644 index b35561e6..00000000 --- a/tests/post/dataset_pred/samples/sample_000000015/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -5.238379450074331922e-01,6.781749793188753817e-01 diff --git a/tests/post/dataset_pred/samples/sample_000000016/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000016/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..a7fef0f2 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000016/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000016/scalars.csv b/tests/post/dataset_pred/samples/sample_000000016/scalars.csv deleted file mode 100644 index c9f1bc8b..00000000 --- a/tests/post/dataset_pred/samples/sample_000000016/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --1.048053035768788499e+00,-1.061649971595848285e+00 diff --git a/tests/post/dataset_pred/samples/sample_000000017/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000017/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..d56c52d8 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000017/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000017/scalars.csv b/tests/post/dataset_pred/samples/sample_000000017/scalars.csv deleted file mode 100644 index db9e27d2..00000000 --- a/tests/post/dataset_pred/samples/sample_000000017/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --1.053688450055755066e+00,6.175402269063643423e-01 diff --git a/tests/post/dataset_pred/samples/sample_000000018/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000018/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..d8f3c89e Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000018/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000018/scalars.csv b/tests/post/dataset_pred/samples/sample_000000018/scalars.csv deleted file mode 100644 index 5f2b289d..00000000 --- a/tests/post/dataset_pred/samples/sample_000000018/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -1.499844143376949601e+00,-6.000631683926870713e-02 diff --git a/tests/post/dataset_pred/samples/sample_000000019/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000019/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..85b148d5 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000019/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000019/scalars.csv b/tests/post/dataset_pred/samples/sample_000000019/scalars.csv deleted file mode 100644 index 72f2281f..00000000 --- a/tests/post/dataset_pred/samples/sample_000000019/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -1.383313141548469405e+00,1.314034658860104010e-01 diff --git a/tests/post/dataset_pred/samples/sample_000000020/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000020/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..de965da3 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000020/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000020/scalars.csv b/tests/post/dataset_pred/samples/sample_000000020/scalars.csv deleted file mode 100644 index 0df83de3..00000000 --- a/tests/post/dataset_pred/samples/sample_000000020/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --4.929134425074230275e-02,8.814783159785007927e-01 diff --git a/tests/post/dataset_pred/samples/sample_000000021/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000021/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..b79c86c3 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000021/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000021/scalars.csv b/tests/post/dataset_pred/samples/sample_000000021/scalars.csv deleted file mode 100644 index 0c0da6b8..00000000 --- a/tests/post/dataset_pred/samples/sample_000000021/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --4.009745929095950290e-01,-6.411538144469796086e-01 diff --git a/tests/post/dataset_pred/samples/sample_000000022/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000022/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..e9f49c01 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000022/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000022/scalars.csv b/tests/post/dataset_pred/samples/sample_000000022/scalars.csv deleted file mode 100644 index ce4a88e6..00000000 --- a/tests/post/dataset_pred/samples/sample_000000022/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --1.586312061185160438e+00,1.041299071013290112e+00 diff --git a/tests/post/dataset_pred/samples/sample_000000023/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000023/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..2f7e61f6 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000023/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000023/scalars.csv b/tests/post/dataset_pred/samples/sample_000000023/scalars.csv deleted file mode 100644 index 209fbd22..00000000 --- a/tests/post/dataset_pred/samples/sample_000000023/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --7.231193945694065484e-01,-1.656576229228291508e-01 diff --git a/tests/post/dataset_pred/samples/sample_000000024/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000024/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..21f6e515 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000024/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000024/scalars.csv b/tests/post/dataset_pred/samples/sample_000000024/scalars.csv deleted file mode 100644 index 93182020..00000000 --- a/tests/post/dataset_pred/samples/sample_000000024/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --9.399942218749092471e-02,1.058916033780072929e+00 diff --git a/tests/post/dataset_pred/samples/sample_000000025/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000025/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..c00fbdaa Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000025/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000025/scalars.csv b/tests/post/dataset_pred/samples/sample_000000025/scalars.csv deleted file mode 100644 index 9dba8e85..00000000 --- a/tests/post/dataset_pred/samples/sample_000000025/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -8.467801922208331167e-01,2.933151221874954334e-02 diff --git a/tests/post/dataset_pred/samples/sample_000000026/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000026/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..6f027b7b Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000026/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000026/scalars.csv b/tests/post/dataset_pred/samples/sample_000000026/scalars.csv deleted file mode 100644 index 26dc2d35..00000000 --- a/tests/post/dataset_pred/samples/sample_000000026/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --3.939904458614237881e-01,1.540825912981907964e+00 diff --git a/tests/post/dataset_pred/samples/sample_000000027/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000027/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..699d8c34 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000027/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000027/scalars.csv b/tests/post/dataset_pred/samples/sample_000000027/scalars.csv deleted file mode 100644 index eaadd81b..00000000 --- a/tests/post/dataset_pred/samples/sample_000000027/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --8.430253026412983797e-01,9.837541251275897514e-01 diff --git a/tests/post/dataset_pred/samples/sample_000000028/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000028/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..7ca89479 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000028/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000028/scalars.csv b/tests/post/dataset_pred/samples/sample_000000028/scalars.csv deleted file mode 100644 index 72f944f1..00000000 --- a/tests/post/dataset_pred/samples/sample_000000028/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --4.815357920158462590e-02,2.146968091363854469e-01 diff --git a/tests/post/dataset_pred/samples/sample_000000029/meshes/mesh_000000000.cgns b/tests/post/dataset_pred/samples/sample_000000029/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..1502aae8 Binary files /dev/null and b/tests/post/dataset_pred/samples/sample_000000029/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_pred/samples/sample_000000029/scalars.csv b/tests/post/dataset_pred/samples/sample_000000029/scalars.csv deleted file mode 100644 index 766cbedd..00000000 --- a/tests/post/dataset_pred/samples/sample_000000029/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -3.931671005553655229e-01,-5.111990795473254634e-01 diff --git a/tests/post/dataset_ref/infos.yaml b/tests/post/dataset_ref/infos.yaml deleted file mode 100644 index 718bd9ae..00000000 --- a/tests/post/dataset_ref/infos.yaml +++ /dev/null @@ -1,7 +0,0 @@ -legal: - owner: Owner - license: Licence -data_production: - type: Test - physics: test data for mmgp library - simulator: test diff --git a/tests/post/dataset_ref/samples/sample_000000000/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000000/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..f0788fd8 Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000000/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000000/scalars.csv b/tests/post/dataset_ref/samples/sample_000000000/scalars.csv deleted file mode 100644 index 65350b2a..00000000 --- a/tests/post/dataset_ref/samples/sample_000000000/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --2.316260024588145117e-01,1.216934606688710785e+00 diff --git a/tests/post/dataset_ref/samples/sample_000000001/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000001/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..72e50d9d Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000001/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000001/scalars.csv b/tests/post/dataset_ref/samples/sample_000000001/scalars.csv deleted file mode 100644 index 6e25ffce..00000000 --- a/tests/post/dataset_ref/samples/sample_000000001/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --4.071113148416793392e-01,5.151457705224029571e-01 diff --git a/tests/post/dataset_ref/samples/sample_000000002/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000002/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..d154680c Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000002/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000002/scalars.csv b/tests/post/dataset_ref/samples/sample_000000002/scalars.csv deleted file mode 100644 index f598e46c..00000000 --- a/tests/post/dataset_ref/samples/sample_000000002/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --1.316642848757348272e+00,-3.964278874197880498e+00 diff --git a/tests/post/dataset_ref/samples/sample_000000003/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000003/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..b8d65f47 Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000003/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000003/scalars.csv b/tests/post/dataset_ref/samples/sample_000000003/scalars.csv deleted file mode 100644 index dca0019d..00000000 --- a/tests/post/dataset_ref/samples/sample_000000003/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --1.295794980483114589e+00,-1.112492279713304555e+00 diff --git a/tests/post/dataset_ref/samples/sample_000000004/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000004/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..b0eb7b89 Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000004/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000004/scalars.csv b/tests/post/dataset_ref/samples/sample_000000004/scalars.csv deleted file mode 100644 index 522b0a0b..00000000 --- a/tests/post/dataset_ref/samples/sample_000000004/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --8.166529015229165855e-01,-1.426563017087640439e+00 diff --git a/tests/post/dataset_ref/samples/sample_000000005/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000005/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..1b223546 Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000005/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000005/scalars.csv b/tests/post/dataset_ref/samples/sample_000000005/scalars.csv deleted file mode 100644 index 97981eb7..00000000 --- a/tests/post/dataset_ref/samples/sample_000000005/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --1.157951362493477410e+00,6.803811300084938729e-01 diff --git a/tests/post/dataset_ref/samples/sample_000000006/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000006/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..365ebe90 Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000006/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000006/scalars.csv b/tests/post/dataset_ref/samples/sample_000000006/scalars.csv deleted file mode 100644 index bd2f45a0..00000000 --- a/tests/post/dataset_ref/samples/sample_000000006/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --7.560567276227481148e-01,-8.030708857345105756e-01 diff --git a/tests/post/dataset_ref/samples/sample_000000007/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000007/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..9abfad9b Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000007/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000007/scalars.csv b/tests/post/dataset_ref/samples/sample_000000007/scalars.csv deleted file mode 100644 index 7f1d62b0..00000000 --- a/tests/post/dataset_ref/samples/sample_000000007/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -1.437041582795101080e+00,-8.807308582456169210e-01 diff --git a/tests/post/dataset_ref/samples/sample_000000008/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000008/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..7dde2f6b Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000008/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000008/scalars.csv b/tests/post/dataset_ref/samples/sample_000000008/scalars.csv deleted file mode 100644 index 985dbd2f..00000000 --- a/tests/post/dataset_ref/samples/sample_000000008/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --9.544603194035470306e-01,-4.056076599154200824e-01 diff --git a/tests/post/dataset_ref/samples/sample_000000009/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000009/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..94e12af8 Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000009/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000009/scalars.csv b/tests/post/dataset_ref/samples/sample_000000009/scalars.csv deleted file mode 100644 index 90115a07..00000000 --- a/tests/post/dataset_ref/samples/sample_000000009/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --3.002057834501626818e-02,-9.579921106507679474e-01 diff --git a/tests/post/dataset_ref/samples/sample_000000010/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000010/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..adaee143 Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000010/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000010/scalars.csv b/tests/post/dataset_ref/samples/sample_000000010/scalars.csv deleted file mode 100644 index 928be241..00000000 --- a/tests/post/dataset_ref/samples/sample_000000010/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --1.131881327720172248e+00,-6.258177776287130456e-01 diff --git a/tests/post/dataset_ref/samples/sample_000000011/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000011/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..2db89434 Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000011/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000011/scalars.csv b/tests/post/dataset_ref/samples/sample_000000011/scalars.csv deleted file mode 100644 index 34ac1deb..00000000 --- a/tests/post/dataset_ref/samples/sample_000000011/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --6.477163597027486841e-02,1.096843506332501050e+00 diff --git a/tests/post/dataset_ref/samples/sample_000000012/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000012/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..785d1a2b Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000012/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000012/scalars.csv b/tests/post/dataset_ref/samples/sample_000000012/scalars.csv deleted file mode 100644 index bc774b3a..00000000 --- a/tests/post/dataset_ref/samples/sample_000000012/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -1.562778626767657109e+00,-1.184788233684134079e-01 diff --git a/tests/post/dataset_ref/samples/sample_000000013/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000013/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..5fd8afef Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000013/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000013/scalars.csv b/tests/post/dataset_ref/samples/sample_000000013/scalars.csv deleted file mode 100644 index 9d6e29e7..00000000 --- a/tests/post/dataset_ref/samples/sample_000000013/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -2.845897295694204687e-01,-1.215780792326891513e+00 diff --git a/tests/post/dataset_ref/samples/sample_000000014/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000014/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..6b6aaca4 Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000014/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000014/scalars.csv b/tests/post/dataset_ref/samples/sample_000000014/scalars.csv deleted file mode 100644 index 9f53220d..00000000 --- a/tests/post/dataset_ref/samples/sample_000000014/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --1.218830153792483939e+00,1.149679891578292423e+00 diff --git a/tests/post/dataset_ref/samples/sample_000000015/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000015/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..33a7f88f Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000015/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000015/scalars.csv b/tests/post/dataset_ref/samples/sample_000000015/scalars.csv deleted file mode 100644 index 62a62ac7..00000000 --- a/tests/post/dataset_ref/samples/sample_000000015/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -5.162087708753002602e-01,-7.559990261696361014e-01 diff --git a/tests/post/dataset_ref/samples/sample_000000016/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000016/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..8587bda3 Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000016/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000016/scalars.csv b/tests/post/dataset_ref/samples/sample_000000016/scalars.csv deleted file mode 100644 index dc7f5204..00000000 --- a/tests/post/dataset_ref/samples/sample_000000016/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --1.407240968085693211e+00,1.424049456495134258e+00 diff --git a/tests/post/dataset_ref/samples/sample_000000017/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000017/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..d1ec568e Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000017/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000017/scalars.csv b/tests/post/dataset_ref/samples/sample_000000017/scalars.csv deleted file mode 100644 index 2772ada4..00000000 --- a/tests/post/dataset_ref/samples/sample_000000017/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --4.586821353951379687e-01,-1.576626540572654722e+00 diff --git a/tests/post/dataset_ref/samples/sample_000000018/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000018/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..870be40c Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000018/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000018/scalars.csv b/tests/post/dataset_ref/samples/sample_000000018/scalars.csv deleted file mode 100644 index 67a48236..00000000 --- a/tests/post/dataset_ref/samples/sample_000000018/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -2.568487380150065724e-01,3.553127427434972785e-01 diff --git a/tests/post/dataset_ref/samples/sample_000000019/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000019/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..421e091f Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000019/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000019/scalars.csv b/tests/post/dataset_ref/samples/sample_000000019/scalars.csv deleted file mode 100644 index d1432684..00000000 --- a/tests/post/dataset_ref/samples/sample_000000019/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -2.958720326323473482e-01,3.042862624421485696e-01 diff --git a/tests/post/dataset_ref/samples/sample_000000020/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000020/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..e11b25ec Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000020/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000020/scalars.csv b/tests/post/dataset_ref/samples/sample_000000020/scalars.csv deleted file mode 100644 index 0cb83947..00000000 --- a/tests/post/dataset_ref/samples/sample_000000020/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --3.258809707245132747e-01,1.140135423673749537e+00 diff --git a/tests/post/dataset_ref/samples/sample_000000021/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000021/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..97545488 Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000021/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000021/scalars.csv b/tests/post/dataset_ref/samples/sample_000000021/scalars.csv deleted file mode 100644 index 99c418ba..00000000 --- a/tests/post/dataset_ref/samples/sample_000000021/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -3.555862093163790760e-01,6.301211567245460499e-01 diff --git a/tests/post/dataset_ref/samples/sample_000000022/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000022/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..f2934e6b Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000022/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000022/scalars.csv b/tests/post/dataset_ref/samples/sample_000000022/scalars.csv deleted file mode 100644 index 6a6786b8..00000000 --- a/tests/post/dataset_ref/samples/sample_000000022/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -5.713344679030850637e-01,1.714987997856202637e-02 diff --git a/tests/post/dataset_ref/samples/sample_000000023/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000023/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..28b2cf62 Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000023/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000023/scalars.csv b/tests/post/dataset_ref/samples/sample_000000023/scalars.csv deleted file mode 100644 index 7ae383bc..00000000 --- a/tests/post/dataset_ref/samples/sample_000000023/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --7.289314954826756621e-01,2.851246751356392162e-01 diff --git a/tests/post/dataset_ref/samples/sample_000000024/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000024/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..43a6c038 Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000024/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000024/scalars.csv b/tests/post/dataset_ref/samples/sample_000000024/scalars.csv deleted file mode 100644 index bfdaeafb..00000000 --- a/tests/post/dataset_ref/samples/sample_000000024/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 --5.741370030447030537e-01,-1.766974544661193136e-01 diff --git a/tests/post/dataset_ref/samples/sample_000000025/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000025/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..ce7fed79 Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000025/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000025/scalars.csv b/tests/post/dataset_ref/samples/sample_000000025/scalars.csv deleted file mode 100644 index a66ff399..00000000 --- a/tests/post/dataset_ref/samples/sample_000000025/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -1.614057572322172396e+00,7.868416541432641242e-01 diff --git a/tests/post/dataset_ref/samples/sample_000000026/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000026/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..d8b6e8f8 Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000026/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000026/scalars.csv b/tests/post/dataset_ref/samples/sample_000000026/scalars.csv deleted file mode 100644 index ba7bf198..00000000 --- a/tests/post/dataset_ref/samples/sample_000000026/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -6.430507187035567895e-01,4.158359150220022893e-01 diff --git a/tests/post/dataset_ref/samples/sample_000000027/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000027/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..e48425e2 Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000027/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000027/scalars.csv b/tests/post/dataset_ref/samples/sample_000000027/scalars.csv deleted file mode 100644 index 776a62ee..00000000 --- a/tests/post/dataset_ref/samples/sample_000000027/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -8.661067523164192039e-01,5.327780239393214989e-01 diff --git a/tests/post/dataset_ref/samples/sample_000000028/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000028/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..32d40bae Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000028/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000028/scalars.csv b/tests/post/dataset_ref/samples/sample_000000028/scalars.csv deleted file mode 100644 index 887377e5..00000000 --- a/tests/post/dataset_ref/samples/sample_000000028/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -8.474135547725786255e-01,-5.411433228195641765e-02 diff --git a/tests/post/dataset_ref/samples/sample_000000029/meshes/mesh_000000000.cgns b/tests/post/dataset_ref/samples/sample_000000029/meshes/mesh_000000000.cgns new file mode 100644 index 00000000..5823e0fc Binary files /dev/null and b/tests/post/dataset_ref/samples/sample_000000029/meshes/mesh_000000000.cgns differ diff --git a/tests/post/dataset_ref/samples/sample_000000029/scalars.csv b/tests/post/dataset_ref/samples/sample_000000029/scalars.csv deleted file mode 100644 index 2fd6c190..00000000 --- a/tests/post/dataset_ref/samples/sample_000000029/scalars.csv +++ /dev/null @@ -1,2 +0,0 @@ -scalar_1,scalar_2 -1.221581660172051276e+00,-6.613558643195046605e-01 diff --git a/tests/post/problem_definition/problem_infos.yaml b/tests/post/problem_definition/problem_infos.yaml index 2bd951e9..470b12fb 100644 --- a/tests/post/problem_definition/problem_infos.yaml +++ b/tests/post/problem_definition/problem_infos.yaml @@ -1,19 +1,17 @@ task: regression input_features: - - name: feature_1 - type: scalar +- type: scalar + name: feature_1 output_features: - - name: feature_2 - type: scalar +- type: scalar + name: feature_2 input_scalars: -- scalar_1 +- feature_1 output_scalars: -- scalar_2 +- feature_2 input_fields: [] -output_fields: -- field_1 +output_fields: [] input_timeseries: [] output_timeseries: [] -input_meshes: -- mesh +input_meshes: [] output_meshes: [] diff --git a/tests/post/problem_definition/split.csv b/tests/post/problem_definition/split.csv deleted file mode 100644 index 9359b5fe..00000000 --- a/tests/post/problem_definition/split.csv +++ /dev/null @@ -1,2 +0,0 @@ -train,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19 -test,20,21,22,23,24,25,26,27,28,29 diff --git a/tests/post/problem_definition/split.json b/tests/post/problem_definition/split.json new file mode 100644 index 00000000..ac64f861 --- /dev/null +++ b/tests/post/problem_definition/split.json @@ -0,0 +1 @@ +{"train": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], "test": [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]} \ No newline at end of file diff --git a/tests/post/test_bisect.py b/tests/post/test_bisect.py index a2712615..764b0a8b 100644 --- a/tests/post/test_bisect.py +++ b/tests/post/test_bisect.py @@ -30,7 +30,9 @@ def test_bisect_with_paths(self, current_directory, working_directory): ref_path = current_directory / "dataset_ref" pred_path = current_directory / "dataset_pred" problem_path = current_directory / "problem_definition" - plot_bisect(ref_path, pred_path, problem_path, "scalar_2", "differ_bisect_plot") + plot_bisect( + ref_path, pred_path, problem_path, "feature_2", "differ_bisect_plot" + ) shutil.move( working_directory / "differ_bisect_plot.png", current_directory / "differ_bisect_plot.png", @@ -40,7 +42,7 @@ def test_bisect_with_objects(self, current_directory, working_directory): ref_path = Dataset(current_directory / "dataset_pred") pred_path = Dataset(current_directory / "dataset_pred") problem_path = ProblemDefinition(current_directory / "problem_definition") - plot_bisect(ref_path, pred_path, problem_path, "scalar_2", "equal_bisect_plot") + plot_bisect(ref_path, pred_path, problem_path, "feature_2", "equal_bisect_plot") shutil.move( working_directory / "equal_bisect_plot.png", current_directory / "equal_bisect_plot.png", diff --git a/tests/post/test_metrics.py b/tests/post/test_metrics.py index 4cb185e7..9deb1c39 100644 --- a/tests/post/test_metrics.py +++ b/tests/post/test_metrics.py @@ -61,31 +61,31 @@ def test_compute_RMSE_data(self, current_directory): path = current_directory / "first_metrics.yaml" with path.open("r") as file: contenu_yaml = yaml.load(file, Loader=yaml.FullLoader) - assert contenu_yaml["rRMSE for scalars"]["train"]["scalar_2"] < 0.2 - assert contenu_yaml["rRMSE for scalars"]["test"]["scalar_2"] < 0.2 - assert contenu_yaml["RMSE for scalars"]["train"]["scalar_2"] < 0.2 - assert contenu_yaml["RMSE for scalars"]["test"]["scalar_2"] < 0.2 - assert contenu_yaml["R2 for scalars"]["train"]["scalar_2"] > 0.8 - assert contenu_yaml["R2 for scalars"]["test"]["scalar_2"] > 0.8 + assert contenu_yaml["rRMSE for scalars"]["train"]["feature_2"] < 0.2 + assert contenu_yaml["rRMSE for scalars"]["test"]["feature_2"] < 0.2 + assert contenu_yaml["RMSE for scalars"]["train"]["feature_2"] < 0.2 + assert contenu_yaml["RMSE for scalars"]["test"]["feature_2"] < 0.2 + assert contenu_yaml["R2 for scalars"]["train"]["feature_2"] > 0.8 + assert contenu_yaml["R2 for scalars"]["test"]["feature_2"] > 0.8 def test_compute_rRMSE_data(self, current_directory): path = current_directory / "second_metrics.yaml" with path.open("r") as file: contenu_yaml = yaml.load(file, Loader=yaml.FullLoader) - assert contenu_yaml["rRMSE for scalars"]["train"]["scalar_2"] > 0.75 - assert contenu_yaml["rRMSE for scalars"]["test"]["scalar_2"] > 0.75 - assert contenu_yaml["RMSE for scalars"]["train"]["scalar_2"] > 0.75 - assert contenu_yaml["RMSE for scalars"]["test"]["scalar_2"] > 0.75 - assert contenu_yaml["R2 for scalars"]["train"]["scalar_2"] < 0.0 - assert contenu_yaml["R2 for scalars"]["test"]["scalar_2"] < 0.0 + assert contenu_yaml["rRMSE for scalars"]["train"]["feature_2"] > 0.25 + assert contenu_yaml["rRMSE for scalars"]["test"]["feature_2"] > 0.25 + assert contenu_yaml["RMSE for scalars"]["train"]["feature_2"] > 0.25 + assert contenu_yaml["RMSE for scalars"]["test"]["feature_2"] > 0.25 + assert contenu_yaml["R2 for scalars"]["train"]["feature_2"] < 0.0 + assert contenu_yaml["R2 for scalars"]["test"]["feature_2"] < 0.0 def test_compute_R2_data(self, current_directory): path = current_directory / "third_metrics.yaml" with path.open("r") as file: contenu_yaml = yaml.load(file, Loader=yaml.FullLoader) - assert contenu_yaml["rRMSE for scalars"]["train"]["scalar_2"] == 0.0 - assert contenu_yaml["rRMSE for scalars"]["test"]["scalar_2"] == 0.0 - assert contenu_yaml["RMSE for scalars"]["train"]["scalar_2"] == 0.0 - assert contenu_yaml["RMSE for scalars"]["test"]["scalar_2"] == 0.0 - assert contenu_yaml["R2 for scalars"]["train"]["scalar_2"] == 1.0 - assert contenu_yaml["R2 for scalars"]["test"]["scalar_2"] == 1.0 + assert contenu_yaml["rRMSE for scalars"]["train"]["feature_2"] == 0.0 + assert contenu_yaml["rRMSE for scalars"]["test"]["feature_2"] == 0.0 + assert contenu_yaml["RMSE for scalars"]["train"]["feature_2"] == 0.0 + assert contenu_yaml["RMSE for scalars"]["test"]["feature_2"] == 0.0 + assert contenu_yaml["R2 for scalars"]["train"]["feature_2"] == 1.0 + assert contenu_yaml["R2 for scalars"]["test"]["feature_2"] == 1.0 diff --git a/tests/test_problem_definition.py b/tests/test_problem_definition.py index 137bd419..8153c714 100644 --- a/tests/test_problem_definition.py +++ b/tests/test_problem_definition.py @@ -24,6 +24,51 @@ def problem_definition() -> ProblemDefinition: return ProblemDefinition() +@pytest.fixture() +def problem_definition_full(problem_definition: ProblemDefinition) -> ProblemDefinition: + problem_definition.set_task("regression") + + feature_identifier = FeatureIdentifier({"type": "scalar", "name": "feature"}) + predict_feature_identifier = FeatureIdentifier( + {"type": "scalar", "name": "predict_feature"} + ) + test_feature_identifier = FeatureIdentifier( + {"type": "scalar", "name": "test_feature"} + ) + problem_definition.add_in_features_identifiers( + [predict_feature_identifier, test_feature_identifier] + ) + problem_definition.add_in_feature_identifier(feature_identifier) + problem_definition.add_out_features_identifiers( + [predict_feature_identifier, test_feature_identifier] + ) + problem_definition.add_out_feature_identifier(feature_identifier) + + problem_definition.add_input_scalars_names(["scalar", "test_scalar"]) + problem_definition.add_input_scalar_name("predict_scalar") + problem_definition.add_output_scalars_names(["scalar", "test_scalar"]) + problem_definition.add_output_scalar_name("predict_scalar") + + problem_definition.add_input_fields_names(["field", "test_field"]) + problem_definition.add_input_field_name("predict_field") + problem_definition.add_output_fields_names(["field", "test_field"]) + problem_definition.add_output_field_name("predict_field") + + problem_definition.add_input_timeseries_names(["timeseries", "test_timeseries"]) + problem_definition.add_input_timeseries_name("predict_timeseries") + problem_definition.add_output_timeseries_names(["timeseries", "test_timeseries"]) + problem_definition.add_output_timeseries_name("predict_timeseries") + + problem_definition.add_input_meshes_names(["mesh", "test_mesh"]) + problem_definition.add_input_mesh_name("predict_mesh") + problem_definition.add_output_meshes_names(["mesh", "test_mesh"]) + problem_definition.add_output_mesh_name("predict_mesh") + + new_split = {"train": [0, 1, 2], "test": [3, 4]} + problem_definition.set_split(new_split) + return problem_definition + + @pytest.fixture() def current_directory() -> Path: return Path(__file__).absolute().parent @@ -458,63 +503,21 @@ def test_set_split(self, problem_definition): print(problem_definition) # -------------------------------------------------------------------------# - def test_save(self, problem_definition, current_directory): - problem_definition.set_task("regression") - - feature_identifier = FeatureIdentifier({"type": "scalar", "name": "feature"}) - predict_feature_identifier = FeatureIdentifier( - {"type": "scalar", "name": "predict_feature"} - ) - test_feature_identifier = FeatureIdentifier( - {"type": "scalar", "name": "test_feature"} - ) - problem_definition.add_in_features_identifiers( - [predict_feature_identifier, test_feature_identifier] - ) - problem_definition.add_in_feature_identifier(feature_identifier) - problem_definition.add_out_features_identifiers( - [predict_feature_identifier, test_feature_identifier] - ) - problem_definition.add_out_feature_identifier(feature_identifier) - - problem_definition.add_input_scalars_names(["scalar", "test_scalar"]) - problem_definition.add_input_scalar_name("predict_scalar") - problem_definition.add_output_scalars_names(["scalar", "test_scalar"]) - problem_definition.add_output_scalar_name("predict_scalar") - - problem_definition.add_input_fields_names(["field", "test_field"]) - problem_definition.add_input_field_name("predict_field") - problem_definition.add_output_fields_names(["field", "test_field"]) - problem_definition.add_output_field_name("predict_field") - - problem_definition.add_input_timeseries_names(["timeseries", "test_timeseries"]) - problem_definition.add_input_timeseries_name("predict_timeseries") - problem_definition.add_output_timeseries_names( - ["timeseries", "test_timeseries"] - ) - problem_definition.add_output_timeseries_name("predict_timeseries") - - problem_definition.add_input_meshes_names(["mesh", "test_mesh"]) - problem_definition.add_input_mesh_name("predict_mesh") - problem_definition.add_output_meshes_names(["mesh", "test_mesh"]) - problem_definition.add_output_mesh_name("predict_mesh") - - new_split = {"train": [0, 1, 2], "test": [3, 4]} - problem_definition.set_split(new_split) - - problem_definition._save_to_dir_(current_directory / "problem_definition") - - def test__save_to_dir_(self, problem_definition, tmp_path): - problem_definition._save_to_dir_(tmp_path / "problem_definition") + def test__save_to_dir_( + self, problem_definition_full: ProblemDefinition, tmp_path: Path + ): + problem_definition_full._save_to_dir_(tmp_path / "problem_definition") def test_load_path_object(self, current_directory): - from pathlib import Path - my_dir = Path(current_directory) ProblemDefinition(my_dir / "problem_definition") - def test_load(self, current_directory): - d_path = current_directory / "problem_definition" + def test___init___path( + self, problem_definition_full: ProblemDefinition, tmp_path: Path + ): + d_path = tmp_path / "problem_definition" + problem_definition_full._save_to_dir_(d_path) + # problem = ProblemDefinition(d_path) assert problem.get_task() == "regression" assert set(problem.get_input_scalars_names()) == set( @@ -526,6 +529,12 @@ def test_load(self, current_directory): all_split = problem.get_split() assert all_split["train"] == [0, 1, 2] and all_split["test"] == [3, 4] + def test__load_from_dir_( + self, problem_definition_full: ProblemDefinition, tmp_path: Path + ): + d_path = tmp_path / "problem_definition" + problem_definition_full._save_to_dir_(d_path) + # problem = ProblemDefinition() problem._load_from_dir_(d_path) assert problem.get_task() == "regression" @@ -538,6 +547,10 @@ def test_load(self, current_directory): all_split = problem.get_split() assert all_split["train"] == [0, 1, 2] and all_split["test"] == [3, 4] + def test_load(self, problem_definition_full: ProblemDefinition, tmp_path: Path): + d_path = tmp_path / "problem_definition" + problem_definition_full._save_to_dir_(d_path) + # problem = ProblemDefinition.load(d_path) assert problem.get_task() == "regression" assert set(problem.get_input_scalars_names()) == set( diff --git a/tests/utils/test_stats.py b/tests/utils/test_stats.py index f7f6577a..806756cd 100644 --- a/tests/utils/test_stats.py +++ b/tests/utils/test_stats.py @@ -68,7 +68,7 @@ def sample_with_scalar(np_samples_3): def sample_with_field(np_samples_6): s = Sample() # 1. Initialize the CGNS tree - s.meshes.init_tree() + s.features.init_tree() # 2. Create a base and a zone s.init_base(topological_dim=3, physical_dim=3) s.init_zone(zone_shape=np.array([np_samples_6.shape[0], 0, 0])) @@ -89,46 +89,6 @@ def field_data_of_different_size(): return np.random.randn(51) -@pytest.fixture() -def time_series_data(): - # 10 time steps, 1 feature - times = np.linspace(0, 1, 10) - values = np.random.randn(10) - return times, values - - -@pytest.fixture() -def time_series_data_of_different_size(): - # 5 time steps, 1 feature - times = np.linspace(0, 1, 5) - values = np.random.randn(5) - return times, values - - -@pytest.fixture() -def sample_with_time_series(time_series_data, field_data): - s = Sample() - times, values = time_series_data - s.add_time_series("ts1", time_sequence=times, values=values) - s.init_base(1, 1) - s.init_zone(np.array([0, 0, 0])) - s.add_field(name="field1", field=field_data) - return s - - -@pytest.fixture() -def sample_with_time_series_of_different_size( - time_series_data_of_different_size, field_data_of_different_size -): - s = Sample() - times, values = time_series_data_of_different_size - s.add_time_series("ts1", time_sequence=times, values=values) - s.init_base(1, 1) - s.init_zone(np.array([0, 0, 0])) - s.add_field(name="field1", field=field_data_of_different_size) - return s - - # %% Functions @@ -247,13 +207,8 @@ def test_get_stats(self, stats, samples): sample: Sample = samples[0] feature_names = sample.get_scalar_names() - feature_names.extend( - item - for ts_name in sample.get_time_series_names() - for item in (f"time_series/{ts_name}", f"timestamps/{ts_name}") - ) - for base_name in sample.meshes.get_base_names(): - for zone_name in sample.meshes.get_zone_names(base_name=base_name): + for base_name in sample.features.get_base_names(): + for zone_name in sample.features.get_zone_names(base_name=base_name): for location in CGNS_FIELD_LOCATIONS: for field_name in sample.get_field_names( location=location, zone_name=zone_name, base_name=base_name @@ -297,184 +252,38 @@ def test_clear_statistics(self, stats, samples): stats.clear_statistics() assert len(stats.get_available_statistics()) == 0 - def test_add_samples_time_series_case_1(self, sample_with_time_series: Sample): - # 1st case: adding time series with same sizes with 2 calls to add_samples + def test_merge_stats_with_same_sizes(self, sample_with_field): stats1 = Stats() - stats1.add_samples([sample_with_time_series]) - stats1.add_samples([sample_with_time_series]) - keys = stats1.get_available_statistics() - - assert "Base_1_1/Zone/Vertex/field1" in keys - stat_field = stats1._stats["Base_1_1/Zone/Vertex/field1"] - assert stat_field.n_samples == 2 - assert stat_field.n_points == 202 - stats_dict = stat_field.get_stats() - check_stats_dict(stats_dict) - assert stats_dict["mean"].shape == (1, 101) - - assert "time_series/ts1" in keys - assert "timestamps/ts1" in keys - stat = stats1._stats["time_series/ts1"] - assert stat.n_samples == 2 - assert stat.n_points == 20 - stats_dict = stat.get_stats() - check_stats_dict(stats_dict) - assert stats_dict["mean"].shape == (1, 10) - - def test_add_samples_time_series_case_2( - self, sample_with_time_series, sample_with_time_series_of_different_size - ): - # 2nd case: adding time series with different sizes with 2 calls to add_samples stats2 = Stats() - stats2.add_samples([sample_with_time_series]) - stats2.add_samples([sample_with_time_series_of_different_size]) - keys = stats2.get_available_statistics() - - assert "Base_1_1/Zone/Vertex/field1" in keys - stat_field = stats2._stats["Base_1_1/Zone/Vertex/field1"] - assert stat_field.n_samples == 2 - assert stat_field.n_points == 152 - stats_dict = stat_field.get_stats() - check_stats_dict(stats_dict) - assert stats_dict["mean"].shape == (1, 1) - - assert "time_series/ts1" in keys - assert "timestamps/ts1" in keys - stat = stats2._stats["time_series/ts1"] - assert stat.n_samples == 2 - assert stat.n_points == 15 - stats_dict = stat.get_stats() - check_stats_dict(stats_dict) - assert stats_dict["mean"].shape == (1, 1) - - def test_add_samples_time_series_case_3(self, sample_with_time_series): - # 3rd case: adding time series with same sizes in a single call to add_samples, in empty stats - stats3 = Stats() - stats3.add_samples([sample_with_time_series, sample_with_time_series]) - keys = stats3.get_available_statistics() - - assert "Base_1_1/Zone/Vertex/field1" in keys - stat_field = stats3._stats["Base_1_1/Zone/Vertex/field1"] - assert stat_field.n_samples == 2 - assert stat_field.n_points == 202 - stats_dict = stat_field.get_stats() - check_stats_dict(stats_dict) - assert stats_dict["mean"].shape == (1, 101) - - assert "time_series/ts1" in keys - assert "timestamps/ts1" in keys - stat = stats3._stats["time_series/ts1"] - assert stat.n_samples == 2 - assert stat.n_points == 20 - stats_dict = stat.get_stats() - check_stats_dict(stats_dict) - assert stats_dict["mean"].shape == (1, 10) - - def test_add_samples_time_series_case_4( - self, sample_with_time_series, sample_with_time_series_of_different_size - ): - # 4th case: adding time series with different sizes in a single call to add_samples, in empty stats - stats4 = Stats() - stats4.add_samples( - [sample_with_time_series, sample_with_time_series_of_different_size] - ) - keys = stats4.get_available_statistics() - - assert "Base_1_1/Zone/Vertex/field1" in keys - stat_field = stats4._stats["Base_1_1/Zone/Vertex/field1"] - assert stat_field.n_samples == 2 - assert stat_field.n_points == 152 - stats_dict = stat_field.get_stats() - check_stats_dict(stats_dict) - assert stats_dict["mean"].shape == (1, 1) - - assert "time_series/ts1" in keys - assert "timestamps/ts1" in keys - stat = stats4._stats["time_series/ts1"] - assert stat.n_samples == 2 - assert stat.n_points == 15 - stats_dict = stat.get_stats() - check_stats_dict(stats_dict) - assert stats_dict["mean"].shape == (1, 1) - - def test_add_samples_time_series_case_5( - self, sample_with_time_series, sample_with_time_series_of_different_size - ): - # 5th case: adding time series with different sizes in a single call to add_samples, in non-empty stats - stats5 = Stats() - stats5.add_samples([sample_with_time_series]) - stats5.add_samples( - [sample_with_time_series, sample_with_time_series_of_different_size] - ) - keys = stats5.get_available_statistics() - - assert "Base_1_1/Zone/Vertex/field1" in keys - stat_field = stats5._stats["Base_1_1/Zone/Vertex/field1"] - assert stat_field.n_samples == 3 - assert stat_field.n_points == 253 - stats_dict = stat_field.get_stats() - check_stats_dict(stats_dict) - assert stats_dict["mean"].shape == (1, 1) - - assert "time_series/ts1" in keys - assert "timestamps/ts1" in keys - stat = stats5._stats["time_series/ts1"] - assert stat.n_samples == 3 - assert stat.n_points == 25 - stats_dict = stat.get_stats() - check_stats_dict(stats_dict) - assert stats_dict["mean"].shape == (1, 1) - - def test_merge_stats_with_same_sizes(self, sample_with_time_series): - stats1 = Stats() - stats2 = Stats() - stats1.add_samples([sample_with_time_series]) - stats2.add_samples([sample_with_time_series]) + stats1.add_samples([sample_with_field]) + stats1.add_samples([sample_with_field]) stats1.merge_stats(stats2) keys = stats1.get_available_statistics() - assert "Base_1_1/Zone/Vertex/field1" in keys + assert "Base_3_3/Zone/Vertex/bar" in keys - stat_field = stats1._stats["Base_1_1/Zone/Vertex/field1"] + stat_field = stats1._stats["Base_3_3/Zone/Vertex/bar"] assert stat_field.n_samples == 2 - assert stat_field.n_points == 202 - assert stat_field.n_features == 101 + assert stat_field.n_points == 100 + assert stat_field.n_features == 50 stats_dict = stat_field.get_stats() check_stats_dict(stats_dict) - assert stats_dict["mean"].shape == (1, 101) - - assert "time_series/ts1" in keys - assert "timestamps/ts1" in keys - stat = stats1._stats["time_series/ts1"] - assert stat.n_samples == 2 - assert stat.n_points == 20 - assert stat.n_features == 10 - stats_dict = stat.get_stats() - check_stats_dict(stats_dict) - assert stats_dict["mean"].shape == (1, 10) + assert stats_dict["mean"].shape == (1, 50) - def test_merge_stats_with_different_sizes( - self, sample_with_time_series, sample_with_time_series_of_different_size + def test_merge_stats_with_different_feautres( + self, sample_with_scalar, sample_with_field ): stats1 = Stats() stats2 = Stats() - stats1.add_samples([sample_with_time_series]) - stats2.add_samples([sample_with_time_series_of_different_size]) + stats1.add_samples([sample_with_scalar]) + stats2.add_samples([sample_with_field]) stats1.merge_stats(stats2) keys = stats1.get_available_statistics() - assert "Base_1_1/Zone/Vertex/field1" in keys + assert "foo" in keys - stat_field = stats1._stats["Base_1_1/Zone/Vertex/field1"] - assert stat_field.n_samples == 2 - assert stat_field.n_points == 152 + stat_field = stats1._stats["foo"] + assert stat_field.n_samples == 1 + assert stat_field.n_points == 1 + assert stat_field.n_features == 1 stats_dict = stat_field.get_stats() check_stats_dict(stats_dict) assert stats_dict["mean"].shape == (1, 1) - - assert "time_series/ts1" in keys - assert "timestamps/ts1" in keys - stat = stats1._stats["time_series/ts1"] - assert stat.n_samples == 2 - assert stat.n_points == 15 - stats_dict = stat.get_stats() - check_stats_dict(stats_dict) - assert stats_dict["mean"].shape == (1, 1)