In [1]:
from plaid_bridges.dataloaders import HeterogeneousPlaidDataLoader, HomogeneousPlaidDataLoader

from datasets import load_dataset, load_from_disk
from plaid.containers.sample import Sample
import pickle
from plaid.bridges.huggingface_bridge import (
    huggingface_dataset_to_plaid,
    huggingface_description_to_problem_definition,
)

  from .autonotebook import tqdm as notebook_tqdm
[2025-08-19 09:37:43,970:INFO:config.py:<module>(54)]:PyTorch version 2.8.0 available.


## Case with homogeneous samples

In [None]:
hf_dataset = load_dataset("PLAID-datasets/VKI-LS59", split="all_samples[:10]")
sample = Sample.model_validate(pickle.loads(hf_dataset[0]["sample"]))

pb_def = huggingface_description_to_problem_definition(hf_dataset.info.description)
ids = pb_def.get_split("train")[:10]

dataset, _ = huggingface_dataset_to_plaid(hf_dataset, ids=ids, processes_number=5)

Converting huggingface dataset to plaid dataset...


100%|██████████| 10/10 [00:07<00:00,  1.31it/s]


In [3]:
print(dataset)
all_feat_ids = dataset[0].get_all_features_identifiers()

scalar_features = [f for f in all_feat_ids if "scalar" in f.values()]
field_features = [f for f in all_feat_ids if "field" in f.values()]

in_feature_identifiers = [scalar_features[0], field_features[0]]
out_feature_identifiers = [field_features[1], scalar_features[1]]

print(in_feature_identifiers)
print(out_feature_identifiers)

Dataset(10 samples, 8 scalars, 0 time_series, 8 fields)
[{'type': 'scalar', 'name': np.str_('Pr')}, {'type': 'field', 'name': 'rou', 'base_name': 'Base_2_2', 'zone_name': 'Zone', 'location': 'Vertex', 'time': np.float64(0.0)}]
[{'type': 'field', 'name': 'mach', 'base_name': 'Base_2_2', 'zone_name': 'Zone', 'location': 'Vertex', 'time': np.float64(0.0)}, {'type': 'scalar', 'name': np.str_('Q')}]


In [4]:
loader = HomogeneousPlaidDataLoader(dataset, batch_size = 2, shuffle = True, in_feature_identifiers = in_feature_identifiers, out_feature_identifiers = out_feature_identifiers)

def _make_hashable(feat_dict: dict):
    return tuple(sorted(feat_dict.items()))

batch = next(iter(loader))
print("scalars =", batch[0][_make_hashable(in_feature_identifiers[0])])
print("fields =", batch[1][_make_hashable(out_feature_identifiers[0])], " | >>>> tensor:", batch[1][_make_hashable(out_feature_identifiers[0])].shape)

batch_in_features.keys() = list[dict_keys([(('name', np.str_('Pr')), ('type', 'scalar')), (('base_name', 'Base_2_2'), ('location', 'Vertex'), ('name', 'rou'), ('time', np.float64(0.0)), ('type', 'field'), ('zone_name', 'Zone'))])]
batch_out_features.keys() = list[dict_keys([(('base_name', 'Base_2_2'), ('location', 'Vertex'), ('name', 'mach'), ('time', np.float64(0.0)), ('type', 'field'), ('zone_name', 'Zone')), (('name', np.str_('Q')), ('type', 'scalar'))])]
scalars = tensor([0.9636, 0.9548], dtype=torch.float64)
fields = tensor([[0.3655, 0.3655, 0.3656,  ..., 0.9624, 0.9623, 0.9622],
        [0.3794, 0.3794, 0.3795,  ..., 0.9610, 0.9610, 0.9610]],
       dtype=torch.float64)  | >>>> tensor: torch.Size([2, 36421])


## Case with heterogeneous samples

In [None]:
hf_dataset = load_dataset("PLAID-datasets/tensile2d", split="all_samples[:10]")
sample = Sample.model_validate(pickle.loads(hf_dataset[0]["sample"]))

pb_def = huggingface_description_to_problem_definition(hf_dataset.info.description)
ids = pb_def.get_split("train_500")[:10]


dataset, _ = huggingface_dataset_to_plaid(hf_dataset, ids=ids, processes_number=5)

Converting huggingface dataset to plaid dataset...


100%|██████████| 10/10 [00:07<00:00,  1.36it/s]


In [6]:
print(dataset)
all_feat_ids = dataset[0].get_all_features_identifiers()

scalar_features = [f for f in all_feat_ids if "scalar" in f.values()]
field_features = [f for f in all_feat_ids if "field" in f.values()]

in_feature_identifiers = [scalar_features[0], field_features[0]]
out_feature_identifiers = [field_features[1], scalar_features[1]]

print(in_feature_identifiers)
print(out_feature_identifiers)

Dataset(10 samples, 10 scalars, 0 time_series, 6 fields)
[{'type': 'scalar', 'name': np.str_('P')}, {'type': 'field', 'name': 'U2', 'base_name': 'Base_2_2', 'zone_name': 'Zone', 'location': 'Vertex', 'time': np.float64(0.0)}]
[{'type': 'field', 'name': 'sig12', 'base_name': 'Base_2_2', 'zone_name': 'Zone', 'location': 'Vertex', 'time': np.float64(0.0)}, {'type': 'scalar', 'name': np.str_('max_U2_top')}]


In [7]:
loader = HeterogeneousPlaidDataLoader(dataset, batch_size = 2, shuffle = True, in_feature_identifiers = in_feature_identifiers, out_feature_identifiers = out_feature_identifiers)

def _make_hashable(feat_dict: dict):
    return tuple(sorted(feat_dict.items()))

batch = next(iter(loader))
print("scalars =", batch[0][_make_hashable(in_feature_identifiers[0])])
print("fields =", batch[1][_make_hashable(out_feature_identifiers[0])], " | >>>> list:", type(batch[1][_make_hashable(out_feature_identifiers[0])]))

scalars = [np.float64(-41.99), np.float64(-47.79)]
fields = [array([ 3.89645225e-04,  3.59882951e-01, -3.01928401e-01, ...,
       -2.27506590e+00,  1.90825710e+01, -2.96575565e+01], shape=(6171,)), array([ 1.12205986e-02,  2.51928777e-01, -3.96669924e-01, ...,
       -3.65844202e+00,  1.14552109e+02,  2.13745365e+01], shape=(6184,))]  | >>>> list: <class 'list'>
