# Read ZeMA dataset and preprocess data

In [1]:
import json

import h5py
import numpy as np
from h5py import Dataset, File, Group
from numpy import ndarray

In [2]:
def print_attrs(h5py_dataset_or_group):
    for key in h5py_dataset_or_group.attrs:
        print(key)
        val = json.loads(h5py_dataset_or_group.attrs[key])
        if isinstance(val, dict):
            for subkey, subval in val.items():
                print(f"   {subkey}  : {subval}")
        else:
            print(f"   {val}")

In [3]:
with h5py.File("axis11_2kHz_ZeMA_PTB_SI.h5", "r") as h5f:
    print_attrs(h5f)

Experiment
   date  : 2021-03-29/2021-04-15
   DUT  : Festo ESBF cylinder
   identifier  : axis11
   label  : Electromechanical cylinder no. 11
Person
   dc:author  : ['Tanja Dorst', 'Maximilian Gruber', 'Anupam Prasad Vedurmudi']
   e-mail  : ['t.dorst@zema.de', 'maximilian.gruber@ptb.de', 'anupam.vedurmudi@ptb.de']
   affiliation  : ['ZeMA gGmbH', 'Physikalisch-Technische Bundesanstalt', 'Physikalisch-Technische Bundesanstalt']
Project
   fullTitle  : Metrology for the Factory of the Future
   acronym  : Met4FoF
   websiteLink  : www.met4fof.eu
   fundingSource  : European Commission (EC)
   fundingAdministrator  : EURAMET
   funding programme  : EMPIR
   fundingNumber  : 17IND12
   acknowledgementText  : This work has received funding within the project 17IND12 Met4FoF from the EMPIR program co-financed by the Participating States and from the European Union's Horizon 2020 research and innovation program. The authors want to thank Clifford Brown, Daniel Hutzschenreuter, Holger Israe

In [4]:
with h5py.File("axis11_2kHz_ZeMA_PTB_SI.h5", "r") as h5f:
    my_uncertainty = h5f["PTB_SUU"]["MPU_9250"]["Acceleration"][
        "qudt:standardUncertainty"
    ]
    print("qudt:standardUncertainty" in my_uncertainty.name)
    print_attrs(my_uncertainty)
    print(my_uncertainty)
    print(list(h5f["PTB_SUU"]))

True
si:label
   ['X acceleration uncertainty', 'Y acceleration uncertainty', 'Z acceleration uncertainty']
<HDF5 dataset "qudt:standardUncertainty": shape (3, 1000, 4766), type "<f8">
['BMA_280', 'MPU_9250']


In [5]:
def extract_data(n_samples: int, verbose: bool = False) -> ndarray:
    extracted_data = np.empty((n_samples, 0))
    indices = np.s_[0:5, 0]
    with h5py.File("axis11_2kHz_ZeMA_PTB_SI.h5", "r") as h5f:
        daq_identifier = "ZeMA_DAQ"
        if verbose:
            print(
                f"\nShow data for sensor set {daq_identifier}:\n{'-'*(26 + len(daq_identifier))}"
            )
        for quantity in conditional_first_level_element(h5f, daq_identifier):
            if verbose:
                print(
                    f"\n    Show data for quantity {quantity}:\n    {'-'*(24 + len(quantity))}"
                )
            for dataset in hdf5_part(h5f, (daq_identifier, quantity)):
                if verbose:
                    print(f"    {hdf5_part(h5f, (daq_identifier, quantity, dataset))}")
                if (
                    "qudt:standardUncertainty"
                    in hdf5_part(h5f, (daq_identifier, quantity, dataset)).name
                ):
                    if (
                        len(hdf5_part(h5f, (daq_identifier, quantity, dataset)).shape)
                        == 3
                    ):
                        for sensor in hdf5_part(
                            h5f, (daq_identifier, quantity, dataset)
                        ):
                            extracted_data = append_to_extraction(
                                extracted_data,
                                extract_sample_from_dataset(sensor, indices),
                            )
                    else:
                        extracted_data = append_to_extraction(
                            extracted_data,
                            extract_sample_from_dataset(
                                hdf5_part(h5f, (daq_identifier, quantity, dataset)),
                                indices,
                            ),
                        )
    return extracted_data


def conditional_first_level_element(hdf5_file: File, identifier: str) -> Group:
    for sensor_set_descriptor in hdf5_file:
        if identifier in sensor_set_descriptor:
            return hdf5_file[sensor_set_descriptor]


def hdf5_part(hdf5_file: File, keys: tuple[str, ...]) -> Group | Dataset:
    part = hdf5_file
    for key in keys:
        part = part[key]
    return part


def extract_sample_from_dataset(
    data_set: Dataset, ns_samples: tuple[int | slice]
) -> ndarray[float]:
    return np.expand_dims(np.array(data_set[ns_samples]), 1)


def append_to_extraction(append_to: ndarray, appendix: ndarray) -> ndarray:
    return np.append(append_to, appendix, axis=1)

In [6]:
uncertainties = extract_data(5, verbose=True)
print(uncertainties)
print(uncertainties.shape)


Show data for sensor set ZeMA_DAQ:
----------------------------------

    Show data for quantity Acceleration:
    ------------------------------------
    <HDF5 dataset "qudt:standardUncertainty": shape (3, 2000, 4766), type "<f8">
    <HDF5 dataset "qudt:value": shape (3, 2000, 4766), type "<f8">

    Show data for quantity Active_Current:
    --------------------------------------
    <HDF5 dataset "qudt:standardUncertainty": shape (2000, 4766), type "<f8">
    <HDF5 dataset "qudt:value": shape (2000, 4766), type "<f8">

    Show data for quantity Force:
    -----------------------------
    <HDF5 dataset "qudt:standardUncertainty": shape (2000, 4766), type "<f8">
    <HDF5 dataset "qudt:value": shape (2000, 4766), type "<f8">

    Show data for quantity Motor_Current:
    -------------------------------------
    <HDF5 dataset "qudt:standardUncertainty": shape (3, 2000, 4766), type "<f8">
    <HDF5 dataset "qudt:value": shape (3, 2000, 4766), type "<f8">

    Show data for quanti