# <div align="center">                                                          
# Inserting H5 File


## Imports

In [3]:
!pip install h5py 

Defaulting to user installation because normal site-packages is not writeable


In [47]:
import actipy
import h5py
import numpy as np
from kielmat.utils.kielmat_dataclass import KielMATRecording
from kielmat.utils.file_io import get_unit_from_type
import pandas as pd
from pathlib import Path

## Editing our data


In [28]:
file_path = '20220218-131956-P_009_TM_16.h5' #INSERT THE FILE HERE

In [29]:
def list_attributes(file_path): # These functions will check current attributes and add the needed ones
    """List and print the attributes of the root group in an HDF5 file."""
    with h5py.File(file_path, 'r') as file:
        root_attributes = list(file.attrs.keys())
        print("Attributes of the root group:")
        for attr in root_attributes:
            value = file.attrs[attr]
            if isinstance(value, bytes):
                value = value.decode('utf-8')  # Decode if value is bytes
            print(f"{attr}: {value}")

def add_or_edit_attribute(file_path, attribute_name, attribute_value):
    """Add or edit an attribute in the root group of an HDF5 file."""
    with h5py.File(file_path, 'a') as file:
        file.attrs[attribute_name] = attribute_value
        print(f"Added/Updated attribute '{attribute_name}' with value: {attribute_value}")

def main():
    # Define the file path
    file_path = '20220218-131956-P_009_TM_16.h5'

    # List existing attributes
    print("Initial attributes:")
    list_attributes(file_path)

    # Define new attribute values
    case_id_list = ["Subject 1", "Experiment Name"]
    monitor_label_list = [
        'Accelerometer_X', 'Accelerometer_Y', 'Accelerometer_Z',
        'Gyroscope_X', 'Gyroscope_Y', 'Gyroscope_Z',
        'Magnetometer_X', 'Magnetometer_Y', 'Magnetometer_Z',
        'Temperature'
    ]

    # Add or edit attributes
    add_or_edit_attribute(file_path, 'CaseIdList', case_id_list)
    add_or_edit_attribute(file_path, 'MonitorLabelList', monitor_label_list)

    # Verify changes
    print("\nUpdated attributes:")
    list_attributes(file_path)

if __name__ == "__main__":
    main()


Initial attributes:
Attributes of the root group:
CaseIdList: ['Subject 1' 'Experiment Name']
FileFormatVersion: 5
MonitorLabelList: ['Accelerometer_X' 'Accelerometer_Y' 'Accelerometer_Z' 'Gyroscope_X'
 'Gyroscope_Y' 'Gyroscope_Z' 'Magnetometer_X' 'Magnetometer_Y'
 'Magnetometer_Z' 'Temperature']
Added/Updated attribute 'CaseIdList' with value: ['Subject 1', 'Experiment Name']
Added/Updated attribute 'MonitorLabelList' with value: ['Accelerometer_X', 'Accelerometer_Y', 'Accelerometer_Z', 'Gyroscope_X', 'Gyroscope_Y', 'Gyroscope_Z', 'Magnetometer_X', 'Magnetometer_Y', 'Magnetometer_Z', 'Temperature']

Updated attributes:
Attributes of the root group:
CaseIdList: ['Subject 1' 'Experiment Name']
FileFormatVersion: 5
MonitorLabelList: ['Accelerometer_X' 'Accelerometer_Y' 'Accelerometer_Z' 'Gyroscope_X'
 'Gyroscope_Y' 'Gyroscope_Z' 'Magnetometer_X' 'Magnetometer_Y'
 'Magnetometer_Z' 'Temperature']


## Function from KielMAT


In [38]:
def import_mobilityLab(file_name: str | Path, tracked_points: str | list[str]) -> tuple[pd.DataFrame, pd.DataFrame]:
    if isinstance(file_name, str):
        file_name = Path(file_name)

    if isinstance(tracked_points, str):
        tracked_points = [tracked_points]

    try:
        with h5py.File(file_name, "r") as hfile:
            # Print available attributes for debugging
            print("Available attributes:")
            for attr in hfile.attrs:
                print(f"{attr}: {hfile.attrs[attr]}")
            
            # Get monitor labels and case IDs
            monitor_labels = hfile.attrs.get("MonitorLabelList", [])
            case_ids = hfile.attrs.get("CaseIdList", [])
            
            if not monitor_labels:
                raise KeyError("MonitorLabelList attribute is missing or empty.")
            if not case_ids:
                raise KeyError("CaseIdList attribute is missing or empty.")
            
            # Convert arrays to lists if necessary
            monitor_labels = monitor_labels.tolist() if isinstance(monitor_labels, np.ndarray) else monitor_labels
            case_ids = case_ids.tolist() if isinstance(case_ids, np.ndarray) else case_ids

            # Track invalid tracked points
            invalid_tracked_points = [tp for tp in tracked_points if tp not in monitor_labels]

            if invalid_tracked_points:
                raise ValueError(f"The following tracked points do not exist in monitor labels: {invalid_tracked_points}")

            # Initialize dictionaries to store channels and data frames
            channels_dict = {
                "name": [],
                "component": [],
                "type": [],
                "tracked_point": [],
                "units": [],
                "sampling_frequency": [],
            }

            # Create dictionary to store data
            data_dict = {}

            # Iterate over each sensor
            for idx_sensor, (monitor_label, case_id) in enumerate(zip(monitor_labels, case_ids)):
                if monitor_label not in tracked_points:
                    continue
                
                sample_rate = hfile[case_id].attrs.get("SampleRate", None)
                if sample_rate is None:
                    raise KeyError(f"SampleRate attribute is missing for case ID: {case_id}")

                # Get raw data
                rawAcc = hfile[case_id]["Calibrated"]["Accelerometers"][:]
                rawGyro = hfile[case_id]["Calibrated"]["Gyroscopes"][:]
                rawMagn = hfile[case_id]["Calibrated"]["Magnetometers"][:]

                # Populate data_dict
                data_dict[f"{monitor_label}"] = pd.DataFrame({
                    f"{monitor_label}_ACCEL_x": rawAcc[:, 0],
                    f"{monitor_label}_ACCEL_y": rawAcc[:, 1],
                    f"{monitor_label}_ACCEL_z": rawAcc[:, 2],
                    f"{monitor_label}_GYRO_x": rawGyro[:, 0],
                    f"{monitor_label}_GYRO_y": rawGyro[:, 1],
                    f"{monitor_label}_GYRO_z": rawGyro[:, 2],
                    f"{monitor_label}_MAGN_x": rawMagn[:, 0],
                    f"{monitor_label}_MAGN_y": rawMagn[:, 1],
                    f"{monitor_label}_MAGN_z": rawMagn[:, 2],
                })

                # Extend lists in channels_dict
                channels_dict["name"].extend([
                    f"{monitor_label}_ACCEL_x",
                    f"{monitor_label}_ACCEL_y",
                    f"{monitor_label}_ACCEL_z",
                    f"{monitor_label}_GYRO_x",
                    f"{monitor_label}_GYRO_y",
                    f"{monitor_label}_GYRO_z",
                    f"{monitor_label}_MAGN_x",
                    f"{monitor_label}_MAGN_y",
                    f"{monitor_label}_MAGN_z",
                ])

                channels_dict["component"].extend(["x", "y", "z"] * 3)
                channels_dict["type"].extend([
                    "ACCEL", "ACCEL", "ACCEL",
                    "GYRO", "GYRO", "GYRO",
                    "MAGN", "MAGN", "MAGN"
                ])
                channels_dict["tracked_point"].extend([monitor_label] * 9)
                channels_dict["units"].extend([
                    "m/s^2", "m/s^2", "m/s^2",
                    "rad/s", "rad/s", "rad/s",
                    "µT", "µT", "µT"
                ])
                channels_dict["sampling_frequency"].extend([sample_rate] * 9)

    except KeyError as e:
        print(f"KeyError: {e}")
        return pd.DataFrame(), pd.DataFrame()
    except ValueError as e:
        print(f"ValueError: {e}")
        return pd.DataFrame(), pd.DataFrame()
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return pd.DataFrame(), pd.DataFrame()

    # Concatenate data frames from data_dict
    data = pd.concat(list(data_dict.values()), axis=1)

    # Create DataFrame from channels_dict
    channels = pd.DataFrame(channels_dict)

    return data, channels


## Importing Our Data


In [46]:
tracked_point = "lowerBack"
data,channels = import_mobilityLab(file_path, tracked_point)
data

Available attributes:
CaseIdList: ['Subject 1' 'Experiment Name']
FileFormatVersion: 5
MonitorLabelList: ['Accelerometer_X' 'Accelerometer_Y' 'Accelerometer_Z' 'Gyroscope_X'
 'Gyroscope_Y' 'Gyroscope_Z' 'Magnetometer_X' 'Magnetometer_Y'
 'Magnetometer_Z' 'Temperature']
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()


In [1]:
def read_and_clean_tsv(file_path, new_headers, skiprows=2):
    """
    Reads a TSV file into a DataFrame, skips rows, and assigns new headers.

    Parameters:
        file_path (str): The path to the TSV file.
        new_headers (list): A list of new column headers to assign.
        skiprows (int): The number of rows to skip from the start of the file (default is 2).

    Returns:
        pd.DataFrame: The cleaned DataFrame.
    """
    # Read the TSV file into a DataFrame with specified headers
    df = pd.read_csv(file_path, sep='\t', skiprows=skiprows, header=1)

    # Assign the new headers to the DataFrame
    df.columns = new_headers

    return df

# Example usage
file_path = 'IMUS1.tsv'
new_headers = [
    'Time', 'Accelerometer_X', 'Accelerometer_Y', 'Accelerometer_Z',
    'Gyroscope_X', 'Gyroscope_Y', 'Gyroscope_Z',
    'Magnetometer_X', 'Magnetometer_Y', 'Magnetometer_Z',
    'Barometer', 'Orientation_S', 'Orientation_X', 'Orientation_Y', 'Orientation_Z'
]

df = read_and_clean_tsv(file_path, new_headers)

# Display the shape and the first few rows of the DataFrame
df.head(5)

NameError: name 'pd' is not defined