## e2eET Skeleton Based HGR Using Data-Level Fusion

In [6]:
import pickle
import numpy as np
from pathlib import Path
from scipy import ndimage, io
from sklearn.model_selection import train_test_split

---

In [7]:
def _resize_gestures(in_gest_seqs, target_length=250):
    """Resize the time series by interpolating them to the same length"""

    out_gest_seqs = []
    for sequence in in_gest_seqs:
        zoomed_skeletons = []
        for skeleton in range(np.size(sequence, 1)):
            _zoom_skel = ndimage.zoom(sequence.T[skeleton], target_length / len(sequence), mode="reflect")
            zoomed_skeletons.append(_zoom_skel)

        out_gest_seqs.append(np.array(zoomed_skeletons).T)

    return np.array(out_gest_seqs)

In [8]:
def load_mat_gestures(type, resize_length, root, cleanup=True, verbose=False):
    """
    Get the 3D pose gestures sequences, and their associated labels.
    Output:  a tuple of (gestures, labels, details).
    """

    # _____
    assert "LMDHG" in root, "Check that the correct dataset folder is provided!"
    assert type == "3d", "LMDHG only contains 3D data!"

    translation = {
        "ATTRAPER_MAIN_LEVEE": ["AttraperMainLevee", "CatchWithTwoHands"],
        "ATTRAPER": ["Attraper", "Catch"],
        "C": ["C", "DrawC"],
        "DEFILER_DOIGT": ["DefilerDoigt", "Scroll"],
        "LIGNE": ["Ligne", "DrawLine"],
        "PIVOTER": ["Pivoter", "Rotate"],
        "POINTER_MAIN_LEVEE": ["PointerMainLevee", "PointToWithTwoHands"],
        "POINTER_PROLONGE": ["PointerMainLevee", "PointToWithTwoHands"],  # [NOTE] unknown class; skip
        "POINTER": ["Pointer", "PointTo"],
        "SECOUER_BAS": ["SecouerBas", "ShakeDown"],
        "SECOUER_POING_LEVE": ["SecouerPoingLeve", "ShakeWithTwoHands"],
        "SECOUER": ["Secouer", "Shake"],
        "TRANCHER": ["Trancher", "Slice"],
        "CISEAUX": ["Trancher", "Slice"],  # [NOTE] unknown class; skip
        "ZOOM": ["Zoom", "Zoom"],
        "REPOS": ["Repos", "Resting"],  # [NOTE] filler class; skip
    }

    filenames = list(Path(root).rglob("*Datafile*.mat"))
    
    # _____
    gestures = []
    labels = []
    details = []
    skipped_classes = {"CISEAUX": 0, "POINTER_PROLONGE": 0, "REPOS": 0}

    for f in filenames:
        f_data = io.loadmat(f)
        if verbose: print(f"{f= }")

        f_annotations = f_data["Anotations"] - 1  # subtract 1 to switch from MATLAB to python indexing
        f_labels = [entry[0] for entry in f_data["labels"][:, 0]]
        f_skeletons = [entry[0] for entry in f_data["skeleton"]]

        for (_stt_idx, _stp_idx), _lbl in zip(f_annotations, f_labels):
            if cleanup and _lbl in ["CISEAUX", "POINTER_PROLONGE", "REPOS"]:
            # if cleanup and _lbl == ["CISEAUX"]:  # [NOTE] alternate filter; not used
                skipped_classes[_lbl] += 1
                continue

            _lbl = translation[_lbl][-1]
            _deets = f"{_lbl}-{f.stem}_{_stt_idx+1}_{_stp_idx+1}"
            _gesture = np.array(f_skeletons[_stt_idx:_stp_idx+1])
            _gesture = _gesture.reshape(_gesture.shape[0], -1)

            gestures.append(_gesture)
            labels.append(_lbl)
            details.append(_deets)
            if verbose: print(f"{_gesture.shape=} | {_deets=}")

        # break

    if resize_length: gestures = _resize_gestures(gestures, target_length=resize_length)
    print(f"{len(gestures)=} | {len(labels)=} | {len(details)=}")
    print(f"{skipped_classes=}")
    assert len(gestures) == len(labels) == len(details)
    return gestures, labels, details

# load_mat_gestures(type="3d", resize_length=None, root="../../LMDHG", verbose=True)

In [9]:
def _write_data(data, filepath):
    """Save the dataset to a file. Note: data is a dict with keys 'X_train', ..."""

    with open(filepath, "wb") as output_file: pickle.dump(data, output_file)

In [10]:
def load_pckl_data(filepath):
    """
    Returns hand gesture sequences (X) and their associated labels (Y).
    """

    file = open(filepath, "rb")
    data = pickle.load(file, encoding="latin1") # change to 'latin1' to 'utf8' if the data does not load
    file.close()

    return (
        data["X_train"], data["X_valid"],
        data["train_labels"], data["valid_labels"],
        data["train_details"], data["valid_details"],
    )

In [13]:
def _get_df(i_deets, subset):
    assert subset in ["train", "valid"]
    dfs = range(1, 36) if (subset == "train") else range(36, 51)
    df = int(i_deets.split("-")[-1].split("_")[0].replace("DataFile", ""))
    return df in dfs

def create_LMDHG_paper_split(type, root, resize_length=None, seed=17711, save_path=None):
    assert type in ["2d", "3d"], "Data type has to be specified ['2d' / '3d']"
    
    # load the dataset gesture sequences from file(s)
    gestures, labels, details = load_mat_gestures(type, resize_length, root)
    print(">>> <gestures, labels, details> loaded successfully!")

    # split into train and validation subsets
    idxs_valid = [i for i, i_deets in enumerate(details) if _get_df(i_deets, "valid")]
    idxs_train = [i for i, i_deets in enumerate(details) if _get_df(i_deets, "train")]

    X_train, X_valid = gestures[idxs_train], gestures[idxs_valid]  # type:ignore
    train_labels = np.array(labels)[idxs_train].tolist()
    valid_labels = np.array(labels)[idxs_valid].tolist()
    train_details = np.array(details)[idxs_train].tolist()
    valid_details = np.array(details)[idxs_valid].tolist()
    print(f">>> {type} training ({X_train.shape}) and validation ({X_valid.shape}) data created.")
    
    # save the test-train data to disk
    if save_path is None: save_path = "../datasets"
    save_path = f"{save_path}/LMDHG_{type}_dictPaperSplit_l{resize_length}_s{len(gestures)}.pckl"

    data = {
        "X_train": X_train, "X_valid": X_valid,
        "train_labels": train_labels, "valid_labels": valid_labels,
        "train_details": train_details, "valid_details": valid_details,
    }
    _write_data(data, filepath=save_path)
    print(f">>> LMDHG Paper train-valid data written to <{save_path}> successfully!")

---

In [9]:
create_LMDHG_paper_split(type="3d", root="../datasets/LMDHG", resize_length=750)

len(gestures)=609 | len(labels)=609 | len(details)=609
skipped_classes={'CISEAUX': 22, 'POINTER_PROLONGE': 23, 'REPOS': 579}
>>> <gestures, labels, details> loaded successfully!
>>> 3d training ((415, 750, 138)) and validation ((194, 750, 138)) data created.
>>> LMDHG Paper train-valid data written to <../datasets/LMDHG_3d_dictLPS_l750_s609.pckl> successfully!
