# Pipeline and configuration analyzesis

This notebooks loads the "raw" datasets and offers you a simple GUI to play around with the configuration as well as pipeline stages and plot the result of single samples.

The prerequisite is to have slki as well as its dependencies to be installed.

## Setup

In [None]:
%pip install -qqq varname==0.13.5

Note: you may need to restart the kernel to use updated packages.


In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
from collections.abc import Callable
from typing import Any

from _defaults import (
    SEED,
    datasets_labels,
    init_notebook,
    load_datasets_chunk_lengths,
    load_signal,
)
import numpy as np
from tabulate import tabulate

from slki.config import Config

In [4]:
init_notebook()

## Load raw datasets

Raw data means that the preprocessing steps are not applied yet.
The starting point is after loading the data from the HDF5 file, drop non-valid data, and save it to as pickle files.

In [5]:
import _defaults

In [6]:
_defaults.raw_data = True
_defaults.resample_size = None
_defaults.normalize = None

Analyse the data. To be presice: Analyse how many signals are avaliable without loading the signals to save memory.

In [7]:
dataset_lengths = load_datasets_chunk_lengths()

[[1;32mINFO[0m] [37m[slki.data.import][data_import.py:537][0m  7 meta data files found.
[[1;32mINFO[0m] [37m[slki.data.import][data_import.py:594][0m  Loading meta data from '/home/lab/slki/Dataset/preprecessed/v4/fernverkehr-points-kionix-sh-z-dt_1-meta.pkl'...
[[1;32mINFO[0m] [37m[slki.data.import][data_import.py:594][0m  Loading meta data from '/home/lab/slki/Dataset/preprecessed/v4/fernverkehr-points-kionix-sh-z-dt_2-meta.pkl'...
[[1;32mINFO[0m] [37m[slki.data.import][data_import.py:594][0m  Loading meta data from '/home/lab/slki/Dataset/preprecessed/v4/fernverkehr-points-kionix-sh-z-dt_3-meta.pkl'...
[[1;32mINFO[0m] [37m[slki.data.import][data_import.py:594][0m  Loading meta data from '/home/lab/slki/Dataset/preprecessed/v4/fernverkehr-points-kionix-sh-z-dt_4-meta.pkl'...
[[1;32mINFO[0m] [37m[slki.data.import][data_import.py:594][0m  Loading meta data from '/home/lab/slki/Dataset/preprecessed/v4/fernverkehr-points-kionix-sh-z-dt_5-meta.pkl'...
[[1;32mINFO

In [8]:
print(
    tabulate(
        zip(datasets_labels, (sum(data_lengths) for data_lengths in dataset_lengths), strict=False),
        headers=["Dataset", "Length"],
    )
)

Dataset         Length
------------  --------
Fernverkehr       5572
Güterzüge         1587
Regioverkehr      4248


## Create GUI Application

### Imports and Definitions

In [9]:
from IPython.display import display
import ipywidgets as widgets
import matplotlib.pyplot as plt
from tslearn.preprocessing import TimeSeriesResampler

In [10]:
from slki.data import SensorDataContainer, SensorDataItem
from slki.preprocessing import (
    Absolute,
    Boost,
    Denoise,
    DetectSignal,
    DoubleIntegrate,
    Normalize,
    Outlier,
    Resample,
    Smooth,
    Stage,
)
from slki.utils.peak import detect_and_cluster_and_plot_peaks

In [11]:
stages = {
    "Denoise": Denoise,
    "Detect Signal": DetectSignal,
    "Resample": Resample,
    "Boost": Boost,
    "Absolute": Absolute,
    "Outlier": Outlier,
    "Smooth": Smooth,
    "Normalize": Normalize,
    "Double Integrate": DoubleIntegrate,
}

### Define preprocessing and plotting methods

In [12]:
def plot(sample: np.ndarray):
    fig, ax = plt.subplots()
    ax.plot(sample)
    plt.ylim(-1.1, 1.1)
    fig.tight_layout()
    return fig, ax

In [13]:
def update_gui_elements_and_load_signals(
    resample_stage_included: bool,
) -> tuple[np.ndarray, dict[str, Any], int, int, int]:
    # dataset_idx
    dataset_idx = dataset_widget.value  # type: ignore[reportUndefinedVariable]

    # sample_idx
    max_idx = sum(dataset_lengths[dataset_idx]) - 1
    sample_idx_max_widget.value = f"max: {max_idx}"  # type: ignore[reportUndefinedVariable]
    sample_idx_widget.max = max_idx  # type: ignore[reportUndefinedVariable]
    signal_idx = min(sample_idx_widget.value, max_idx)  # type: ignore[reportUndefinedVariable]
    sample_idx_widget.value = signal_idx  # type: ignore[reportUndefinedVariable]

    # load signal and meta data
    signal, meta = load_signal(dataset_idx, signal_idx, dataset_lengths)

    # resample size
    resample_size = Config.RESAMPLE_SIZE if resample_stage_included else len(signal)
    if resample_widget.max == resample_widget.value:  # type: ignore[reportUndefinedVariable]
        resample_widget.max = resample_widget.value = resample_size  # type: ignore[reportUndefinedVariable]
    else:
        resample_widget.max = resample_size  # type: ignore[reportUndefinedVariable]
        resample_size = min(resample_widget.value, resample_size)  # type: ignore[reportUndefinedVariable]
        resample_widget.value = resample_size  # type: ignore[reportUndefinedVariable]

    return signal, meta, dataset_idx, signal_idx, resample_size

In [14]:
out = widgets.Output()

In [15]:
from copy import copy

from slki.utils.debug import ensure_deterministic

In [16]:
@out.capture()
def handle_interaction(*args, manually_triggered: bool = False, **kwargs):
    if not manually_triggered and not auto_load_widget.value:  # type: ignore[reportUndefinedVariable]
        return

    # set random seed
    ensure_deterministic(SEED)

    # clear output
    out.clear_output()

    # show progress widget
    progress_widget.layout.visibility = "visible"  # type: ignore[reportUndefinedVariable]

    # get selected stages
    selected_stages: list[type[Stage]] = [
        list(stages.values())[stage_widget.value]
        for stage_widget in stages_widget.children[0].children  # type: ignore[reportUndefinedVariable]
        if stage_widget.value is not None
    ]

    # load data and get selected dataset index, signal index and (post-processing) resample size
    signal, meta, _, _, resample_size = update_gui_elements_and_load_signals(selected_stages.__contains__(Resample))
    signal, meta = signal.copy(), copy(meta)

    # run all selected stages
    data = SensorDataContainer([SensorDataItem(signal, meta)], resampled=False)
    for stage_cls in selected_stages:
        stage_cls(data).run_default()
        if len(data) < 1:
            print("No data available after processing")
            progress_widget.layout.visibility = "hidden"  # type: ignore[reportUndefinedVariable]
            return
    signal = data.get_sensor_data_items()[0].data
    meta = data.get_sensor_data_items()[0].meta

    # post processing: resample
    if len(signal) != resample_size:
        # resample signal
        signal = TimeSeriesResampler(resample_size).fit_transform(signal).flatten()
        # normalize resampled signal
        data = SensorDataContainer([SensorDataItem(signal, meta)], resampled=False)
        Normalize(data).run_default()
        signal = data.get_sensor_data_items()[0].data

    # clear output
    out.clear_output()

    # plot sample
    _, ax = plot(signal)

    # post processing: peak detection
    if peak_detection_widget.value:  # type: ignore[reportUndefinedVariable]
        detect_and_cluster_and_plot_peaks(signal, meta["sample_length"], ax, print_console=True)

    # hide progress widget
    progress_widget.layout.visibility = "hidden"  # type: ignore[reportUndefinedVariable]

    # show plot
    plt.show()

### Create GUI elements

#### Stages widget elements

In [17]:
def create_stages_widget(observe_handler, default: bool = True):
    def create_stage_widget(value=None):
        cbo_stage = widgets.Dropdown(options=list(zip(stages.keys(), range(len(stages)), strict=False)), value=value)
        cbo_stage.observe(observe_handler, names="value")
        return cbo_stage

    def get_default():
        return [create_stage_widget(0), create_stage_widget(1), create_stage_widget(2), create_stage_widget(7)]

    def create_button(description: str, button_style: str, icon: str, handler):
        btn = widgets.Button(
            description=description,
            disabled=False,
            button_style=button_style,
            icon=icon,
        )
        btn.on_click(handler)
        return btn

    def add_stage_handler(*_):
        stage_widgets.children = (*stage_widgets.children, create_stage_widget())

    def remove_stage_handler(*_):
        stage_widgets.children = stage_widgets.children[:-1]
        observe_handler()

    stage_widgets = widgets.VBox(get_default() if default else [create_stage_widget()])
    # stage_widgets.observe(observe_handler, names="children")
    btn_add = create_button("add stage", "success", "plus", add_stage_handler)
    btn_remove = create_button("remove stage", "danger", "minus", remove_stage_handler)
    return widgets.VBox([stage_widgets, widgets.HBox([btn_add, btn_remove])])

#### Configuration widget elements

In [18]:
from utils import nameof

In [19]:
# dictionary of all config widgets
config_widgets_dict = {}

In [20]:
def l(desc: str) -> widgets.Label:  # noqa: E743
    """Create a label widget."""
    return widgets.Label(value=desc)

In [21]:
def w(
    var: Any, cls: type, *, target: Any = None, on_changed: Callable[[Any, Any], Any] | None = None, **kwargs
) -> widgets.Widget:
    """Create an interactive config widget with direct feedback."""
    if not target or not isinstance(target, str):
        target = nameof(target if target is not None else var, frame=2, vars_only=False)

    def widget_handler(*args, **kwargs):
        new = args[0]["new"]
        if on_changed:
            new = on_changed(new, args[0]["old"])
        # update target variable
        if isinstance(new, str):
            exec(f"{target} = '{new}'")
        else:
            exec(f"{target} = {new}")
        # handle interaction
        handle_interaction()

    widget = cls(value=var, **kwargs)
    widget.observe(widget_handler, names="value")
    config_widgets_dict[target] = widget
    return widget

Create all configuration widget elements:

In [22]:
config_tab_titles = []
config_tab_contents = []
layout = widgets.Layout(grid_template_columns="250px auto")

In [23]:
# Denoise
# ...

In [24]:
# Detect Signal
config_tab_titles.append("Detect Signal")
config_tab_contents.append(
    widgets.GridBox(
        [
            l("Window size"),
            w(Config.WINDOW_SIZE, widgets.IntText),
            l("Variance threshold"),
            w(Config.VAR_THRESHOLD, widgets.FloatText),
            l("Remove unrecognized samples"),
            w(Config.REMOVE_UNRECOGNIZED_SAMPLES, widgets.Checkbox, indent=False),
        ],
        layout=layout,
    )
)

In [25]:
# Resample
config_tab_titles.append("Resample")
config_tab_contents.append(
    widgets.GridBox(
        [
            l("Resample size"),
            w(Config.RESAMPLE_SIZE, widgets.IntText, min=10),
        ],
        layout=layout,
    )
)

In [26]:
# Boost
# ...

In [27]:
# Absolute
# ...

In [28]:
# Outlier
# ...

In [29]:
# Smooth
def on_smooth_type_changed(new: Any, old: Any) -> Any:
    def set_disabled(window_length: bool, window_type: bool, alpha: bool):
        config_widgets_dict["Config.SMOOTHER_KWARGS['window_length']"].disabled = window_length
        config_widgets_dict["Config.SMOOTHER_KWARGS['window_type']"].disabled = window_type
        config_widgets_dict["Config.SMOOTHER_KWARGS['alpha']"].disabled = alpha

    _new = str(new).lower()
    if _new == "conv":
        set_disabled(False, False, True)
    elif _new == "exp":
        set_disabled(False, True, False)
    else:
        # unknown type
        set_disabled(True, True, True)
    return new


config_tab_titles.append("Smooth")
config_tab_contents.append(
    widgets.GridBox(
        [
            l("Smooth type"),
            w(
                Config.SMOOTH_TYPE,
                widgets.Dropdown,
                options=[("Convolution Smoother", "conv"), ("Exponential Smoother", "exp")],
                on_changed=on_smooth_type_changed,
            ),
            l("Window length"),
            w(
                Config.SMOOTHER_KWARGS.get("window_length", 10),
                widgets.IntText,
                target="Config.SMOOTHER_KWARGS['window_length']",
                min=1,
            ),
            l("Window type"),
            w(
                Config.SMOOTHER_KWARGS.get("window_type", "hanning"),
                widgets.Dropdown,
                target="Config.SMOOTHER_KWARGS['window_type']",
                options=[(x.title(), x) for x in ("ones", "hanning", "hamming", "bartlett", "blackman")],
            ),
            l("Alpha"),
            w(
                Config.SMOOTHER_KWARGS.get("alpha", 0.5),
                widgets.FloatText,
                target="Config.SMOOTHER_KWARGS['alpha']",
            ),
        ],
        layout=layout,
    )
)

on_smooth_type_changed(Config.SMOOTH_TYPE, None)

'conv'

In [30]:
# Normalize
config_tab_titles.append("Normalize")
config_tab_contents.append(
    widgets.GridBox(
        [
            l("Normalize"),
            w(
                Config.NORM_TYPE,
                widgets.Dropdown,
                options=["mean_var", "zero_one", "mone_one", "mone_one_zero_fix", "l1", "l2", "max"],
            ),
        ],
        layout=layout,
    )
)

In [31]:
# Double Integrate
config_tab_titles.append("Double Integrate")
config_tab_contents.append(
    widgets.GridBox(
        [
            l("Cutoff frequency in Hz (low, high)"),
            widgets.HBox(
                [
                    w(Config.CUTOFF_FREQ_IN_HZ[0], widgets.FloatText),
                    w(Config.CUTOFF_FREQ_IN_HZ[1], widgets.FloatText),
                ]
            ),
            l("Tap in seconds"),
            w(Config.TAP_S, widgets.FloatText),
            l("One side tapper flipped"),
            w(Config.ONE_SIDE_TAPER_FLIPPED, widgets.Checkbox, indent=False),
            l("Filter order"),
            w(Config.F_ORDER, widgets.IntText),
            l("Convert meter to millimeter"),
            w(Config.CONVERT_M_TO_MM, widgets.Checkbox, indent=False),
        ],
        layout=layout,
    )
)

Create a Tab widget which contains all configurations in one place:

In [32]:
# create tabs widget
config_widgets = widgets.Tab()
config_widgets.children = config_tab_contents
config_widgets.titles = config_tab_titles

### Assemble the entire GUI

Load first signal since that is the signal that is shown after the application is started.

In [33]:
signal, meta = load_signal(0, 0, dataset_lengths)

[[1;32mINFO[0m] [37m[slki.data.import][data_import.py:532][0m  1 data files and 1 meta data files found.
[[1;32mINFO[0m] [37m[slki.data.import][data_import.py:576][0m  Loading data from '/home/lab/slki/Dataset/preprecessed/v4/fernverkehr-points-kionix-sh-z-dt_1.pkl'...
[[1;32mINFO[0m] [37m[slki.data.import][data_import.py:594][0m  Loading meta data from '/home/lab/slki/Dataset/preprecessed/v4/fernverkehr-points-kionix-sh-z-dt_1-meta.pkl'...


Define the rest of the widgets:

In [34]:
dataset_widget = widgets.ToggleButtons(options=list(zip(datasets_labels, range(len(datasets_labels)), strict=False)))
dataset_widget.observe(handle_interaction, names="value")
sample_idx_widget = widgets.BoundedIntText(value=0, min=0, max=sum(dataset_lengths[0]) - 1, step=1)
sample_idx_widget.observe(handle_interaction, names="value")
sample_idx_max_widget = widgets.Label(value=f"max: {sum(dataset_lengths[0]) - 1}")
auto_load_widget = widgets.Checkbox(value=False, indent=False, description="Auto load")
load_widget = widgets.Button(description="Load")
load_widget.on_click(lambda _: handle_interaction(manually_triggered=True))
stages_widget = create_stages_widget(handle_interaction)
resample_widget = widgets.IntSlider(
    value=1000, min=100, max=Config.RESAMPLE_SIZE, step=100, readout=True, readout_format="d"
)
resample_widget.observe(handle_interaction, names="value")
peak_detection_widget = widgets.Checkbox(value=True, indent=False)
peak_detection_widget.observe(handle_interaction, names="value")
progress_widget = widgets.HTML(value="<progress />")
progress_widget.layout.visibility = "hidden"

Build the GUI with all widgets:

In [35]:
grid_widget = widgets.GridBox(
    [
        widgets.Accordion(
            [
                widgets.GridBox(
                    [
                        widgets.Label(value="Dataset:"),
                        dataset_widget,
                        widgets.Label(value="Sample:"),
                        widgets.HBox([sample_idx_widget, sample_idx_max_widget]),
                    ],
                    layout=widgets.Layout(grid_template_columns="100px auto"),
                ),
            ],
            titles=["Signal"],
            selected_index=0,
        ),
        widgets.Accordion([stages_widget], titles=["Stages"], selected_index=0),
        widgets.Accordion([config_widgets], titles=["Configuration"], selected_index=0),
        widgets.Accordion(
            [
                widgets.GridBox(
                    [
                        widgets.Label(value="Resample:"),
                        resample_widget,
                        widgets.Label(value="Peaks detection:"),
                        peak_detection_widget,
                    ],
                    layout=widgets.Layout(grid_template_columns="100px auto"),
                ),
            ],
            titles=["Postprocessing"],
            selected_index=0,
        ),
        widgets.VBox(
            [
                widgets.HBox([load_widget, auto_load_widget]),
                progress_widget,
            ]
        ),
    ],
    layout=widgets.Layout(grid_template_columns="auto", grid_gap="10px"),
)

Define the main function of the application:

In [36]:
def show_gui():
    display(grid_widget, out)
    handle_interaction(manually_triggered=True)

## Application

In [37]:
show_gui()

GridBox(children=(Accordion(children=(GridBox(children=(Label(value='Dataset:'), ToggleButtons(options=(('Fern…

Output()