In [82]:
import os
import plotly.graph_objs as go
from plotly.offline import iplot
from pathlib import Path
import numpy as np
import pandas as pd

In [83]:
# set autoreload to reload all external modules automatically (otherwise changes to those modules won't take effect in the notebook)
%load_ext autoreload
%autoreload 2
from make_folder_dataset import MakeFolderDataset

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [84]:
#load environment variables from .env file in repo root
%load_ext dotenv
%dotenv

#DATASET_REPO_ROOT_PATH=<absolute-path-to-dataset-repo-root-folder>
dataset_repo_root_path = Path(os.environ.get("DATASET_REPO_ROOT_PATH"))

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [85]:
# set True / False to enable / disable data set persisting
save_dataset = False
specific_instances = None
specific_plot_target = ["etau_J0"] # plots everything if None, nothing if [], specific targets if [<list-of-targets>]
x_y_data_suffix = ""

In [86]:
# load dataset instances
# use current git repo root folder as a reliable "base" folder. Dataset folders should be under <git-root>\dataset\
paths: list[Path] = []
instances: list[MakeFolderDataset] = []
raw_data_path = dataset_repo_root_path / "rawData"
for p in raw_data_path.iterdir():
    if specific_instances is not None and p.name not in specific_instances:
        continue
    if p.is_dir() and not p.name == "_ignore":
        paths.append(p)

paths = sorted(paths, key=lambda p: p.name)

for p in paths:
    instance = MakeFolderDataset(p.absolute())
    instance.extract_robot_data()
    instance.get_labels_all()
    instances.append(instance)

print(f"found {len(instances)} instances")

TypeError: 'MakeFolderDataset' object is not subscriptable

In [None]:
# add contact labels from true_label dataframe to robot-data dataframe for all instances
for inst in instances:
    inst.df = pd.merge_asof(left=inst.df, right=inst.true_label[["time", "DATA0"]], on="time", tolerance=0.02)
    inst.df.rename(columns={"DATA0": "has_contact"}, inplace=True)
    inst.df["has_contact"] = inst.df["has_contact"].fillna(0)

    if inst.df.loc[1, 'has_contact'] == 1:
        inst.df.loc[0, 'has_contact'] = 1

In [None]:
# clean up faulty (true_label) sensor data
# IMPORTANT: assume that the first measurement (time-window with has_contact = 1) is correct and can be used as a reference point to clean the remaining instance
# thus it must manually be verified that the first measurement of each instance is indeed correct

def get_contact_duration(df, time):
    start_time_index = df[(df['time'] < time) & (
        df['has_contact'] == 0)].index[-1] + 1
    start_time = df.loc[start_time_index, 'time']
    try:
        end_time_index = df[(df['time'] > time) & (
            df['has_contact'] == 0)].index[0] - 1
    except IndexError:
        # occurs if filtered df above is empty, which means there is no row with has_contact = 0 after specified time
        end_time_index = df.index[-1]
    end_time = df.loc[end_time_index, 'time']
    return (end_time - start_time), start_time, end_time


def get_next_contact_time(df, excl_from_time):
    # time of contact time-window must always be greater than time of 1st no-contact
    # -> exclude any cut-off contact time-windows at start of measurement
    first_no_contact_time = df[df['has_contact'] == 0].iloc[0]['time']
    filtered_df = df[(df['time'] > first_no_contact_time) & (
        df['time'] > excl_from_time) & (df['has_contact'] == 1)]
    return (filtered_df.iloc[0]['time'], filtered_df.index[0]) if len(filtered_df) > 0 else (None, None)


for inst in instances:
    inst.df['has_contact_original'] = inst.df.loc[:, 'has_contact']

    # calculate duration of 1st contact time-window
    # 1st contact time-window starts at 1st row with has_contact = 1, where a previous row with has_contact = 0 exists
    contact_time, _ = get_next_contact_time(inst.df, inst.start_from_time)
    reference_duration, reference_start_time, reference_end_time = get_contact_duration(
        inst.df, contact_time)

    # set has_contact to 0 for all rows before 1st actuall contact time-window
    inst.df.loc[inst.df["time"] < contact_time, "has_contact"] = 0

    # inst.first_contact_start_time = reference_start_time
    # inst.window_size = window_size = len(inst.df[(inst.df['time'] >= reference_start_time) & (
    #    inst.df['time'] <= reference_end_time)])

    # set has_contact to 0 for all contact time-windows with duration outside of [<lower-bound-multiplier>*reference_duration, <upper-bound-multiplier>*reference_duration]
    # -> remove faulty time-windows
    # multiplier values can be set manually in meta.json, defaults are 0.85 and 1.2
    last_contact_end_time = reference_end_time
    reference_duration_multiplier_lower = inst.reference_duration_multiplier_lower if inst.reference_duration_multiplier_lower is not None else 0.85
    reference_duration_multiplier_upper = inst.reference_duration_multiplier_upper if inst.reference_duration_multiplier_upper is not None else 1.2
    while True:
        contact_time, _ = get_next_contact_time(inst.df, last_contact_end_time)
        if contact_time is None:
            break
        contact_duration, contact_start_time, contact_end_time = get_contact_duration(
            inst.df, contact_time)
        if not (reference_duration_multiplier_lower * reference_duration <= contact_duration <= reference_duration_multiplier_upper * reference_duration):
            # print(contact_duration, contact_start_time, contact_end_time)
            inst.df.loc[(inst.df['time'] >= contact_start_time) & (
                inst.df['time'] <= contact_end_time), 'has_contact'] = 0
        last_contact_end_time = contact_end_time

In [None]:
target_position = ['e0','e1','e2','e3','e4','e5','e6']
target_velocity = ['de0','de1','de2','de3','de4','de5','de6']
target_torques = ['etau_J0','etau_J1', 'etau_J2', 'etau_J3', 'etau_J4', 'etau_J5', 'etau_J6']
target = target_torques + target_position + target_velocity

plot_target = target if specific_plot_target is None else specific_plot_target
for i in plot_target:
    for inst in instances:
        # label gets scaled otherwise measure and label are not visible properly on plot
        A = inst.df[i].max()-inst.df[i].min()
        inst.df['has_contact_scaled'] = inst.df['has_contact'] * \
            A + inst.df[i].min()
        inst.df['has_contact_original_scaled'] = inst.df['has_contact_original'] * \
            A + inst.df[i].min()
        # use plotly to make interactive plots
        trace_has_contact = go.Scatter(
            x=inst.df['time'], y=inst.df['has_contact_scaled'], name='has contact')
        trace_has_contact_original = go.Scatter(
            x=inst.df['time'], y=inst.df['has_contact_original_scaled'], name='has contact original')
        trace_robotdata = go.Scatter(
            x=inst.df['time'], y=inst.df[i], mode='lines', name='robot data')
        data = [trace_has_contact_original, trace_robotdata, trace_has_contact]
        layout = go.Layout(title=f'{i} (instance {os.path.basename(os.path.normpath(inst.path))})',
                           xaxis=dict(title='time(sec)'),
                           yaxis=dict(title='Y-axis'))
        fig = go.Figure(data=data, layout=layout)
        iplot(fig)

In [None]:
def save_data(X, y, X_filename, y_filename):
    processed_data_path = dataset_repo_root_path / "processedData"
    X_path = processed_data_path / X_filename
    y_path = processed_data_path / y_filename
    print(
        f"saving dataset as: {str(X_path.absolute())} / {str(y_path.absolute())}")
    np.save(str(X_path.absolute()), X)
    np.save(str(y_path.absolute()), y)


X_single_on_contact, y_single_on_contact = [], []
X_single_left_offset, y_single_left_offset = [], []
X_sliding_left_offset, y_sliding_left_offset = [], []

# 40 data points with robot data publish frequency of 200Hz -> 200ms time-windows
window_size = 40
# -20 data points offset on the left of a time window with freq. of 200Hz -> include data up to 100ms before contact for sliding windows
window_left_offset = -20

for inst in instances:
    contact_end_time = -1
    while True:
        contact_time, contact_time_index = get_next_contact_time(inst.df, contact_end_time)
        if contact_time is None:
            break

        _, _, contact_end_time = get_contact_duration(inst.df, contact_time)

        # append contact time-windows to feature matrices
        # append to feature matrix where time-windows start exactly at time of contact, with fixed window size
        X_single_on_contact.append(
            inst.df.iloc[contact_time_index:contact_time_index+window_size][target].to_numpy())
        y_single_on_contact.append(inst.contact_type)

        if contact_time_index + window_left_offset >= 0:
            # append to feature matrix where time-windows start left (by defined offset) of time of contact, with fixed window size
            X_single_left_offset.append(
                inst.df.iloc[contact_time_index+window_left_offset:contact_time_index+window_left_offset+window_size][target].to_numpy())
            y_single_left_offset.append(inst.contact_type)

            # append to sliding window feature matrix: start left (by offset) of time of contact, fixed window size
            # move sliding window to the right by 4 rows (= 20ms) each step and append until end of contact time-window is reached by right side of sliding window,
            # or until offset equals 20 (-> 100ms after initial contact) to avoid including data too late after initial contact
            window_current_offset = window_left_offset
            while inst.df.iloc[contact_time_index+window_current_offset+window_size]['time'] <= contact_end_time and window_current_offset <= 20:
                X_sliding_left_offset.append(
                    inst.df.iloc[contact_time_index+window_current_offset:contact_time_index+window_current_offset+window_size][target].to_numpy())
                y_sliding_left_offset.append(inst.contact_type)
                window_current_offset += 4

X_single_on_contact = np.array(X_single_on_contact)
print("shape of single on contact feature / label matrices: ")
print(np.shape(X_single_on_contact))
print(np.shape(y_single_on_contact))
print()

X_single_left_offset = np.array(X_single_left_offset)
print("shape of single left offset from contact feature / label matrices: ")
print(np.shape(X_single_left_offset))
print(np.shape(y_single_left_offset))
print()

X_sliding_left_offset = np.array(X_sliding_left_offset)
print("shape of sliding window w/ left offset from contact feature / label matrices: ")
print(np.shape(X_sliding_left_offset))
print(np.shape(y_sliding_left_offset))
print()

if save_dataset:
    save_data(X_single_on_contact, y_single_on_contact, f"x_single_on_contact{x_y_data_suffix}.npy", f"y_single_on_contact{x_y_data_suffix}.npy")
    save_data(X_single_left_offset, y_single_left_offset, f"x_single_left_offset{x_y_data_suffix}.npy", f"y_single_left_offset{x_y_data_suffix}.npy")
    save_data(X_sliding_left_offset, y_sliding_left_offset, f"x_sliding_left_offset{x_y_data_suffix}.npy", f"y_sliding_left_offset{x_y_data_suffix}.npy")

shape of single on contact feature / label matrices: 
(48, 40, 21)
(48,)

shape of single left offset from contact feature / label matrices: 
(48, 40, 21)
(48,)

shape of sliding window w/ left offset from contact feature / label matrices: 
(528, 40, 21)
(528,)

