In [33]:
import os
import plotly.graph_objs as go
from plotly.offline import iplot
from pathlib import Path
import numpy as np
import pandas as pd

In [34]:
# set autoreload to reload all external modules automatically (otherwise changes to those modules won't take effect in the notebook)
%load_ext autoreload
%autoreload 2
from make_folder_dataset import MakeFolderDataset

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [35]:
# set True / False to enable / disable data set persisting
save_dataset = True
specific_instances = ["a4_soft", "a4_hard", "a4_plasticbottle"]

In [36]:
# load dataset instances
# use current git repo root folder as a reliable "base" folder. Dataset folders should be under <git-root>\dataset\
instances = []
root_path = Path('/home/mindlab/humanObjectDetectionDataset')
#root_path = Path("C:\\Users\\juhe9\\repos\\MasterProject\\humanObjectDetectionDataset\\")
raw_data_path = root_path / "rawData"
for p in raw_data_path.iterdir():
    if specific_instances is not None and p.name not in specific_instances:
        continue
    if p.is_dir() and not p.name == "_ignore":
        instance = MakeFolderDataset(p.absolute())
        instance.extract_robot_data()
        instance.get_labels_all()
        instances.append(instance)

print(f"found {len(instances)} instances")

found 3 instances


In [37]:
# add contact labels from true_label dataframe to robot-data dataframe for all instances
for inst in instances:
    inst.df = pd.merge_asof(left=inst.df, right=inst.true_label[["time", "DATA0"]], on="time", tolerance=0.02)
    inst.df.rename(columns={"DATA0": "has_contact"}, inplace=True)
    inst.df["has_contact"] = inst.df["has_contact"].fillna(0)

    if inst.df.loc[1, 'has_contact'] == 1:
        inst.df.loc[0, 'has_contact'] = 1

In [38]:
# clean up faulty (true_label) sensor data
# IMPORTANT: assume that the first measurement (time-window with has_contact = 1) is correct and can be used as a reference point to clean the remaining instance
# thus it must manually be verified that the first measurement of each instance is indeed correct

def get_contact_duration(df, time):
    start_time_index = df[(df['time'] < time) & (
        df['has_contact'] == 0)].index[-1] + 1
    start_time = df.loc[start_time_index, 'time']
    try:
        end_time_index = df[(inst.df['time'] > time) & (
            df['has_contact'] == 0)].index[0] - 1
    except IndexError:
        # occurs if filtered df above is empty, which means there is no row with has_contact = 0 after specified time
        end_time_index = df.index[-1]
    end_time = df.loc[end_time_index, 'time']
    return (end_time - start_time), start_time, end_time


def get_next_contact_time(df, excl_from_time):
    # time of contact time-window must always be greater than time of 1st no-contact 
    # -> exclude any cut-off contact time-windows at start of measurement
    first_no_contact_time = df[df['has_contact'] == 0].iloc[0]['time']
    filtered_df = df[(df['time'] > first_no_contact_time) & (df['time'] > excl_from_time) & (df['has_contact'] == 1)]
    return (filtered_df.iloc[0]['time'], filtered_df.index[0]) if len(filtered_df) > 0 else (None, None)


for inst in instances:
    inst.df['has_contact_original'] = inst.df.loc[:, 'has_contact']

    # calculate duration of 1st contact time-window
    # 1st contact time-window starts at 1st row with has_contact = 1, where a previous row with has_contact = 0 exists
    contact_time, _ = get_next_contact_time(inst.df, inst.start_from_time)
    reference_duration, reference_start_time, reference_end_time = get_contact_duration(
        inst.df, contact_time)
    
    # set has_contact to 0 for all rows before 1st actuall contact time-window
    inst.df.loc[inst.df["time"] < contact_time, "has_contact"] = 0

    #inst.first_contact_start_time = reference_start_time
    #inst.window_size = window_size = len(inst.df[(inst.df['time'] >= reference_start_time) & (
    #    inst.df['time'] <= reference_end_time)])

    # set has_contact to 0 for all contact time-windows with duration outside of [0.95*reference_duration, 1.05*reference_duration] -> remove faulty contact time-windows
    # (as later only data during contact (has_contact = 1) will be used)
    last_contact_end_time = reference_end_time
    while True:
        contact_time, _ = get_next_contact_time(inst.df, last_contact_end_time)
        if contact_time is None:
            break
        contact_duration, contact_start_time, contact_end_time = get_contact_duration(
            inst.df, contact_time)
        if not (0.85 * reference_duration <= contact_duration <= 1.5 * reference_duration):
            # print(contact_duration, contact_start_time, contact_end_time)
            inst.df.loc[(inst.df['time'] >= contact_start_time) & (
                inst.df['time'] <= contact_end_time), 'has_contact'] = 0
        last_contact_end_time = contact_end_time

In [39]:
target_position = ['e0','e1','e2','e3','e4','e5','e6']
target_velocity = ['de0','de1','de2','de3','de4','de5','de6']
target_torques = ['etau_J0','etau_J1', 'etau_J2', 'etau_J3', 'etau_J4', 'etau_J5', 'etau_J6']
target = target_torques + target_position

for inst in instances:
    i = "etau_J0"
    # label gets scaled otherwise measure and label are not visible properly on plot
    A = inst.df[i].max()-inst.df[i].min()
    inst.df['has_contact_scaled'] = inst.df['has_contact'] * \
        A + inst.df[i].min()
    inst.df['has_contact_original_scaled'] = inst.df['has_contact_original'] * \
        A + inst.df[i].min()
    # use plotly to make interactive plots
    trace_has_contact = go.Scatter(
        x=inst.df['time'], y=inst.df['has_contact_scaled'], name='has contact')
    trace_has_contact_original = go.Scatter(
        x=inst.df['time'], y=inst.df['has_contact_original_scaled'], name='has contact original')
    trace_robotdata = go.Scatter(
        x=inst.df['time'], y=inst.df[i], mode='lines', name='robot data')
    data = [trace_has_contact_original, trace_robotdata, trace_has_contact]
    layout = go.Layout(title=f'{i} (instance {os.path.basename(os.path.normpath(inst.path))})',
                       xaxis=dict(title='time(sec)'),
                       yaxis=dict(title='Y-axis'))
    fig = go.Figure(data=data, layout=layout)
    iplot(fig)

In [40]:
# ToDo: improve code duplication situation
# (can't fully integrate this in the above loop though, because some contact time-windows are removed there, thus need to iterate twice)

X, y = [], []
# 40 data points with robot data publish frequency = 40Hz -> 200ms time-windows
window_size = 40

for inst in instances:
    # contact_time, contact_time_index = get_next_contact_time(inst.df, -1)

    last_contact_end_time = -1
    while True:
        contact_time, contact_time_index = get_next_contact_time(
            inst.df, last_contact_end_time)
        if contact_time is None:
            break

        # append contact time-window to feature matrix
        # to equalize time-window size per instance, all time-windows are created by taking inst.window_size many elements (rows) from the first element of a contact time-window
        X.append(
            inst.df.iloc[contact_time_index:contact_time_index+window_size][target].to_numpy())
        y.append(inst.contact_type)

        _, _, contact_end_time = get_contact_duration(inst.df, contact_time)
        last_contact_end_time = contact_end_time

X = np.array(X)
print(np.shape(X))
print(np.shape(y))

if save_dataset: 
    x_data_path = root_path / "processedData" / "x_data.npy"
    y_data_path = root_path / "processedData" / "y_data.npy"
    np.save(str(x_data_path.absolute()), X)
    np.save(str(y_data_path.absolute()), y)

(39, 40, 7)
(39,)
