In [None]:
import os
from pathlib import Path
import pandas as pd
import plotly.graph_objs as go
from plotly.offline import iplot
import numpy as np

from make_folder_dataset import MakeFolderDataset

%load_ext autoreload
%autoreload 2

#load environment variables from .env file in repo root
%load_ext dotenv
%dotenv

#DATASET_REPO_ROOT_PATH=<absolute-path-to-dataset-repo-root-folder>
dataset_repo_root_path = Path(os.environ.get("DATASET_REPO_ROOT_PATH"))
test_data_path = dataset_repo_root_path / "testData"

labels_map = {'hard':0,'pvc_tube':1, 'soft':2}

evaluation_period_after_contact_sec = 0.2
evaluation_predictions_after_contact = 10


In [None]:
def get_next_contact_time(df, excl_from_time):
    first_no_contact_time = df[df['has_contact'] == 0].iloc[0]['time']
    filtered_df = df[(df['time'] > first_no_contact_time) & (
        df['time'] > excl_from_time) & (df['has_contact'] == 1)]
    return (filtered_df.iloc[0]['time'], filtered_df.index[0]) if len(filtered_df) > 0 else (None, None)

def get_contact_duration(df, time):
    start_time_index = df[(df['time'] < time) & (
        df['has_contact'] == 0)].index[-1] + 1
    start_time = df.loc[start_time_index, 'time']
    try:
        end_time_index = df[(df['time'] > time) & (
            df['has_contact'] == 0)].index[0] - 1
    except IndexError:
        # occurs if filtered df above is empty, which means there is no row with has_contact = 0 after specified time
        end_time_index = df.index[-1]
    end_time = df.loc[end_time_index, 'time']
    return (end_time - start_time), start_time, end_time

def evaluation(path, inst: MakeFolderDataset):
    time_after_contact_indices = np.array([])
    nof_predictions_after_contact_indices = np.array([])
    last_contact_end_time = -1
    while True:
        contact_time, contact_index = get_next_contact_time(inst.model_results, last_contact_end_time)
        if contact_time is None:
            break
        _, _, last_contact_end_time = get_contact_duration(
            inst.model_results, contact_time)        
        next_indices = np.array((inst.model_results[(inst.model_results['time'] >= contact_time) & (inst.model_results['time'] <= contact_time + evaluation_period_after_contact_sec)].index.values))
        time_after_contact_indices = np.concatenate((time_after_contact_indices, next_indices))
        nof_predictions_after_contact_indices = np.concatenate((nof_predictions_after_contact_indices, np.arange(contact_index, contact_index + evaluation_predictions_after_contact)))

    filtered_model_results_time = inst.model_results.iloc[time_after_contact_indices]
    value_counts_time = filtered_model_results_time['contact_class_prediction'].value_counts()

    filtered_model_results_nof_predictions = inst.model_results.iloc[nof_predictions_after_contact_indices]
    value_counts_nof_predictions = filtered_model_results_nof_predictions['contact_class_prediction'].value_counts()


    true_label = labels_map[inst.contact_type]
    num_true_time = 0
    num_true_nof_predictions = 0
    try:
        num_true_time = value_counts_time[true_label]
        num_true_nof_predictions = value_counts_nof_predictions[true_label]
    except KeyError:
        pass

    num_predicted_time = sum(value_counts_time[value_counts_time.index != -1])
    num_predicted_nof_predictions = sum(value_counts_nof_predictions[value_counts_nof_predictions.index != -1])

    print(f"instance: {path.name} (target class: {inst.contact_type})\n")

    print(f"evaluated model results up to {evaluation_period_after_contact_sec}sec after first contact time")
    print("correctly classified predictions: ", num_true_time)
    print("total predictions:", num_predicted_time)
    print("accuracy: ", str((num_true_time/num_predicted_time)*100) + "%\n")
    
    print(f"evaluated first {evaluation_predictions_after_contact} predictions per contact")
    print("correctly classified predictions: ", num_true_nof_predictions)
    print("total predictions:", num_predicted_nof_predictions)
    #print("accuracy: ", str((num_true_nof_predictions/num_predicted_nof_predictions)*100) + "%")

    #i = 'etau_J1'
    #A= inst.df[i].max()-inst.df[i].min()

    #inst.true_label['label_scaled']=inst.true_label['DATA0'] * A + inst.df[i].min()
    #trace_robotdata = go.Scatter(
    #    x=inst.df['time'], y=inst.df[i], mode='lines', name='robot data')
    trace1 = go.Scatter(x=inst.true_label['time'], y=inst.true_label['DATA0'], name='contact')
    trace2 = go.Scatter(x=inst.model_results['time'], y=inst.model_results['correctly_classified'], name='prediction correctness')
    trace3 = go.Scatter(x=inst.model_results['time'], y=inst.model_results['contact_class_prediction'], name='prediction',mode="markers")
    data = [trace1, trace3, trace2]
    layout = go.Layout(title=f'(instance {path.name})',
                    xaxis=dict(title='time(sec)'),
                    yaxis=dict(title='Y-axis'))
    fig = go.Figure(data=data, layout=layout)
    iplot(fig)


In [None]:
instances: list[tuple[Path, MakeFolderDataset]] = []
for p in test_data_path.iterdir():
    if p.is_dir() and p.name != "_ignore":
        instance = MakeFolderDataset(p.absolute())
        instance.extract_robot_data()
        instance.get_labels_all()

        instance.model_results = pd.read_csv(str((p / "model_result.csv").absolute()))
        instance.model_results['time'] = instance.model_results['Time_sec'] + instance.model_results['Time_nsec']- instance.init_time
        instance.model_results = pd.merge_asof(left=instance.model_results, right=instance.true_label[["time", "DATA0"]], on="time", tolerance=0.02)
        instance.model_results.rename(columns={"DATA0": "has_contact"}, inplace=True)
        instance.model_results["has_contact"] = instance.model_results["has_contact"].fillna(0)
        instance.model_results['correctly_classified'] = (instance.model_results['contact_class_prediction'] == labels_map[instance.contact_type]).astype(int)

        instances.append((p, instance))

instances = sorted(instances, key=lambda i: i[0].name)
for inst in instances:
    evaluation(inst[0], inst[1])
