# Inhouse data evaluation

Evaluate the RTT and Tango measurements of our robot moving along given trajectories in BC329, at EPFL. See *experiments/robot_test* folder.

In [None]:
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

import os

%matplotlib notebook
%reload_ext autoreload
%autoreload 2

np.set_printoptions(precision=2)

In [None]:
#print(os.listdir('experiments/robot_test/'))

in_model =  ['circle2_double.csv', 
             'circle3_triple.csv', 
             'clover.csv',
             'eight2_double.csv', 
             'rounds.csv', 
             'straight1.csv', 
             'straight2.csv', 
             'straight3.csv', 
             'straight4.csv', 
             'straight5.csv', 
             'straight6.csv', 
             'triangle_double.csv']
     
out_of_model = ['pentagone_double.csv', 
                'walking.csv', 
                'stopping.csv',
                'walking_circle1.csv', 
                'walking_circle2.csv', 
                'walking_circle3.csv']


# TODO: currently we have analyzed only in_model. 
names = in_model
#names = out_of_model

datafile = 'experiments/robot_test/' + names[0]
anchorsfile = 'experiments/anchors.csv'

from global_variables import TANGO_SYSTEM_ID


In [None]:
from evaluate_dataset import read_anchors_df
anchors_df = read_anchors_df(anchorsfile)
anchors_df.head()

# Distance filtering

## Resampling

In [None]:
raise ValueError('Comment this out if you really want to regenerate results.')

from evaluate_dataset import resample, read_dataset
for name in names:#
    datafile = 'experiments/robot_test/' + name
    datafile_name = datafile.split('.')[0]
    resample_name = datafile_name + '_resampled.pkl'
    print('Creating new results.'.format(resample_name))

    data_df = read_dataset(datafile, anchors_df)
    tmax = data_df.timestamp.max()
    tmin = data_df.timestamp.min()
    resampled_df_rtt = resample(data_df, t_range=[tmin, tmax], t_delta=0.5, t_window=1.0, system_id='Range') 
    resampled_df_tango = resample(data_df, t_range=[tmin, tmax], t_delta=0.5, t_window=0.1, system_id='GT') 
    resampled_df = pd.concat((resampled_df_rtt, resampled_df_tango), ignore_index=True)
    resampled_df.sort_values(["timestamp", "anchor_name"], inplace=True)
    resampled_df.to_pickle(resample_name)
    print('Saved as', resample_name)

## Find calibration vs. trajectory times

In [None]:
%matplotlib notebook
from evaluate_dataset import read_correct_dataset

# hard-coded time indices of datasets where automatic detection did not work. 

# We can do below either with the raw dat or with resampled data. 
use_raw = False # set True if you want to use raw data. Probably better to use resampled 


simplified = dict(
    resampled = { # [start_idx, end_idx] for each trajectory bit.
    'experiments/robot_test/circle3_triple.csv':[[67,155],[160, 250],[299,386]],
    'experiments/robot_test/clover.csv':[[37, 382]], 
    'experiments/robot_test/eight2_double.csv': [[40, 412]]
    }, 
    raw = {
    'experiments/robot_test/circle3_triple.csv':[[67,155],[160, 250],[299,386]],
    'experiments/robot_test/clover.csv':[[37, 382]], 
    'experiments/robot_test/eight2_double.csv': [[40, 412]]
    }
)


for datafile in sorted(simplified.keys()):
    print("not remaking unnecessary plots."); continue
    # below was used to find above hardcoded values. 
    data_df = read_correct_dataset(datafile, anchors_df, use_raw)
    
    tango_df = data_df.loc[data_df.system_id=="GT"]
    tango_df.loc[:, "length"] = get_length(tango_df)
    
    fig = plt.figure()
    plt.plot(tango_df.index, tango_df.length)
    plt.title(datafile)
    plt.ylim([0, 0.1])

In [None]:
raise ValueError('Comment this out if you really want to regenerate results.')

%matplotlib inline
import json

from evaluate_dataset import find_start_times, find_end_times
from evaluate_dataset import get_length, find_calibration_data


for name in names:
    
    # calibration_data will store the start and end times of the calibration dataset.
    # for example, calibration_data[10] = [t1, t2] means that for the trajectory
    # starting at index 10, the times between t1 and t2 are valid calibration times. 
    calibration_data = {
         'calibration':[],
         'trajectory':[]
    }
    
    datafile = 'experiments/robot_test/' + name
    datafile_root = datafile.split('.')[0]
    data_df = read_correct_dataset(datafile, anchors_df, use_raw)

    tango_df = data_df.loc[data_df.system_id=="GT"]
    tango_df.loc[:, "length"] = get_length(tango_df, plot=False)
    print(len(tango_df.length))
    
    plt.figure()
    plt.plot(tango_df.timestamp, tango_df.length)
    plt.title(datafile_root)
    
    
    simplified_picked = simplified['raw'] if use_raw else simplified['resampled']

    if datafile in simplified_picked.keys():
        print('using simplified calculation for', datafile_root)
        
        times = tango_df.timestamp.values
        
        tuples = simplified_picked[datafile] # [start_move, end_move]
        start_indices = sorted([t[0] for t in tuples])
        end_indices = sorted([t[1] for t in tuples])
        
        calibration_data['calibration'].append([times[0], times[start_indices[0]]])
        calibration_data['trajectory'].append([times[start_indices[0]], times[end_indices[0]]])
        for s, e1, e2 in zip(start_indices[1:], end_indices[:-1], end_indices[1:]):
            calibration_data['calibration'].append([times[e1], times[s]])
            calibration_data['trajectory'].append([times[s], times[e2]])
    else:
        start_times, start_indices = find_start_times(tango_df, plot=False)
        end_times, end_indices = find_end_times(tango_df, plot=False)
        calibration_data = find_calibration_data(tango_df, start_times, start_indices)
        calibration_data['trajectory'][-1][1] = end_times[-1]
        
    print(calibration_data)
    for idx, [start_time, end_time] in enumerate(calibration_data['calibration']):
        plt.plot([start_time, start_time], [tango_df.length.min(), tango_df.length.max()], 
                 color='green')
        plt.plot([end_time, end_time], [tango_df.length.min(), tango_df.length.max()], 
                 color='orange')
    for idx, [__, end_time] in enumerate(calibration_data['trajectory']):
        plt.plot([end_time, end_time], [tango_df.length.min(), tango_df.length.max()], 
                 color='red')
        
    timesfile = datafile_root + '_times.json'
    with open(timesfile, 'w+') as file:
        valid_data = calibration_data
        json.dump(valid_data, file)
    print('saved as ', timesfile)

## Calibrate distances

In [None]:
raise ValueError('Comment this out if you really want to regenerate results.')
import json
from evaluate_dataset import apply_calibrate

for name in names:
    datafile = 'experiments/robot_test/' + name
    datafile_root = datafile.split('.')[0]
    data_df = read_correct_dataset(datafile, anchors_df, use_raw)
   
    timesfile = datafile_root + '_times.json'
    with open(timesfile, 'r') as file:
        calibration_data = json.load(file)
        
    ########### Calibrate using the available 
    print('calibrating {}....'.format(datafile_root))
    rtt_df = data_df[data_df.system_id=='Range']
    
    # calculate offsets
    calib_dict = dict() 
    for i, (anchor_name, df) in enumerate(rtt_df.groupby('anchor_name')):
        print(df.columns)
        errors = df.distance.values - df.distance_gt.values
        calib_dict[anchor_name] = dict(all_mean=np.mean(errors), all_median=np.median(errors))
        
    # calibrate distances
    data_df.loc[:, 'distance_mean_all'] = data_df.apply(lambda row: apply_calibrate(row, calib_dict, 'all_mean'), axis=1)
    data_df.loc[:, 'distance_median_all'] = data_df.apply(lambda row: apply_calibrate(row, calib_dict, 'all_median'), axis=1)
    
    
    for j in range(len(calibration_data['calibration'])): 
        (start_calib, end_calib) = calibration_data['calibration'][j]
        (start_traj, end_traj) = calibration_data['trajectory'][j]
        # calculate offsets
        calib_dict = dict()
        for i, (anchor_name, df) in enumerate(rtt_df.groupby('anchor_name')):
            df = df[(df.timestamp >= start_calib) & (df.timestamp <= end_calib)]
            errors = df.distance.values - df.distance_gt.values
            calib_dict[anchor_name] = dict(mean=np.mean(errors),  median=np.median(errors))

        # calibrate distances
        data_df.loc[:, 'distance_mean_{}'.format(j)] = data_df.apply(lambda row: apply_calibrate(row, calib_dict, 'mean'), axis=1)
        data_df.loc[:, 'distance_median_{}'.format(j)] = data_df.apply(lambda row: apply_calibrate(row, calib_dict, 'median'), axis=1)
        
        reduced_df = data_df[(data_df.timestamp >= start_traj) & (data_df.timestamp <= end_traj)]
        reduced_name = '{}_{}_calibrated.pkl'.format(datafile_root, j)
        reduced_df.to_pickle(reduced_name)
        print('saved as', reduced_name)
        
    calibrate_name = datafile_root + '_calibrated.pkl'
    data_df.to_pickle(calibrate_name)
    print('saved as', calibrate_name)

## Calibrate using static positions

In [None]:
# TODO use calib1-calib5 datasets for calibration. Not sure if we want to do this, 
# but it could be a more realistic calibration variant compared to the others.

# Plot results

### Run below cell first. Then, either inspect individual plots, or jumping down to regenerating all plots. 

In [None]:
%matplotlib inline

from plotting_tools import make_dirs_safe, read_plot_df

name = names[0]
name = 'circle3_triple_2'

out_dir = 'experiments/robot_test/plots/'
make_dirs_safe(out_dir)
print('will save under', out_dir)

data_df, plot_df = read_plot_df(name)
plot_df.head()

In [None]:
data_df.head()

In [None]:
from plotting_tools import plot_cdfs

filename = '{}{}_cdfs.png'.format(out_dir, name.split('.')[0])
print(filename)
plot_cdfs(plot_df)

In [None]:
from plotting_tools import plot_times

plot_times(plot_df)

In [None]:
from plotting_tools import plot_rssis
        
plot_rssis(plot_df)

In [None]:
from plotting_tools import plot_tango_components

plot_tango_components(data_df)

In [None]:
from plotting_tools import plot_tango_2d
    
plot_tango_2d(data_df, anchors_df, '')

## (can directly jump here) Plot all and save

In [None]:
for name in names:
    data_df, plot_df = read_plot_df(name)
    
    pure_name = name.split('.')[0]
    
    skeleton = out_dir + '{}_{}.png'
    plot_cdfs(plot_df, skeleton.format('cdf', pure_name))
    plot_times(plot_df, skeleton.format('time', pure_name))
    plot_tango_components(data_df, skeleton.format('tango_components', pure_name))
    plot_tango_2d(data_df, anchors_df=anchors_df, filename=skeleton.format('tango_2d', pure_name))
    
    plot_cdfs(plot_df, skeleton.format(pure_name, 'cdf'))
    plot_times(plot_df, skeleton.format(pure_name, 'time'))
    plot_tango_components(data_df, skeleton.format(pure_name, 'tango_components'))
    plot_tango_2d(data_df, anchors_df=anchors_df, filename=skeleton.format(pure_name, 'tango_2d'))

# (in beta-stage) Position prediction

In [None]:
from trajectory_creator import get_trajectory
from evaluate_dataset import get_length, find_start_times, find_end_times

# TODO read the correct trajectory
name = 'circle2_double.csv'
datafile = 'experiments/robot_test/'
data_df, plot_df = read_plot_df(name)
tango_df = data_df[data_df.system_id=='GT']


print(datafile, simplified)
if datafile in simplified.keys():
    print('using simplified.')
    tuples = simplified[datafile]
    start_indices = [t[0] for t in tuples]
    end_indices = [t[1] for t in tuples]
    print(start_indices, end_indices)
else:
    start_times, start_indices = find_start_times(tango_df, plot=False)
    end_times, end_indices = find_end_times(tango_df, plot=False)

print(name)
trajectory = get_trajectory(name)

df_predicted = pd.DataFrame(index=range(len(tango_df)), columns=["px", "py", "pz", "timestamp", "model_timestamp"])
df_predicted.loc[:, "timestamp"] = tango_df.timestamp
df_predicted.reset_index(inplace=True, drop=True)

tango_df.loc[:, "length"] = get_length(tango_df)

previous_end = 0
for i in range(len(start_indices)):
    i0 = start_indices[i]
    iN = end_indices[i]
    
    indices = np.arange(i0, iN)
    
    # TODO we are relying on tango-lengths here instead of constant velocity. 
    # should be changed. 
    
    lengths = tango_df.length.values[indices]
    
    distances = np.cumsum(lengths)
    times, _, _  = trajectory.get_times_from_distances(arbitrary_distances=distances, plot=False)
    
    basis = trajectory.get_basis(times=times)
    points = trajectory.get_sampling_points(basis=basis)
    
    plt.figure()
    trajectory.plot(basis=basis)
    plt.scatter(points[0, :], points[1, :])
    plt.axis('equal')
    
    df_predicted.loc[i0:iN-1, ["px", "py"]] = points.T
    df_predicted.loc[i0:iN-1, "model_timestamp"] = times
    
    # fill also the stationary points:
    df_predicted.loc[previous_end:i0, ["px", "py"]] = points.T[0]
    df_predicted.loc[iN:-1, ["px", "py"]] = points.T[-1]
    previous_end = iN
df_predicted.loc[:, "pz"] = 0.0

In [None]:
plt.figure()
sns.scatterplot(data=df_predicted, x='px', y='py', hue='timestamp', 
               linewidth=0.0, alpha=0.1)
df_predicted.head()

### Rotation and translation example

In [None]:
from evaluate_dataset import match_reference
p = np.array([df_predicted.px.values, df_predicted.py.values], dtype=float)
R = np.array([tango_df.px.values, tango_df.py.values], dtype=float)
R = R[:, ~np.isnan(p).any(axis=0)]
p = p[:, ~np.isnan(p).any(axis=0)]

rotated, _ = match_reference(R, p)
plt.plot(R[0], R[1], "o", label="Tango", alpha=0.3)
plt.plot(p[0], p[1], "o", label="expected", alpha=0.3)
plt.plot(rotated[0], rotated[1], "o", label="expected to Tango", alpha=0.3)
plt.axis('equal')
plt.legend()
plt.show()

# Additional processing 

## This used to be useful. Could probably be deleted at some point.

In [None]:
from evaluate_dataset import add_median_raw, add_median_raw_rolling
from evaluate_dataset import read_dataset
import time

data_df = read_dataset(datafile + 'circle2_double.csv', anchors_df)
data_df.head()

# Our (centered) way
t1 = time.time()
data_df = add_median_raw(data_df)
print("took {:.2f} seconds.".format(time.time() - t1))

# Alternative (non-centered) way. 
t1 = time.time()
data_df = add_median_raw_rolling(data_df)
print("took {:.2f} seconds.".format(time.time() - t1))

In [None]:
%matplotlib inline 

from evaluate_dataset import add_gt_raw, apply_distance_gt
#data_df = add_gt_raw(data_df)
data_df.loc[:, "distance_gt"] = data_df.apply(lambda row: apply_distance_gt(row, anchors_df), axis=1)
fg = sns.FacetGrid(data_df, col='anchor_name', col_wrap=4, col_order=sorted(data_df.anchor_name.unique()))
fg.map(plt.plot, 'timestamp', 'distance')
fg.map(plt.plot, 'timestamp', 'distance_gt')

def plot_distances(data_df):
    import itertools
    colors = itertools.cycle(plt.get_cmap('tab10').colors)
    rtt_ids = anchors_df[anchors_df.system_id=='Range'].anchor_id.unique()
    fig, axs = plt.subplots(1, len(rtt_ids), sharey=True)
    fig.set_size_inches(15, 5)
    for ax, anchor_id in zip(axs, rtt_ids):
        color = next(colors)
        data = data_df[data_df.anchor_id==anchor_id]
        anchor_name = anchors_df.loc[anchors_df.anchor_id==anchor_id, 'anchor_name']
        ax.plot(data.timestamp, data.distance_gt, linestyle=':', label=anchor_name, color=color)
        ax.plot(data.timestamp, data.distance, linestyle='-', color=color)
        ax.set_ylim(0, 15)
    
plot_distances(data_df)
plt.show()

In [None]:
from evaluate_dataset import convert_room_to_robot

import os
folder = 'experiments/robot_test/'
file_list = os.listdir(folder)
plt.figure()

end_points_lines_room = np.array([
    [5.046, 5.615, 0.0],
    [4.869, 4.231, 0.0],
    [5.870, 1.830, 0.0],
    [4.254, 1.567, 0.0],
    [2.924, 1.867, 0.0],
    [1.198, 1.416, 0.0]]).T
start_point_room = np.array([1.034, 5.410, 0.0]).reshape((3, 1))
end_points_lines = convert_room_to_robot(end_points_lines_room)
start_point = convert_room_to_robot(start_point_room)

for datafile in sorted(file_list):
    if datafile[-4:] != '.csv':
        continue
    if (datafile[:8] != 'straight') and (datafile[:5] != 'calib') or (datafile[:10] == 'calibratio'):
        continue
    data_df = pd.read_csv(folder + datafile)
    #data_df.loc[:, 'seconds'] = (data_df.timestamp.values - data_df.timestamp.min()) / 1000. # in seconds
    data = data_df[data_df.system_id==TANGO_SYSTEM_ID]
    sns.scatterplot(data=data, x='px', y='py', label=datafile, linewidth=0.0)

plt.scatter(*start_point[:2], color='black', s=10.0)
for end_point in end_points_lines.T:
    print(end_point)
    plt.scatter(*end_point[:2], color='black', s=5.0)
    plt.plot([start_point[0], end_point[0]], [start_point[1], end_point[1]], color='black')
    
plt.axis('equal')
plt.legend(loc='lower left', bbox_to_anchor=[0.8, 0])
#plt.legend()
#fill_ground_truth(data_df, current_gt=position)