# Public dataset evaluation

In this notebook we test and evaluabte publicly available datasets (see *datasets/* folder).

In [None]:
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
from scipy.io import loadmat
import seaborn as sns

%matplotlib inline
%reload_ext autoreload
%autoreload 2

from data_utils import *

plt.rcParams['figure.figsize'] = 7, 3
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

# set the random seed so can reproduce when something didn't work. 
# (only when cells are run in order)
np.random.seed(1)

This notebook contains functionality to evaluate the proposed algorithms on real datasets. To simply reproduce the plots from the paper you can directly run GenerateAllFigures. 

# Preparation

### 1. Download .mat files and save them in folder ./datasets/ (can simply run download_datasets.sh)

WiFi: http://www.robesafe.es/repository/UAHWiFiDataset/

Lawnmower: https://github.com/gtrll/gpslam/raw/master/matlab/data/

See datasets/README.md for file description.  

### 2. Choose dataset and range

In [None]:
from trajectory_creator import get_trajectory

anchor_names = None  # use all anchors by default.

#filename = 'datasets/uah1.mat' # fingers
#filename = 'datasets/Plaza1.mat'; # zig zag. 
filename = 'datasets/Plaza2.mat' # triangle

verbose = False
traj = get_trajectory(filename)
dataname = filename.split('/')[-1].split('.')[0]

if dataname == 'uah1':
    t_window = 1.0
    eps = 2.0
    xlim = 0, 50
    ylim = -20, 20

    min_time = 0
    max_time = 1000
elif dataname == 'Plaza1':
    t_window = 0.5
    eps = 0.5
    xlim = -50, 10
    ylim = -20, 75

    # choose one:
    min_time = 0  # first big circle
    max_time = 200  # first big circle
    min_time = 510  # first loop
    max_time = 600  # first loop
    min_time = 0  # first few loops
    max_time = 1000  # first few loops.
elif dataname == 'Plaza2':
    t_window = 0.1
    eps = 0.2
    xlim = -80, 10
    ylim = -20, 75

    min_time = 45.1
    period = 101 - 45
    print('period:', period)
    num_loops = 2
    max_time = min_time + num_loops * period
    traj.period = period

    #anchor_names = ['Range {}'.format(i) for i in range(1, 4)]
try:
    result_dict = loadmat(filename)
except FileNotFoundError:
    raise FileNotFoundError('Could not find {}. Did you run the script download_datasets?'.format(dataset))
except Exception as e:
    print('Unknown reading error with {}. Check if the file looks ok.'.format(filename))
    raise e
print('Successfully read {}'.format(filename))

### 3. Prepare dataset

In [None]:
from data_utils import prepare_dataset

full_df, anchors_df = prepare_dataset(
    result_dict, 
    range_system_id, 
    gt_system_id, 
    [min_time, max_time], 
    t_window)

fig, axs = plt.subplots(1, 2)
sns.scatterplot(data=full_df, x='px', y='py', hue='timestamp', linewidth=0.0, ax=axs[0])
sns.scatterplot(data=full_df, x='timestamp', y='px', hue='timestamp', linewidth=0.0, ax=axs[1])
sns.scatterplot(data=anchors_df, x='px', y='py', linewidth=0.0,  ax=axs[0], color='red')

### 4. (optional) plot distance measurements

In [None]:
fig, axs = plt.subplots(1, 2)
range_df = full_df.loc[full_df.system_id == range_system_id].copy()
sns.scatterplot(data=range_df, x='px', y='py', hue='timestamp', linewidth=0.0, ax=axs[0])
sns.scatterplot(data=anchors_df, x='px', y='py', linewidth=0.0,  ax=axs[0], color='red')
for a_id, px, py in zip(anchors_df.anchor_id, anchors_df.px, anchors_df.py):
    print(a_id, px, py)
    axs[0].annotate(s='a{}'.format(a_id), xy=(px+2,py+2), color='red')
axs[0].legend('')
sns.scatterplot(data=range_df, x='timestamp', y='px', hue='timestamp', linewidth=0.0, ax=axs[1])

plot_distance_times(full_df)

In [None]:
fig, ax = plt.subplots()
fig.set_size_inches(5, 2)
ax = plot_distance_errors(full_df, ax=ax)
#savefig(fig, 'results/accuracy.pdf')

In [None]:
anchor_names = sorted(range_df.anchor_name.unique())
fig, axs = plt.subplots(1, len(anchor_names), sharey=True)
fig.set_size_inches(15, 4)
for ax, anchor_name in zip(axs, anchor_names):
    plot_df = range_df.loc[range_df.anchor_name==anchor_name].copy()
    plot_df.loc[:, 'distance error'] = plot_df.distance.values - plot_df.distance_gt.values
    plot_df.loc[:, 'anchor name'] = plot_df.anchor_name.values
    anchors_df.loc[:, 'anchor name'] = anchors_df.anchor_name.values
    sns.scatterplot(data=plot_df, x='px', y='py', hue='anchor name', size='distance error', 
                    hue_order=anchor_names, linewidth=0.0, alpha=0.8, ax=ax, legend=False)
    sns.scatterplot(data=anchors_df, x='px', y='py', hue='anchor name',
                    hue_order=anchor_names, linewidth=0.0, legend=False, ax=ax)
    ax.axis('equal')
    ax.set_title(anchor_name)
fig.suptitle('Scatter plots with size proportional to distance error.')

### 5. (optional) Filter measurements

In [None]:
filtered_df = full_df[full_df.distance<=50]

plot_distance_times(filtered_df)
ax = plot_distance_errors(filtered_df)

# Reconstruction

### 1. Prepare

In [None]:
from evaluate_dataset import compute_distance_matrix

chosen_df = full_df
#chosen_df = filtered_df
chosen_distance = 'distance'
#chosen_distance = 'distance_gt'

## Construct anchors. 
if anchor_names is None:
    anchors = anchors_df.loc[:, ['px', 'py', 'pz']].values.astype(np.float32).T
else:
    anchors_df = anchors_df.loc[anchors_df.anchor_name.isin(anchor_names)]
    anchors = get_coordinates(anchors_df, anchor_names)

## Construct times.
range_df = chosen_df[chosen_df.system_id == range_system_id]
times = range_df.timestamp.unique()

## Construct D.
D, times = compute_distance_matrix(chosen_df, anchors_df, anchor_names, times, chosen_distance)
if np.sum(D > 0) > D.shape[0]:
    print('Warning: multiple measurements for times:{}/{}!'.format(
          np.sum(np.sum(D > 0, axis=1)>1), D.shape[0]))

## Construct ground truth.
ground_truth_pos = get_ground_truth(chosen_df, times)

### 2. Global algorithm

In [None]:
def test_hypothesis(D, dim, K):
    import hypothesis as h
    mask = (D)
    p = np.sort(np.sum(mask, axis=0))[::-1]
    assert h.limit_condition(list(p), dim+1, K)
    
def add_measurement(result_df, method=''):
    global counter_df
    result_df.loc[counter_df] = dict(
        n_complexity=n_complexity,
        n_measurements=n_measurements,
        method=method,
        n_it=n_it,
        mae=mae,
        mse=mse
    )
    counter_df += 1
    
from other_algorithms import pointwise_srls, apply_algorithm, error_measure
from plotting_tools import plot_complexities, add_scalebar
from fit_curve import fit_trajectory

print('available', D.shape[0])
list_complexities = [3, 5, 11, 19]
list_measurements = [40, 100, 200, 300, 400, 499]
methods = ['ours-weighted', 'ours']
methods += ['lm-ellipse', 'lm-ours']
methods += ['srls', 'rls']
fname = 'results/algorithms_sunday.pkl'

total_n_it = 20

anchors = anchors[:2, :]
points_gt = full_df.loc[full_df.timestamp.isin(times), ['px', 'py']].values.astype(np.float32)

fig, axs = plt.subplots(len(list_measurements), len(list_complexities), sharex=True, sharey=True)
#fig_size = [5, 1.2]
fig_size = [5, 1.2 * len(list_measurements)]

# using this complicated initialization to make sure dtypes are correct
result_df = pd.DataFrame(columns=['n_it','n_complexity','n_measurements','mae','mse','method'])
counter_df = 0

verbose = True

for j, n_complexity in enumerate(list_complexities):
    if verbose:
        print(f'K={n_complexity}')
    traj.set_n_complexity(n_complexity)
    
    axs[0, j].set_title(f'K={n_complexity}')
    
    for i, n_measurements in enumerate(list_measurements):
        axs[i, 0].set_ylabel(f'N={n_measurements}')
        if verbose:
            print(f'n_measurements={n_measurements}')
        
        for n_it in range(total_n_it):
            indices = sorted(np.random.choice(D.shape[0], n_measurements, replace=False))
            D_small = D[indices, :]
            
            # test hypothesis
            test_hypothesis(D_small, traj.dim, traj.n_complexity)

            times_small = np.array(times)[indices]
            basis_small = traj.get_basis(times=times_small)
            points_small = points_gt[indices, :]

            results = {}
            for method in methods: 
                C_hat, p_hat, lat_idx = apply_algorithm(traj, D_small, times_small, 
                                                   anchors, method=method)
                results[method] = (C_hat, p_hat)
                traj.set_coeffs(coeffs=C_hat)
                p_fitted = traj.get_sampling_points(times=times_small).T
                mae = error_measure(p_fitted, points_small, 'mae')
                mse = error_measure(p_fitted, points_small, 'mse')
                add_measurement(result_df, method=method)
                
                # do raw version if applicable
                if method in ['rls', 'srls']:
                    points_lat = points_small[lat_idx]
                    mae = error_measure(p_hat, points_lat, 'mae')
                    mse = error_measure(p_hat, points_lat, 'mse')
                    add_measurement(result_df, method=method + ' raw')
            # fit ground truth to chosen points.
            coeffs = fit_trajectory(points_small.T, times=times_small, traj=traj)
            traj_gt = traj.copy()
            traj_gt.set_coeffs(coeffs=coeffs)
            points_fitted = traj_gt.get_sampling_points(times=times_small).T
            
            mse = error_measure(points_fitted, points_small, 'mse')
            mae = error_measure(points_fitted, points_small, 'mae')
            add_measurement(result_df, 'gt')
            
        result_df.to_pickle(fname)
        print('saved as', fname)
            
        ax = axs[i, j]
        ax = plot_complexities(traj, times_small, results, points_fitted, ax)
fig.set_size_inches(*fig_size) 
add_scalebar(axs[0, 0], 20, loc='lower left')
[ax.set_xlim(*xlim) for ax in axs.flatten()]
[ax.set_ylim(*ylim) for ax in axs.flatten()]
result_df.head()

In [None]:
# convert all numerical columns to float, ignore non-numeric.
fname = 'results/algorithms_sunday.pkl'
result_df = pd.read_pickle(fname)
result_df = result_df.apply(pd.to_numeric, errors='ignore')
pd.set_option('precision', 2)
print_table = result_df[result_df.n_measurements.isin([40, 300, 499])]
pd.pivot_table(print_table, values='mae', index=['method'], columns=['n_measurements', 'n_complexity'], 
               aggfunc=['mean', 'std'])

In [None]:
from plotting_backup import plot_subsample_old

n_complexity = 5
traj.set_n_complexity(n_complexity)

min_number = n_complexity*(traj.dim + 2) - 1 
print(min_number, D.shape[0])
#n_measurements_list = np.arange(D.shape[0], min_number, step=-100)
#n_measurements_list = [19, 20, 30, 40, 50, 100, 200, 300, 499]
n_measurements_list = [19, 30, 60, 200][::-1]

fig, axs = plot_subsample_old(traj, D, times, anchors, full_df, 
                          n_measurements_list)
[ax.set_xlim(*xlim) for ax in axs]
[ax.set_ylim(*ylim) for ax in axs]
fig.set_size_inches(*fig_size)

# Compare with other algorithms

This part is in beta-stage. We compare against other algorithms such as Lebenberg-Marquardt optimization. 

In [None]:
import time
from other_algorithms import init_lm 
from fit_curve import fit_trajectory

traj_gt = get_trajectory(filename)
traj_gt.set_n_complexity(n_complexity)
R = np.c_[full_df.px.values.flatten(), full_df.py.values.flatten()].T
coeffs_gt = fit_trajectory(R, full_df.timestamp.values, traj_gt)

n_measurements_list = [20, 30, 50, 200][::-1]
methods = ['weighted', 'srls', 'lm-real', 'lm-noise', 'lm-ellipse']

for n_measurements in n_measurements_list:
    coeffs = np.empty([traj.dim, n_complexity, 0])
    
    fig, ax = plt.subplots()

    np.random.seed(1)
    indices = np.random.choice(D.shape[0], n_measurements, replace=False)

    D_small = D[indices, :]
    times_small = np.array(times)[indices]

    for k, method in enumerate(methods): 
        if method == 'weighted':
            basis = traj.get_basis(times=times_small)

            t1 = time.time()
            Chat = trajectory_recovery(D_small, anchors[:2, :], basis, weighted=True)
            t2 = time.time()

            traj.set_coeffs(coeffs=Chat)
            traj.plot_pretty(times=times, color=f'C{k}', ax=ax, label=method)
        elif method == 'srls':
            t1 = time.time()
            points, __ = pointwise_srls(D, anchors, traj, indices)
            t2 = time.time()

            label = 'SRLS'
            for x in points:
                ax.scatter(*x, color=f'C{k}', label=label)
                label=None
        elif 'lm' in method:
            basis = traj.get_basis(times=times_small)

            coeffs_init = init_lm(coeffs_gt, method=method, sigma=1.0)
            traj.set_coeffs(coeffs=coeffs_init)
            traj.plot_pretty(times=times, color=f'C{k}', ax=ax, label=method+' init', linestyle=':')

            x0 = coeffs_init.reshape((-1,))
            #print(x0)
            t1 = time.time()
            Crand = least_squares_lm(D_small, anchors, basis, x0, verbose=False)
            t2 = time.time()

            traj.set_coeffs(coeffs=Crand)
            traj.plot_pretty(times=times, color=f'C{k}', ax=ax, label=method)
        #print(f'{method} took {t2-t1:.2f} seconds.')
        
    traj_gt.set_coeffs(coeffs=coeffs_gt)
    traj_gt.plot_pretty(times=times, color='black', ax=ax, label='ground truth fitted', linestyle=':')

    ax.plot(full_df.px, full_df.py, color='black', label='ground truth')
    ax.set_xlabel('x [m]')
    ax.set_title('N={}'.format(n_measurements))

    ax.set_xlim(*xlim)
    ax.set_ylim(*ylim)

legend = ax.legend(loc='lower right', ncol=3, facecolor='white', framealpha=1)

In [None]:
n_measurements = 200
sigmas = [0.1, 1, 2, 5, 10]
fig, axs = plt.subplots(1, len(sigmas), sharex=True, sharey=True)
fig.set_size_inches(3*len(sigmas), 5)

np.random.seed(1)

method = 'noise'
k = 1
for ax, sigma in zip(axs, sigmas):
    #print(f'sigma {sigma}')
    coeffs = np.empty([traj.dim, n_complexity, 0])
    indices = np.random.choice(D.shape[0], n_measurements, replace=False)

    D_small = D[indices, :]
    times_small = np.array(times)[indices]
    basis = traj.get_basis(times=times_small)
    
    coeffs_init = init_lm(coeffs_gt, method=method, sigma=sigma)
    
    x0 = coeffs_init.reshape((-1,))
    #Crand = least_squares_lm(D_small, anchors, basis, x0, cost='squared')
    #Crand = least_squares_lm(D_small, anchors, basis, x0, cost='squared', jacobian=True)
    Crand = least_squares_lm(D_small, anchors, basis, x0, cost='simple')
    
    Chat = trajectory_recovery(D_small, anchors, basis, weighted=True)
    
    
    # plotting
    ax.set_title(f'sigma={sigma}')
    traj.set_coeffs(coeffs=coeffs_init)
    traj.plot_pretty(times=times, color=f'C{k}', ax=ax, label=method+' init', linestyle='-')
    traj.set_coeffs(coeffs=Crand)
    traj.plot_pretty(times=times, color=f'C{k+1}', ax=ax, label=method+' res', linestyle=':')
    traj.set_coeffs(coeffs=Chat)
    traj.plot_pretty(times=times, color=f'C{k+2}', ax=ax, label='ours', linestyle='-')
    
    traj.set_coeffs(coeffs=coeffs_gt)
    traj.plot_pretty(times=times, color='black', ax=ax, label='ground truth fitted', linestyle=':')
    ax.plot(full_df.px, full_df.py, color='black', label='ground truth')

ax.set_xlabel('x [m]')

ax.set_xlim(*xlim)
ax.set_ylim(*ylim)

axs[0].set_ylabel('y [m]')
legend = ax.legend(loc='lower right', ncol=3, facecolor='white', framealpha=1)

# Iterative algorithms

In this section, we apply our trajectory estimation iteratively: either using a constant time window (Averaging algortihm) or building up and refining the trajectory as we go (Build up algorithm)

In [None]:
period_it = 10
if dataname == 'uah1':
    # for iterative.
    n_complexity_it = 2
    model_it = 'polynomial'
    t_window_it = 80
elif dataname == 'Plaza1':
    # for iterative.
    n_complexity_it = 3
    model_it = 'full_bandlimited'
    period_it = 40
    t_window_it = 20
elif dataname == 'Plaza2':
    # for iterative.
    n_complexity_it = 3
    model_it = 'bandlimited'
    period_it = 40
    t_window_it = 40

In [None]:
traj_it = traj.copy()
traj_it.set_n_complexity(n_complexity_it)
traj_it.model = model_it
traj_it.period = period_it
basis = traj_it.get_basis(times=times)
print('Using trajectory model: \n model={}, K={}, period={}'.format(traj_it.model, traj_it.n_complexity, traj_it.period))

### 1. Averaging algorithm

In [None]:
from iterative_algorithms import averaging_algorithm
print('averaging with time window', t_window_it)
C_list, t_list = averaging_algorithm(D, anchors[:2, :], basis, times, t_window=t_window_it)
ax1 = plot_individual(C_list, t_list, traj_it)
ax1.plot(ground_truth_pos.px, ground_truth_pos.py, color='black')
result_df = get_smooth_points(C_list, t_list, traj_it)
ax2 = plot_smooth(result_df)
ax2.plot(ground_truth_pos.px, ground_truth_pos.py, color='black')
[[ax.set_xlim(*xlim), ax.set_ylim(*ylim)] for ax in [ax1, ax2]]

### 2. Build up algorithm

In [None]:
from iterative_algorithms import build_up_algorithm

C_list, t_list = build_up_algorithm(D, anchors[:2, :], basis, times, eps=eps, verbose=False)
ax1 = plot_individual(C_list, t_list, traj_it.copy())
ax1.plot(ground_truth_pos.px, ground_truth_pos.py, color='black')

result_df = get_smooth_points(C_list, t_list, traj_it)
ax2 = plot_smooth(result_df)
ax2.plot(ground_truth_pos.px, ground_truth_pos.py, color='black')
[[ax.set_xlim(*xlim), ax.set_ylim(*ylim)] for ax in [ax1, ax2]]