In [None]:
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns

%matplotlib notebook
%reload_ext autoreload
%autoreload 2

from public_data_utils import *

plt.rcParams['figure.figsize'] = 7, 3
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

# set the random seed so can reproduce when something didn't work. 
# (only when cells are run in order)
np.random.seed(1)

# Public dataset evaluation

This notebook contains functionality to evaluate the proposed algorithms on real datasets. To simply reproduce the plots from the paper you can directly run GenerateAllFigures. 

# Preparation

### 1. Download .mat files and save them in folder ./datasets/ (can simply run download_datasets.sh)

WiFi: http://www.robesafe.es/repository/UAHWiFiDataset/

Lawnmower: https://panda.frc.ri.cmu.edu/projects/emergencyresponse/RangeData/download.html

See datasets/README.md for file description.  

### 2. Choose dataset and range

Note that currently fully functional are only Plaza1 and Plaza2. The others are still kept here for development purposes.

In [None]:
#filename = 'datasets/uah1.mat' # fingers
filename = 'datasets/Plaza1.mat'; # zig zag. 
#filename = 'datasets/Plaza2.mat' # 
#filename = 'datasets/Gesling1.mat' # not working
#filename = 'datasets/Gesling2.mat' # not working
#filename = 'datasets/Gesling3.mat' # 

original_df, anchors_df, traj = read_dataset(filename, verbose=True)
xlim, ylim = get_plotting_params(filename)
print(xlim, ylim)

In [None]:
%matplotlib inline
from generate_results import calibrate

calibrate(original_df)

fig, ax = plt.subplots()
for anchor_id in original_df.anchor_id.unique():
    if anchor_id == 'GT':
        continue
    gt_df = original_df[original_df.anchor_id==anchor_id]
    fig, axs = plt.subplots(1, 2)
    fig.set_size_inches(5, 3)
    axs[0].scatter(gt_df.px, gt_df.py, s=1.0)
    
    axs[1].scatter(gt_df.distance_gt, gt_df.distance, s=1.0, label='raw')
    axs[1].scatter(gt_df.distance_gt, gt_df.distance_calib, s=1.0, label='calibrated')
    axs[1].scatter(gt_df.distance_gt, gt_df.distance_gt, s=1.0, label='ideal')
    fig.suptitle(f"anchor id {anchor_id}")
    ax.plot(gt_df.timestamp, gt_df.distance, label=anchor_id)
    axs[1].legend()    
    
    axs[0].set_xlabel('x [m]')
    axs[0].set_ylabel('y [m]')
    axs[1].set_xlabel('real d [m]')
    axs[1].set_ylabel('measured d [m]')
    
ax.set_xlabel('timestamp')
ax.set_ylabel('distance')
ax.legend()

### 3. Prepare dataset

In [None]:
%matplotlib inline
#%matplotlib notebook
from generate_results_polynomial import TIME_RANGES
from math import ceil, floor

if 'Plaza1' in filename:
    plot_df = original_df[(original_df.timestamp > 300) & (original_df.timestamp < 1400)]
    time_ranges = TIME_RANGES
    print(original_df.timestamp.min(), original_df.timestamp.max())
    print(time_ranges)

    fig, ax = plt.subplots()
    sns.scatterplot(data=plot_df, x='timestamp', y='px', color='red', linewidth=0.0, ax=ax, s=1.0)
    sns.scatterplot(data=plot_df, x='timestamp', y='py', color='green', linewidth=0.0, ax=ax, s=1.0)

    side = np.sqrt(len((time_ranges)))
    fig, axs2 = plt.subplots(ceil(side), floor(side), sharex=False, sharey=True)
    fig.suptitle('piecewise linear coordinates over time', y=0.95)
    fig.set_size_inches(1.0*np.array(axs2.shape))
    axs2 = axs2.reshape((-1,))
    for ax2, time_range in zip(axs2, time_ranges):
        plot_df = original_df[(original_df.timestamp > time_range[0]) & (original_df.timestamp < time_range[1])]
        ax2.scatter(plot_df.timestamp, plot_df.px, color='red', s=1.0, label='x')
        ax2.scatter(plot_df.timestamp, plot_df.py, color='green', s=1.0, label='y')

        ax.scatter(plot_df.timestamp, plot_df.px, color='black', s=1.0)
        ax.scatter(plot_df.timestamp, plot_df.py, color='black', s=1.0)

    ax2.legend(loc='lower left', bbox_to_anchor=[1.0, 0.0])
    mask = np.array([False] * len(original_df))
    for time_range in time_ranges:
        mask = mask | ((original_df.timestamp > time_range[0]) & (original_df.timestamp < time_range[1])).values
    full_df = original_df[mask]
else:
    full_df = original_df
    print('using all measurements for', filename)

### 4. (optional) plot distance measurements

In [None]:
%matplotlib inline
plot_distance_times(full_df)

### 5. (optional) plot distance distributions

In [None]:
fig, axs = plt.subplots(5, 1, sharex=False)
fig.set_size_inches(5, 10)
ax = plot_distance_errors(full_df, ax=axs[0])
#savefig(fig, 'results/accuracy.pdf')

axs[1].set_ylabel('(d - d_gt)')
distance_error = full_df.distance - full_df.distance_gt
axs[1].hist(distance_error, bins=30)

axs[2].set_ylabel('1/d(d**2 - d_gt**2)')
distance_error = (full_df.distance.values.astype(np.float32)**2 - full_df.distance_gt.values.astype(np.float32)**2)/(full_df.distance_gt.values.astype(np.float32) + 1e-3)
axs[2].hist(distance_error, bins=30)

axs[3].set_ylabel('(d**2 - d_gt**2)')
distance_error = full_df.distance.values.astype(np.float32)**2 - full_df.distance_gt.values.astype(np.float32)**2
axs[3].hist(distance_error, bins=30)

axs[4].set_ylabel('(d - d_gt)**2')
distance_error = (full_df.distance.values.astype(np.float32) - full_df.distance_gt.values.astype(np.float32))**2
_ = axs[4].hist(distance_error, bins=30)

### 6. (optional) plot distance error spacially 

In [None]:
range_df = full_df.loc[full_df.system_id=='Range']

anchor_names = sorted(range_df.anchor_name.unique())
print(anchor_names)
fig, axs = plt.subplots(1, len(anchor_names), sharey=True)
fig.set_size_inches(15, 4)
for ax, anchor_name in zip(axs, anchor_names):
    plot_df = range_df.loc[range_df.anchor_name==anchor_name].copy()
    plot_df.loc[:, 'distance error'] = plot_df.distance.values - plot_df.distance_gt.values
    plot_df.loc[:, 'anchor name'] = plot_df.anchor_name.values
    anchors_df.loc[:, 'anchor name'] = anchors_df.anchor_name.values
    sns.scatterplot(data=plot_df, x='px', y='py', hue='anchor name', size='distance error',
                    hue_order=anchor_names, linewidth=0.0, alpha=0.8, ax=ax, legend=False)
    anchors_df = anchors_df.apply(pd.to_numeric, downcast='float', errors='ignore', axis=0)
    sns.scatterplot(data=anchors_df, x='px', y='py', hue='anchor name',
                    linewidth=0.0, legend=False, ax=ax)
    ax.set_title(anchor_name)
g = sns.scatterplot(data=anchors_df, x='px', y='py', hue='anchor name',
                    linewidth=0.0, legend='full', ax=ax)
g.legend(loc='center left', bbox_to_anchor=(1.25, 0.5), ncol=1)
fig.suptitle('Scatter plots with size proportional to distance error.')

# Reconstruction Algorithm

## bandlimited table

In [None]:
from table_tools import *
def format_here(number):
    if number > 10000:
        return '{:.2e}'.format(number)
    else:
        return '{:.1f}'.format(number)

fname = 'results/bandlimited_tuesday.pkl'
outname = 'results/table_bandlimited.tex'

#fname = 'results/bandlimited_tuesday_calib.pkl'
#outname = 'results/table_bandlimited_calib.tex'

result_df = pd.read_pickle(fname)
# convert all numerical columns to float, ignore non-numeric.
result_df = result_df.apply(pd.to_numeric, errors='ignore')
#print_table = result_df[result_df.n_measurements.isin([40, 100, 200, 300, 499])]
print_table = result_df[(result_df.n_complexity >= 5) & (result_df.n_measurements >= 100)]
print_table = print_table[print_table.n_measurements.isin([100, 300, 499])]
methods = ['gt','srls raw', 'srls', 'rls raw', 'rls', 'lm-ellipse', 
           'lm-ours-weighted', 'ours', 'ours-weighted']
#pretty_print_table(print_table, methods=methods, value='cost_rls')
styler, __ = pretty_print_table(print_table, methods=methods, value='mse')
styler

In [None]:
methods = ['gt','srls raw', 'srls', 'rls raw', 'rls', 'lm-ellipse', 
           'lm-ours-weighted', 'ours', 'ours-weighted']
__, pt = pretty_print_table(print_table, methods=methods, value='mse')
latex_print(pt, methods, outname, float_format=format_here)

## Some bandlimited sanity checks

In [None]:
# plot error vs n measurements and n complexity
plot_df = result_df[result_df.mae < 100]
plot_df = plot_df[plot_df.n_measurements > 100]
fg = sns.FacetGrid(data=plot_df, col='n_complexity', hue='method', legend_out=True)
fg.map(plt.semilogy, 'n_measurements', 'mae', linestyle='', marker='.', alpha=0.5)
legend = plt.gca().get_legend()
plt.legend()

In [None]:
# understand why N=100, K=19 is so bad...
df = result_df.loc[(result_df.n_measurements==100) & (result_df.n_complexity==19), :]
fig, ax = plt.subplots()
for method, df_m in df.groupby('method'):
    ax.scatter(df_m.n_it, df_m.mse, label=method)
ax.set_yscale('log')
ax.legend(loc='upper right')

## polynomial table

In [None]:
fname = 'results/polynomial_tuesday.pkl'
outname = 'results/table_polynomial.tex'

#fname = 'results/polynomial_tuesday_calib.pkl'
#outname = 'results/table_polynomial_calib.tex'

result_df = pd.read_pickle(fname)
# convert all numerical columns to float, ignore non-numeric.
result_df = result_df.apply(pd.to_numeric, errors='ignore')
print_table = result_df
print_table = print_table[print_table.n_measurements.isin([10, 20, 30, 50])]

methods = ['gt','srls raw', 'srls', 'rls raw', 'rls', 'lm-line', 
           'lm-ours-weighted', 'ours', 'ours-weighted']
styler, __ = pretty_print_table(print_table, methods=methods, value='mse')
styler

In [None]:
__, pt = pretty_print_table(print_table, methods=methods, value='mse')
latex_print(pt, methods, outname, index_names=False, index=False, float_format=format_here)

# Sandbox (space to try out stuff)

### Example reconstructions

In [None]:
from evaluate_dataset import compute_distance_matrix, compute_anchors
chosen_distance = 'distance'
#chosen_distance = 'distance_gt'
anchor_names = None

## Construct anchors. 
anchors = compute_anchors(anchors_df, anchor_names)
print(anchors.shape)

## Construct times.
times = full_df[full_df.system_id == range_system_id].timestamp.unique()

## Construct D.
D, times = compute_distance_matrix(full_df, anchors_df, anchor_names, times, chosen_distance)
print(D.shape)
if np.sum(D > 0) > D.shape[0]:
    print('Warning: multiple measurements for times:{}/{}!'.format(
          np.sum(np.sum(D > 0, axis=1)>1), D.shape[0]))

## Construct ground truth.
points_gt = get_ground_truth(full_df, times)

In [None]:
from other_algorithms import apply_algorithm
from fit_curve import fit_trajectory
print(D.shape)
fig, ax = plt.subplots()
ax.scatter(points_gt.px, points_gt.py, s=10)

traj.set_n_complexity(3)

#method = 'ours-weighted'
method = 'lm-line'
coeffs, __, __ = apply_algorithm(traj, D, times, anchors, method=method)
traj.set_coeffs(coeffs=coeffs)
traj.plot_pretty(times=times, ax=ax, color='red', label='fitted')

traj.print()
coeffs = fit_trajectory(points_gt.T, times, traj)
traj.set_coeffs(coeffs=coeffs)
traj.plot_pretty(times=times, ax=ax, color='green', label='best fit')
ax.set_xlim(*xlim)
ax.set_ylim(*ylim)
ax.legend()

### Piecewise linear reconstructions: correlation between cost and reconstruction error

In [None]:
result_df = pd.read_pickle('results/polynomial_tuesday_calib.pkl')
result_df = pd.read_pickle('results/polynomial_tuesday.pkl')

chosen_measure = 'mse'

for N, df_N in result_df.groupby('n_measurements'):
    Ks = df_N.n_complexity.unique()
    fig, axs = plt.subplots(2, len(Ks), squeeze=False, sharey=True)
    fig.suptitle(f'N={N}')
    i = 0
    for K, df_K in df_N.groupby('n_complexity'):
        ax1, ax2 = axs[:, i]
        for method, df_method in df_K.groupby('method'):
            ax1.plot(df_method[chosen_measure].values, label=method)
            ax2.scatter(df_method['cost_rls'].values, df_method[chosen_measure], label=method)
        ax2.set_xscale('log')
        ax2.set_yscale('log')
        ylim_chosen = min(200, df_method[chosen_measure].max()) 
        xlim_rls = min(2000, df_method['cost_rls'].max())
        ax1.set_ylim([1, ylim_chosen])
        ax2.set_ylabel(str.upper(chosen_measure))
        ax2.set_xlim([1, xlim_rls])
        ax2.legend(loc='lower left', bbox_to_anchor=[1, 0])
        i += 1