In [None]:
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns

%matplotlib notebook
%reload_ext autoreload
%autoreload 2

from public_data_utils import *

plt.rcParams['figure.figsize'] = 7, 3
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

# set the random seed so can reproduce when something didn't work. 
# (only when cells are run in order)
np.random.seed(1)

# Public dataset evaluation

This notebook contains functionality to evaluate the proposed algorithms on real datasets. To simply reproduce the plots from the paper you can directly run GenerateAllFigures. 

# Preparation

### 1. Download .mat files and save them in folder ./datasets/ (can simply run download_datasets.sh)

WiFi: http://www.robesafe.es/repository/UAHWiFiDataset/

Lawnmower: https://panda.frc.ri.cmu.edu/projects/emergencyresponse/RangeData/download.html

See datasets/README.md for file description.  

### 2. Choose dataset and range

In [None]:
#filename = 'datasets/uah1.mat' # fingers
filename = 'datasets/Plaza1.mat'; # zig zag. 
#filename = 'datasets/Plaza2.mat' # triangle
#filename = 'datasets/Gesling1.mat' # not working
#filename = 'datasets/Gesling2.mat' # not working
#filename = 'datasets/Gesling3.mat' # 

original_df, anchors_df, traj = read_dataset(filename, verbose=True)
xlim, ylim = get_plotting_params(filename)
print(xlim, ylim)

In [None]:
%matplotlib inline
from generate_results import calibrate

calibrate(original_df)

fig, ax = plt.subplots()
for anchor_id in original_df.anchor_id.unique():
    if anchor_id == 'GT':
        continue
    gt_df = original_df[original_df.anchor_id==anchor_id]
    fig, axs = plt.subplots(1, 2)
    fig.set_size_inches(5, 3)
    axs[0].scatter(gt_df.px, gt_df.py, s=1.0)
    
    axs[1].scatter(gt_df.distance_gt, gt_df.distance, s=1.0, label='raw')
    axs[1].scatter(gt_df.distance_gt, gt_df.distance_calib, s=1.0, label='calibrated')
    axs[1].scatter(gt_df.distance_gt, gt_df.distance_gt, s=1.0, label='ideal')
    fig.suptitle(anchor_id)
    ax.plot(gt_df.timestamp, gt_df.distance, label=anchor_id)
    axs[1].legend()    
ax.legend()

### 3. Prepare dataset

In [None]:
%matplotlib inline
#%matplotlib notebook
from generate_results_polynomial import TIME_RANGES
from math import ceil, floor

if 'Plaza1' in filename:
    plot_df = original_df[(original_df.timestamp > 300) & (original_df.timestamp < 1400)]
    time_ranges = TIME_RANGES
    print(original_df.timestamp.min(), original_df.timestamp.max())
    print(time_ranges)

    fig, axs = plt.subplots(1, 2)
    sns.scatterplot(data=plot_df, x='px', y='py', hue='timestamp', linewidth=0.0, ax=axs[0])
    sns.scatterplot(data=plot_df, x='timestamp', y='px', color='red', linewidth=0.0, ax=axs[1], s=1.0)
    sns.scatterplot(data=plot_df, x='timestamp', y='py', color='green', linewidth=0.0, ax=axs[1], s=1.0)
    sns.scatterplot(data=anchors_df, x='px', y='py', linewidth=0.0,  ax=axs[0], color='red')

    side = np.sqrt(len((time_ranges)))
    fig, axs2 = plt.subplots(ceil(side), floor(side), sharex=False, sharey=True)
    fig.set_size_inches(1.0*np.array(axs2.shape))
    axs2 = axs2.reshape((-1,))
    for ax, time_range in zip(axs2, time_ranges):
        plot_df = original_df[(original_df.timestamp > time_range[0]) & (original_df.timestamp < time_range[1])]
        ax.scatter(plot_df.timestamp, plot_df.px, color='red', s=1.0)
        ax.scatter(plot_df.timestamp, plot_df.py, color='green', s=1.0)

        axs[1].scatter(plot_df.timestamp, plot_df.px, color='black', s=1.0)
        axs[1].scatter(plot_df.timestamp, plot_df.py, color='black', s=1.0)

    mask = np.array([False] * len(original_df))
    for time_range in time_ranges:
        mask = mask | ((original_df.timestamp > time_range[0]) & (original_df.timestamp < time_range[1])).values
    full_df = original_df[mask]
else:
    full_df = original_df
    print('using all measurements for', filename)

### 4. (optional) plot distance measurements

In [None]:
%matplotlib inline
fig, axs = plt.subplots(1, 2)
range_df = full_df.loc[full_df.system_id == range_system_id].copy()
sns.scatterplot(data=range_df, x='px', y='py', hue='timestamp', linewidth=0.0, ax=axs[0])
sns.scatterplot(data=anchors_df, x='px', y='py', linewidth=0.0,  ax=axs[0], color='red')
for a_id, px, py in zip(anchors_df.anchor_id, anchors_df.px, anchors_df.py):
    print(a_id, px, py)
    axs[0].annotate(s='a{}'.format(a_id), xy=(px+2,py+2), color='red')
axs[0].legend('')
sns.scatterplot(data=range_df, x='timestamp', y='px', hue='timestamp', linewidth=0.0, ax=axs[1])

plot_distance_times(full_df)

In [None]:
fig, axs = plt.subplots(5, 1, sharex=False)
fig.set_size_inches(5, 10)
ax = plot_distance_errors(full_df, ax=axs[0])
#savefig(fig, 'results/accuracy.pdf')

axs[1].set_ylabel('(d - d_gt)')
distance_error = full_df.distance - full_df.distance_gt
axs[1].hist(distance_error, bins=30)

axs[2].set_ylabel('1/d(d**2 - d_gt**2)')
distance_error = (full_df.distance.values.astype(np.float32)**2 - full_df.distance_gt.values.astype(np.float32)**2)/(full_df.distance_gt.values.astype(np.float32) + 1e-3)
axs[2].hist(distance_error, bins=30)

axs[3].set_ylabel('(d**2 - d_gt**2)')
distance_error = full_df.distance.values.astype(np.float32)**2 - full_df.distance_gt.values.astype(np.float32)**2
axs[3].hist(distance_error, bins=30)

axs[4].set_ylabel('(d - d_gt)**2')
distance_error = (full_df.distance.values.astype(np.float32) - full_df.distance_gt.values.astype(np.float32))**2
_ = axs[4].hist(distance_error, bins=30)

In [None]:
anchor_names = sorted(range_df.anchor_name.unique())
print(anchor_names)
fig, axs = plt.subplots(1, len(anchor_names), sharey=True)
fig.set_size_inches(15, 4)
for ax, anchor_name in zip(axs, anchor_names):
    plot_df = range_df.loc[range_df.anchor_name==anchor_name].copy()
    plot_df.loc[:, 'distance error'] = plot_df.distance.values - plot_df.distance_gt.values
    plot_df.loc[:, 'anchor name'] = plot_df.anchor_name.values
    anchors_df.loc[:, 'anchor name'] = anchors_df.anchor_name.values
    sns.scatterplot(data=plot_df, x='px', y='py', hue='anchor name', size='distance error',
                    hue_order=anchor_names, linewidth=0.0, alpha=0.8, ax=ax, legend=False)
    anchors_df = anchors_df.apply(pd.to_numeric, downcast='float', errors='ignore', axis=0)
    sns.scatterplot(data=anchors_df, x='px', y='py', hue='anchor name',
                    linewidth=0.0, legend=False, ax=ax)
    ax.axis('equal')
    ax.set_title(anchor_name)
g = sns.scatterplot(data=anchors_df, x='px', y='py', hue='anchor name',
                    linewidth=0.0, legend='full', ax=ax)
g.legend(loc='center left', bbox_to_anchor=(1.25, 0.5), ncol=1)
fig.suptitle('Scatter plots with size proportional to distance error.')

# Global Algorithms

### Plot and print results from generate_results.py

In [None]:
def pretty_print_table(print_table, methods=None, value='mse'):
    print_table.rename(columns={'n_measurements':'N',
                       'n_complexity':'K'}, inplace=True)
    pt = pd.pivot_table(print_table, values=value, index='method', columns=['N', 'K'], 
                    aggfunc=['mean', 'std'])
    if methods is not None:
        pt = pt.reindex(methods)
    #styler = pt.style.apply(highlight_min, axis=0)
    styler = pt.style.apply(highlight_both, axis=0)
    pd.set_option('precision', 2)
    pd.set_option('max_columns', 100)
    return styler, pt

def highlight_min(data, exclude=[0], color='red', index=0):
    """
    :param exclude: rows indices to exclude for calculating min.
    :param index: set to 0 for smallest, 1 for second smallest, etc.
    """
    attr = 'background-color: {}'.format(color)
    
    if data.ndim == 1:  # Series from .apply(axis=0) or axis=1
        indices = [i for i in range(len(data)) if i not in exclude]
        is_min = data == sorted(data[indices])[index]
        if sum(is_min) > 1:
            return ['']*len(data)
        return [attr if v else '' for v in is_min]
    
def highlight_both(data, exclude=[0]):
    attr1 = highlight_min(data, exclude=exclude, color='red', index=0)
    attr2 = highlight_min(data, exclude=exclude, color='orange', index=1)
    return [a1+a2 for a1, a2 in zip(attr1, attr2)]
    
def latex_print(pt, methods, fname='', **kwargs):
    from generate_results import METHOD_DICT
    method_names = [METHOD_DICT.get(m, 'unknown') for m in methods]
    pt.index = method_names

    min_vals = np.sort(pt['mean'].values[1:, :], axis=0)[0, :].round(4)
    second_vals = np.sort(pt['mean'].values[1:, :], axis=0)[1, :].round(4)
    print(min_vals.shape)
    print(min_vals)
    print(second_vals.shape)
    
    column_format='l|'
    
    N_levels = len(pt.columns.levels[1])
    K_levels = len(pt.columns.levels[2])
    for _ in range(N_levels):
        cols_K = ''.join(['c']*K_levels)
        column_format += cols_K+'|'
        
    latex = pt['mean'].to_latex(
        column_format=column_format,
        multicolumn_format='c', 
        **kwargs)
    for min_val in min_vals.round(2):
        string = "\\cellcolor{{\\firstcolor}} {}".format(min_val)
        latex = latex.replace(str(min_val), string, 20)
        
    for min_val in second_vals.round(2):
        string = "\\cellcolor{{\\secondcolor}} {}".format(min_val)
        latex = latex.replace(str(min_val), string, 20)
        
    latex = latex.replace('K &','\\multicolumn{1}{r|}{K} &' )
    latex = latex.replace('N &','\\multicolumn{1}{r|}{N} &' )
    latex = latex.replace('SRLS  ','\\midrule SRLS ' )
        
    print(latex)
    if fname != '':
        with open(fname, 'w+') as f:
            f.write(latex)
        print('wrote as', fname)

## bandlimited table

In [None]:
fname = 'results/bandlimited_tuesday.pkl'
outname = 'results/table_bandlimited.tex'

#fname = 'results/bandlimited_tuesday_calib.pkl'
#outname = 'results/table_bandlimited_calib.tex'

result_df = pd.read_pickle(fname)
# convert all numerical columns to float, ignore non-numeric.
result_df = result_df.apply(pd.to_numeric, errors='ignore')
#print_table = result_df[result_df.n_measurements.isin([40, 100, 200, 300, 499])]
print_table = result_df[(result_df.n_complexity >= 5) & (result_df.n_measurements >= 100)]
print_table = print_table[print_table.n_measurements.isin([100, 300, 499])]
methods = ['gt','srls raw', 'srls', 'rls raw', 'rls', 'lm-ellipse', 
           'lm-ours-weighted', 'ours', 'ours-weighted']
#pretty_print_table(print_table, methods=methods, value='cost_rls')
styler, __ = pretty_print_table(print_table, methods=methods, value='mse')
styler

In [None]:
methods = ['gt','srls raw', 'srls', 'rls raw', 'rls', 'lm-ellipse', 
           'lm-ours-weighted', 'ours', 'ours-weighted']
__, pt = pretty_print_table(print_table, methods=methods, value='mse')
latex_print(pt, methods, outname)

In [None]:
plot_df = result_df[result_df.mae < 100]
plot_df = plot_df[plot_df.n_measurements > 100]
fg = sns.FacetGrid(data=plot_df, col='n_complexity', hue='method', legend_out=True)
fg.map(plt.semilogy, 'n_measurements', 'mae', linestyle='', marker='.', alpha=0.5)
legend = plt.gca().get_legend()
plt.legend()

In [None]:
# understand why N=100, K=19 is so bad...
df = result_df.loc[(result_df.n_measurements==100) & (result_df.n_complexity==19), :]
fig, ax = plt.subplots()
for method, df_m in df.groupby('method'):
    ax.scatter(df_m.n_it, df_m.mse, label=method)
ax.set_yscale('log')
ax.legend(loc='upper right')

## polynomial table

In [None]:
fname = 'results/polynomial_tuesday.pkl'
outname = 'results/table_polynomial.tex'

result_df = pd.read_pickle(fname)
# convert all numerical columns to float, ignore non-numeric.
result_df = result_df.apply(pd.to_numeric, errors='ignore')
print_table = result_df
print_table = print_table[print_table.n_measurements.isin([10, 30, 50])]

methods = ['gt','srls raw', 'srls', 'rls raw', 'rls', 'lm-line', 
           'lm-ours-weighted', 'ours', 'ours-weighted']
styler, __ = pretty_print_table(print_table, methods=methods, value='mse')
styler

In [None]:
__, pt = pretty_print_table(print_table, methods=methods, value='mse')
latex_print(pt, methods, outname, index_names=False, index=False)

# Example reconstructions

Space to try out stuff

In [None]:
from evaluate_dataset import compute_distance_matrix, compute_anchors
chosen_distance = 'distance'
#chosen_distance = 'distance_gt'
anchor_names = None

## Construct anchors. 
anchors = compute_anchors(anchors_df, anchor_names)
print(anchors.shape)

## Construct times.
times = full_df[full_df.system_id == range_system_id].timestamp.unique()

## Construct D.
D, times = compute_distance_matrix(full_df, anchors_df, anchor_names, times, chosen_distance)
print(D.shape)
if np.sum(D > 0) > D.shape[0]:
    print('Warning: multiple measurements for times:{}/{}!'.format(
          np.sum(np.sum(D > 0, axis=1)>1), D.shape[0]))

## Construct ground truth.
points_gt = get_ground_truth(full_df, times)

In [None]:
from other_algorithms import apply_algorithm
from fit_curve import fit_trajectory
print(D.shape)
fig, ax = plt.subplots()
ax.scatter(points_gt.px, points_gt.py, s=10)

traj.set_n_complexity(2)

#method = 'ours-weighted'
method = 'lm-line'
coeffs, __, __ = apply_algorithm(traj, D, times, anchors, method=method)
traj.set_coeffs(coeffs=coeffs)
traj.plot_pretty(times=times, ax=ax, color='red', label='fitted')

traj.print()
coeffs = fit_trajectory(points_gt.T, times, traj)
traj.set_coeffs(coeffs=coeffs)
traj.plot_pretty(times=times, ax=ax, color='green', label='best fit')
ax.set_xlim(*xlim)
ax.set_ylim(*ylim)
ax.legend()

# Plot piecewise linear reconstructions

In [None]:
result_df = pd.read_pickle('results/polynomial_monday_calib.pkl')
result_df = pd.read_pickle('results/polynomial_monday.pkl')

chosen_measure = 'mse'

for N, df_N in result_df.groupby('n_measurements'):
    Ks = df_N.n_complexity.unique()
    fig, axs = plt.subplots(2, len(Ks), squeeze=False, sharey=True)
    fig.suptitle(f'N={N}')
    i = 0
    for K, df_K in df_N.groupby('n_complexity'):
        ax1, ax2 = axs[:, i]
        for method, df_method in df_K.groupby('method'):
            ax1.plot(df_method[chosen_measure].values, label=method)
            ax2.scatter(df_method['cost_rls'].values, df_method[chosen_measure], label=method)
        ax2.set_xscale('log')
        ax2.set_yscale('log')
        ylim_chosen = min(200, df_method[chosen_measure].max()) 
        xlim_rls = min(2000, df_method['cost_rls'].max())
        ax1.set_ylim([1, ylim_chosen])
        ax2.set_ylabel(str.upper(chosen_measure))
        ax2.set_xlim([1, xlim_rls])
        ax2.legend(loc='lower left', bbox_to_anchor=[1, 0])
        i += 1

In [None]:
from generate_results_polynomial import TIME_RANGES
from trajectory_creator import get_trajectory

traj = get_trajectory('Plaza1')

time_range = 2
df = df_K[df_K.n_it == time_range]
fig, ax = plt.subplots()

traj_plot = traj.copy()
for method in df.method.unique():
    df_method = df.loc[df.method==method]
    coeffs, points = df_method.iloc[0].loc['plotting']
    traj_plot.set_coeffs(coeffs=coeffs)
    t_range = TIME_RANGES[time_range]
    times = np.linspace(*t_range, 10)
    traj_plot.plot_pretty(times=times, label=method, ax=ax)
points_gt = full_df.loc[(full_df.timestamp < t_range[1]) & (full_df.timestamp > t_range[0]), ['px', 'py']].values
ax.scatter(*points_gt.T, color='black', label='gt', s=1)
ax.set_xlim(*xlim)
ax.set_ylim(*ylim)
ax.legend(loc='best')

In [None]:
fname = 'results/polynomial_tuesday_calib.pkl'
fname = 'results/polynomial_tuesday.pkl'
result_df = pd.read_pickle(fname)
# convert all numerical columns to float, ignore non-numeric.
result_df = result_df.apply(pd.to_numeric, errors='ignore')

print_table = result_df
methods = ['gt','srls raw', 'srls', 'rls raw', 'rls', 
           'lm-line', 'lm-ours-weighted', 
           'ours', 'ours-weighted']
st, __ = pretty_print_table(print_table, methods=methods, value='mse')
st