In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import glob
import numpy as np
import seaborn as sns
import pandas as pd

from collections import OrderedDict

In [2]:
def pretty_dataset_name(dataset_name):
    if dataset_name == 'eth':
        return 'ETH - Univ'
    elif dataset_name == 'hotel':
        return 'ETH - Hotel'
    elif dataset_name == 'univ':
        return 'UCY - Univ'
    elif dataset_name == 'zara1':
        return 'UCY - Zara 1'
    elif dataset_name == 'zara2':
        return 'UCY - Zara 2'
    else:
        return dataset_name

# Displacement Error Analyses

In [3]:
errors_df = pd.concat([pd.read_csv(f) for f in glob.glob('plots/data/*_errors.csv')], ignore_index=True)

In [4]:
errors_df.head()

Unnamed: 0,data_precondition,dataset,method,run,node,sample,error_type,error_value
0,prev,zara2,our_full,0,Pedestrian/5,0,mse,0.590357
1,prev,zara2,our_full,0,Pedestrian/5,1,mse,1.021359
2,prev,zara2,our_full,0,Pedestrian/5,2,mse,0.626
3,prev,zara2,our_full,0,Pedestrian/5,3,mse,0.842233
4,prev,zara2,our_full,0,Pedestrian/5,4,mse,1.286998


In [5]:
dataset_names = ['eth', 'hotel', 'univ', 'zara1', 'zara2']

In [6]:
sgan_err_df = errors_df[(errors_df['data_precondition'] == 'prev') & (errors_df['method'] == 'sgan')]
our_ml_err_df = errors_df[(errors_df['data_precondition'] == 'prev') & (errors_df['method'] == 'our_most_likely')]
our_full_err_df = errors_df[(errors_df['data_precondition'] == 'prev') & (errors_df['method'] == 'our_full')]

In [7]:
sgan_err_df.head()

Unnamed: 0,data_precondition,dataset,method,run,node,sample,error_type,error_value
37564000,prev,hotel,sgan,0,Pedestrian/9,0,mse,0.054958
37564001,prev,hotel,sgan,0,Pedestrian/9,1,mse,0.053381
37564002,prev,hotel,sgan,0,Pedestrian/9,2,mse,0.090009
37564003,prev,hotel,sgan,0,Pedestrian/9,3,mse,0.020534
37564004,prev,hotel,sgan,0,Pedestrian/9,4,mse,0.054487


In [8]:
sgan_err_df.dtypes

data_precondition     object
dataset               object
method                object
run                    int64
node                  object
sample                 int64
error_type            object
error_value          float64
dtype: object

In [9]:
random_subsamples = np.random.choice(2000, size=100, replace=False).astype(int).tolist()

for dataset_name in dataset_names:
    print(dataset_name)
    curr_sgan_df = sgan_err_df[sgan_err_df['dataset'] == dataset_name]
    curr_our_ml_df = our_ml_err_df[our_ml_err_df['dataset'] == dataset_name]
    curr_our_full_df = our_full_err_df[our_full_err_df['dataset'] == dataset_name]

    subsamp_sgan_df = curr_sgan_df[curr_sgan_df['sample'].isin(random_subsamples)]
    subsamp_our_ml_df = curr_our_ml_df[curr_our_ml_df['sample'].isin(random_subsamples)]
    subsamp_our_full_df = curr_our_full_df[curr_our_full_df['sample'].isin(random_subsamples)]        
    
    sgan_sample_errs_df = subsamp_sgan_df.groupby(['run', 'sample', 'error_type'])['error_value'].agg(['sum', 'count']).reset_index()
    sgan_best_sample_errs_df = sgan_sample_errs_df.iloc[sgan_sample_errs_df.groupby(["run", "error_type"])['sum'].idxmin()]
    described_sgan_errs = sgan_best_sample_errs_df.groupby(['error_type']).sum().reset_index()
    described_sgan_errs['best_of_100_mean_error'] = described_sgan_errs['sum'] / described_sgan_errs['count']
    
    our_ml_sample_errs_df = subsamp_our_ml_df.groupby(['run', 'sample', 'error_type'])['error_value'].agg(['sum', 'count']).reset_index()
    our_ml_best_sample_errs_df = our_ml_sample_errs_df.iloc[our_ml_sample_errs_df.groupby(["run", "error_type"])['sum'].idxmin()]
    described_our_ml_errs = our_ml_best_sample_errs_df.groupby(['error_type']).sum().reset_index()
    described_our_ml_errs['best_of_100_mean_error'] = described_our_ml_errs['sum'] / described_our_ml_errs['count']

    our_full_sample_errs_df = subsamp_our_full_df.groupby(['run', 'sample', 'error_type'])['error_value'].agg(['sum', 'count']).reset_index()
    our_full_best_sample_errs_df = our_full_sample_errs_df.iloc[our_full_sample_errs_df.groupby(["run", "error_type"])['sum'].idxmin()]
    described_our_full_errs = our_full_best_sample_errs_df.groupby(['error_type']).sum().reset_index()
    described_our_full_errs['best_of_100_mean_error'] = described_our_full_errs['sum'] / described_our_full_errs['count']
    
    print('-- SGAN --')
    print(described_sgan_errs)
    
    print('-- OUR ML --')
    print(described_our_ml_errs)
    
    print('-- OUR FULL --')
    print(described_our_full_errs)
    
    print()

eth
-- SGAN --
  error_type   run  sample         sum  count  best_of_100_mean_error
0        fse  2211   62218  330.756034    303                1.091604
1        mse  2211   60360  187.259471    303                0.618018
-- OUR ML --
  error_type   run  sample         sum  count  best_of_100_mean_error
0        fse  2211   59183  237.275175    303                0.783086
1        mse  2211   65033  121.070910    303                0.399574
-- OUR FULL --
  error_type   run  sample         sum  count  best_of_100_mean_error
0        fse  2211   58188  213.465223    303                0.704506
1        mse  2211   65980  111.925879    303                0.369392

hotel
-- SGAN --
  error_type   run  sample         sum  count  best_of_100_mean_error
0        fse  3486   79891  286.875943    346                0.829121
1        mse  3486   78395  137.182538    346                0.396481
-- OUR ML --
  error_type   run  sample         sum  count  best_of_100_mean_error
0        fse  34