# Processing Simulation Data
This notebook is to be used as a local tool to process simulation data that is generated after running ```run_sim/sh``` in the repository. This allows all simulation information to be aggregated nicely and averaged. PANDAS is the primary tool for processing the data.

In [5]:
#imports
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib
import matplotlib.pyplot as plt
import math
from IPython.display import display
from matplotlib.colors import LogNorm
from matplotlib import cm
import os

%matplotlib inline

In [45]:
#get the data files
f_mean = []
f_mes = []
f_ei = []
path= '/home/vpreston/Documents/IPP/informative-path-planning/experiments'
for root, dirs, files in os.walk(path):
    for name in files:
        if 'metric' in name and 'mean' in root:
            f_mean.append(root + "/" + name)
        elif 'metric' in name and 'exp_improve' in root:
            f_ei.append(root + "/" + name)
        elif 'metric' in name and 'mes' in root:
            f_mes.append(root + "/" + name)

In [46]:
# variables for making dataframes
l = ['time', 'info_gain','aqu_fun', 'MSE', 'hotspot_error','max_loc_error', 'max_val_error', 
                    'simple_regret', 'sample_regret_loc', 'sample_regret_val', 'regret', 'info_regret',
                    'current_highest_obs', 'current_highest_obs_loc_x', 'current_highest_obs_loc_y',
                    'robot_loc_x', 'robot_loc_y', 'robot_loc_a', 'star_obs_0', 'star_obs_loc_x_0',
                    'star_obs_loc_y_0', 'star_obs_1', 'star_obs_loc_x_1', 'star_obs_loc_y_1']

In [67]:
#All UCB data
d = f_mean[0]
mean_data = pd.read_table(d, delimiter = " ", header=None)
mean_data = mean_data.T
mean_data.columns = l

for m in f_mean[1:]:
    temp_data = pd.read_table(m, delimiter = " ", header=None)
    temp_data = temp_data.T
    temp_data.columns = l
    mean_data = mean_data.append(temp_data)

mean_data.head(5)
print len(mean_data)

3325


In [68]:
#All MES data
d = f_mes[0]
mes_data = pd.read_table(d, delimiter = " ", header=None)
mes_data = mes_data.T
mes_data.columns = l

for m in f_mes[1:]:
    temp_data = pd.read_table(m, delimiter = " ", header=None)
    temp_data = temp_data.T
    temp_data.columns = l
    mes_data = mes_data.append(temp_data)

mes_data.head(5)
print len(mes_data)

3325


In [49]:
#All EI data
d = f_ei[0]
ei_data = pd.read_table(d, delimiter = " ", header=None)
ei_data = ei_data.T
ei_data.columns = l

for m in f_mes[1:]:
    temp_data = pd.read_table(m, delimiter = " ", header=None)
    temp_data = temp_data.T
    temp_data.columns = l
    ei_data = ei_data.append(temp_data)

ei_data.head(5)
print len(e)

Unnamed: 0,time,info_gain,aqu_fun,MSE,hotspot_error,max_loc_error,max_val_error,simple_regret,sample_regret_loc,sample_regret_val,...,current_highest_obs_loc_y,robot_loc_x,robot_loc_y,robot_loc_a,star_obs_0,star_obs_loc_x_0,star_obs_loc_y_0,star_obs_1,star_obs_loc_x_1,star_obs_loc_y_1
0,0.0,15.55043,1735.107652,111.295905,92.145292,7.443229,26.184901,1.222649,0.0,0.0,...,0.0,5.0,5.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,1.0,387.784443,2745.989648,86.153723,98.274882,0.128332,2.122014,2.294012,1.222649,12.087247,...,5.0,3.94593,3.93279,3.933185,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
2,2.0,745.228514,3688.259715,74.718365,83.418925,0.128332,2.067234,2.785448,1.758331,15.511727,...,5.0,4.769625,2.679184,5.293712,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
3,3.0,1102.875664,4588.955687,76.671806,79.483562,0.128332,2.026948,2.788028,2.100703,19.895468,...,5.0,6.258166,2.494131,6.159501,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
4,4.0,1435.296924,5473.308973,67.215036,91.143136,0.128332,2.01126,2.735009,2.272534,21.924105,...,5.0,7.363737,3.507892,0.742105,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


## Getting Averages at the End of Time
First, let's go ahead and get a snapshot of the last frame for all of these simulations

In [66]:
mean_end = mean_data[mean_data.time == 174]
mes_end = mes_data[mes_data.time == 174]
ei_end = ei_data[ei_data.time == 174]

for e in l:
    print '-------------'
    print str(e)
    print 'MEAN:    ' + str(mean_end[e].mean())
    print 'MES :    ' + str(mes_end[e].mean())
    print 'EI  :    ' + str(ei_end[e].mean())

print ei_end

-------------
time
MEAN:    174.0
MES :    174.0
EI  :    174.0
-------------
info_gain
MEAN:    22802.0843046
MES :    21712.5414852
EI  :    21375.8853657
-------------
aqu_fun
MEAN:    348528.664553
MES :    455.220694362
EI  :    3315.80827459
-------------
MSE
MEAN:    0.544991048711
MES :    0.701390341014
EI  :    0.904887010668
-------------
hotspot_error
MEAN:    0.733459822195
MES :    1.15451642702
EI  :    1.49670382631
-------------
max_loc_error
MEAN:    1.49897613682
MES :    1.26491043611
EI  :    1.26491043611
-------------
max_val_error
MEAN:    0.723198105172
MES :    0.725726122815
EI  :    0.718010978184
-------------
simple_regret
MEAN:    3.37217637778
MES :    1.15679357902
EI  :    1.25253951782
-------------
sample_regret_loc
MEAN:    3.95198933469
MES :    3.09904567349
EI  :    3.16076842316
-------------
sample_regret_val
MEAN:    17.9552478189
MES :    17.861398184
EI  :    17.7038335187
-------------
regret
MEAN:    3998.72651575
MES :    3870.4933253
EI 