## Evaluation of Accuracy 
To measure drift without ground truth coordinates, we calculate RMSE between the distance travelled as outputted by the GPS coordinates and the actual distance we let our robot run. 

In [8]:
import glob
import os
import re

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from math import sqrt

def distance(lat1, lon1, lat2, lon2) -> float:
    """Calculates distance in feet between two pairs 
    of coordinates using Vicenty's algorithm and 
    assuming Earth is spherical. 
    """

    #convert latitiude and longitude to spherical coords
    lat1, lon1 = np.deg2rad(lat1), np.deg2rad(lon1)
    lat2, lon2 = np.deg2rad(lat2), np.deg2rad(lon2)
    
    # average radius of earth in feet
    r = 20890565.9449 
    
    p1 = 0.5*np.pi-lat1
    p2 = 0.5*np.pi-lat2
    a = np.sin(p1)*np.sin(p2)*np.cos(lon1-lon2)+np.cos(p1)*np.cos(p2)
    
    return r * np.arccos(a)

def rmse(expected, actual) -> float:    
    """ Calculates root mean squared error between the
    calcualted distances of GPS outputted coordinates
    and distance of track for each batch.
    """
    total_preds = len(expected)
    sum_error = 0.0
    
    for i in range(total_preds):
        squared_error = (actual[i]-expected[i])**2
        sum_error += squared_error
        
    return sqrt(sum_error/float(total_preds))

def error(expected, actual) -> float:
    """ Calculates error between expected distance
    and actual distance as indicated by GPS outputs
    for a single run.
    """
    total_preds = 1
    squared_error = (actual[0]-expected[0])**2
    
    return sqrt(squared_error)

def calc_distances_batch(path) -> pd.DataFrame():
    """Compiles a dataframe with batch name, run name, start
    position coordinates, end positionn coordinates, length 
    of track, calculated distance, and rmse of the batch.
    """
    
    ground_truth = int(path.split("../data/gps_data/", 1)[1].replace("ft",""))
    files = glob.glob(path + "/*.csv")
    df_dist = pd.DataFrame(columns = ['batch', 'run','start_lat_lon','finish_lat_lon','expected_dist','actual_dist','rmse'])
    
    for file in files:
        df = pd.read_csv(file)
        start = df.iloc[0]
        finish = df.iloc[-1]
        calc_dist = distance(start.lat, start.lon, finish.lat, finish.lon)
        name = re.search('(?:[^/](?!\/))+(?=_cleaned.csv)', file)
        batch = re.search('(?:\/gps_data\/)(.*(?=\/))', file)
        df_dist = df_dist.append({'batch': batch.group(1),'run' : name.group(), 'start_lat_lon': (start.lat, start.lon), 
                                  'finish_lat_lon': (finish.lat, finish.lon), 'expected_dist' : ground_truth, 
                                  'actual_dist' : calc_dist}, 
                                 ignore_index = True)
    df_dist.rmse = rmse(df_dist.expected_dist, df_dist.actual_dist)
    
    return df_dist

def calc_distances_run(path) -> pd.DataFrame():
    """Compiles a dataframe with batch name, run name, start
    position coordinates, end positionn coordinates, length 
    of track, calculated distance, and rmse of each file.
    """
    
    path = "../data/gps_data"
    all_raw_folders = glob.glob(os.path.join(path, '*'))
    df_all_runs = pd.DataFrame(columns = ['batch', 'run','start_lat_lon','finish_lat_lon','expected_dist','actual_dist','error'])


    for folder in all_raw_folders:
        ground_truth = int(folder.split("../data/gps_data/", 1)[1].replace("ft",""))
        files = glob.glob(folder + "/*.csv")
        for file in files:
            df = pd.read_csv(file)
            start = df.iloc[0]
            finish = df.iloc[-1]
            calc_dist = distance(start.lat, start.lon, finish.lat, finish.lon)
            name = re.search('(?:[^/](?!\/))+(?=_cleaned.csv)', file)
            batch = re.search('(?:\/gps_data\/)(.*(?=\/))', file)
            row = pd.DataFrame({'batch': batch.group(1),'run' : name.group(), 'start_lat_lon': [(start.lat, start.lon)], 
                                      'finish_lat_lon': [(finish.lat, finish.lon)], 'expected_dist' : ground_truth, 
                                      'actual_dist' : calc_dist, 'error': "temp"}, index=[0])

            row.error = error(row.expected_dist, row.actual_dist)
            df_all_runs = df_all_runs.append(row)
        
    return df_all_runs

def make_rmse_table(path):
    df = pd.DataFrame(columns = ['ground_truth', 'rmse'])
    all_batches = glob.glob(os.path.join(path, '*'))
    
    for batch in all_batches:
        batch_df = calc_distances_batch(batch)
        df = df.append({'ground_truth' : batch_df.batch.iloc[0], 'rmse' : batch_df.rmse.iloc[0]}, ignore_index=True)
    df = df.set_index('ground_truth')
    df = df.sort_index(ascending=True)
    return df

### Error of Each Run

In [18]:
path = "../data/gps_data/"
calc_distances_run(path)

Unnamed: 0,batch,run,start_lat_lon,finish_lat_lon,expected_dist,actual_dist,error
0,0ft,20-11-2020_12-24,"(32.869536, -117.22043899999998)","(32.869533000000004, -117.220459)",0,6.218087,6.218087
0,0ft,20-11-2020_12-21,"(32.869538, -117.22046599999999)","(32.86956, -117.22048000000001)",0,9.097022,9.097022
0,0ft,20-11-2020_12-22,"(32.869549, -117.22048500000001)","(32.869559, -117.220477)",0,4.402358,4.402358
0,0ft,20-11-2020_12-19,"(32.869572, -117.22049299999999)","(32.869535, -117.220455)",0,17.817315,17.817315
0,0ft,20-11-2020_12-23,"(32.869559, -117.220487)","(32.869536, -117.220448)",0,14.59433,14.59433
0,20ft,08-12-2020_15-06-42,"(32.869436, -117.220828)","(32.869448999999996, -117.22075900000002)",20,21.656736,1.656736
0,20ft,08-12-2020_15-08-13,"(32.869468, -117.22081899999999)","(32.869465999999996, -117.22076299999999)",20,17.166372,2.833628
0,20ft,08-12-2020_15-07-28,"(32.869457000000004, -117.220826)","(32.869465999999996, -117.220751)",20,23.201302,3.201302
0,20ft,08-12-2020_15-09-01,"(32.869458, -117.220825)","(32.869454, -117.22076000000001)",20,19.956815,0.043185
0,20ft,08-12-2020_15-09-40,"(32.869453, -117.220825)","(32.869465999999996, -117.220751)",20,23.149035,3.149035


### RMSE of Each Batch

In [20]:
path = "../data/gps_data/"
make_rmse_table(path)

Unnamed: 0_level_0,rmse
ground_truth,Unnamed: 1_level_1
0ft,11.586651
10ft,1.841198
20ft,2.487603
50ft,3.884429
5ft,2.73306


### Detailed information of calculating RMSE of 5ft batch

In [17]:
path = "../data/gps_data/0ft"
calc_distances_batch(path)

Unnamed: 0,batch,run,start_lat_lon,finish_lat_lon,expected_dist,actual_dist,rmse
0,0ft,20-11-2020_12-24,"(32.869536, -117.22043899999998)","(32.869533000000004, -117.220459)",0,6.218087,11.586651
1,0ft,20-11-2020_12-21,"(32.869538, -117.22046599999999)","(32.86956, -117.22048000000001)",0,9.097022,11.586651
2,0ft,20-11-2020_12-22,"(32.869549, -117.22048500000001)","(32.869559, -117.220477)",0,4.402358,11.586651
3,0ft,20-11-2020_12-19,"(32.869572, -117.22049299999999)","(32.869535, -117.220455)",0,17.817315,11.586651
4,0ft,20-11-2020_12-23,"(32.869559, -117.220487)","(32.869536, -117.220448)",0,14.59433,11.586651


### Detailed information of calculating RMSE of 5ft batch

In [15]:
path = "../data/gps_data/5ft"
calc_distances_batch(path)

Unnamed: 0,batch,run,start_lat_lon,finish_lat_lon,expected_dist,actual_dist,rmse
0,5ft,08-12-2020_14-27-40,"(32.869504, -117.220803)","(32.869496000000005, -117.220789)",5,5.190303,2.73306
1,5ft,08-12-2020_14-23,"(32.869514, -117.220803)","(32.869488, -117.220786)",5,10.810455,2.73306
2,5ft,08-12-2020_15-01-09,"(32.869475, -117.220805)","(32.869482, -117.220799)",5,3.143912,2.73306
3,5ft,08-12-2020_14-25,"(32.869496000000005, -117.22079699999999)","(32.869505, -117.220785)",5,4.941634,2.73306
4,5ft,08-12-2020_14-24,"(32.869484, -117.220793)","(32.869487, -117.220776)",5,5.319389,2.73306


### Detailed information of calculating RMSE of 10ft batch

In [11]:
path = "../data/gps_data/10ft"
calc_distances_batch(path)

Unnamed: 0,batch,run,start_lat_lon,finish_lat_lon,expected_dist,actual_dist,rmse
0,10ft,08-12-2020_15-05-54,"(32.869433, -117.220828)","(32.869437, -117.220805)",10,7.193512,1.841198
1,10ft,08-12-2020_15-05-22,"(32.869428000000006, -117.22083500000001)","(32.869434000000005, -117.220801)",10,10.638778,1.841198
2,10ft,08-12-2020_15-04-47,"(32.869456, -117.22081499999999)","(32.869436, -117.2208)",10,8.626824,1.841198


### Detailed information of calculating RMSE of 20ft batch

In [12]:
path = "../data/gps_data/20ft"
calc_distances_batch(path)

Unnamed: 0,batch,run,start_lat_lon,finish_lat_lon,expected_dist,actual_dist,rmse
0,20ft,08-12-2020_15-06-42,"(32.869436, -117.220828)","(32.869448999999996, -117.22075900000002)",20,21.656736,2.487603
1,20ft,08-12-2020_15-08-13,"(32.869468, -117.22081899999999)","(32.869465999999996, -117.22076299999999)",20,17.166372,2.487603
2,20ft,08-12-2020_15-07-28,"(32.869457000000004, -117.220826)","(32.869465999999996, -117.220751)",20,23.201302,2.487603
3,20ft,08-12-2020_15-09-01,"(32.869458, -117.220825)","(32.869454, -117.22076000000001)",20,19.956815,2.487603
4,20ft,08-12-2020_15-09-40,"(32.869453, -117.220825)","(32.869465999999996, -117.220751)",20,23.149035,2.487603


### Detailed information of calculating RMSE of 50ft batch

In [13]:
path = "../data/gps_data/50ft"
calc_distances_batch(path)

Unnamed: 0,batch,run,start_lat_lon,finish_lat_lon,expected_dist,actual_dist,rmse
0,50ft,08-12-2020_15-15-05,"(32.869474, -117.22081200000001)","(32.869456, -117.220678)",50,41.55625,3.884429
1,50ft,08-12-2020_15-16-19,"(32.869440999999995, -117.22083700000002)","(32.86945, -117.220671)",50,50.941003,3.884429
2,50ft,08-12-2020_15-18-49,"(32.869440999999995, -117.220828)","(32.869445, -117.22067)",50,48.40692,3.884429
3,50ft,08-12-2020_15-17-30,"(32.869464, -117.220823)","(32.869454, -117.22065900000001)",50,50.354581,3.884429
4,50ft,08-12-2020_15-13-49,"(32.869433, -117.220825)","(32.869446, -117.220665)",50,49.226744,3.884429
