### Setup some basic stuff

In [1]:
import logging
logging.getLogger().setLevel(logging.DEBUG)

In [2]:
import folium
import folium.features as fof
import folium.utilities as ful
import branca.element as bre
import json
import geojson as gj
import arrow

import shapely.geometry as shpg
import pandas as pd
import geopandas as gpd

In [3]:
def lonlat_swap(lon_lat):
    return list(reversed(lon_lat))

In [4]:
def get_row_count(n_maps, cols):
    rows = (n_maps / cols)
    if (n_maps % cols != 0):
        rows = rows + 1
    return rows

In [5]:
def get_one_marker(loc, disp_color):
    if loc["geometry"]["type"] == "Point":
        curr_latlng = lonlat_swap(loc["geometry"]["coordinates"])
        return folium.Marker(curr_latlng, icon=folium.Icon(color=disp_color),
                  popup="%s" % loc["properties"]["name"])
    elif loc["geometry"]["type"] == "Polygon":
        assert len(loc["geometry"]["coordinates"]) == 1,\
            "Only simple polygons supported!"
        curr_latlng = [lonlat_swap(c) for c in loc["geometry"]["coordinates"][0]]
        # print("Returning polygon for %s" % curr_latlng)
        return folium.PolyLine(curr_latlng, color=disp_color, fill=disp_color,
                  popup="%s" % loc["properties"]["name"])        

In [6]:
def get_marker(loc, disp_color):
    if type(loc) == list:
        return [get_one_marker(l, disp_color) for l in loc]
    else:
        print("Found single entry, is this expected?")
        return [get_one_marker(loc, disp_color)]

### Read the data

In [7]:
spec_to_validate = json.load(open("final_sfbayarea_filled_reroutes/train_bus_ebike_mtv_ucb.filled.reroute.json"))
sensing_configs = json.load(open("sensing_regimes.all.specs.json"))

### Validating the time range

In [8]:
print("Experiment runs from %s -> %s" % (arrow.get(spec_to_validate["start_ts"]), arrow.get(spec_to_validate["end_ts"])))
start_fmt_time_to_validate = arrow.get(spec_to_validate["start_ts"]).format("YYYY-MM-DD")
end_fmt_time_to_validate = arrow.get(spec_to_validate["end_ts"]).format("YYYY-MM-DD")
if (start_fmt_time_to_validate != spec_to_validate["start_fmt_date"]):
    print("VALIDATION FAILED, got start %s, expected %s" % (start_fmt_time_to_validate, spec_to_validate["start_fmt_date"]))
if (end_fmt_time_to_validate != spec_to_validate["end_fmt_date"]):
    print("VALIDATION FAILED, got end %s, expected %s" % (end_fmt_time_to_validate, spec_to_validate["end_fmt_date"]))

Experiment runs from 2019-07-16T07:00:00+00:00 -> 2020-04-30T07:00:00+00:00


### Validating calibration trips

In [9]:
def get_map_for_calibration_test(trip):
    curr_map = folium.Map()
    if trip["start_loc"] is None or trip["end_loc"] is None:
        return curr_map
    curr_start = lonlat_swap(trip["start_loc"]["coordinates"])
    curr_end = lonlat_swap(trip["end_loc"]["coordinates"])
    folium.Marker(curr_start, icon=folium.Icon(color="green"),
                  popup="Start: %s" % trip["start_loc"]["name"]).add_to(curr_map)
    folium.Marker(curr_end, icon=folium.Icon(color="red"),
                  popup="End: %s" % trip["end_loc"]["name"]).add_to(curr_map)
    folium.PolyLine([curr_start, curr_end], popup=trip["id"]).add_to(curr_map)
    curr_map.fit_bounds([curr_start, curr_end])    
    return curr_map

In [10]:
calibration_tests = spec_to_validate["calibration_tests"]
rows = get_row_count(len(calibration_tests), 4)
calibration_maps = bre.Figure((rows,4))
for i, t in enumerate(calibration_tests):
    if t["config"]["sensing_config"] != sensing_configs[t["config"]["id"]]["sensing_config"]:
        print("Mismatch in config for test" % t)
    curr_map = get_map_for_calibration_test(t)
    calibration_maps.add_subplot(rows, 4, i+1).add_child(curr_map)
calibration_maps

### Validating evaluation trips

In [11]:
def add_waypoint_markers(waypoint_coords, curr_map):
    for i, wpc in enumerate(waypoint_coords["geometry"]["coordinates"]):
        folium.map.Marker(
            lonlat_swap(wpc), popup="%d" % i,
            icon=fof.DivIcon(class_name='leaflet-div-icon')).add_to(curr_map)

def get_map_for_travel_leg(trip):
    curr_map = folium.Map()
    [get_one_marker(loc, "green").add_to(curr_map) for loc in trip["start_loc"]]
    [get_one_marker(loc, "red").add_to(curr_map) for loc in trip["end_loc"]]
    
    # iterate over all reroutes
    for rc in trip["route_coords"]:
        coords = rc["geometry"]["coordinates"]
        print("Found %d coordinates for the route" % (len(coords)))
        
        latlng_coords = [lonlat_swap(c) for c in coords]
        folium.PolyLine(latlng_coords, popup="%s: %s" % (trip["mode"], trip["name"])).add_to(curr_map)
        
        for i, c in enumerate(latlng_coords):
            folium.CircleMarker(c, radius=5, popup="%d: %s" % (i, c)).add_to(curr_map)
            
        curr_map.fit_bounds(ful.get_bounds(latlng_coords))
    
    return curr_map

In [12]:
def get_map_for_shim_leg(trip):
    curr_map = folium.Map()
    for loc in trip["loc"]:
        mkr = get_one_marker(loc, "purple")
        mkr.add_to(curr_map)
        curr_map.fit_bounds(mkr.get_bounds())
    return curr_map

In [13]:
evaluation_trips = spec_to_validate["evaluation_trips"]
map_list = []
for t in evaluation_trips:
    for l in t["legs"]:
        if l["type"] == "TRAVEL":
            curr_map = get_map_for_travel_leg(l)
            map_list.append(curr_map)
        else:
            curr_map = get_map_for_shim_leg(l)
            map_list.append(curr_map)

rows = get_row_count(len(map_list), 2)
evaluation_maps = bre.Figure(ratio="{}%".format((rows/2) * 100))
for i, curr_map in enumerate(map_list):
    evaluation_maps.add_subplot(rows, 2, i+1).add_child(curr_map)
evaluation_maps

Found 89 coordinates for the route
Found 426 coordinates for the route
Found 626 coordinates for the route
Found 8 coordinates for the route
Found 9 coordinates for the route
Found 85 coordinates for the route
Found 67 coordinates for the route
Found 87 coordinates for the route
Found 78 coordinates for the route
Found 28 coordinates for the route
Found 42 coordinates for the route
Found 273 coordinates for the route
Found 266 coordinates for the route
Found 441 coordinates for the route
Found 408 coordinates for the route
Found 25 coordinates for the route
Found 29 coordinates for the route
Found 120 coordinates for the route
Found 701 coordinates for the route
Found 89 coordinates for the route


### Validating start and end polygons

In [14]:
def check_start_end_contains(leg):
    for rc in leg["route_coords"]:
        points = gpd.GeoSeries([shpg.Point(p) for p in rc["geometry"]["coordinates"]])
        
        route_start_ts = rc["properties"]["valid_start_ts"]
        route_end_ts = rc["properties"]["valid_end_ts"]
        
        # query all start_locs that have start date >= route start date and end date <= route end date
        start_locs = [shpg.shape(l["geometry"]) for l in leg["start_loc"]\
                      if l["properties"]["valid_start_ts"] >= route_start_ts\
                      and l["properties"]["valid_end_ts"] <= route_end_ts]
        
        # query all end_locs that have start date >= route start date and end date <= route end date
        end_locs = [shpg.shape(l["geometry"]) for l in leg["end_loc"]\
                    if l["properties"]["valid_start_ts"] >= route_start_ts\
                    and l["properties"]["valid_end_ts"] <= route_end_ts]
        
        for sl in start_locs:
            start_contains = points.apply(lambda p: sl.contains(p))
            print(points[start_contains])
            
            # some of the points are within the start polygon
            assert start_contains.any(), leg
            
            # the first point is within the start polygon
            assert start_contains.iloc[0], points.head()
            
            # points within polygons are contiguous
            max_index_diff_start = pd.Series(start_contains[start_contains == True].index).diff().max()
            assert pd.isnull(max_index_diff_start) or max_index_diff_start == 1, "Max diff in index = %s for points %s" % (gpd.GeoSeries(start_contains[end_contains == True].index).diff().max(), points.head())
            
        for el in end_locs:
            end_contains = points.apply(lambda p: el.contains(p))
            print(points[end_contains])
            
            # some of the points are within the end polygon
            assert end_contains.any(), leg
        
            # the last point is within the end polygon
            assert end_contains.iloc[-1], points.tail()
        
            # points within polygons are contiguous
            max_index_diff_end = pd.Series(end_contains[end_contains == True].index).diff().max()
            assert pd.isnull(max_index_diff_end) or max_index_diff_end == 1, "Max diff in index = %s for points %s" % (gpd.GeoSeries(end_contains[end_contains == True].index).diff().max(), points.tail())

In [15]:
invalid_legs = []
for t in evaluation_trips:
    for l in t["legs"]:
        if l["type"] == "TRAVEL" and l["id"] not in invalid_legs:
            print("Checking leg %s, %s" % (t["id"], l["id"]))
            check_start_end_contains(l)

Checking leg mtv_to_berkeley_sf_bart, walk_to_caltrain
0    POINT (-122.08337 37.39025)
1    POINT (-122.08338 37.39022)
dtype: geometry
83    POINT (-122.07657 37.39452)
84    POINT (-122.07652 37.39450)
85    POINT (-122.07649 37.39454)
86    POINT (-122.07601 37.39435)
87    POINT (-122.07604 37.39430)
88    POINT (-122.07573 37.39418)
dtype: geometry
Checking leg mtv_to_berkeley_sf_bart, commuter_rail_aboveground
0    POINT (-122.07630 37.39460)
1    POINT (-122.07667 37.39474)
dtype: geometry
425    POINT (-122.38759 37.60071)
dtype: geometry
Checking leg mtv_to_berkeley_sf_bart, subway_underground
0    POINT (-122.38606 37.59948)
dtype: geometry
624    POINT (-122.26803 37.86947)
625    POINT (-122.26829 37.87122)
dtype: geometry
Checking leg mtv_to_berkeley_sf_bart, walk_to_bus
6    POINT (-122.26771 37.87105)
7    POINT (-122.26771 37.87110)
dtype: geometry
8    POINT (-122.26825 37.87084)
dtype: geometry
Checking leg mtv_to_berkeley_sf_bart, city_bus_short
0    POINT (-122.268

### Validating sensing settings

In [16]:
for ss in spec_to_validate["sensing_settings"]:
    for phoneOS, compare_map in ss.items():
        compare_list = compare_map["compare"]
        for i, ssc in enumerate(compare_map["sensing_configs"]):
            if ssc["id"] != compare_list[i]:
                print("Mismatch in sensing configurations for %s" % ss)

### Validating routes for no duplicate coordinates

In [17]:
REL_TOL = 1e-5

for t in evaluation_trips:
    for l in t["legs"]:
        if l["type"] == "TRAVEL":
            for rc in l["route_coords"]:
                print("Checking leg %s, %s between dates %s, %s" % (t["id"], l["id"], rc["properties"]["valid_start_fmt_date"], rc["properties"]["valid_end_fmt_date"]))
                for i in range(len(rc["geometry"]["coordinates"])):
                    c1 = rc["geometry"]["coordinates"][i]
                    for j in range(i + 1, len(rc["geometry"]["coordinates"])):
                        c2 = rc["geometry"]["coordinates"][j]
                        if abs(c2[0] - c1[0]) < REL_TOL and abs(c2[1] - c1[1]) < REL_TOL:
                            print(f"Duplicates {c1}, {c2} found @ indices {i}, {j}")

Checking leg mtv_to_berkeley_sf_bart, walk_to_caltrain between dates 2019-07-16, 2020-04-30
Duplicates [-122.0807, 37.3899], [-122.08069, 37.3899] found @ indices 31, 32
Duplicates [-122.07678, 37.39287], [-122.07678, 37.39288] found @ indices 68, 69
Checking leg mtv_to_berkeley_sf_bart, commuter_rail_aboveground between dates 2019-07-16, 2020-04-30
Duplicates [-122.13362, 37.42409], [-122.13362, 37.42409] found @ indices 43, 44
Duplicates [-122.13367, 37.42413], [-122.13367, 37.42413] found @ indices 45, 46
Duplicates [-122.13978, 37.42798], [-122.13978, 37.42798] found @ indices 56, 57
Duplicates [-122.14017, 37.42822], [-122.14017, 37.42822] found @ indices 58, 59
Duplicates [-122.15601, 37.43821], [-122.15601, 37.43821] found @ indices 68, 69
Duplicates [-122.15631, 37.4384], [-122.15631, 37.4384] found @ indices 71, 72
Duplicates [-122.16987, 37.44695], [-122.16987, 37.44695] found @ indices 96, 97
Duplicates [-122.17075, 37.4475], [-122.17075, 37.4475] found @ indices 98, 99
Dupl

Duplicates [-122.28991, 37.80283], [-122.28991, 37.80283] found @ indices 433, 434
Duplicates [-122.27996, 37.79956], [-122.27996, 37.79956] found @ indices 444, 445
Duplicates [-122.27975, 37.79946], [-122.27975, 37.79946] found @ indices 446, 447
Duplicates [-122.27886, 37.79898], [-122.27886, 37.79898] found @ indices 449, 450
Duplicates [-122.27855, 37.79882], [-122.27855, 37.79882] found @ indices 452, 453
Duplicates [-122.27683, 37.79848], [-122.27683, 37.79848] found @ indices 462, 463
Duplicates [-122.27406, 37.7996], [-122.27406, 37.7996] found @ indices 476, 477
Duplicates [-122.27291, 37.80147], [-122.27291, 37.80147] found @ indices 478, 479
Duplicates [-122.27028, 37.81304], [-122.27028, 37.81304] found @ indices 495, 496
Duplicates [-122.27082, 37.81532], [-122.27082, 37.81532] found @ indices 508, 509
Duplicates [-122.27034, 37.81702], [-122.27034, 37.81702] found @ indices 517, 518
Duplicates [-122.26983, 37.81865], [-122.26983, 37.81865] found @ indices 523, 524
Duplic

Duplicates [-122.27179, 37.84721], [-122.27179, 37.8472] found @ indices 195, 196
Duplicates [-122.27179, 37.84721], [-122.27179, 37.84721] found @ indices 195, 197
Duplicates [-122.27179, 37.8472], [-122.27179, 37.84721] found @ indices 196, 197
Checking leg berkeley_to_mtv_SF_express_bus, ebike_bikeshare_urban_long between dates 2019-09-01, 2020-04-30
Duplicates [-122.27179, 37.84721], [-122.27179, 37.8472] found @ indices 188, 189
Duplicates [-122.27179, 37.84721], [-122.27179, 37.84721] found @ indices 188, 190
Duplicates [-122.27179, 37.8472], [-122.27179, 37.84721] found @ indices 189, 190
Checking leg berkeley_to_mtv_SF_express_bus, express_bus between dates 2019-07-16, 2019-08-30
Duplicates [-122.27939, 37.83118], [-122.27939, 37.83118] found @ indices 1, 2
Duplicates [-122.27983, 37.8311], [-122.27983, 37.8311] found @ indices 5, 6
Duplicates [-122.27996, 37.83107], [-122.27996, 37.83107] found @ indices 7, 8
Duplicates [-122.28027, 37.83102], [-122.28027, 37.83102] found @ in

Duplicates [-122.3063, 37.54815], [-122.3063, 37.54815] found @ indices 423, 424
Duplicates [-122.30625, 37.54809], [-122.30625, 37.54809] found @ indices 425, 426
Duplicates [-122.29713, 37.53757], [-122.29713, 37.53757] found @ indices 439, 440
Duplicates [-122.29689, 37.5373], [-122.29689, 37.5373] found @ indices 441, 442
Duplicates [-122.28913, 37.53085], [-122.28913, 37.53085] found @ indices 454, 455
Duplicates [-122.28846, 37.5304], [-122.28846, 37.5304] found @ indices 456, 457
Duplicates [-122.27868, 37.52323], [-122.27868, 37.52323] found @ indices 468, 469
Duplicates [-122.27657, 37.52141], [-122.27657, 37.52141] found @ indices 472, 473
Duplicates [-122.27621, 37.52111], [-122.27621, 37.52111] found @ indices 475, 476
Duplicates [-122.27593, 37.52087], [-122.27593, 37.52087] found @ indices 477, 478
Duplicates [-122.27532, 37.52037], [-122.27532, 37.52037] found @ indices 482, 483
Duplicates [-122.2718, 37.51754], [-122.2718, 37.51754] found @ indices 488, 489
Duplicates [

### Validating overlapping time ranges

In [18]:
def check_overlaps(x):
    ranges = sorted([(l["properties"]["valid_start_ts"], l["properties"]["valid_end_ts"]) for l in x],
                     key=lambda c: c[0])
    for i, r in enumerate(ranges[:-1]):
        assert (ts1 := r[1]) <= (ts2 := ranges[i + 1][0]), f"Overlapping timestamps: {ts1}, {ts2}"


for t in evaluation_trips:
    for l in t["legs"]:
        print("Checking leg %s, %s" % (t["id"], l["id"]))
        
        # check locs for shim legs
        if "loc" in l:
            print("\tChecking shim locs...")
            check_overlaps(l["loc"])
        
        # check start locs
        if "start_loc" in l:
            print("\tChecking start locs...")
            check_overlaps(l["start_loc"])
        
        # check end locs
        if "end_loc" in l:
            print("\tChecking end locs...")
            check_overlaps(l["end_loc"])
        
        # check trajectories
        if l["type"] == "TRAVEL":
            print("\tChecking trajectories...")
            check_overlaps(l["route_coords"])

Checking leg mtv_to_berkeley_sf_bart, walk_to_caltrain
	Checking start locs...
	Checking end locs...
	Checking trajectories...
Checking leg mtv_to_berkeley_sf_bart, wait_for_commuter_rail_aboveground
	Checking shim locs...
Checking leg mtv_to_berkeley_sf_bart, commuter_rail_aboveground
	Checking start locs...
	Checking end locs...
	Checking trajectories...
Checking leg mtv_to_berkeley_sf_bart, tt_commuter_rail_aboveground_subway_underground
	Checking shim locs...
Checking leg mtv_to_berkeley_sf_bart, wait_for_subway_underground
	Checking shim locs...
Checking leg mtv_to_berkeley_sf_bart, subway_underground
	Checking start locs...
	Checking end locs...
	Checking trajectories...
Checking leg mtv_to_berkeley_sf_bart, walk_to_bus
	Checking start locs...
	Checking end locs...
	Checking trajectories...
Checking leg mtv_to_berkeley_sf_bart, wait_for_city_bus_short_0
	Checking shim locs...
Checking leg mtv_to_berkeley_sf_bart, wait_for_city_bus_short_1
	Checking shim locs...
Checking leg mtv_t