In [1]:
# --- Global variables

# Setting up the Coordinate Reference Systems up front in the necessary format.
crs_degree = {'init': 'epsg:4326'} # CGS_WGS_1984 (what the GPS uses)

# --- Paths

# Root path of Fremont Dropbox
import os
import sys
import demand_util

# updates modules when changed
%load_ext autoreload
%autoreload 2

# We let this notebook to know where to look for fremontdropbox module
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from fremontdropbox import get_dropbox_location
# Root path of the Dropbox business account
dbx = get_dropbox_location()

# Temporary! Location of the folder where the restructuring is currently happening
data_path = dbx + '/Private Structured data collection'

In [2]:
import geopandas as gpd
from shapely import geometry
# array analysis
import numpy as np
import pandas as pd
import sklearn.cluster as skc
from scipy.spatial import ConvexHull
import matplotlib.pyplot as plt

# geo spacial data analysis
import geopandas as gpd
from shapely import wkt
from keplergl import KeplerGl
import fiona

# assorted parsing and modeling tools
import os
import math
import csv
from pytz import utc
from shutil import copyfile, copytree
from shapely.ops import nearest_points, unary_union
from shapely.geometry import Point, LineString, Polygon, MultiPoint

import requests
import random
import polyline

from pathlib import Path


# importing all the Kepler.gl configurations
import ast

In [3]:
sfcta_folder = os.path.join(data_path, "Data processing", "Raw", "Demand", "OD demand", "SFCTA demand data")
sections_path = os.path.join(data_path, "Aimsun", "Inputs", "sections.shp")
sections_shp = gpd.GeoDataFrame.from_file(sections_path)

In [4]:
project_delimitation = []
project_delimitation.append((-121.94277062699996, 37.55273259000006))
project_delimitation.append((-121.94099807399999, 37.554268507000074))
project_delimitation.append((-121.91790942699998, 37.549823434000075))
project_delimitation.append((-121.89348666299998, 37.52770136500004, ))
project_delimitation.append((-121.90056572499998, 37.52292299800007))
project_delimitation.append((-121.90817571699995, 37.52416183400004))
project_delimitation.append((-121.91252749099999, 37.51845069500007))
project_delimitation.append((-121.91349347899995, 37.513972023000065))
project_delimitation.append((-121.90855417099999, 37.503837324000074))
project_delimitation.append((-121.91358547299996, 37.50097863000008))
project_delimitation.append((-121.90798018999999, 37.49080413200005))
project_delimitation.append((-121.91894942199997, 37.48791568200005))
project_delimitation.append((-121.92029048799998, 37.488706567000065))
project_delimitation.append((-121.93070953799997, 37.48509600500006))
project_delimitation.append((-121.93254686299997, 37.48864173700008))
project_delimitation.append((-121.94079404499996, 37.50416395900004))
project_delimitation.append((-121.94569804899999, 37.51332606200003))
project_delimitation.append((-121.94918207899997, 37.520371545000046))
project_delimitation.append((-121.95305006999996, 37.52804520800004))
project_delimitation.append((-121.953966735, 37.53272020000003))
project_delimitation.append((-121.95428756799998, 37.53817435800005))
project_delimitation.append((-121.95506236799997, 37.54107322100003))
project_delimitation.append((-121.95676186899999, 37.54656695700004))
project_delimitation.append((-121.95529950799994, 37.54980786700003))
project_delimitation.append((-121.95261192399994, 37.550479763000055))
project_delimitation.append((-121.94988481799999, 37.55277211300006))
project_delimitation.append((-121.94613010599994, 37.55466923100005))
project_delimitation.append((-121.94277062699996, 37.55273259000006))

In [5]:
def get_path_by_here_api(start, end, stop_on_error=False):
    """
    Using Here API to get smooth path from start to end location
    where start and end are Point objects
    """
    here_url = 'https://route.ls.hereapi.com/routing/7.2/calculateroute.json?'
    api_key = 'PRIVATE'

    # convention on .shp files and kepler are lng, lat
    # Here API convention is lat, lng
    start_pos = 'geo!{},{}'.format(start.y, start.x)
    end_pos = 'geo!{},{}'.format(end.y, end.x)
    params = {'apiKey': api_key,
              'mode': 'fastest;car;traffic:disabled',
              'representation': 'display',
              'waypoint0': start_pos,
              'waypoint1': end_pos}

    response = requests.get(here_url, params=params)
    if response.ok:
        body = response.json()
        route = body['response']['route']
        if route:
            points = []
            points.append((start.x, start.y))
            maneuver = route[0]['leg'][0]['maneuver']
            for m in maneuver:
                path = m['shape']
                for p in path:
                    lat_lng = p.split(',')
                    lat = float(lat_lng[0])
                    lng = float(lat_lng[1])
                    # keep .shp file convention, lng, lat
                    points.append((lng, lat))
            points.append((end.x, end.y))
            return points
        else:
            if stop_on_error:
                stop_code('no routes found for start={}, destination={}'.format(start, end))
            return None
    else:
        if stop_on_error:
            response.raise_for_status()
        # Here API throws an error on some bodies of water, keep going if stop on error is false
        return None
        # print('response={}'.format(response))
        # print('start={}, destination={}'.format(start_pos, end_pos))

In [6]:
def create_external_taz(dir_taz, sections_df, output_dir=None):
    """
    3 Steps for Create external TAZs
    1. Create a external demand delimitation:
    - load SFCTA data as Geopandas point (one point = one origin or one destination)
    - Get convex hull of the point
    - Use the convex hull (+ buffer) as the external demand delimitation
    2. create external centroids:
    - select road with no fnode and capacity above 800 from sections_df
    - create a point at the end of all selected road
    - plot the points, get a list of points to remove visually
    3. create external TAZs:
    - create a mesh a points inside the external demand delimitation and outside the internal demand delimitation (project delimitation)
    - use a Direction API (maybe Here direction):
    for every mesh point:
        Query path from mesh point to center of the project area
        Find the closest external centroid to the path. Test that all paths are not to far from existing
            external centroid --> if not, we might be missing one external centroid.
        Associate the external centroid to the mesh point.
        create external TAZ from mesh of points (if you reach point, Theo has already done it for internal TAZs)

    @param dir_taz:         folder containing prefix_fremont_legs.csv where prefix=ending, internal and starting
    @param sections_df:     geo pandas data frame of the aimsun sections
    """
    # 1. Create a external demand delimitation:
    # load the 3 csv files
    ending_csv = pd.read_csv(os.path.join(dir_taz, "ending_fremont_legs.csv"))
    internal_csv = pd.read_csv(os.path.join(dir_taz, "internal_fremont_legs.csv"))
    starting_csv = pd.read_csv(os.path.join(dir_taz, "starting_fremont_legs.csv"))

    # get the points from the csv's (start and end points)
    def get_points(csv_df):
        all_points = []
        node_types = ['start', 'end']
        for node_type in node_types:
            points = list(zip(csv_df[node_type + '_node_lng'], csv_df[node_type + '_node_lat']))
            all_points.extend(points)
        return all_points

    points = []
    points.extend(get_points(ending_csv))
    points.extend(get_points(internal_csv))
    points.extend(get_points(starting_csv))
    points = np.array(points)

    # get convex hull of points
    hull = ConvexHull(points)
    hull_points = points[hull.vertices, :]

    # add buffer to convex hull
    def normalize(point):
        norm = np.linalg.norm(point)
        return point / norm if norm > 0 else point

    # for each point calculate the direction to expand for buffer
    buffer_directions = []
    for i in range(len(hull_points)):
        point = hull_points[i]
        left_neighbor = hull_points[(i-1) % len(hull_points)]
        right_neighbor = hull_points[(i+1) % len(hull_points)]
        left_arrow = point - left_neighbor
        right_arrow = point - right_neighbor
        left_arrow = normalize(left_arrow)
        right_arrow = normalize(right_arrow)
        buffer_directions.append(normalize(left_arrow + right_arrow))
    buffer_directions = np.array(buffer_directions)

    # calculate the new (expanded) hull points with buffer
    buffer_coefficient = .05
    expanded_hull_points = hull_points + buffer_coefficient * buffer_directions

    # 2. create external centroids:
    # select roads with no fnode and capacity above 800 from sections_df
    sections_df = sections_df[pd.isnull(sections_df['fnode']) & (sections_df['capacity'] > 800)]
    sections_df = sections_df[['eid', 'geometry']]

    # filter out roads that are visually erroneous -> a road not entering the project area (Fremont)
    # sections_df.to_csv('selected_roads.csv')   # roads to obtained visually
    roads_to_remove = [56744, 30676, 35572, 56534]
    sections_df = sections_df.astype({'eid': 'int32'})
    sections_df = sections_df[~sections_df['eid'].isin(roads_to_remove)]

    # create external centroid nodes -> create a point at the terminal end of these roads
    # that is, for each road find the end of the road that is closer to the external delimitation (convex hull)
    external_centroid_nodes = []
    internal_centroid_nodes = []  # need later to compute center point of project area
    circle = np.concatenate((expanded_hull_points, expanded_hull_points[0][None, :]), axis=0)
    external_delimitation = LineString(circle)
    for road in sections_df['geometry']:
        start_point = Point(road.coords[0])
        end_point = Point(road.coords[-1])

        if external_delimitation.distance(start_point) < external_delimitation.distance(end_point):
            # start is external centroid
            external_centroid_nodes.append(start_point)
            internal_centroid_nodes.append(end_point)
        else:
            # end is external centroid
            external_centroid_nodes.append(end_point)
            internal_centroid_nodes.append(start_point)

    # 3. create external TAZs:
    # create mesh of points
    mesh_density = 0.001  # should be 0.001 (creates 2 million points)
    x_min, x_max = np.min(expanded_hull_points[:, 0]), np.max(expanded_hull_points[:, 0])
    y_min, y_max = np.min(expanded_hull_points[:, 1]), np.max(expanded_hull_points[:, 1])
    x, y = np.meshgrid(np.arange(x_min, x_max, mesh_density), np.arange(y_min, y_max, mesh_density))
    x = x.reshape(x.shape[0] * x.shape[1])
    y = y.reshape(y.shape[0] * y.shape[1])
    mesh_points = list(zip(x, y))
    print('created {} mesh points'.format(len(mesh_points)))
    x = y = points = None  # free up memory

    # keep those inside external delimitation and outside project delimitation
    external_delimitation_poly = Polygon(expanded_hull_points)
    project_delimitation_poly = Polygon(project_delimitation)
    external_minus_project = external_delimitation_poly.difference(project_delimitation_poly)
    # bottleneck (iterating over points and using contains method is slow)
    mesh_points = list(filter(lambda p: external_minus_project.contains(p), MultiPoint(mesh_points)))
    print('kept {} mesh points'.format(len(mesh_points)))

    # compute center of project area
    internal_centroid_nodes = np.array([(p.x, p.y) for p in internal_centroid_nodes])
    project_center = np.mean(internal_centroid_nodes, axis=0)
    project_center = Point(project_center[0], project_center[1])

    # for each mesh point find closest external centroid to its query path
    project_delimitation_line = LineString(project_delimitation + [project_delimitation[0]])

    testing = True
    sample_size = 500
    info_point_to_center = []  # desired result
    intersection_to_centroid_paths = []

    # for testing sample mesh points at random and run them
    if testing:
        mesh_points = random.sample(mesh_points, sample_size)

    distance_to_centroid_threshold = 0.005
    for point in mesh_points:
        path = get_path_by_here_api(point, project_center, stop_on_error=False)
        if not path:
            # not path found, ie. start is body of water hence no car path to destination
            # from sample testing, Google API takes this into account but not Here API
            continue  # next mesh point

        # find intersection (point) of path and project delimitation
        path = LineString(path)
        intersect_point = project_delimitation_line.intersection(path)
        if not isinstance(intersect_point, Point):
            # usually API error, ie. start is body of water, path includes a segment that jumps from water
            # to fremont intersecting project delimitation multiple times
            continue  # next mesh point

        # find closest centroid to intersection point
        min_distance = 999999
        closest_centroid = None
        for centroid in external_centroid_nodes:
            dist = intersect_point.distance(centroid)
            if dist < min_distance:
                min_distance = dist
                closest_centroid = centroid

        if min_distance < distance_to_centroid_threshold:
            # path intersection to centroid
            intersection_to_centroid = [(intersect_point.x, intersect_point.y), (closest_centroid.x, closest_centroid.y)]
            intersection_to_centroid_paths.append(LineString(intersection_to_centroid))

            # write result to csv
            info_point_to_center.append([point, project_center, closest_centroid, min_distance, path])

    #if testing:
        #kepler_map = KeplerGl(height=600)
        #kepler_map.add_data(data=gpd.GeoDataFrame({'geometry': [project_center]}, crs='epsg:4326'), name='project_center')
        #kepler_map.add_data(data=gpd.GeoDataFrame({'geometry': external_centroid_nodes}, crs='epsg:4326'), name='external_centroids')
        #kepler_map.add_data(data=gpd.GeoDataFrame({'geometry': [project_delimitation_line]}, crs='epsg:4326'), name='project_delimitation')
        #kepler_map.add_data(data=gpd.GeoDataFrame({'geometry': [external_delimitation]}, crs='epsg:4326'), name='external_delimitation')
        #kepler_map.add_data(data=gpd.GeoDataFrame({'geometry': mesh_points}, crs='epsg:4326'), name='mesh_points')
        #kepler_map.add_data(data=gpd.GeoDataFrame({'geometry': [l[-1] for l in info_point_to_center]}, crs='epsg:4326'), name='paths')
        #kepler_map.add_data(data=gpd.GeoDataFrame({'geometry': intersection_to_centroid_paths}, crs='epsg:4326'), name='intersection_to_centroid_paths')
        #file_path = 'mesh_points_to_external_centroids.html'
        #if output_dir:
            #file_path = os.path.join(output_dir, file_path)
        #kepler_map.save_to_html(file_name=file_path)

    #create dataframe
    

    info_points_col = ['origin_mesh_point','destination','closest_external_centroid','distance_to_centroid','path']
    info_points_df = pd.DataFrame(info_point_to_center, columns = info_points_col)
    
    def to_csv(file_name, header, lines):
        def add_quotes(val):
            return "\"" + str(val) + "\"" if ',' in str(val) else str(val)

        csv = open(file_name, 'w')
        csv.write(header + '\n')
        for line in lines:
            csv.write(','.join(map(add_quotes, line)) + '\n')

    # write results to csv
    mesh_points_to_centroid_file_path = 'mesh_point_to_centroid.csv'
    if output_dir:
        mesh_points_to_centroid_file_path = os.path.join(output_dir, mesh_points_to_centroid_file_path)
    to_csv(mesh_points_to_centroid_file_path,
           'origin_mesh_point,destination,closest_external_centroid,distance_to_centroid,path',
           info_point_to_center)
    return
    #render    
    

In [7]:
def render_taz_from_csv(csv_file, output_dir=None):
    project_delimitation_line = LineString(project_delimitation + [project_delimitation[0]])
    
    info_points_df = pd.read_csv(csv_file)
    external_centroid_nodes = info_points_df['closest_external_centroid'].map(convert_point_to_coord).tolist()
    external_centroid_nodes = [Point(coord[0], coord[1]) for coord in external_centroid_nodes]
    
    kepler_map = KeplerGl(height=600)
    #kepler_map.add_data(data=gpd.GeoDataFrame({'geometry': [project_center]}, crs='epsg:4326'), name='project_center')
    kepler_map.add_data(data=gpd.GeoDataFrame({'geometry': external_centroid_nodes}, crs='epsg:4326'), name='external_centroids')
    kepler_map.add_data(data=gpd.GeoDataFrame({'geometry': [project_delimitation_line]}, crs='epsg:4326'), name='project_delimitation')
    #kepler_map.add_data(data=gpd.GeoDataFrame({'geometry': [external_delimitation]}, crs='epsg:4326'), name='external_delimitation')
    
    taz_id = 0
    boundary_list = []
    taz_name_list = []
    external_centroids = info_points_df['closest_external_centroid'].unique()
    for centroid in external_centroids:
        taz_df = info_points_df.loc[info_points_df['closest_external_centroid']==centroid]
        taz_points = np.array(taz_df['origin_mesh_point'].map(convert_point_to_coord).tolist())
        taz_hull = ConvexHull(taz_points)
        taz_boundary = taz_points[taz_hull.vertices, :]
        taz_poly = Polygon(taz_boundary)
        taz_name = 'External TAZ' + str(taz_id)
        kepler_map.add_data(data=gpd.GeoDataFrame({'geometry': taz_poly}, crs='epsg:4326',index=[0]), name=taz_name)
        taz_name_list.append(taz_name)
        boundary_list.append(taz_poly)
        taz_id += 1
    taz_gpd = gpd.GeoDataFrame({'taz_name':taz_name_list,'geometry':boundary_list})
    file_path = 'external_taz.html'
    if output_dir:
        file_path = os.path.join(output_dir, file_path)
    kepler_map.save_to_html(file_name=file_path)
    return taz_gpd

In [8]:
def convert_point_to_coord(string):
    lon, lat = string.split(' ')[1:]
    lon = float(lon.split('(')[1])
    lat = float(lat.split(')')[0])
    return [lon, lat]

In [9]:
# print(sections_shp.columns)
# print(sections_shp.head)
output_dir = os.path.join(data_path, 'Data processing', 'Kepler maps', 'HereAPI')
mesh_points_to_centroid_file_path = 'mesh_point_to_centroid.csv'
taz_gpd = render_taz_from_csv(os.path.join(output_dir, mesh_points_to_centroid_file_path), output_dir=output_dir)



User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to /Users/trevorwu/Dropbox/Private Structured data collection/Data processing/Kepler maps/HereAPI/external_taz.html!
