In [None]:
import numpy as np
import pandas as pd
from heapq import heapify, heappush, heappop

In [None]:
street_data = pd.read_csv('Street_Data.csv')
node_data = pd.read_csv('Node_Data.csv')
edge_data = None
node_set = {}

In [None]:
# determines the degrees of nodes to see what we can delete
node_degrees = {}

node_data['id'] = node_data['id'].apply(int)
node_data['latitude'] = node_data['latitude'].apply(float)
node_data['longitude'] = node_data['longitude'].apply(float)

for cell in node_data['id']:
    node_degrees[cell] = 0
    
for index, row in street_data.iterrows():
    string_of_nodes = row['node_ids']
    list_of_nodes = string_of_nodes.split('-')
    for node in list_of_nodes:
        node = int(node)
        if node in node_degrees:
            node_degrees[node] += 1

In [None]:
# scans through all nodes and keeps only relevant ones, adds every relevant segment to edge_data
node_hashmap = node_data.set_index('id').T.to_dict('list')

THRESHOLD = math.pi / 6
CENTER = math.pi

def get_angle(curr_coords, prev_coords, next_coords):
    for i in range(len(curr_coords)):
        prev_coords[i] -= curr_coords[i]
        next_coords[i] -= curr_coords[i]
    prev_angle = math.atan2(prev_coords[1], prev_coords[0])
    next_angle = math.atan2(next_coords[1], next_coords[0])
    return (next_angle - prev_angle) % (2 * math.pi)

edge_list = {}
edge_list['name'] = []
edge_list['start_id'] = []
edge_list['end_id'] = []
edge_list['highway'] = []

for index, row in street_data.iterrows():
    string_of_nodes = row['node_ids']
    list_of_nodes = string_of_nodes.split('-')
    nodes_to_keep = []
    last = 0
    for i in range(1, len(list_of_nodes) - 1):
        curr_node = int(list_of_nodes[i])
        prev_node = int(list_of_nodes[last])
        next_node = int(list_of_nodes[i + 1])
        if node_degrees[curr_node] > 2: # this is an intersection
            continue
        curr_coords = node_hashmap[curr_node]
        prev_coords = node_hashmap[prev_node]
        next_coords = node_hashmap[next_node]
        angle = get_angle(curr_coords, prev_coords, next_coords)
        if abs(angle - CENTER) > THRESHOLD: # turn in the road
            continue
        nodes_to_keep.append(prev_node)
        last = i
    if len(nodes_to_keep) > 0 and nodes_to_keep[0] != int(list_of_nodes[len(list_of_nodes) - 1]):
        nodes_to_keep.append(int(list_of_nodes[len(list_of_nodes) - 1]))
    for i in range(len(node_to_keep) - 1):
        edge_list['name'].append(row['name'])
        edge_list['highway'].append(row['highway'])
        edge_list['start_id'].append(node_to_keep[i])
        edge_list['end_id'].append(node_to_keep[i + 1])
    for node in node_to_keep:
        node_set.add(node)
        
edge_data = pd.DataFrame(edge_list)
node_data = node_data[node_data['id'].isin(node_set)]

In [None]:
# adds columns for all other features a street segment could have

nan = [None for b in range(len(edge_data))]

features = [
    'crime_count', 
    'tree_count', 
    'light_count', 
    'business_count', 
    'signal_count', 
    'pavement_width', 
    'street_type', 
    'crime_ratio', 
    'tree_ratio', 
    'light_ratio', 
    'business_ratio', 
    'signal_ratio', 
    'region']

for feature in features:
    edge_data[feature] = nan

In [None]:
# creates adjacency list for nodes
node_adj = {}

for node in node_set:
    node_adj[node] = {}
    adjacent_edges = edge_data.index[(edge_data['start_index'] == node) or (edge_data['end_index'] == node)].tolist()
    node_adj[node].update(adjacent_edges)

In [None]:
# utility functions to get distances

def get_distance_btwn_points(x1, y1, x2, y2):
    return sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)

# finds distance between (x3, y3) to line defined by (x1, y1) and (x2, y2)
def get_distance_btwn_point_and_line(x1, y1, x2, y2, x3, y3):
    p1 = np.array([x1, y1])
    p2 = np.array([x2, y2])
    p3 = np.array([x3, y3])
    return norm(np.cross(p2 - p1, p1 - p3)) / norm(p2 - p1)

In [None]:
# gets street segment indices for a latitude and longitude

# gets the street segment closest to the latitude and longitude of a given point
# current implementation will assume streets are straight lines and the earth is flat
# also current implementation goes through all edges which is slow, implement regions in the future
# REQUIRES intersections to have coordinates
def get_block(latitude, longitude):
    min_distance = float('inf')
    min_street_index = -1
    for index, row in edge_data.iterrows():
        start_intersection = row['start_id']
        end_intersection = row['end_id']
        current_distance = None
        if start_intersection is not None and end_intersection is None:
            start_latitude = node_data.at[start_intersection, 'latitude']
            start_longitude = node_data.at[start_intersection, 'longitude']
            current_distance = get_distance_btwn_points(start_latitude, start_longitude, latitude, longitude)
        elif start_intersection is None and end_intersection is not None:
            end_latitude = node_data.at[end_intersection, 'latitude']
            end_longitude = node_data.at[end_intersection, 'longitude']
            current_distance = get_distance_btwn_points(end_latitude, end_longitude, latitude, longitude)
        elif start_intersection is not None and end_intersection is not None:
            start_latitude = node_data.at[start_intersection, 'latitude']
            start_longitude = node_data.at[start_intersection, 'longitude']
            end_latitude = node_data.at[end_intersection, 'latitude']
            end_longitude = node_data.at[end_intersection, 'longitude']
            current_distance = get_distance_btwn_point_and_line(
                start_latitude, start_longitude, end_latitude, end_longitude, latitude, longitude)
        if current_distance is not None and current_distance < min_distance:
            min_distance = current_distance
            min_street_index = index
    return min_street_index

# gets the k closest segments to the given point
# REQUIRES intersections to have coordinates
def get_closest_blocks(latitude, longitude, k):
    pq = []
    heapify(pq)
    for index, row in edge_data.iterrows():
        start_intersection = row['start_id']
        end_intersection = row['end_id']
        current_distance = None
        if start_intersection is not None and end_intersection is None:
            start_latitude = node_data.at[start_intersection, 'latitude']
            start_longitude = node_data.at[start_intersection, 'longitude']
            current_distance = get_distance_btwn_points(start_latitude, start_longitude, latitude, longitude)
        elif start_intersection is None and end_intersection is not None:
            end_latitude = node_data.at[end_intersection, 'latitude']
            end_longitude = node_data.at[end_intersection, 'longitude']
            current_distance = get_distance_btwn_points(end_latitude, end_longitude, latitude, longitude)
        elif start_intersection is not None and end_intersection is not None:
            start_latitude = node_data.at[start_intersection, 'latitude']
            start_longitude = node_data.at[start_intersection, 'longitude']
            end_latitude = node_data.at[end_intersection, 'latitude']
            end_longitude = node_data.at[end_intersection, 'longitude']
            current_distance = get_distance_btwn_point_and_line(
                start_latitude, start_longitude, end_latitude, end_longitude, latitude, longitude)
        if current_distance is not None:
            if not len(pq):
                heappush(pq, (-current_distance, index))
            elif len(pq) >= k:
                furthest = pq[0]
                if furthest[0] < -current_distance:
                    heappop(pq)
                    heappush(pq, (-current_distance, index))
            else:
                heappush(pq, (-current_distance, index))
        closest = []
        for i in range(k):
            closest.append(heappop(pq)[1])
    return closest

In [None]:
# increments the value of parameter at the k street segments closest to location
def update_street_data(latitude, longitude, parameter, k = 1):
    if k == 1:
        index = get_block_from_coordinates(latitude, longitude)
        if edge_data.at[index, parameter] is None:
            edge_data.at[index, parameter] = 0
        edge_data.at[index, parameter] += 1
    else:
        index = get_closest_blocks_from_coordinates(latitude, longitude, k)
        if index:
            for block in index:
                if edge_data.at[index, parameter] is None:
                    edge_data.at[index, parameter] = 0
                edge_data.at[block, parameter] += 1
                
def update_street_data_coords(coords, parameter, k = 1):
    update_street_data(coords[0], coords[1], parameter, k)

In [None]:
# adds crime data
import re

crime = pd.read_csv('crimes.csv')
crime = crime[['Block_Location']]
pattern = '\((.*)\)'

def extract_coords(given_string, split, lat_first = True):
    s = re.search(pattern, given_string).group(1)
    coords = given_string.split(split)
    if lat_first:
        return float(coords[0]), float(coords[1])
    return float(coords[1]), float(coords[0])

crime['Block_Location'] = crime['Block_Location'].apply(extract_coords, args = (', ', True))

crime.head(10)

crime['Block_Location'].apply(update_street_data_coords, args=('crime_count', 3))

In [None]:
# adds tree data
def create_coords(latitude, longitude):
    return latitude, longitude

trees = pd.read_csv('City_Trees.csv')
trees = trees[['Latitude', 'Longitude']]

trees['coordinates'] = trees[['Latitude', 'Longitude']].apply(create_coords)

trees.head(10)

trees['coordinates'].apply(update_street_data_coords, args=('tree_count', 1))

In [None]:
# adds light data
streetLights = pd.read_csv('streetLights.csv')
streetLights = streetLights[['the_geom']]

streetLights['the_geom'] = streetLights['the_geom'].apply(extract_coords, args = (' ', False))

streetLights.head(10)

streetLights['the_geom'].apply(update_street_data_coords, args=('light_count', 1))

In [None]:
# adds business data

In [None]:
# adds park data

In [None]:
# converts DataFrame into csv files

# creates dash separated list of edge indices adjacent to a node
def create_string_from_set(node):
    adj = node_adj[node]
    return '-'.join(adj)

node_data['adjacencies'] = [None for b in range(len(node_data))]
node_data['adjacencies'] = node_data['id'].apply(create_string_from_set)

# writes to files
node_data.to_csv('Node_Data.csv')
edge_data.to_csv('Edge_Data.csv')