In [1]:
import numpy as np
import pandas as pd

In [2]:
edge_data = pd.read_csv('Edge_Data.csv', index_col = [0])
node_data = pd.read_csv('Node_Data.csv', index_col = [0])

In [3]:
node_data['id'] = node_data['id'].apply(int)
node_data['latitude'] = node_data['latitude'].apply(float)
node_data['longitude'] = node_data['longitude'].apply(float)

edge_data['start_id'] = edge_data['start_id'].apply(int)
edge_data['end_id'] = edge_data['end_id'].apply(int)

In [4]:
node_set = set(node_data['id'].tolist()) # set of all non-redundant nodes
node_id_to_index = {} # maps node id to index of csv file
node_adj = {} # all edges incident to a node

for index, row in node_data.iterrows():
    node_id_to_index[int(row['id'])] = index
    
for index, row in node_data.iterrows():
    string_of_edges = row['adjacencies']
    list_of_edges = string_of_edges.split('-')
    list_of_edges_int = [int(b) for b in list_of_edges]
    node_adj[int(row['id'])] = set(list_of_edges_int)

display(node_data)
display(edge_data)

Unnamed: 0,id,latitude,longitude,adjacencies,region
1,26819598,37.904009,-122.313040,27898-23787-16950-27015,21
2,30364622,37.841569,-122.297114,269,1
6,33947072,37.863725,-122.244567,3-4,9
7,33947074,37.863798,-122.244472,5-93-6,9
13,33947087,37.863233,-122.242479,8-160-7,9
...,...,...,...,...,...
52761,8889247858,37.883829,-122.259885,27144-27145,13
52767,8889247864,37.884155,-122.259602,7063-27142-27143,18
52768,8889247865,37.884172,-122.259599,27143,18
52769,8889247866,37.884352,-122.258878,27139-27141,18


Unnamed: 0,name,start_id,end_id,highway,crime_count,tree_count,light_count,business_count,signal_count,pavement_width,...,tree_ratio,light_ratio,business_ratio,signal_ratio,region,distance,start_lat,start_lon,end_lat,end_lon
0,Stonewall-Panoramic Trail,35718720,2790624066,track,0,0,0,,,,...,,,,,9,53.109536,37.862638,-122.244025,37.862768,-122.244107
1,Stonewall-Panoramic Trail,2790624066,2535392487,track,0,0,0,,,,...,,,,,9,66.085009,37.862768,-122.244107,37.862927,-122.244216
2,Stonewall-Panoramic Trail,2535392487,2790624087,track,0,0,0,,,,...,,,,,9,282.724290,37.862927,-122.244216,37.863661,-122.244528
3,Stonewall-Panoramic Trail,2790624087,33947072,track,0,0,0,,,,...,,,,,9,25.694295,37.863661,-122.244528,37.863725,-122.244567
4,Stonewall-Panoramic Trail,33947072,2532688215,track,0,0,0,,,,...,,,,,9,13.444345,37.863725,-122.244567,37.863761,-122.244562
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27898,Eastshore Freeway,240513184,26819598,motorway,0,0,0,,,,...,,,,,21,1394.851044,37.907367,-122.315344,37.904009,-122.313040
27899,,8642719052,35833153,motorway_link,0,0,0,,,,...,,,,,11,895.874707,37.868197,-122.303880,37.870537,-122.304812
27900,,8676709718,8676709719,service,0,2,0,,,,...,,,,,7,105.410139,37.863530,-122.293156,37.863599,-122.292801
27901,,8756728707,550509285,steps,0,0,0,,,,...,,,,,21,61.729878,37.897309,-122.303131,37.897419,-122.303294


In [5]:
# finds distance between c = (x3, y3) to line defined by a = (x1, y1) and b = (x2, y2)
def get_distance_btwn_point_and_line(x1, y1, x2, y2, x3, y3):
    p1 = np.array([x1, y1])
    p2 = np.array([x2, y2])
    p3 = np.array([x3, y3])
    ab = p2 - p1
    ba = p1 - p2
    ac = p3 - p1
    bc = p3 - p2
    bac = np.dot(ab, ac)
    cba = np.dot(ba, bc)
    if bac < 0 and cba < 0:
        return None
    elif bac < 0:
        return ac[0] * ac[0] + ac[1] * ac[1]
    elif cba < 0:
        return bc[0] * bc[0] + bc[1] * bc[1]
    cross_product = np.cross(ab, ac)
    return cross_product * cross_product / (ab[0] * ab[0] + ab[1] * ab[1])

In [6]:
# gets street segment indices for a latitude and longitude

# counter = [0]
index_list = edge_data.index.tolist()
start_ids = edge_data['start_id'].tolist()
end_ids = edge_data['end_id'].tolist()
start_indices = [node_id_to_index[b] for b in start_ids]
end_indices = [node_id_to_index[b] for b in end_ids]
start_latitudes = [node_data.at[start_index, 'latitude'] for start_index in start_indices]
start_longitudes = [node_data.at[start_index, 'longitude'] for start_index in start_indices]
end_latitudes = [node_data.at[end_index, 'latitude'] for end_index in end_indices]
end_longitudes = [node_data.at[end_index, 'longitude'] for end_index in end_indices]

# gets the street segment closest to the latitude and longitude of a given point
# current implementation will assume streets are straight lines and the earth is flat
# also current implementation goes through all edges which is slow, implement regions in the future
# REQUIRES intersections to have coordinates
def get_block(latitude, longitude):
    min_distance = float('inf')
    min_street_index = -1
    for index, start_latitude, start_longitude, end_latitude, end_longitude in zip(index_list, start_latitudes, start_longitudes, end_latitudes, end_longitudes):
        current_distance = get_distance_btwn_point_and_line(
            start_latitude, start_longitude, end_latitude, end_longitude, latitude, longitude)
        if current_distance < min_distance:
            min_distance = current_distance
            min_street_index = index
    return min_street_index

# gets the k closest segments to the given point
# REQUIRES intersections to have coordinates
def get_closest_blocks(latitude, longitude, k):
#     print(counter[0])
#     counter[0] += 1;
    pq = [(get_distance_btwn_point_and_line(
        start_latitude, start_longitude, end_latitude, end_longitude, latitude, longitude), index) 
          for index, start_latitude, start_longitude, end_latitude, end_longitude 
          in zip(index_list, start_latitudes, start_longitudes, end_latitudes, end_longitudes)]
    pq.sort()
    closest = [pq[i][1] for i in range(k)]
    return closest

In [7]:
# increments the value of parameter at the k street segments closest to location
def set_zero(parameter):
    edge_data[parameter] = [0 for b in range(len(edge_data))]

def update_street_data(latitude, longitude, parameter, k = 1):
    if k == 1:
        index = get_block(latitude, longitude)
        if edge_data.at[index, parameter] is None:
            edge_data.at[index, parameter] = 0
        edge_data.at[index, parameter] += 1
    else:
        index = get_closest_blocks(latitude, longitude, k)
        if index:
            for block in index:
                if edge_data.at[block, parameter] is None:
                    edge_data.at[block, parameter] = 0
                edge_data.at[block, parameter] += 1
                
def update_street_data_coords(coords, parameter, k = 1):
    if k == 1:
        index = get_block(coords[0], coords[1])
        edge_data.at[index, parameter] += 1
    else:
        index = get_closest_blocks(coords[0], coords[1], k)
        for block in index:
            edge_data.at[block, parameter] += 1

In [8]:
import re

crime = pd.read_csv('crimes.csv')
crime = crime[['Block_Location']]
pattern = '\((.*)\)'

def extract_coords(given_string, split, lat_first = True):
    s = re.search(pattern, given_string).group(1)
    coords = s.split(split)
    if lat_first:
        return float(coords[0]), float(coords[1])
    return float(coords[1]), float(coords[0])

crime['Block_Location'] = crime['Block_Location'].apply(extract_coords, args = (', ', True))

set_zero('crime_count')

crime['Block_Location'].apply(update_street_data_coords, args=('crime_count', 10))

display(edge_data)

Unnamed: 0,name,start_id,end_id,highway,crime_count,tree_count,light_count,business_count,signal_count,pavement_width,...,tree_ratio,light_ratio,business_ratio,signal_ratio,region,distance,start_lat,start_lon,end_lat,end_lon
0,Stonewall-Panoramic Trail,35718720,2790624066,track,0,0,0,,,,...,,,,,9,53.109536,37.862638,-122.244025,37.862768,-122.244107
1,Stonewall-Panoramic Trail,2790624066,2535392487,track,0,0,0,,,,...,,,,,9,66.085009,37.862768,-122.244107,37.862927,-122.244216
2,Stonewall-Panoramic Trail,2535392487,2790624087,track,0,0,0,,,,...,,,,,9,282.724290,37.862927,-122.244216,37.863661,-122.244528
3,Stonewall-Panoramic Trail,2790624087,33947072,track,0,0,0,,,,...,,,,,9,25.694295,37.863661,-122.244528,37.863725,-122.244567
4,Stonewall-Panoramic Trail,33947072,2532688215,track,0,0,0,,,,...,,,,,9,13.444345,37.863725,-122.244567,37.863761,-122.244562
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27898,Eastshore Freeway,240513184,26819598,motorway,0,0,0,,,,...,,,,,21,1394.851044,37.907367,-122.315344,37.904009,-122.313040
27899,,8642719052,35833153,motorway_link,2,0,0,,,,...,,,,,11,895.874707,37.868197,-122.303880,37.870537,-122.304812
27900,,8676709718,8676709719,service,0,2,0,,,,...,,,,,7,105.410139,37.863530,-122.293156,37.863599,-122.292801
27901,,8756728707,550509285,steps,0,0,0,,,,...,,,,,21,61.729878,37.897309,-122.303131,37.897419,-122.303294


In [9]:
# node_data.to_csv('Node_Data.csv')
edge_data.to_csv('Edge_Data.csv')