In [None]:
import numpy as np
import pandas as pd
from heapq import heapify, heappush, heappop

In [None]:
street_data = pd.read_csv('Street_Centerlines.csv') # TODO: find complete street data with latitude and longitude
street_data = street_data[street_data['CITYR' == 'Berkeley']]
intersection_data = None # TODO: get intersection data

In [None]:
undefined = [-1 for b in range(len(street_data))]
nan = [None for b in range(len(street_data))]
features = ['crime_count', 
            'tree_count', 
            'light_count', 
            'business_count', 
            'signal_count', 
            'pavement_width', 
            'street_type', 
            'crime_ratio', 
            'tree_ratio', 
            'light_ratio', 
            'business_ratio', 
            'signal_ratio', 
            'start_point', 
            'end_point',
            'region']
for feature in features:
    street_data[feature] = nan
    
intersection_data['latitude'] = [None for b in range(len(intersection_data))]
intersection_data['longitude'] = [None for b in range(len(intersection_data))]

In [None]:
# data will be split into regions to make querying faster
# relevant regions to a point will include the region the point is in along with the 8 surrounding regions
# not used for now, but may use later

def get_north_region(region):
    pass

def get_south_region(region):
    pass

def get_east_region(region):
    pass

def get_west_region(region):
    pass

def get_northwest_region(region):
    pass

def get_northeast_region(region):
    pass

def get_southwest_region(region):
    pass

def get_southeast_region(region):
    pass

In [None]:
# gets the numerical prefix (address) of a string
def get_number(address):
    address = str(address)
    if address and address[0].isdigit():
        num = 0
        i = 0
        while address[i].isdigit():
            num = num*10 + int(address[i])
            i += 1
    return num

In [None]:
# gets the street name
def get_street(address):
    address = str(address)
    endArr = [' ST', ' BLVD', ' RD', ' AVE', ' PL', ' WAY', ' DR', ' WALK', ' LN', ' OVRPAS', 'CIR']
    numberLen = len(str(get_number(address)))+1
    adr = address[numberLen:]
    adr = adr.upper()
    for b in endArr:
        if b in adr:
            adr = adr.replace(b,'')
    adr = adr.replace('BLOCK', '')
    adr = adr.replace(' ', '')
    return adr

In [None]:
def get_distance_btwn_points(x1, y1, x2, y2):
    return sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)

# finds distance between (x3, y3) to line defined by (x1, y1) and (x2, y2)
def get_distance_btwn_point_and_line(x1, y1, x2, y2, x3, y3):
    p1 = np.array([x1, y1])
    p2 = np.array([x2, y2])
    p3 = np.array([x3, y3])
    return norm(np.cross(p2 - p1, p1 - p3)) / norm(p2 - p1)

In [None]:
# gets the street segments corresponding to an address
# TODO: some features may not have a street number/name, figure out implementation
def get_block(address):
    number = get_number(address)
    if number != 0:
        street = get_street(address)
        indices = street_data.index[street_data['name'] == street].tolist()
        index = []
        for edge_segment in indices:
            start_point = toInt(street_data.at[edge_segment, 'start_point'])
            end_point = toInt(street_data.at[edge_segment, 'end_point'])
            if start_point <= number <= end_point or end_point <= number <= start_point:
                index.add(edge_segment)
    return index

# gets the street segment closest to the latitude and longitude of a given point
# current implementation will assume streets are straight lines and the earth is flat
# also current implementation goes through all edges which is slow, implement regions in the future
# REQUIRES intersections to have coordinates
def get_block_from_coordinates(latitude, longitude):
    min_distance = float('inf')
    min_street_index = -1
    for index, row in street_data.iterrows():
        start_intersection = row['start_point']
        end_intersection = row['end_point']
        current_distance = None
        if start_intersection is not None and end_intersection is None:
            start_latitude = intersection_data.at[start_intersection, 'latitude']
            start_longitude = intersection_data.at[start_intersection, 'longitude']
            current_distance = get_distance_btwn_points(start_latitude, start_longitude, latitude, longitude)
        elif start_intersection is None and end_intersection is not None:
            end_latitude = intersection_data.at[end_intersection, 'latitude']
            end_longitude = intersection_data.at[end_intersection, 'longitude']
            current_distance = get_distance_btwn_points(end_latitude, end_longitude, latitude, longitude)
        elif start_intersection is not None and end_intersection is not None:
            start_latitude = intersection_data.at[start_intersection, 'latitude']
            start_longitude = intersection_data.at[start_intersection, 'longitude']
            end_latitude = intersection_data.at[end_intersection, 'latitude']
            end_longitude = intersection_data.at[end_intersection, 'longitude']
            current_distance = get_distance_btwn_point_and_line(
                start_latitude, start_longitude, end_latitude, end_longitude, latitude, longitude)
        if current_distance is not None and current_distance < min_distance:
            min_distance = current_distance
            min_street_index = index
    return min_street_index

# gets the k closest segments to the given point
# REQUIRES intersections to have coordinates
def get_closest_blocks_from_coordinates(latitude, longitude, k):
    pq = []
    heapify(pq)
    for index, row in street_data.iterrows():
        start_intersection = row['start_point']
        end_intersection = row['end_point']
        current_distance = None
        if start_intersection is not None and end_intersection is None:
            start_latitude = intersection_data.at[start_intersection, 'latitude']
            start_longitude = intersection_data.at[start_intersection, 'longitude']
            current_distance = get_distance_btwn_points(start_latitude, start_longitude, latitude, longitude)
        elif start_intersection is None and end_intersection is not None:
            end_latitude = intersection_data.at[end_intersection, 'latitude']
            end_longitude = intersection_data.at[end_intersection, 'longitude']
            current_distance = get_distance_btwn_points(end_latitude, end_longitude, latitude, longitude)
        elif start_intersection is not None and end_intersection is not None:
            start_latitude = intersection_data.at[start_intersection, 'latitude']
            start_longitude = intersection_data.at[start_intersection, 'longitude']
            end_latitude = intersection_data.at[end_intersection, 'latitude']
            end_longitude = intersection_data.at[end_intersection, 'longitude']
            current_distance = get_distance_btwn_point_and_line(
                start_latitude, start_longitude, end_latitude, end_longitude, latitude, longitude)
        if current_distance is not None:
            if not len(pq):
                heappush(pq, (-current_distance, index))
            elif len(pq) >= k:
                furthest = pq[0]
                if furthest[0] < -current_distance:
                    heappop(pq)
                    heappush(pq, (-current_distance, index))
            else:
                heappush(pq, (-current_distance, index))
        closest = []
        for i in range(k):
            closest.append(heappop(pq)[1])
    return closest

In [None]:
# if street data is two way, simply match intersection coordinates with two closest segment endpoints
# construct graph with desired endpoints, extra weighting not needed
# if prefer not turning, do something similar to original idea with gadgets
# EASIEST SOLUTION IF WE HAVE DATA

# if street data is one way, must use gadgets to match segments together
# need to hope users don't notice things about crossing streets (might not be a big deal anyways)
# still requires locations of intersections
# STILL OK SOLUTION

# if no intersection data, we're screwed
# some n^2 algo required to loop through edges to see where they intersect (assuming straight lines, kinda bad)
# might be billions of segments, would take 10^18 computations
# could optimize by regions but that's still a constant factor
# only have to do this once though, then we can use intersection data and continue with one way street idea
# requires street names with segments
# WORST CASE, WE CURRENTLY AREN'T EVEN CAPABLE OF THIS

# currently, we could continue the address lookup
# several flaws: duplicate addresses, streets like 3rd street, don't know anything about intersections
# hard to map back to google maps
# currently have a working implementation (anshul's code) even though they have the flaws mentioned above
# CURRENT IMPLEMENTATION (not finished)

In [None]:
def get_edge_index(address):
    index = get_block(address)
    return [(street_data[ind, 'start_point'], street_data[ind, 'end_point']) for ind in index]

In [None]:
# increments the value of parameter at the street segment containing address
def update_street_data(address, parameter):
    index = get_block(address)
    if index:
        for block in index:
            street_data.at[block, parameter] += 1
            
# increments the value of parameter at the k street segments closest to location
def update_street_data_with_coordinates(latitude, longitude, parameter, k = 1):
    if k == 1:
        index = get_block_from_coordinates(latitude, longitude)
        street_data.at[index, parameter] += 1
    else:
        index = get_closest_blocks_from_coordinates(latitude, longitude, k)
        if index:
            for block in index:
                street_data.at[block, parameter] += 1

In [None]:
# fills intersections with data, DEPENDS ON DATA OF STREET LAYOUT
def populate_intersections():
    pass

In [None]:
# sets start index and end index for each street segment to intersection indices, DEPENDS ON STREET LAYOUT
def set_intersection_indices():
    pass