In [None]:
import pandas as pd
import numpy as np

import bokeh.layouts

from bokeh.io import output_file, show
from bokeh.layouts import widgetbox
from bokeh.models.widgets import Button, RadioButtonGroup, Select, Slider, CheckboxButtonGroup

from bokeh.models import (
  GMapPlot, GMapOptions, ColumnDataSource, Circle, DataRange1d, PanTool, WheelZoomTool, BoxSelectTool
)

In [None]:
april = pd.read_csv("april_split.csv", sep=',', index_col=0)
april.head()

In [None]:
lat = april.icol(1)
lon = april.icol(2)
lat = np.asarray(lat)
lon = np.asarray(lon)
print("length of latitude: %s" % len(lat))
print("length of longitude: %s" % len(lon))

In [None]:
map_options = GMapOptions(lat=40.7690, lng=-73.9549, map_type="roadmap", zoom=10)

plot = GMapPlot(
    x_range=DataRange1d(), y_range=DataRange1d(), map_options=map_options
)
plot.title.text = "New York City"

# For GMaps to function, Google requires you obtain and enable an API key:
#
#     https://developers.google.com/maps/documentation/javascript/get-api-key
#
# Replace the value below with your personal API key:
plot.api_key = "AIzaSyApd7Vz-EuCAupgT1wE-vo4PMq2hzBrTW0"

source = ColumnDataSource(
    data=dict(
        lat=lat,
        lon=lon,
    )
)

circle = Circle(x="lon", y="lat", size=3, fill_color="blue", fill_alpha=0.1, line_color=None)
plot.add_glyph(source, circle)

plot.add_tools(PanTool(), WheelZoomTool(), BoxSelectTool())

# create some widgets
slider = Slider(start=0, end=23, step=1, title="Hour")
select = Select(title="Month:", value="Month", options=["April", "May", "June", "July", "August", "September"])

# create some widgets
checkbox_button_group = CheckboxButtonGroup(
        labels=["Uber", "Taxi"], active=[0, 1])
slider = Slider(start=0, end=23, step=1, title="Hour")
select1 = Select(title="Month:", value="Month", options=["April", "May", "June", "July", "August", "September"])
select2 = Select(title="Date:", value="Date", options=["1", "2", "3", "4", "5", "6", "7"])



show(bokeh.layouts.row(plot, checkbox_button_group, widgetbox(slider, select1, select2, width=300)))

#show(plot)

In [None]:
import json

def read_json(filename):

    """
    A function read the json file and process the Neighborhoods and Coordinates

    Parameter: filename, a json file)

    Return: result, a dictionary contains
            key: Neighborhood,
            value: a list of coordinate corresponding to the that neighborhood
    """

    with open(filename) as f:
        data = json.load(f)

    result = {}
    for feature in data['features']:
        
        # NATName = Neighborhood Tabulation Area Name
        
        neighborhood = feature['properties']['NTAName']
        coordinates = feature['geometry']['coordinates'][0] # 3 layers or 2 layers
        
        if len(coordinates[0]) > 2:
            # 3 layers
            coordinates = coordinates[0]

        result[neighborhood] = coordinates

    return result

In [None]:
result = read_json("query.geojson")

In [None]:
# result = {"Borough Park" : [[lat1, lon1], [lat2, lon2], ...]}

"""
polygons = 
{
"Borough Park" : {Lat : [], Lon : []}
"East Flushing" : {Lat : [], Lon : []}
"Auburndale" : {Lat : [], Lon : []}
.
.
.
"Elmhurst" : {Lat : [], Lon : []}
}
"""

def process_coordinates(result):
    
    """
    A function read the dictionary contains
    
    key: neighborhood
    value: list of coordinates (latitude, longitude)
    
    and reconstruct a new dictionary contains
    key: neighborhood
    value: a dictionary contains a list of latitudes and a list of longitudes.
    
    Parameter: result dictionary, contains neighborhoods and list of coordinates

    Return: polygon dictionary, contains neighborhoods 
            and a list of latitudes and a list of longitudes
    """

    polygons = {}
    
    #for neighborhood, coordinates in result.items():
    for neighborhood in result.keys():
               
        coordinates = result[neighborhood]
        
        lat_list = []
        lon_list = []
        
        for coordinate in coordinates:
            lat_list.append(coordinate[1])
            lon_list.append(coordinate[0])
        polygons[neighborhood] = {}
        polygons[neighborhood]["Lat"] = lat_list
        polygons[neighborhood]["Lon"] = lon_list

    return polygons

In [None]:
polygons = process_coordinates(result)
polygons

In [None]:
def point_inside_polygon(x, y, poly):
    """
    A function determines if a given pair of (lon, lat) is inside a 
    given polygon or not
    
    Parameter: x, longitude
               y, latitude
               polygon, a list of (x, y) pairs.
    
    Return: a boolean, whether a (x, y) inside the poly or not.
    """
    n = len(poly)
    inside =False
    p1x,p1y = poly[0]
    for i in range(n+1):
        p2x,p2y = poly[i % n]
        if y > min(p1y,p2y):
            if y <= max(p1y,p2y):
                if x <= max(p1x,p2x):
                    if p1y != p2y:
                        xinters = (y-p1y)*(p2x-p1x)/(p2y-p1y)+p1x
                    if p1x == p2x or x <= xinters:
                        inside = not inside
        p1x,p1y = p2x,p2y
    return inside

In [None]:
import csv
import pickle


# Lat = row[2]
# Lon = row[3]

list_neighborhood = []
found = False
with open("april_split.csv", 'r') as csv_input:
    reader = csv.reader(csv_input)
    # skip the headers
    next(reader, None)
    for row in reader:
        lat = float(row[2])
        lon = float(row[3])

        for neighborhood in result.keys():
            if point_inside_polygon(lon, lat, result[neighborhood]):
                list_neighborhood.append(neighborhood)
                found = True
                break
        if found == False:
            list_neighborhood.append('NA')
        found = False

pickle.dump(list_neighborhood, open('neighborhood_column', 'wb'))

In [None]:
neighborhood_column = pickle.load(open('neighborhood_column', 'rb'))
len(neighborhood_column)

In [None]:
neighborhood_column

In [None]:
df = pd.read_csv("april_split.csv", sep=',', index_col=0)
df.head()

In [None]:
neighborhoods = np.asarray(neighborhood_column)
df['neighborhood'] = neighborhoods
df = df[df.neighborhood != 'NA']    # remove all the 'NA' rows

In [None]:
df.head()

In [None]:
# converts the data frame to csv file
df.to_csv("april_neighborhood.csv")