In [1]:
import pandas as pd
import numpy as np

import bokeh.layouts

from bokeh.io import output_file, show
from bokeh.layouts import widgetbox
from bokeh.models.widgets import Button, RadioButtonGroup, Select, Slider, CheckboxButtonGroup

from bokeh.models import (
  GMapPlot, GMapOptions, ColumnDataSource, Circle, DataRange1d, PanTool, WheelZoomTool, BoxSelectTool
)

In [2]:
april = pd.read_csv("april_split.csv", sep=',', index_col=0)
april.head()

Unnamed: 0,Date/Time,Lat,Lon,year,month,day,dayofweek,hour,minute,time
0,4/1/14 0:11,40.769,-73.9549,2014,4,1,1,0,11,00:11:00
1,4/1/14 0:17,40.7267,-74.0345,2014,4,1,1,0,17,00:17:00
2,4/1/14 0:21,40.7316,-73.9873,2014,4,1,1,0,21,00:21:00
3,4/1/14 0:28,40.7588,-73.9776,2014,4,1,1,0,28,00:28:00
4,4/1/14 0:33,40.7594,-73.9722,2014,4,1,1,0,33,00:33:00


In [3]:
lat = april.icol(1)
lon = april.icol(2)
lat = np.asarray(lat)
lon = np.asarray(lon)
print("length of latitude: %s" % len(lat))
print("length of longitude: %s" % len(lon))

length of latitude: 564516
length of longitude: 564516


  if __name__ == '__main__':
  from ipykernel import kernelapp as app


In [4]:
map_options = GMapOptions(lat=40.7690, lng=-73.9549, map_type="roadmap", zoom=10)

plot = GMapPlot(
    x_range=DataRange1d(), y_range=DataRange1d(), map_options=map_options
)
plot.title.text = "New York City"

# For GMaps to function, Google requires you obtain and enable an API key:
#
#     https://developers.google.com/maps/documentation/javascript/get-api-key
#
# Replace the value below with your personal API key:
plot.api_key = "AIzaSyApd7Vz-EuCAupgT1wE-vo4PMq2hzBrTW0"

source = ColumnDataSource(
    data=dict(
        lat=lat,
        lon=lon,
    )
)

circle = Circle(x="lon", y="lat", size=3, fill_color="blue", fill_alpha=0.1, line_color=None)
plot.add_glyph(source, circle)

plot.add_tools(PanTool(), WheelZoomTool(), BoxSelectTool())

# create some widgets
slider = Slider(start=0, end=23, step=1, title="Hour")
select = Select(title="Month:", value="Month", options=["April", "May", "June", "July", "August", "September"])

# create some widgets
checkbox_button_group = CheckboxButtonGroup(
        labels=["Uber", "Taxi"], active=[0, 1])
slider = Slider(start=0, end=23, step=1, title="Hour")
select1 = Select(title="Month:", value="Month", options=["April", "May", "June", "July", "August", "September"])
select2 = Select(title="Date:", value="Date", options=["1", "2", "3", "4", "5", "6", "7"])



show(bokeh.layouts.row(plot, checkbox_button_group, widgetbox(slider, select1, select2, width=300)))

#show(plot)

In [6]:
import json

def read_json(filename):

    """
    A function read the json file and process the Neighborhoods and Coordinates

    Parameter: filename, a json file)

    Return: result, a dictionary contains
            key: Neighborhood,
            value: a list of coordinate corresponding to the that neighborhood
    """

    with open(filename) as f:
        data = json.load(f)

    result = {}
    for feature in data['features']:
        
        # NATName = Neighborhood Tabulation Area Name
        
        neighborhood = feature['properties']['NTAName']
        coordinates = feature['geometry']['coordinates'][0] # 3 layers or 2 layers
        
        if len(coordinates[0]) > 2:
            # 3 layers
            coordinates = coordinates[0]

        result[neighborhood] = coordinates

    return result

In [8]:
result = read_json("query.geojson")

{'Airport': [[-73.7471229036745, 40.6371705262911],
  [-73.7470679157732, 40.6371625003178],
  [-73.747066217744, 40.6371692498533],
  [-73.7470297242845, 40.63716392323],
  [-73.747007645476, 40.6371607006469],
  [-73.7469952729197, 40.6371588952696],
  [-73.7470012123916, 40.6371352783408],
  [-73.7470123849139, 40.6371369569381],
  [-73.747039252118, 40.6371409926868],
  [-73.7470697095698, 40.6371455694044],
  [-73.7470681935305, 40.6371514168603],
  [-73.7471218962802, 40.6371594916062],
  [-73.7471376747951, 40.6370663108666],
  [-73.747159177203, 40.6370687740877],
  [-73.7471506060137, 40.6371340819214],
  [-73.7471557196371, 40.6371346136789],
  [-73.7471315602496, 40.6372251114383],
  [-73.7471053872719, 40.6372195098347],
  [-73.7471229036745, 40.6371705262911]],
 'Allerton-Pelham Gardens': [[-73.853637796689, 40.8733088923496],
  [-73.8535776943645, 40.8732843136881],
  [-73.8526589276062, 40.873000480519],
  [-73.8517112813765, 40.8727057515993],
  [-73.851086042224, 40.87

In [9]:
# result = {"Borough Park" : [[lat1, lon1], [lat2, lon2], ...]}

"""
polygons = 
{
"Borough Park" : {Lat : [], Lon : []}
"East Flushing" : {Lat : [], Lon : []}
"Auburndale" : {Lat : [], Lon : []}
.
.
.
"Elmhurst" : {Lat : [], Lon : []}
}
"""

def process_coordinates(result):
    
    """
    A function read the dictionary contains
    
    key: neighborhood
    value: list of coordinates (latitude, longitude)
    
    and reconstruct a new dictionary contains
    key: neighborhood
    value: a dictionary contains a list of latitudes and a list of longitudes.
    
    Parameter: result dictionary, contains neighborhoods and list of coordinates

    Return: polygon dictionary, contains neighborhoods 
            and a list of latitudes and a list of longitudes
    """

    polygons = {}
    
    #for neighborhood, coordinates in result.items():
    for neighborhood in result.keys():
               
        coordinates = result[neighborhood]
        
        lat_list = []
        lon_list = []
        
        for coordinate in coordinates:
            lat_list.append(coordinate[1])
            lon_list.append(coordinate[0])
        polygons[neighborhood] = {}
        polygons[neighborhood]["Lat"] = lat_list
        polygons[neighborhood]["Lon"] = lon_list

    return polygons

In [10]:
polygons = process_coordinates(result)
polygons

{'Airport': {'Lat': [40.6371705262911,
   40.6371625003178,
   40.6371692498533,
   40.63716392323,
   40.6371607006469,
   40.6371588952696,
   40.6371352783408,
   40.6371369569381,
   40.6371409926868,
   40.6371455694044,
   40.6371514168603,
   40.6371594916062,
   40.6370663108666,
   40.6370687740877,
   40.6371340819214,
   40.6371346136789,
   40.6372251114383,
   40.6372195098347,
   40.6371705262911],
  'Lon': [-73.7471229036745,
   -73.7470679157732,
   -73.747066217744,
   -73.7470297242845,
   -73.747007645476,
   -73.7469952729197,
   -73.7470012123916,
   -73.7470123849139,
   -73.747039252118,
   -73.7470697095698,
   -73.7470681935305,
   -73.7471218962802,
   -73.7471376747951,
   -73.747159177203,
   -73.7471506060137,
   -73.7471557196371,
   -73.7471315602496,
   -73.7471053872719,
   -73.7471229036745]},
 'Allerton-Pelham Gardens': {'Lat': [40.8733088923496,
   40.8732843136881,
   40.873000480519,
   40.8727057515993,
   40.8725010845028,
   40.8724045855179,
  

In [11]:
def point_inside_polygon(x, y, poly):
    """
    A function determines if a given pair of (lon, lat) is inside a 
    given polygon or not
    
    Parameter: x, longitude
               y, latitude
               polygon, a list of (x, y) pairs.
    
    Return: a boolean, whether a (x, y) inside the poly or not.
    """
    n = len(poly)
    inside =False
    p1x,p1y = poly[0]
    for i in range(n+1):
        p2x,p2y = poly[i % n]
        if y > min(p1y,p2y):
            if y <= max(p1y,p2y):
                if x <= max(p1x,p2x):
                    if p1y != p2y:
                        xinters = (y-p1y)*(p2x-p1x)/(p2y-p1y)+p1x
                    if p1x == p2x or x <= xinters:
                        inside = not inside
        p1x,p1y = p2x,p2y
    return inside

In [40]:
import csv
import pickle


# Lat = row[2]
# Lon = row[3]

list_neighborhood = []
found = False
with open("april_split.csv", 'r') as csv_input:
    reader = csv.reader(csv_input)
    # skip the headers
    next(reader, None)
    for row in reader:
        lat = float(row[2])
        lon = float(row[3])

        for neighborhood in result.keys():
            if point_inside_polygon(lon, lat, result[neighborhood]):
                list_neighborhood.append(neighborhood)
                found = True
                break
        if found == False:
            list_neighborhood.append('NA')
        found = False

pickle.dump(list_neighborhood, open('neighborhood_column', 'wb'))

In [41]:
neighborhood_column = pickle.load(open('neighborhood_column', 'rb'))
len(neighborhood_column)

564516

In [42]:
neighborhood_column

['NA',
 'NA',
 'East Village',
 'Midtown-Midtown South',
 'Turtle Bay-East Midtown',
 'NA',
 'Chinatown',
 'Midtown-Midtown South',
 'Hudson Yards-Chelsea-Flatiron-Union Square',
 'Midtown-Midtown South',
 'East Village',
 'Turtle Bay-East Midtown',
 'East Village',
 'NA',
 'Astoria',
 'NA',
 'Midtown-Midtown South',
 'NA',
 'Hudson Yards-Chelsea-Flatiron-Union Square',
 'NA',
 'Midtown-Midtown South',
 'Hudson Yards-Chelsea-Flatiron-Union Square',
 'Prospect Lefferts Gardens-Wingate',
 'East Village',
 'NA',
 'NA',
 'NA',
 'NA',
 'SoHo-TriBeCa-Civic Center-Little Italy',
 'West Village',
 'SoHo-TriBeCa-Civic Center-Little Italy',
 'Upper West Side',
 'Lincoln Square',
 'Clinton',
 'NA',
 'Hudson Yards-Chelsea-Flatiron-Union Square',
 'Lincoln Square',
 'NA',
 'NA',
 'Chinatown',
 'NA',
 'Hudson Yards-Chelsea-Flatiron-Union Square',
 'Midtown-Midtown South',
 'NA',
 'NA',
 'NA',
 'SoHo-TriBeCa-Civic Center-Little Italy',
 'NA',
 'Midtown-Midtown South',
 'Upper East Side-Carnegie Hill'

In [44]:
df = pd.read_csv("april_split.csv", sep=',', index_col=0)
df.head()

Unnamed: 0,Date/Time,Lat,Lon,year,month,day,dayofweek,hour,minute,time
0,4/1/14 0:11,40.769,-73.9549,2014,4,1,1,0,11,00:11:00
1,4/1/14 0:17,40.7267,-74.0345,2014,4,1,1,0,17,00:17:00
2,4/1/14 0:21,40.7316,-73.9873,2014,4,1,1,0,21,00:21:00
3,4/1/14 0:28,40.7588,-73.9776,2014,4,1,1,0,28,00:28:00
4,4/1/14 0:33,40.7594,-73.9722,2014,4,1,1,0,33,00:33:00


In [45]:
neighborhoods = np.asarray(neighborhood_column)
df['neighborhood'] = neighborhoods
df = df[df.neighborhood != 'NA']    # remove all the 'NA' rows

In [48]:
df.head()

Unnamed: 0,Date/Time,Lat,Lon,year,month,day,dayofweek,hour,minute,time,neighborhood
2,4/1/14 0:21,40.7316,-73.9873,2014,4,1,1,0,21,00:21:00,East Village
3,4/1/14 0:28,40.7588,-73.9776,2014,4,1,1,0,28,00:28:00,Midtown-Midtown South
4,4/1/14 0:33,40.7594,-73.9722,2014,4,1,1,0,33,00:33:00,Turtle Bay-East Midtown
6,4/1/14 0:39,40.7223,-73.9887,2014,4,1,1,0,39,00:39:00,Chinatown
7,4/1/14 0:45,40.762,-73.979,2014,4,1,1,0,45,00:45:00,Midtown-Midtown South


In [49]:
# converts the data frame to csv file
df.to_csv("april_neighborhood.csv")