In [9]:
import pandas as pd
import numpy as np
import csv

In [10]:
import json

def read_json(filename):

    """
    A function read the json file and process the Neighborhoods and Coordinates

    Parameter: filename, a json file)

    Return: result, a dictionary contains
            key: Neighborhood,
            value: a list of coordinate corresponding to the that neighborhood
    """

    with open(filename) as f:
        data = json.load(f)

    result = {}
    for feature in data['features']:
        
        # NATName = Neighborhood Tabulation Area Name
        
        neighborhood = feature['properties']['NTAName']
        coordinates = feature['geometry']['coordinates'][0] # 3 layers or 2 layers
        
        if len(coordinates[0]) > 2:
            # 3 layers
            coordinates = coordinates[0]

        result[neighborhood] = coordinates

    return result

In [42]:
# need to change name "query.geojson"
result = read_json("query.geojson")

{'Airport': [[-73.7471229036745, 40.6371705262911],
  [-73.7470679157732, 40.6371625003178],
  [-73.747066217744, 40.6371692498533],
  [-73.7470297242845, 40.63716392323],
  [-73.747007645476, 40.6371607006469],
  [-73.7469952729197, 40.6371588952696],
  [-73.7470012123916, 40.6371352783408],
  [-73.7470123849139, 40.6371369569381],
  [-73.747039252118, 40.6371409926868],
  [-73.7470697095698, 40.6371455694044],
  [-73.7470681935305, 40.6371514168603],
  [-73.7471218962802, 40.6371594916062],
  [-73.7471376747951, 40.6370663108666],
  [-73.747159177203, 40.6370687740877],
  [-73.7471506060137, 40.6371340819214],
  [-73.7471557196371, 40.6371346136789],
  [-73.7471315602496, 40.6372251114383],
  [-73.7471053872719, 40.6372195098347],
  [-73.7471229036745, 40.6371705262911]],
 'Allerton-Pelham Gardens': [[-73.853637796689, 40.8733088923496],
  [-73.8535776943645, 40.8732843136881],
  [-73.8526589276062, 40.873000480519],
  [-73.8517112813765, 40.8727057515993],
  [-73.851086042224, 40.87

In [41]:
n = list(result.keys())
d = {'neighborhood': n}
neigh = pd.DataFrame(data=d)
neigh.to_csv("neighborhood_column.csv", index=False)

In [4]:
# result = {"Borough Park" : [[lat1, lon1], [lat2, lon2], ...]}

"""
polygons = 
{
"Borough Park" : {Lat : [], Lon : []}
"East Flushing" : {Lat : [], Lon : []}
"Auburndale" : {Lat : [], Lon : []}
.
.
.
"Elmhurst" : {Lat : [], Lon : []}
}
"""

def process_coordinates(result):
    
    """
    A function read the dictionary contains
    
    key: neighborhood
    value: list of coordinates (latitude, longitude)
    
    and reconstruct a new dictionary contains
    key: neighborhood
    value: a dictionary contains a list of latitudes and a list of longitudes.
    
    Parameter: result dictionary, contains neighborhoods and list of coordinates

    Return: polygon dictionary, contains neighborhoods 
            and a list of latitudes and a list of longitudes
    """

    polygons = {}
    
    #for neighborhood, coordinates in result.items():
    for neighborhood in result.keys():
               
        coordinates = result[neighborhood]
        
        lat_list = []
        lon_list = []
        
        for coordinate in coordinates:
            lat_list.append(coordinate[1])
            lon_list.append(coordinate[0])
        polygons[neighborhood] = {}
        polygons[neighborhood]["Lat"] = lat_list
        polygons[neighborhood]["Lon"] = lon_list

    return polygons

In [5]:
polygons = process_coordinates(result)

In [25]:
from check_points import point_inside_polygon


def find_neighborhood(result, csv_file):
    """
    A function determines the coordinates belongs to which neighborhood.
    Add a new column 'neighborhood' to the given csv file, then output a new csv file.

    Parameter: result, a dictionary with key: neighborhood, value: (Lat, Lon) pairs
               csv_file, input csv file
               month, the month corresponding to the csv file
    """

    neighborhoods_list = []
    found = False
    with open(csv_file, 'r') as csv_input:
        reader = csv.reader(csv_input)
        # skip the headers
        next(reader, None)
        for row in reader:
            lat = float(row[4])     # Taxi, Lat = row[4], Lon = row[3]
            lon = float(row[3])     # Uber, Lat = row[3], Lon = row[2]

            for neighborhood in result.keys():
                if point_inside_polygon(lon, lat, result[neighborhood]):
                    neighborhoods_list.append(neighborhood)
                    found = True
                    break
            if found == False:
                neighborhoods_list.append('NA')
            found = False

    df = pd.read_csv(csv_file, sep=',', index_col=0)
    neighborhoods = np.asarray(neighborhoods_list)
    df['neighborhood'] = neighborhoods

    # remove all the 'NA' rows
    df = df[df.neighborhood != 'NA']

    # converts the data frame to csv file
    df.to_csv("final_" + csv_file)

In [26]:
find_neighborhood(result, "split_taxi_06_2014_sample.csv")