## Load libraries

In [95]:
import pandas as pd
import numpy as np

import geopandas, fiona
import pyproj    
import shapely
import shapely.ops as ops
from shapely.geometry import Point
from functools import partial

## Load data

In [62]:
restaurants_nyc = pd.read_csv("restaurants_nyc.csv")
shopping_nyc = pd.read_csv("shopping_nyc.csv")
nightlife_nyc = pd.read_csv("nightlife_nyc.csv")

In [96]:
neighbourhoods_geo = geopandas.read_file("neighbourhoods.geojson")

In [98]:
neighbourhoods_geo.sort_values("neighbourhood")

Unnamed: 0,neighbourhood,neighbourhood_group,geometry
1,Allerton,Bronx,"(POLYGON ((-73.848597 40.87167, -73.845823 40...."
7,Arden Heights,Staten Island,"(POLYGON ((-74.169827 40.561078, -74.169822999..."
8,Arrochar,Staten Island,"(POLYGON ((-74.06077999999999 40.593188, -74.0..."
9,Arverne,Queens,"(POLYGON ((-73.789535 40.599972, -73.789541 40..."
15,Astoria,Queens,"(POLYGON ((-73.90160299999999 40.76777, -73.90..."
16,Bath Beach,Brooklyn,"(POLYGON ((-73.99381 40.60195, -73.99961999999..."
18,Battery Park City,Manhattan,"(POLYGON ((-74.01375400000001 40.71369, -74.01..."
10,Bay Ridge,Brooklyn,"(POLYGON ((-74.014752 40.633967, -74.014753 40..."
19,Bay Terrace,Queens,"(POLYGON ((-73.770652 40.782078, -73.775599 40..."
20,"Bay Terrace, Staten Island",Staten Island,"(POLYGON ((-74.132614 40.563453, -74.13311 40...."


In [64]:
neighbourhoods_geo.head()

Unnamed: 0,neighbourhood,neighbourhood_group,geometry
0,Bayswater,Queens,"(POLYGON ((-73.76670799999999 40.614911, -73.7..."
1,Allerton,Bronx,"(POLYGON ((-73.848597 40.87167, -73.845823 40...."
2,City Island,Bronx,"(POLYGON ((-73.782822 40.843919, -73.782572 40..."
3,Ditmars Steinway,Queens,"(POLYGON ((-73.90160299999999 40.76777, -73.90..."
4,Ozone Park,Queens,"(POLYGON ((-73.83754399999999 40.691364, -73.8..."


In [65]:
neighbourhoods_geo.iloc[0]

neighbourhood                                                  Bayswater
neighbourhood_group                                               Queens
geometry               (POLYGON ((-73.76670799999999 40.614911, -73.7...
Name: 0, dtype: object

## Neighborhood Area

In [73]:
polygon_list = list(neighbourhoods_geo["geometry"])
area_list = []
for polygon in polygon_list:
    geom_area = ops.transform(
        partial(
            pyproj.transform,
            pyproj.Proj(init='EPSG:4326'),
            pyproj.Proj(
                proj='aea',
                lat1=polygon.bounds[1],
                lat2=polygon.bounds[3])),
        polygon)
    area_list.append(geom_area.area/1000000)

In [76]:
neighbourhoods_geo["area"] = area_list

## Neighborhood Centroid 

In [86]:
centroid_longitude = []
centroid_latitude = []
for polygon in polygon_list:
    centroid_longitude.append(polygon.centroid.x)
    centroid_latitude.append(polygon.centroid.y)

In [87]:
neighbourhoods_geo["centroid_longitude"] = centroid_longitude
neighbourhoods_geo["centroid_latitude"] = centroid_latitude

In [88]:
neighbourhoods_geo.head()

Unnamed: 0,neighbourhood,neighbourhood_group,geometry,area,centroid_longitude,centroid_latitude
0,Bayswater,Queens,"(POLYGON ((-73.76670799999999 40.614911, -73.7...",0.235399,-73.769503,40.6174
1,Allerton,Bronx,"(POLYGON ((-73.848597 40.87167, -73.845823 40....",2.42687,-73.859984,40.864727
2,City Island,Bronx,"(POLYGON ((-73.782822 40.843919, -73.782572 40...",1.114446,-73.786654,40.846932
3,Ditmars Steinway,Queens,"(POLYGON ((-73.90160299999999 40.76777, -73.90...",5.456081,-73.906003,40.777804
4,Ozone Park,Queens,"(POLYGON ((-73.83754399999999 40.691364, -73.8...",4.835648,-73.846364,40.679083


In [94]:
neighbourhoods_geo.to_file("neighbourhoods_nyc.csv", driver="CSV")

## Neighborhood Mapping

In [56]:
def append_neighborhood(df, geo_df):
    # Create a list of Point objects
    point_list = []
    for idx, biz in df.iterrows():
        point_list.append(Point(biz["longitude"], biz["latitude"]))
    
    # Create a list of Polygon
    polygon_list = list(geo_df["geometry"])
    
    neighborhood_list = []
    neighborhood_group_list = []
    flag = True
    for i, point in enumerate(point_list):
        print(i, end='\r', flush=True)
        for j, polygon in enumerate(polygon_list):
            if point.within(polygon):
                neighborhood_list.append(neighbourhoods_geo.iloc[j][0])
                neighborhood_group_list.append(neighbourhoods_geo.iloc[j][1])
                break
            if j == len(polygon_list) - 1:
                neighborhood_list.append(None)
                neighborhood_group_list.append(None)
    
    df["neighborhood"] = neighborhood_list
    df["neighborhood_group"] = neighborhood_group_list
    df = df[~df["neighborhood"].isna()]
    return df

In [57]:
restaurants_nyc = append_neighborhood(restaurants_nyc, neighbourhoods_geo)
shopping_nyc = append_neighborhood(shopping_nyc, neighbourhoods_geo)
nightlife_nyc = append_neighborhood(nightlife_nyc, neighbourhoods_geo)

53033

In [59]:
restaurants_nyc.to_csv("restaurants_nyc.csv", index=False)
shopping_nyc.to_csv("shopping_nyc.csv", index=False)
nightlife_nyc.to_csv("nightlife_nyc.csv", index=False)