In [20]:
import networkx as nx
import osmnx as ox

In [99]:
import pandas as pd
import numpy as np

In [51]:
mu.head()

Unnamed: 0,name,BOROUGH,address
0,American Folk Art Museum,MANHATTAN,45 W. 53rd St. NYC
1,American Museum of Natural History,MANHATTAN,Central Park West NYC
2,Asia Society and Museum,MANHATTAN,725 Park Ave. NYC
3,The Bronx Museum of the Arts,BRONX,1040 Grand Concourse NYC
4,Bronx Zoo,BRONX,Bronx River Parkway and Fordham Road NYC


In [100]:
# packages
import requests
import json
import time

# geocoder class
class Geocoder:
    # base url
    base_url = 'https://nominatim.openstreetmap.org/search'
    
    # results
    results = []

    def fetch(self, address):
        # headers
        headers = {
            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
        }
        
        # string query parameters
        params = {
            'q': address,
            'format': 'geocodejson'
        }
        
        # make HTTP GET request to Nominatim API
        res = requests.get(url=self.base_url, params=params, headers=headers)
        print('HTTP GET request to URL: %s | Status code: %s' % (res.url, res.status_code))
        
        if res.status_code == 200:
            return res
        else:
            return None
    
    def parse(self, res):
        try:
            label = json.dumps(res['features'][0]['properties']['geocoding']['label'], indent=2)
            coordinates = json.dumps(res['features'][0]['geometry']['coordinates'], indent=2).replace('\n', '').replace('[', '').replace(']', '').strip()                       
            
            # retrieved data
            self.results.append({
                'address': label,
                'coordinates': coordinates
            })
            
        except:
            self.results.append({
                'address': res,
                'coordinates': np.nan
            })
    
    def store_results(self):
        # write results to file
        return self.results
#         with open('results.json', 'w') as f:
#             f.write(json.dumps(self.results, indent=2))
    
    def run(self, addresses):
        # addresses list        
        # convert addresses to list
        addresses = addresses.tolist()
        
        # loop over addresse
        for address in addresses:
            res = self.fetch(address).json()
            self.parse(res)
            
            # respect Nominatim crawling policies
            time.sleep(2)
        
        # store results
        self.store_results()


## Musuems

In [48]:
mu = pd.read_csv("museums.csv", index_col=0)

In [49]:
mu.head()

Unnamed: 0,name,BOROUGH,address
0,American Folk Art Museum,MANHATTAN,45 W. 53rd St. (at Fifth Ave.)
1,American Museum of Natural History,MANHATTAN,Central Park West (at W. 79th St.)
2,Asia Society and Museum,MANHATTAN,725 Park Ave. (at E. 70th St.)
3,The Bronx Museum of the Arts,BRONX,1040 Grand Concourse
4,Bronx Zoo,BRONX,Bronx River Parkway and Fordham Road


In [50]:
## remove string inside parentheses and add "NYC" to address
import re 
mu["address"] = mu["address"].apply(lambda x: re.sub("[\(\[].*?[\)\]]", "", x)) + " NYC"

In [None]:
## convert musuems
geocoder = Geocoder()
geocoder.run(mu.address)
df = geocoder.store_results()

In [65]:
mu_df = pd.DataFrame(df)

In [71]:
mu[['LONGITUDE', 'LATITUDE']] = mu_df['coordinates'].str.split(',', 1, expand=True)

In [73]:
mu = mu[mu['LONGITUDE'].notna()]
mu

Unnamed: 0,name,BOROUGH,address,LONGITUDE,LATITUDE
0,American Folk Art Museum,MANHATTAN,45 W. 53rd St. NYC,-74.0191358556701,40.64779588659794
1,American Museum of Natural History,MANHATTAN,Central Park West NYC,-73.9656374,40.7903594
2,Asia Society and Museum,MANHATTAN,725 Park Ave. NYC,-73.9644037,40.7698862
3,The Bronx Museum of the Arts,BRONX,1040 Grand Concourse NYC,-73.9199471,40.8310911
5,Brooklyn Botanic Garden,BROOKLYN,1000 Washington Ave. NYC,-73.96212479221636,40.6675495
6,Brooklyn Children's Museum,BROOKLYN,145 Brooklyn Ave. NYC,-73.94404613465176,40.67439655
7,Brooklyn Museum,BROOKLYN,200 Eastern Pkwy. NYC,-73.9455797,40.6696058
8,Children's Museum of Manhattan,MANHATTAN,212 W. 83rd St. NYC,-73.97726088274193,40.7858779
10,"Cooper-Hewitt, National Design Museum",MANHATTAN,2 E. 91st St. NYC,-73.95769902873573,40.7842843
11,El Museo del Barrio,MANHATTAN,1230 Fifth Ave. NYC,-74.00175317725922,40.651623


In [74]:
#mu.to_csv("museums_with_geo.csv", header=True, index=False)

## Restaurants

In [101]:
rest = pd.read_csv("restaurants.csv")

In [102]:
rest.head()

Unnamed: 0,CAMIS,DBA,BORO,BUILDING,STREET,ZIPCODE,PHONE,CUISINE DESCRIPTION,RATING_RANDOM
0,40511702,NOTARO RESTAURANT,MANHATTAN,635,SECOND AVENUE,10016.0,2126863400,Italian,4.0
1,40511702,NOTARO RESTAURANT,MANHATTAN,635,SECOND AVENUE,10016.0,2126863400,Italian,2.0
2,50046354,VITE BAR,QUEENS,2507,BROADWAY,11106.0,3478134702,Italian,5.0
3,50061389,TACK'S CHINESE TAKE OUT,STATEN ISLAND,11C,HOLDEN BLVD,10314.0,7189839854,Chinese,3.0
4,41516263,NO QUARTER,BROOKLYN,8015,5 AVENUE,11209.0,7187019180,American,5.0


In [103]:
rest["address"] = rest.apply(lambda x: str(x[3]) + " " + str(x[4]) + " " + str(x[2]), axis=1)
rest.head()

Unnamed: 0,CAMIS,DBA,BORO,BUILDING,STREET,ZIPCODE,PHONE,CUISINE DESCRIPTION,RATING_RANDOM,address
0,40511702,NOTARO RESTAURANT,MANHATTAN,635,SECOND AVENUE,10016.0,2126863400,Italian,4.0,635 SECOND AVENUE MANHATTAN
1,40511702,NOTARO RESTAURANT,MANHATTAN,635,SECOND AVENUE,10016.0,2126863400,Italian,2.0,635 SECOND AVENUE MANHATTAN
2,50046354,VITE BAR,QUEENS,2507,BROADWAY,11106.0,3478134702,Italian,5.0,2507 BROADWAY QUEENS
3,50061389,TACK'S CHINESE TAKE OUT,STATEN ISLAND,11C,HOLDEN BLVD,10314.0,7189839854,Chinese,3.0,11C HOLDEN BLVD STATEN ISLAND
4,41516263,NO QUARTER,BROOKLYN,8015,5 AVENUE,11209.0,7187019180,American,5.0,8015 5 AVENUE BROOKLYN


In [196]:
rest_100 = rest.sample(n=100, random_state=1)

In [113]:
## convert reasturants 
geocoder = Geocoder()
geocoder.run(rest_100.address)
df_rest = geocoder.store_results()

HTTP GET request to URL: https://nominatim.openstreetmap.org/search?q=1674+BROADWAY+MANHATTAN&format=geocodejson | Status code: 200
HTTP GET request to URL: https://nominatim.openstreetmap.org/search?q=431+WEST+++16+STREET+MANHATTAN&format=geocodejson | Status code: 200
HTTP GET request to URL: https://nominatim.openstreetmap.org/search?q=86+SOUTH+PORTLAND+AVENUE+BROOKLYN&format=geocodejson | Status code: 200
HTTP GET request to URL: https://nominatim.openstreetmap.org/search?q=0+JFK+AIRPORT+QUEENS&format=geocodejson | Status code: 200
HTTP GET request to URL: https://nominatim.openstreetmap.org/search?q=220+CONOVER+STREET+BROOKLYN&format=geocodejson | Status code: 200
HTTP GET request to URL: https://nominatim.openstreetmap.org/search?q=1232+AVENUE+U+BROOKLYN&format=geocodejson | Status code: 200
HTTP GET request to URL: https://nominatim.openstreetmap.org/search?q=3415+14+AVENUE+BROOKLYN&format=geocodejson | Status code: 200
HTTP GET request to URL: https://nominatim.openstreetmap.or

HTTP GET request to URL: https://nominatim.openstreetmap.org/search?q=7911+ROOSEVELT+AVENUE+QUEENS&format=geocodejson | Status code: 200
HTTP GET request to URL: https://nominatim.openstreetmap.org/search?q=2505+EMMONS+AVENUE+BROOKLYN&format=geocodejson | Status code: 200
HTTP GET request to URL: https://nominatim.openstreetmap.org/search?q=330+HICKS+STREET+BROOKLYN&format=geocodejson | Status code: 200
HTTP GET request to URL: https://nominatim.openstreetmap.org/search?q=605+AVENUE+Z+BROOKLYN&format=geocodejson | Status code: 200
HTTP GET request to URL: https://nominatim.openstreetmap.org/search?q=4128+MAIN+ST+QUEENS&format=geocodejson | Status code: 200
HTTP GET request to URL: https://nominatim.openstreetmap.org/search?q=882+DEKALB+AVE+BROOKLYN&format=geocodejson | Status code: 200
HTTP GET request to URL: https://nominatim.openstreetmap.org/search?q=1456+COLLEGE+POINT+BOULEVARD+QUEENS&format=geocodejson | Status code: 200
HTTP GET request to URL: https://nominatim.openstreetmap.or

In [197]:
df_rest_100 = pd.DataFrame(df_rest)

In [198]:
df_rest_100.head()

Unnamed: 0,address,coordinates
0,"""1674, Broadway, Manhattan Community Board 5, ...","-73.9829442, 40.76279"
1,"""Manhattan Street, Allegheny West Center, Pitt...","-80.0251194, 40.4504069"
2,"""86, South Portland Avenue, Fort Greene, Brook...","-73.9747054, 40.6868993"
3,"{'type': 'FeatureCollection', 'geocoding': {'v...",
4,"""220, Conover Street, Red Hook, Brooklyn, King...","-74.0153935, 40.6772452"


In [199]:
tmp = df_rest_100['coordinates'].str.split(',', 1, expand=True)

In [200]:
tmp.head()

Unnamed: 0,0,1
0,-73.9829442,40.76279
1,-80.0251194,40.4504069
2,-73.9747054,40.6868993
3,,
4,-74.0153935,40.6772452


In [205]:
rest_100['LONGITUDE'] = tmp.iloc[:,0].values
rest_100['LATITUDE'] = tmp.iloc[:,1].values

In [206]:
rest_100.head()

Unnamed: 0,CAMIS,DBA,BORO,BUILDING,STREET,ZIPCODE,PHONE,CUISINE DESCRIPTION,RATING_RANDOM,address,LONGITUDE,LATITUDE
137150,40534810,CROWN DELI,MANHATTAN,1674,BROADWAY,10019.0,2129568410,American,1.0,1674 BROADWAY MANHATTAN,-73.9829442,40.76279
90577,41250710,HIGHLINE BALLROOM,MANHATTAN,431,WEST 16 STREET,10011.0,2124145994,American,3.0,431 WEST 16 STREET MANHATTAN,-80.0251194,40.4504069
254234,41595712,PEQUENA,BROOKLYN,86,SOUTH PORTLAND AVENUE,11217.0,7186430000,Mexican,3.0,86 SOUTH PORTLAND AVENUE BROOKLYN,-73.9747054,40.6868993
301147,41335952,EUROPAN CAFE(M11)/EUROCREPE (M4A),QUEENS,0,JFK AIRPORT,11430.0,7186561500,CafÃ©/Coffee/Tea,5.0,0 JFK AIRPORT QUEENS,,
20371,41389061,BOTANICA,BROOKLYN,220,CONOVER STREET,11231.0,3472250148,American,1.0,220 CONOVER STREET BROOKLYN,-74.0153935,40.6772452


In [207]:
rest_100 = rest_100[rest_100['LONGITUDE'].notna()]
rest_100 = rest_100[rest_100['address'] != "nan nan nan"]

rest_100.head()

Unnamed: 0,CAMIS,DBA,BORO,BUILDING,STREET,ZIPCODE,PHONE,CUISINE DESCRIPTION,RATING_RANDOM,address,LONGITUDE,LATITUDE
137150,40534810,CROWN DELI,MANHATTAN,1674,BROADWAY,10019.0,2129568410,American,1.0,1674 BROADWAY MANHATTAN,-73.9829442,40.76279
90577,41250710,HIGHLINE BALLROOM,MANHATTAN,431,WEST 16 STREET,10011.0,2124145994,American,3.0,431 WEST 16 STREET MANHATTAN,-80.0251194,40.4504069
254234,41595712,PEQUENA,BROOKLYN,86,SOUTH PORTLAND AVENUE,11217.0,7186430000,Mexican,3.0,86 SOUTH PORTLAND AVENUE BROOKLYN,-73.9747054,40.6868993
20371,41389061,BOTANICA,BROOKLYN,220,CONOVER STREET,11231.0,3472250148,American,1.0,220 CONOVER STREET BROOKLYN,-74.0153935,40.6772452
32377,50014889,Shing Wong Restaurant,BROOKLYN,1232,AVENUE U,11229.0,9178072394,Chinese,4.0,1232 AVENUE U BROOKLYN,-73.95892341550304,40.5983044


In [209]:
#rest_100.to_csv("restaurants100_with_geo.csv", header=True, index=False)