In [1]:
import numpy as np 
import pandas as pd 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json 

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests 
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes  lab
#!pip install folium
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-1.22.0               |     pyh9f0ad1d_0          63 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          97 KB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-1.22.0-pyh9f0ad1d_0



Downloading and Extracting Packages
geopy-1.22.0         | 63 KB     | ##################################### | 100% 
geographiclib-1.50   | 34 KB     | ###############################

In [2]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset   
with open('newyork_data.json') as json_data:
    ny_data = json.load(json_data)
neighborhoods_data = ny_data['features']

In [3]:
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 
neighborhoods = pd.DataFrame(columns=column_names)

for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [4]:
#neighborhoods

In [5]:
neighborhoods['Borough'].unique()

array(['Bronx', 'Manhattan', 'Brooklyn', 'Queens', 'Staten Island'],
      dtype=object)

# Bronx

In [6]:
bronx_data = neighborhoods[neighborhoods['Borough'] == 'Bronx'].reset_index(drop=True)
address = 'Bronx, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Bronx are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Bronx are 40.8466508, -73.8785937.


In [7]:
CLIENT_ID = 'LZGGP3NZK4W3Q2IRUBZPORHTGOHLIAI4VAJRGK3VMBH3NG4G' # your Foursquare ID
CLIENT_SECRET = 'MYDPVGGOVQ3JQKKX51V4UFEE1CUS452VV1EYW5OLCPVXBDJX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

LIMIT = 100
radius = 700

In [8]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [9]:
def getNearbyVenues(names, latitudes, longitudes, radius=700):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [10]:
bronx_venues = getNearbyVenues(names=bronx_data['Neighborhood'],
                                   latitudes=bronx_data['Latitude'],
                                   longitudes=bronx_data['Longitude']
                                  )

Wakefield
Co-op City
Eastchester
Fieldston
Riverdale
Kingsbridge
Woodlawn
Norwood
Williamsbridge
Baychester
Pelham Parkway
City Island
Bedford Park
University Heights
Morris Heights
Fordham
East Tremont
West Farms
High  Bridge
Melrose
Mott Haven
Port Morris
Longwood
Hunts Point
Morrisania
Soundview
Clason Point
Throgs Neck
Country Club
Parkchester
Westchester Square
Van Nest
Morris Park
Belmont
Spuyten Duyvil
North Riverdale
Pelham Bay
Schuylerville
Edgewater Park
Castle Hill
Olinville
Pelham Gardens
Concourse
Unionport
Edenwald
Claremont Village
Concourse Village
Mount Eden
Mount Hope
Bronxdale
Allerton
Kingsbridge Heights


In [11]:
bronx_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Allerton,36,36,36,36,36,36
Baychester,43,43,43,43,43,43
Bedford Park,51,51,51,51,51,51
Belmont,90,90,90,90,90,90
Bronxdale,66,66,66,66,66,66
Castle Hill,11,11,11,11,11,11
City Island,36,36,36,36,36,36
Claremont Village,22,22,22,22,22,22
Clason Point,14,14,14,14,14,14
Co-op City,37,37,37,37,37,37


In [12]:
bronx_onehot = pd.get_dummies(bronx_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
bronx_onehot['Neighborhood'] = bronx_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [bronx_onehot.columns[-1]] + list(bronx_onehot.columns[:-1])
bronx_onehot = bronx_onehot[fixed_columns]

In [13]:
bronx_grouped = bronx_onehot.groupby('Neighborhood').mean().reset_index() #frequency of venues

In [14]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [15]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = bronx_grouped['Neighborhood']

for ind in np.arange(bronx_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(bronx_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Allerton,Donut Shop,Bus Station,Pizza Place,Sandwich Place,Bar
1,Baychester,Donut Shop,Department Store,Supermarket,Bus Station,Discount Store
2,Bedford Park,Deli / Bodega,Pizza Place,Diner,Sandwich Place,Chinese Restaurant
3,Belmont,Italian Restaurant,Pizza Place,Bakery,Deli / Bodega,Café
4,Bronxdale,Pizza Place,Italian Restaurant,Sandwich Place,Bank,Ice Cream Shop
5,Castle Hill,Pizza Place,Market,Bus Station,Latin American Restaurant,Bank
6,City Island,Harbor / Marina,Seafood Restaurant,Grocery Store,Boat or Ferry,Thrift / Vintage Store
7,Claremont Village,Pizza Place,Liquor Store,Food,Bus Station,Bakery
8,Clason Point,Park,Bus Stop,Pool,River,South American Restaurant
9,Co-op City,Bus Station,Fried Chicken Joint,Fast Food Restaurant,Mattress Store,Accessories Store


In [16]:
df_bronx= neighborhoods_venues_sorted.loc[[2, 19, 20, 34, 39, 41, 43]]

In [17]:
df_bronx

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,Bedford Park,Deli / Bodega,Pizza Place,Diner,Sandwich Place,Chinese Restaurant
19,High Bridge,Pizza Place,Pharmacy,Supermarket,Latin American Restaurant,Ice Cream Shop
20,Hunts Point,Waste Facility,Deli / Bodega,Paper / Office Supplies Store,Restaurant,Juice Bar
34,Parkchester,Pizza Place,Chinese Restaurant,Supermarket,Bus Station,American Restaurant
39,Riverdale,Bank,Pizza Place,Park,Health & Beauty Service,Japanese Restaurant
41,Soundview,Chinese Restaurant,Deli / Bodega,Fast Food Restaurant,Clothing Store,Fried Chicken Joint
43,Throgs Neck,Italian Restaurant,Pizza Place,Coffee Shop,Chinese Restaurant,Pub


In [18]:
bronx_data_new = bronx_data.loc[[12, 18, 23, 29, 4, 25, 27]]

In [19]:
bronx_data_new

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
12,Bronx,Bedford Park,40.870185,-73.885512
18,Bronx,High Bridge,40.836623,-73.926102
23,Bronx,Hunts Point,40.80973,-73.883315
29,Bronx,Parkchester,40.837938,-73.856003
4,Bronx,Riverdale,40.890834,-73.912585
25,Bronx,Soundview,40.821012,-73.865746
27,Bronx,Throgs Neck,40.815109,-73.81635


In [20]:
# create map of Manhattan using latitude and longitude values
map_bronx = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(bronx_data['Latitude'], bronx_data['Longitude'], bronx_data_new['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bronx)  
    
map_bronx

# Manhattan

In [21]:
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
address = 'Manhattan, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 40.7896239, -73.9598939.


In [22]:
CLIENT_ID = 'LZGGP3NZK4W3Q2IRUBZPORHTGOHLIAI4VAJRGK3VMBH3NG4G' # your Foursquare ID
CLIENT_SECRET = 'MYDPVGGOVQ3JQKKX51V4UFEE1CUS452VV1EYW5OLCPVXBDJX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

LIMIT = 100
radius = 700

In [23]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [24]:
manhattan_venues = getNearbyVenues(names=manhattan_data['Neighborhood'],
                                   latitudes=manhattan_data['Latitude'],
                                   longitudes=manhattan_data['Longitude']
                                  )

Marble Hill
Chinatown
Washington Heights
Inwood
Hamilton Heights
Manhattanville
Central Harlem
East Harlem
Upper East Side
Yorkville
Lenox Hill
Roosevelt Island
Upper West Side
Lincoln Square
Clinton
Midtown
Murray Hill
Chelsea
Greenwich Village
East Village
Lower East Side
Tribeca
Little Italy
Soho
West Village
Manhattan Valley
Morningside Heights
Gramercy
Battery Park City
Financial District
Carnegie Hill
Noho
Civic Center
Midtown South
Sutton Place
Turtle Bay
Tudor City
Stuyvesant Town
Flatiron
Hudson Yards


In [25]:
manhattan_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Battery Park City,100,100,100,100,100,100
Carnegie Hill,100,100,100,100,100,100
Central Harlem,71,71,71,71,71,71
Chelsea,100,100,100,100,100,100
Chinatown,100,100,100,100,100,100
Civic Center,100,100,100,100,100,100
Clinton,100,100,100,100,100,100
East Harlem,88,88,88,88,88,88
East Village,100,100,100,100,100,100
Financial District,100,100,100,100,100,100


In [26]:
manhattan_onehot = pd.get_dummies(manhattan_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manhattan_onehot['Neighborhood'] = manhattan_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [manhattan_onehot.columns[-1]] + list(manhattan_onehot.columns[:-1])
manhattan_onehot = manhattan_onehot[fixed_columns]

In [27]:
manhattan_grouped = manhattan_onehot.groupby('Neighborhood').mean().reset_index() #frequency of venues

In [28]:
manhattan_grouped.shape

(40, 336)

In [29]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = manhattan_grouped['Neighborhood']

for ind in np.arange(manhattan_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(manhattan_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Battery Park City,Park,Coffee Shop,Hotel,Memorial Site,Gym
1,Carnegie Hill,Café,Coffee Shop,Gym,Gym / Fitness Center,Bakery
2,Central Harlem,Bar,African Restaurant,Southern / Soul Food Restaurant,Jazz Club,Caribbean Restaurant
3,Chelsea,Art Gallery,Coffee Shop,Ice Cream Shop,Bakery,Nightclub
4,Chinatown,Chinese Restaurant,Bakery,Optical Shop,Bar,Ice Cream Shop
5,Civic Center,Coffee Shop,Hotel,French Restaurant,Spa,Gym / Fitness Center
6,Clinton,Italian Restaurant,Gym / Fitness Center,Wine Shop,Coffee Shop,Gym
7,East Harlem,Mexican Restaurant,Bakery,Thai Restaurant,Deli / Bodega,Café
8,East Village,Pizza Place,Cocktail Bar,Bar,Ice Cream Shop,Juice Bar
9,Financial District,Coffee Shop,Pizza Place,Memorial Site,Cocktail Bar,Juice Bar


In [30]:
df_manhattan = neighborhoods_venues_sorted.loc[[2, 4, 7, 12, 13, 16, 19, 20, 21, 24, 25, 26, 33, 36]]

In [31]:
df_manhattan

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,Central Harlem,Bar,African Restaurant,Southern / Soul Food Restaurant,Jazz Club,Caribbean Restaurant
4,Chinatown,Chinese Restaurant,Bakery,Optical Shop,Bar,Ice Cream Shop
7,East Harlem,Mexican Restaurant,Bakery,Thai Restaurant,Deli / Bodega,Café
12,Greenwich Village,Italian Restaurant,Sandwich Place,Coffee Shop,Café,Gym
13,Hamilton Heights,Coffee Shop,Deli / Bodega,Bar,Mexican Restaurant,Café
16,Lenox Hill,Sushi Restaurant,Italian Restaurant,Gym / Fitness Center,Café,Coffee Shop
19,Lower East Side,Cocktail Bar,Italian Restaurant,Café,Chinese Restaurant,Mexican Restaurant
20,Manhattan Valley,Coffee Shop,Park,Chinese Restaurant,Pizza Place,Grocery Store
21,Manhattanville,Coffee Shop,Seafood Restaurant,Deli / Bodega,Park,Chinese Restaurant
24,Midtown South,Korean Restaurant,Hotel,Gym / Fitness Center,American Restaurant,Japanese Restaurant


In [32]:
manhattan_data_new = manhattan_data.loc[[6, 1, 7, 18, 4, 10, 20, 25, 5, 33, 26, 16, 36, 12]]

In [33]:
manhattan_data_new

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
6,Manhattan,Central Harlem,40.815976,-73.943211
1,Manhattan,Chinatown,40.715618,-73.994279
7,Manhattan,East Harlem,40.792249,-73.944182
18,Manhattan,Greenwich Village,40.726933,-73.999914
4,Manhattan,Hamilton Heights,40.823604,-73.949688
10,Manhattan,Lenox Hill,40.768113,-73.95886
20,Manhattan,Lower East Side,40.717807,-73.98089
25,Manhattan,Manhattan Valley,40.797307,-73.964286
5,Manhattan,Manhattanville,40.816934,-73.957385
33,Manhattan,Midtown South,40.74851,-73.988713


In [34]:
# create map of Manhattan using latitude and longitude values
map_manhattan = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], manhattan_data_new['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_manhattan)  
    
map_manhattan

# Queens

In [35]:
queens_data = neighborhoods[neighborhoods['Borough'] == 'Queens'].reset_index(drop=True)
address = 'Queens, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Queens are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Queens are 40.7498243, -73.7976337.


In [36]:
CLIENT_ID = 'LZGGP3NZK4W3Q2IRUBZPORHTGOHLIAI4VAJRGK3VMBH3NG4G' # your Foursquare ID
CLIENT_SECRET = 'MYDPVGGOVQ3JQKKX51V4UFEE1CUS452VV1EYW5OLCPVXBDJX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

LIMIT = 100
radius = 700

In [37]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [39]:
queens_venues = getNearbyVenues(names=queens_data['Neighborhood'],
                                   latitudes=queens_data['Latitude'],
                                   longitudes=queens_data['Longitude']
                                  )

Astoria
Woodside
Jackson Heights
Elmhurst
Howard Beach
Corona
Forest Hills
Kew Gardens
Richmond Hill
Flushing
Long Island City
Sunnyside
East Elmhurst
Maspeth
Ridgewood
Glendale
Rego Park
Woodhaven
Ozone Park
South Ozone Park
College Point
Whitestone
Bayside
Auburndale
Little Neck
Douglaston
Glen Oaks
Bellerose
Kew Gardens Hills
Fresh Meadows
Briarwood
Jamaica Center
Oakland Gardens
Queens Village
Hollis
South Jamaica
St. Albans
Rochdale
Springfield Gardens
Cambria Heights
Rosedale
Far Rockaway
Broad Channel
Breezy Point
Steinway
Beechhurst
Bay Terrace
Edgemere
Arverne
Rockaway Beach
Neponsit
Murray Hill
Floral Park
Holliswood
Jamaica Estates
Queensboro Hill
Hillcrest
Ravenswood
Lindenwood
Laurelton
Lefrak City
Belle Harbor
Rockaway Park
Somerville
Brookville
Bellaire
North Corona
Forest Hills Gardens
Jamaica Hills
Utopia
Pomonok
Astoria Heights
Hunters Point
Sunnyside Gardens
Blissville
Roxbury
Middle Village
Malba
Hammels
Bayswater
Queensbridge


In [40]:
queens_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arverne,26,26,26,26,26,26
Astoria,100,100,100,100,100,100
Astoria Heights,28,28,28,28,28,28
Auburndale,55,55,55,55,55,55
Bay Terrace,42,42,42,42,42,42
Bayside,86,86,86,86,86,86
Bayswater,7,7,7,7,7,7
Beechhurst,27,27,27,27,27,27
Bellaire,28,28,28,28,28,28
Belle Harbor,23,23,23,23,23,23


In [41]:
queens_onehot = pd.get_dummies(queens_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
queens_onehot['Neighborhood'] = queens_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [queens_onehot.columns[-1]] + list(queens_onehot.columns[:-1])
queens_onehot = queens_onehot[fixed_columns]

In [42]:
queens_grouped = queens_onehot.groupby('Neighborhood').mean().reset_index() #frequency of venues

In [43]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = queens_grouped['Neighborhood']

for ind in np.arange(queens_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(queens_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Arverne,Surf Spot,Metro Station,Donut Shop,Sandwich Place,Bus Stop
1,Astoria,Bar,Greek Restaurant,Coffee Shop,Middle Eastern Restaurant,Bakery
2,Astoria Heights,Rental Car Location,Bus Station,Pizza Place,Chinese Restaurant,Italian Restaurant
3,Auburndale,Korean Restaurant,Pizza Place,Cosmetics Shop,Pharmacy,Deli / Bodega
4,Bay Terrace,Clothing Store,Women's Store,Donut Shop,Shoe Store,Cosmetics Shop
5,Bayside,Bar,Pizza Place,Sushi Restaurant,Indian Restaurant,American Restaurant
6,Bayswater,Playground,Construction & Landscaping,Athletics & Sports,Park,Men's Store
7,Beechhurst,Gym / Fitness Center,Chinese Restaurant,Pizza Place,Shopping Mall,Supermarket
8,Bellaire,Pizza Place,Convenience Store,Deli / Bodega,Intersection,Fast Food Restaurant
9,Belle Harbor,Beach,Pub,Deli / Bodega,Spa,Bagel Shop


In [44]:
df_queens = neighborhoods_venues_sorted.loc[[3,5,8,17,19,21,22,23,24,25,28,29,32,41,45,46,49,50,51,54,56,57,58,62,72,74,76,80]]

In [None]:
df_queens

In [45]:
queens_data_new = queens_data.loc[[23, 22, 65, 20, 25, 47, 3, 41, 9, 52, 29, 26, 56, 7, 58, 24, 13, 76, 51, 32, 70, 33, 55,
                                  8, 38, 44, 73, 1]]

In [46]:
# create map of Manhattan using latitude and longitude values
map_queens = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(queens_data['Latitude'], queens_data['Longitude'], queens_data_new['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_queens)  
    
map_queens

# Brooklyn

In [47]:
brooklyn_data = neighborhoods[neighborhoods['Borough'] == 'Brooklyn'].reset_index(drop=True)
address = 'Brooklyn, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Brooklyn are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Brooklyn are 40.6501038, -73.9495823.


In [48]:
CLIENT_ID = 'LZGGP3NZK4W3Q2IRUBZPORHTGOHLIAI4VAJRGK3VMBH3NG4G' # your Foursquare ID
CLIENT_SECRET = 'MYDPVGGOVQ3JQKKX51V4UFEE1CUS452VV1EYW5OLCPVXBDJX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

LIMIT = 100
radius = 700

In [49]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [50]:
brooklyn_venues = getNearbyVenues(names=brooklyn_data['Neighborhood'],
                                   latitudes=brooklyn_data['Latitude'],
                                   longitudes=brooklyn_data['Longitude']
                                  )

Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker Heights
Gerritsen Beach
Marine Park
Clinton Hill
Sea Gate
Downtown
Boerum Hill
Prospect Lefferts Gardens
Ocean Hill
City Line
Bergen Beach
Midwood
Prospect Park South
Georgetown
East Williamsburg
North Side
South Side
Ocean Parkway
Fort Hamilton
Ditmas Park
Wingate
Rugby
Remsen Village
New Lots
Paerdegat Basin
Mill Basin
Fulton Ferry
Vinegar Hill
Weeksville
Broadway Junction
Dumbo
Homecrest
Highland Park
Madison
Erasmus


In [51]:
brooklyn_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bath Beach,74,74,74,74,74,74
Bay Ridge,100,100,100,100,100,100
Bedford Stuyvesant,70,70,70,70,70,70
Bensonhurst,40,40,40,40,40,40
Bergen Beach,7,7,7,7,7,7
Boerum Hill,100,100,100,100,100,100
Borough Park,29,29,29,29,29,29
Brighton Beach,62,62,62,62,62,62
Broadway Junction,24,24,24,24,24,24
Brooklyn Heights,100,100,100,100,100,100


In [52]:
brooklyn_onehot = pd.get_dummies(brooklyn_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
brooklyn_onehot['Neighborhood'] = brooklyn_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [brooklyn_onehot.columns[-1]] + list(brooklyn_onehot.columns[:-1])
brooklyn_onehot = brooklyn_onehot[fixed_columns]

In [53]:
brooklyn_grouped = brooklyn_onehot.groupby('Neighborhood').mean().reset_index() #frequency of venues

In [54]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = brooklyn_grouped['Neighborhood']

for ind in np.arange(brooklyn_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(brooklyn_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Bath Beach,Pizza Place,Bank,Bakery,Cantonese Restaurant,Chinese Restaurant
1,Bay Ridge,Pizza Place,Spa,Italian Restaurant,Cosmetics Shop,Greek Restaurant
2,Bedford Stuyvesant,Pizza Place,Coffee Shop,Deli / Bodega,Chinese Restaurant,Café
3,Bensonhurst,Chinese Restaurant,Italian Restaurant,Donut Shop,Bakery,Bagel Shop
4,Bergen Beach,Harbor / Marina,Baseball Field,Gym,Playground,Donut Shop
5,Boerum Hill,Bar,Coffee Shop,Arts & Crafts Store,Bakery,Grocery Store
6,Borough Park,Bank,Deli / Bodega,Pizza Place,Fast Food Restaurant,Bakery
7,Brighton Beach,Beach,Restaurant,Bakery,Eastern European Restaurant,Russian Restaurant
8,Broadway Junction,Latin American Restaurant,Diner,Discount Store,Donut Shop,Sandwich Place
9,Brooklyn Heights,Park,Coffee Shop,Wine Shop,Italian Restaurant,Pizza Place


In [55]:
df_brooklyn = neighborhoods_venues_sorted.loc[[0,2,3,10,15,27,28,29,31,36,40,41,44,45,46,52]]

In [None]:
df_brooklyn

In [56]:
brooklyn_data_new = brooklyn_data.loc[[33, 17, 1, 14, 38, 69, 8, 29, 53, 4, 11, 68, 37, 46, 60, 59]]

In [57]:
# create map of Manhattan using latitude and longitude values
map_brooklyn = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(brooklyn_data['Latitude'], brooklyn_data['Longitude'], brooklyn_data_new['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_brooklyn)  
    
map_brooklyn

# Staten Island

In [58]:
staten_data = neighborhoods[neighborhoods['Borough'] == 'Staten Island'].reset_index(drop=True)
address = 'Staten Island, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Staten Island are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Staten Island are 40.5834557, -74.1496048.


In [59]:
CLIENT_ID = 'LZGGP3NZK4W3Q2IRUBZPORHTGOHLIAI4VAJRGK3VMBH3NG4G' # your Foursquare ID
CLIENT_SECRET = 'MYDPVGGOVQ3JQKKX51V4UFEE1CUS452VV1EYW5OLCPVXBDJX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

LIMIT = 100
radius = 700

In [60]:
staten_venues = getNearbyVenues(names=staten_data['Neighborhood'],
                                   latitudes=staten_data['Latitude'],
                                   longitudes=staten_data['Longitude']
                                  )

St. George
New Brighton
Stapleton
Rosebank
West Brighton
Grymes Hill
Todt Hill
South Beach
Port Richmond
Mariner's Harbor
Port Ivory
Castleton Corners
New Springville
Travis
New Dorp
Oakwood
Great Kills
Eltingville
Annadale
Woodrow
Tottenville
Tompkinsville
Silver Lake
Sunnyside
Park Hill
Westerleigh
Graniteville
Arlington
Arrochar
Grasmere
Old Town
Dongan Hills
Midland Beach
Grant City
New Dorp Beach
Bay Terrace
Huguenot
Pleasant Plains
Butler Manor
Charleston
Rossville
Arden Heights
Greenridge
Heartland Village
Chelsea
Bloomfield
Bulls Head
Richmond Town
Shore Acres
Clifton
Concord
Emerson Hill
Randall Manor
Howland Hook
Elm Park
Manor Heights
Willowbrook
Sandy Ground
Egbertville
Prince's Bay
Lighthouse Hill
Richmond Valley
Fox Hills


In [61]:
staten_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Annadale,12,12,12,12,12,12
Arden Heights,6,6,6,6,6,6
Arlington,9,9,9,9,9,9
Arrochar,31,31,31,31,31,31
Bay Terrace,12,12,12,12,12,12
Bloomfield,10,10,10,10,10,10
Bulls Head,48,48,48,48,48,48
Butler Manor,4,4,4,4,4,4
Castleton Corners,35,35,35,35,35,35
Charleston,37,37,37,37,37,37


In [62]:
staten_onehot = pd.get_dummies(staten_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
staten_onehot['Neighborhood'] = staten_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [staten_onehot.columns[-1]] + list(staten_onehot.columns[:-1])
staten_onehot = staten_onehot[fixed_columns]

In [63]:
staten_grouped = staten_onehot.groupby('Neighborhood').mean().reset_index() #frequency of venues

In [64]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = staten_grouped['Neighborhood']

for ind in np.arange(staten_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(staten_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Annadale,Pizza Place,Restaurant,Diner,Train Station,Sushi Restaurant
1,Arden Heights,Pizza Place,Pharmacy,Food,Home Service,Coffee Shop
2,Arlington,Deli / Bodega,Boat or Ferry,Bus Stop,American Restaurant,Supermarket
3,Arrochar,Bus Stop,Pizza Place,Beach,Italian Restaurant,Deli / Bodega
4,Bay Terrace,Supermarket,Donut Shop,Insurance Office,Plaza,Sushi Restaurant
5,Bloomfield,Baseball Field,Department Store,Recreation Center,Doctor's Office,Burger Joint
6,Bulls Head,Bus Stop,Diner,Sushi Restaurant,Grocery Store,Café
7,Butler Manor,Pool,Baseball Field,Convenience Store,Yoga Studio,French Restaurant
8,Castleton Corners,Pizza Place,Bank,Chinese Restaurant,Diner,Ice Cream Shop
9,Charleston,Big Box Store,Cosmetics Shop,Diner,Breakfast Spot,Burger Joint


In [65]:
df_staten = neighborhoods_venues_sorted.loc[[13,16,20,21,22,29,31,33,42,43,60,61]]

In [None]:
df_staten

In [66]:
staten_data_new = staten_data.loc[[31, 17, 33, 29, 16, 55, 32, 14, 59, 52, 25, 56]]

In [67]:
# create map of Manhattan using latitude and longitude values
map_staten = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(staten_data['Latitude'], staten_data['Longitude'], staten_data_new['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_staten)  
    
map_staten