In [2]:
pip install geopy

Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install folium

Note: you may need to restart the kernel to use updated packages.


# Deciding a suitable location to open an Mexican restaurant in New York City

In [4]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


# 1. Download and Explore Dataset

In [5]:
with open('nyu_2451_34572-geojson.json') as json_data:
    newyork_data = json.load(json_data)

In [6]:
neighbourhoods_data = newyork_data['features']
neighbourhoods_data[0]

{'type': 'Feature',
 'id': 'nyu_2451_34572.1',
 'geometry': {'type': 'Point',
  'coordinates': [-73.84720052054902, 40.89470517661]},
 'geometry_name': 'geom',
 'properties': {'name': 'Wakefield',
  'stacked': 1,
  'annoline1': 'Wakefield',
  'annoline2': None,
  'annoline3': None,
  'annoangle': 0.0,
  'borough': 'Bronx',
  'bbox': [-73.84720052054902,
   40.89470517661,
   -73.84720052054902,
   40.89470517661]}}

In [8]:
# Define the dataframe columns :
column_names = ['Borough', 'Neighbourhood', 'Latitude', 'Longitude'] 

# Instantiate the dataframe :
neighbourhoods = pd.DataFrame(columns=column_names)

In [9]:
for data in neighbourhoods_data:
    borough = neighbourhood_name = data['properties']['borough'] 
    neighbourhood_name = data['properties']['name']
        
    neighbourhood_latlon = data['geometry']['coordinates']
    neighbourhood_lat = neighbourhood_latlon[1]
    neighbourhood_lon = neighbourhood_latlon[0]
    
    neighbourhoods = neighbourhoods.append({'Borough': borough,
                                          'Neighbourhood': neighbourhood_name,
                                          'Latitude': neighbourhood_lat,
                                          'Longitude': neighbourhood_lon}, ignore_index=True)

In [12]:
neighbourhoods.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [13]:
neighbourhoods.shape

(306, 4)

In [14]:
neighbourhoods.describe

<bound method NDFrame.describe of            Borough              Neighbourhood   Latitude  Longitude
0            Bronx                  Wakefield  40.894705 -73.847201
1            Bronx                 Co-op City  40.874294 -73.829939
2            Bronx                Eastchester  40.887556 -73.827806
3            Bronx                  Fieldston  40.895437 -73.905643
4            Bronx                  Riverdale  40.890834 -73.912585
5            Bronx                Kingsbridge  40.881687 -73.902818
6        Manhattan                Marble Hill  40.876551 -73.910660
7            Bronx                   Woodlawn  40.898273 -73.867315
8            Bronx                    Norwood  40.877224 -73.879391
9            Bronx             Williamsbridge  40.881039 -73.857446
10           Bronx                 Baychester  40.866858 -73.835798
11           Bronx             Pelham Parkway  40.857413 -73.854756
12           Bronx                City Island  40.847247 -73.786488
13           B

In [15]:
neighbourhoods.tail()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
301,Manhattan,Hudson Yards,40.756658,-74.000111
302,Queens,Hammels,40.587338,-73.80553
303,Queens,Bayswater,40.611322,-73.765968
304,Queens,Queensbridge,40.756091,-73.945631
305,Staten Island,Fox Hills,40.617311,-74.08174


In [16]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighbourhoods['Borough'].unique()),
        neighbourhoods.shape[0]
    )
)

The dataframe has 5 boroughs and 306 neighborhoods.


# Use Geopy Library to get the Latitude and Longitude values of New York City.

In [17]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


# Create a map of New York with neighborhoods superimposed on top.

In [18]:
# Create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# Add markers to map
for lat, lng, borough, neighbourhood in zip(neighbourhoods['Latitude'], neighbourhoods['Longitude'], neighbourhoods['Borough'], neighbourhoods['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

In [19]:
neighbourhoods['Borough'].unique()

array(['Bronx', 'Manhattan', 'Brooklyn', 'Queens', 'Staten Island'],
      dtype=object)

In [21]:
queens_data = neighbourhoods[neighbourhoods['Borough'] == 'Queens'].reset_index(drop=True)
queens_data.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,Queens,Astoria,40.768509,-73.915654
1,Queens,Woodside,40.746349,-73.901842
2,Queens,Jackson Heights,40.751981,-73.882821
3,Queens,Elmhurst,40.744049,-73.881656
4,Queens,Howard Beach,40.654225,-73.838138


In [22]:
queens_data.shape

(81, 4)

In [23]:
queens_data.describe

<bound method NDFrame.describe of    Borough         Neighbourhood   Latitude  Longitude
0   Queens               Astoria  40.768509 -73.915654
1   Queens              Woodside  40.746349 -73.901842
2   Queens       Jackson Heights  40.751981 -73.882821
3   Queens              Elmhurst  40.744049 -73.881656
4   Queens          Howard Beach  40.654225 -73.838138
5   Queens                Corona  40.742382 -73.856825
6   Queens          Forest Hills  40.725264 -73.844475
7   Queens           Kew Gardens  40.705179 -73.829819
8   Queens         Richmond Hill  40.697947 -73.831833
9   Queens              Flushing  40.764454 -73.831773
10  Queens      Long Island City  40.750217 -73.939202
11  Queens             Sunnyside  40.740176 -73.926916
12  Queens         East Elmhurst  40.764073 -73.867041
13  Queens               Maspeth  40.725427 -73.896217
14  Queens             Ridgewood  40.708323 -73.901435
15  Queens              Glendale  40.702762 -73.870742
16  Queens             Rego Par

In [24]:
queens_data.tail()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
76,Queens,Middle Village,40.716415,-73.881143
77,Queens,Malba,40.790602,-73.826678
78,Queens,Hammels,40.587338,-73.80553
79,Queens,Bayswater,40.611322,-73.765968
80,Queens,Queensbridge,40.756091,-73.945631


# Defining Foursquare Credentials and Version

In [25]:
CLIENT_ID = 'GAQ35M5NL2BLLLGQO555NX3N3SIVOJT4ZYBSS4BN0UQJKJL4' 
CLIENT_SECRET = 'AVCHOI25G212RZPLJZKHHCHMRR4QRPJAQFYOG0W0PBKHRMOQ' 
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: GAQ35M5NL2BLLLGQO555NX3N3SIVOJT4ZYBSS4BN0UQJKJL4
CLIENT_SECRET:AVCHOI25G212RZPLJZKHHCHMRR4QRPJAQFYOG0W0PBKHRMOQ


# 2. Explore Neighbourhoods in New York

In [26]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name'],
            v['venue']['categories'][0]['id'],
            v['venue']['id']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category',
                  'Venue Category Id',
                  'Venue Id']
    
    return(nearby_venues)

# Code to run the above function on each neighborhood and create a new dataframe called newyork_venues.

In [27]:
# Type your answer here

newyork_venues = getNearbyVenues(names=queens_data['Neighbourhood'],
                                   latitudes=queens_data['Latitude'],
                                   longitudes=queens_data['Longitude']
                                  )
newyork_venues.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Venue Category Id,Venue Id
0,Astoria,40.768509,-73.915654,Favela Grill,40.767348,-73.917897,Brazilian Restaurant,4bf58dd8d48988d16b941735,4bdf502a89ca76b062b75d5e
1,Astoria,40.768509,-73.915654,Orange Blossom,40.769856,-73.917012,Gourmet Shop,4bf58dd8d48988d1f5941735,52c580e8498eddd52d925dd9
2,Astoria,40.768509,-73.915654,Titan Foods Inc.,40.769198,-73.919253,Gourmet Shop,4bf58dd8d48988d1f5941735,4a9c0105f964a520b03520e3
3,Astoria,40.768509,-73.915654,CrossFit Queens,40.769404,-73.918977,Gym,4bf58dd8d48988d176941735,4c94d26d58d4b60c40fc2b29
4,Astoria,40.768509,-73.915654,Off The Hook,40.7672,-73.918104,Seafood Restaurant,4bf58dd8d48988d1ce941735,514f9fd5e4b023ae1edd4a68


In [28]:
print(newyork_venues.shape)

(2132, 9)


In [29]:
newyork_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Venue Category Id,Venue Id
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Arverne,21,21,21,21,21,21,21,21
Astoria,100,100,100,100,100,100,100,100
Astoria Heights,12,12,12,12,12,12,12,12
Auburndale,20,20,20,20,20,20,20,20
Bay Terrace,38,38,38,38,38,38,38,38
Bayside,79,79,79,79,79,79,79,79
Bayswater,2,2,2,2,2,2,2,2
Beechhurst,15,15,15,15,15,15,15,15
Bellaire,12,12,12,12,12,12,12,12
Belle Harbor,19,19,19,19,19,19,19,19


In [30]:
print('There are {} uniques categories.'.format(len(newyork_venues['Venue Category'].unique())))

There are 271 uniques categories.


# Filtering Out The Mexican Restaurants

In [31]:
mexican_restaurant_category = '4bf58dd8d48988d1c1941735'  # obtained from Foursquare API

In [32]:
mexican = newyork_venues[newyork_venues['Venue Category Id'] == mexican_restaurant_category]

In [33]:
mexican.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Venue Category Id,Venue Id
16,Astoria,40.768509,-73.915654,El Cafetal,40.770153,-73.918214,Mexican Restaurant,4bf58dd8d48988d1c1941735,4adc66a6f964a520122c21e3
22,Astoria,40.768509,-73.915654,Viva Viva,40.765441,-73.918768,Mexican Restaurant,4bf58dd8d48988d1c1941735,5dd0a68e832e520008369b09
205,Jackson Heights,40.751981,-73.882821,Tacos Mi Mexico Lindo,40.747677,-73.882399,Mexican Restaurant,4bf58dd8d48988d1c1941735,4b91ca34f964a5209fd733e3
224,Jackson Heights,40.751981,-73.882821,Taco Veloz,40.748037,-73.880308,Mexican Restaurant,4bf58dd8d48988d1c1941735,51e248ef498e6768a842bff1
272,Elmhurst,40.744049,-73.881656,Tacos Mi Mexico Lindo,40.747677,-73.882399,Mexican Restaurant,4bf58dd8d48988d1c1941735,4b91ca34f964a5209fd733e3


In [34]:
mexican.shape

(37, 9)

In [35]:
mexican.describe

<bound method NDFrame.describe of              Neighbourhood  Neighbourhood Latitude  Neighbourhood Longitude  \
16                 Astoria               40.768509               -73.915654   
22                 Astoria               40.768509               -73.915654   
205        Jackson Heights               40.751981               -73.882821   
224        Jackson Heights               40.751981               -73.882821   
272               Elmhurst               40.744049               -73.881656   
275               Elmhurst               40.744049               -73.881656   
279               Elmhurst               40.744049               -73.881656   
282               Elmhurst               40.744049               -73.881656   
285               Elmhurst               40.744049               -73.881656   
302           Howard Beach               40.654225               -73.838138   
332                 Corona               40.742382               -73.856825   
336               

In [36]:
mexican.tail()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Venue Category Id,Venue Id
1826,Forest Hills Gardens,40.714611,-73.841022,Happy Fresh Tortilla Grill,40.718962,-73.841342,Mexican Restaurant,4bf58dd8d48988d1c1941735,4ba3f7a9f964a520847238e3
1891,Hunters Point,40.743414,-73.953868,Casa Enrique,40.743374,-73.954339,Mexican Restaurant,4bf58dd8d48988d1c1941735,4f62afe7e4b09b9cd8b354f6
2008,Sunnyside Gardens,40.745652,-73.918193,Chihuahua Mexican Restaurant & Cantina,40.742529,-73.917889,Mexican Restaurant,4bf58dd8d48988d1c1941735,56218d22498ef80198e4bee7
2009,Sunnyside Gardens,40.745652,-73.918193,El Rey Del Taco,40.744449,-73.915299,Mexican Restaurant,4bf58dd8d48988d1c1941735,4de9baa545dd3993a879cd99
2030,Sunnyside Gardens,40.745652,-73.918193,Arriba Arriba,40.743944,-73.923589,Mexican Restaurant,4bf58dd8d48988d1c1941735,4b023ca0f964a5203e4822e3


In [51]:
# Create map of Mexican restaurants using latitude and longitude values
map_mexican = folium.Map(location=[latitude, longitude], zoom_start=10)

# Add markers to map
for lat, lng, Venue, neighbourhood in zip(mexican['Venue Latitude'], mexican['Venue Longitude'], mexican['Venue'],mexican['Neighbourhood']):
    label = '{}, {}'.format(Venue, neighbourhood)
    label = folium.Popup(label, parse_html=True)
    folium.Marker(
        [lat, lng],
        popup=label,
        icon=folium.Icon(color='green',icon='info-sign')
        ).add_to(map_mexican) 
    
map_mexican

# 3. Analyzing Each Neighborhood

In [38]:
# One hot encoding
newyork_onehot = pd.get_dummies(mexican[['Venue']], prefix="", prefix_sep="")

# Add neighborhood column back to dataframe
newyork_onehot['Neighbourhood'] = mexican['Neighbourhood'] 

# Move neighborhood column to the first column
fixed_columns = [newyork_onehot.columns[-1]] + list(newyork_onehot.columns[:-1])
newyork_onehot = newyork_onehot[fixed_columns]

newyork_onehot

Unnamed: 0,Neighbourhood,Arriba Arriba,Blue Tijuana Restaurant,Casa Enrique,Chihuahua Mexican Restaurant & Cantina,Chipotle Mexican Grill,Cienega Las Tlayudas de Oaxaca Mexican Cuisine,Cinco De Mayo,Don Nico's,El Cafetal,El Rey Del Taco,Emilio's Mexican Grill,Fres' c tortilla Tex Mex Express,Fresco Tortilla,Genesis Mexican Authentic Cuisine,Happy Fresh Tortilla Grill,Juquila Mexican Cuisine,La Esquina Del Camaron Mexicano,Los Tres Potrillos,Mexico & El Salvador Restaurant,Mi Cocina,Mi Nuevo Rancho,Miso Taco,Moho Mexican Grill,Pico Mexican Restaurant,Spanglish NYC,Taco King,Taco Veloz,Tacos Mi Mexico Lindo,Taquerias Kermes,Tequila Sunrise,Viva Viva,Waheyo
16,Astoria,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22,Astoria,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
205,Jackson Heights,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
224,Jackson Heights,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
272,Elmhurst,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
275,Elmhurst,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
279,Elmhurst,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
282,Elmhurst,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
285,Elmhurst,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
302,Howard Beach,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [39]:
newyork_grouped = newyork_onehot.groupby('Neighbourhood').mean().reset_index()
newyork_grouped

Unnamed: 0,Neighbourhood,Arriba Arriba,Blue Tijuana Restaurant,Casa Enrique,Chihuahua Mexican Restaurant & Cantina,Chipotle Mexican Grill,Cienega Las Tlayudas de Oaxaca Mexican Cuisine,Cinco De Mayo,Don Nico's,El Cafetal,El Rey Del Taco,Emilio's Mexican Grill,Fres' c tortilla Tex Mex Express,Fresco Tortilla,Genesis Mexican Authentic Cuisine,Happy Fresh Tortilla Grill,Juquila Mexican Cuisine,La Esquina Del Camaron Mexicano,Los Tres Potrillos,Mexico & El Salvador Restaurant,Mi Cocina,Mi Nuevo Rancho,Miso Taco,Moho Mexican Grill,Pico Mexican Restaurant,Spanglish NYC,Taco King,Taco Veloz,Tacos Mi Mexico Lindo,Taquerias Kermes,Tequila Sunrise,Viva Viva,Waheyo
0,Astoria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0
1,Bayside,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0
2,Belle Harbor,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Cambria Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,College Point,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Corona,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Elmhurst,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.2,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0
7,Forest Hills Gardens,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Glen Oaks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Howard Beach,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# 4. Cluster Neighborhoods

In [40]:
# Set number of clusters
kclusters = 5

newyork_grouped_clustering = newyork_grouped.drop('Neighbourhood', 1)

# Run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(newyork_grouped_clustering)

# Check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([3, 3, 4, 2, 3, 3, 3, 3, 3, 1], dtype=int32)

In [41]:
# add clustering labels
newyork_grouped.insert(0, 'Cluster Labels', kmeans.labels_)

newyork_merged = neighbourhoods

# merge newyork_grouped with newyork_data to add latitude/longitude for each neighborhood
newyork_merged = neighbourhoods.join(newyork_grouped.set_index('Neighbourhood'), on='Neighbourhood')

newyork_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,Arriba Arriba,Blue Tijuana Restaurant,Casa Enrique,Chihuahua Mexican Restaurant & Cantina,Chipotle Mexican Grill,Cienega Las Tlayudas de Oaxaca Mexican Cuisine,Cinco De Mayo,Don Nico's,El Cafetal,El Rey Del Taco,Emilio's Mexican Grill,Fres' c tortilla Tex Mex Express,Fresco Tortilla,Genesis Mexican Authentic Cuisine,Happy Fresh Tortilla Grill,Juquila Mexican Cuisine,La Esquina Del Camaron Mexicano,Los Tres Potrillos,Mexico & El Salvador Restaurant,Mi Cocina,Mi Nuevo Rancho,Miso Taco,Moho Mexican Grill,Pico Mexican Restaurant,Spanglish NYC,Taco King,Taco Veloz,Tacos Mi Mexico Lindo,Taquerias Kermes,Tequila Sunrise,Viva Viva,Waheyo
0,Bronx,Wakefield,40.894705,-73.847201,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,Bronx,Co-op City,40.874294,-73.829939,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,Bronx,Eastchester,40.887556,-73.827806,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,Bronx,Fieldston,40.895437,-73.905643,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,Bronx,Riverdale,40.890834,-73.912585,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [42]:
newyork_merged = newyork_merged.dropna()
newyork_merged.tail()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,Arriba Arriba,Blue Tijuana Restaurant,Casa Enrique,Chihuahua Mexican Restaurant & Cantina,Chipotle Mexican Grill,Cienega Las Tlayudas de Oaxaca Mexican Cuisine,Cinco De Mayo,Don Nico's,El Cafetal,El Rey Del Taco,Emilio's Mexican Grill,Fres' c tortilla Tex Mex Express,Fresco Tortilla,Genesis Mexican Authentic Cuisine,Happy Fresh Tortilla Grill,Juquila Mexican Cuisine,La Esquina Del Camaron Mexicano,Los Tres Potrillos,Mexico & El Salvador Restaurant,Mi Cocina,Mi Nuevo Rancho,Miso Taco,Moho Mexican Grill,Pico Mexican Restaurant,Spanglish NYC,Taco King,Taco Veloz,Tacos Mi Mexico Lindo,Taquerias Kermes,Tequila Sunrise,Viva Viva,Waheyo
195,Queens,North Corona,40.754071,-73.857518,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
196,Queens,Forest Hills Gardens,40.714611,-73.841022,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
220,Staten Island,Sunnyside,40.61276,-74.097126,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
272,Queens,Hunters Point,40.743414,-73.953868,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
277,Queens,Sunnyside Gardens,40.745652,-73.918193,3.0,0.333333,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [43]:
newyork_merged['Cluster Labels'] = newyork_merged['Cluster Labels'].astype(int)

In [44]:
# Create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# Set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(newyork_merged['Latitude'], newyork_merged['Longitude'], newyork_merged['Neighbourhood'], newyork_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# 5. Examinining Clusters

# Cluster 1

In [45]:
newyork_merged.loc[newyork_merged['Cluster Labels'] == 0, newyork_merged.columns[[1] + list(range(5, newyork_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Arriba Arriba,Blue Tijuana Restaurant,Casa Enrique,Chihuahua Mexican Restaurant & Cantina,Chipotle Mexican Grill,Cienega Las Tlayudas de Oaxaca Mexican Cuisine,Cinco De Mayo,Don Nico's,El Cafetal,El Rey Del Taco,Emilio's Mexican Grill,Fres' c tortilla Tex Mex Express,Fresco Tortilla,Genesis Mexican Authentic Cuisine,Happy Fresh Tortilla Grill,Juquila Mexican Cuisine,La Esquina Del Camaron Mexicano,Los Tres Potrillos,Mexico & El Salvador Restaurant,Mi Cocina,Mi Nuevo Rancho,Miso Taco,Moho Mexican Grill,Pico Mexican Restaurant,Spanglish NYC,Taco King,Taco Veloz,Tacos Mi Mexico Lindo,Taquerias Kermes,Tequila Sunrise,Viva Viva,Waheyo
272,Hunters Point,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Cluster 2

In [46]:
newyork_merged.loc[newyork_merged['Cluster Labels'] == 1, newyork_merged.columns[[1] + list(range(5, newyork_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Arriba Arriba,Blue Tijuana Restaurant,Casa Enrique,Chihuahua Mexican Restaurant & Cantina,Chipotle Mexican Grill,Cienega Las Tlayudas de Oaxaca Mexican Cuisine,Cinco De Mayo,Don Nico's,El Cafetal,El Rey Del Taco,Emilio's Mexican Grill,Fres' c tortilla Tex Mex Express,Fresco Tortilla,Genesis Mexican Authentic Cuisine,Happy Fresh Tortilla Grill,Juquila Mexican Cuisine,La Esquina Del Camaron Mexicano,Los Tres Potrillos,Mexico & El Salvador Restaurant,Mi Cocina,Mi Nuevo Rancho,Miso Taco,Moho Mexican Grill,Pico Mexican Restaurant,Spanglish NYC,Taco King,Taco Veloz,Tacos Mi Mexico Lindo,Taquerias Kermes,Tequila Sunrise,Viva Viva,Waheyo
133,Howard Beach,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
139,Long Island City,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5
160,Jamaica Center,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
189,Lefrak City,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Cluster 3

In [47]:
newyork_merged.loc[newyork_merged['Cluster Labels'] == 2, newyork_merged.columns[[1] + list(range(5, newyork_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Arriba Arriba,Blue Tijuana Restaurant,Casa Enrique,Chihuahua Mexican Restaurant & Cantina,Chipotle Mexican Grill,Cienega Las Tlayudas de Oaxaca Mexican Cuisine,Cinco De Mayo,Don Nico's,El Cafetal,El Rey Del Taco,Emilio's Mexican Grill,Fres' c tortilla Tex Mex Express,Fresco Tortilla,Genesis Mexican Authentic Cuisine,Happy Fresh Tortilla Grill,Juquila Mexican Cuisine,La Esquina Del Camaron Mexicano,Los Tres Potrillos,Mexico & El Salvador Restaurant,Mi Cocina,Mi Nuevo Rancho,Miso Taco,Moho Mexican Grill,Pico Mexican Restaurant,Spanglish NYC,Taco King,Taco Veloz,Tacos Mi Mexico Lindo,Taquerias Kermes,Tequila Sunrise,Viva Viva,Waheyo
168,Cambria Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Cluster 4

In [49]:
newyork_merged.loc[newyork_merged['Cluster Labels'] == 3, newyork_merged.columns[[1] + list(range(5, newyork_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Arriba Arriba,Blue Tijuana Restaurant,Casa Enrique,Chihuahua Mexican Restaurant & Cantina,Chipotle Mexican Grill,Cienega Las Tlayudas de Oaxaca Mexican Cuisine,Cinco De Mayo,Don Nico's,El Cafetal,El Rey Del Taco,Emilio's Mexican Grill,Fres' c tortilla Tex Mex Express,Fresco Tortilla,Genesis Mexican Authentic Cuisine,Happy Fresh Tortilla Grill,Juquila Mexican Cuisine,La Esquina Del Camaron Mexicano,Los Tres Potrillos,Mexico & El Salvador Restaurant,Mi Cocina,Mi Nuevo Rancho,Miso Taco,Moho Mexican Grill,Pico Mexican Restaurant,Spanglish NYC,Taco King,Taco Veloz,Tacos Mi Mexico Lindo,Taquerias Kermes,Tequila Sunrise,Viva Viva,Waheyo
129,Astoria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0
131,Jackson Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.5,0.0,0.0,0.0,0.0
132,Elmhurst,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.2,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0
134,Corona,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
137,Richmond Hill,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
140,Sunnyside,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
143,Ridgewood,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
146,Woodhaven,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
149,College Point,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
151,Bayside,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0


# Cluster 5

In [50]:
newyork_merged.loc[newyork_merged['Cluster Labels'] == 4, newyork_merged.columns[[1] + list(range(5, newyork_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Arriba Arriba,Blue Tijuana Restaurant,Casa Enrique,Chihuahua Mexican Restaurant & Cantina,Chipotle Mexican Grill,Cienega Las Tlayudas de Oaxaca Mexican Cuisine,Cinco De Mayo,Don Nico's,El Cafetal,El Rey Del Taco,Emilio's Mexican Grill,Fres' c tortilla Tex Mex Express,Fresco Tortilla,Genesis Mexican Authentic Cuisine,Happy Fresh Tortilla Grill,Juquila Mexican Cuisine,La Esquina Del Camaron Mexicano,Los Tres Potrillos,Mexico & El Salvador Restaurant,Mi Cocina,Mi Nuevo Rancho,Miso Taco,Moho Mexican Grill,Pico Mexican Restaurant,Spanglish NYC,Taco King,Taco Veloz,Tacos Mi Mexico Lindo,Taquerias Kermes,Tequila Sunrise,Viva Viva,Waheyo
190,Belle Harbor,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
