# Capstone Project - The Battle of Neighborhoods 

## Week 2 

### 1. Introduction 

This project clusters Manhattan and Brooklyn, studies the segments, and explores the similarity between them.   

### 2. Data 

#### Import packages and dependencies 

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

# All requested packages already installed.

Solving environment: done

# All requested packages already installed.

Libraries imported.


#### Load New York city data from file 

In [2]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

#### Check the New York City data 

In [3]:
newyork_data 

{'bbox': [-74.2492599487305,
  40.5033187866211,
  -73.7061614990234,
  40.9105606079102],
 'crs': {'properties': {'name': 'urn:ogc:def:crs:EPSG::4326'}, 'type': 'name'},
 'features': [{'geometry': {'coordinates': [-73.84720052054902,
     40.89470517661],
    'type': 'Point'},
   'geometry_name': 'geom',
   'id': 'nyu_2451_34572.1',
   'properties': {'annoangle': 0.0,
    'annoline1': 'Wakefield',
    'annoline2': None,
    'annoline3': None,
    'bbox': [-73.84720052054902,
     40.89470517661,
     -73.84720052054902,
     40.89470517661],
    'borough': 'Bronx',
    'name': 'Wakefield',
    'stacked': 1},
   'type': 'Feature'},
  {'geometry': {'coordinates': [-73.82993910812398, 40.87429419303012],
    'type': 'Point'},
   'geometry_name': 'geom',
   'id': 'nyu_2451_34572.2',
   'properties': {'annoangle': 0.0,
    'annoline1': 'Co-op',
    'annoline2': 'City',
    'annoline3': None,
    'bbox': [-73.82993910812398,
     40.87429419303012,
     -73.82993910812398,
     40.874294193

In [4]:
addressNewyork = 'New York City, NY'

geolocatorNewyork = Nominatim(user_agent="my-application")
locationNewyork = geolocatorNewyork.geocode(addressNewyork)
latitudeNewyork = locationNewyork.latitude
longitudeNewyork = locationNewyork.longitude 
print('The geograpical coordinate of New York City are {}, {}.'.format(latitudeNewyork, longitudeNewyork))

The geograpical coordinate of New York City are 40.7308619, -73.9871558.


#### Prepare the New York city data 

In [5]:
# Neighborhoods 
neighborhoods_data = newyork_data['features'] 
# Pandas data frame 
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names) 
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True) 
# Display data 
neighborhoods.head() 

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


#### Extract Manhattan and Brooklyn data 

In [6]:
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
brooklyn_data = neighborhoods[neighborhoods['Borough'] == 'Brooklyn'].reset_index(drop=True)

In [7]:
manhattan_data.head() 

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688


In [8]:
brooklyn_data.head() 

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Brooklyn,Bay Ridge,40.625801,-74.030621
1,Brooklyn,Bensonhurst,40.611009,-73.99518
2,Brooklyn,Sunset Park,40.645103,-74.010316
3,Brooklyn,Greenpoint,40.730201,-73.954241
4,Brooklyn,Gravesend,40.59526,-73.973471


#### Geographical coordinates of Manhattan and Brooklyn 

In [9]:
addressManhattan = 'Manhattan, NY'
geolocatorManhattan = Nominatim(user_agent="my-application")
locationManhattan = geolocatorManhattan.geocode(addressManhattan)
latitudeManhattan = locationManhattan.latitude
longitudeManhattan = locationManhattan.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitudeManhattan, longitudeManhattan)) 

addressBrooklyn = 'Brooklyn, NY'
geolocatorBrooklyn = Nominatim(user_agent="my-application")
locationBrooklyn = geolocatorBrooklyn.geocode(addressBrooklyn)
latitudeBrooklyn = locationBrooklyn.latitude
longitudeBrooklyn = locationBrooklyn.longitude
print('The geograpical coordinate of Brooklyn are {}, {}.'.format(latitudeBrooklyn, longitudeBrooklyn)) 

The geograpical coordinate of Manhattan are 40.7900869, -73.9598295.
The geograpical coordinate of Brooklyn are 40.6501038, -73.9495823.


#### Visualize Manhattan and Brooklyn neighborhoods 

In [10]:
# create map of Manhattan and Brooklyn using latitude and longitude values
mapManhattanBrooklyn = folium.Map(location=[latitudeNewyork, longitudeNewyork], zoom_start=11)

# add markers to map
for lat, lng, label in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], manhattan_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(mapManhattanBrooklyn)  
    
# add markers to map
for lat, lng, label in zip(brooklyn_data['Latitude'], brooklyn_data['Longitude'], brooklyn_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(mapManhattanBrooklyn)  
    
mapManhattanBrooklyn   

#### Foursquare Credentials and Version

In [48]:
CLIENT_ID = 'SOLOMON_ID' # my Foursquare ID hidden after run  
CLIENT_SECRET = 'SOLOMON_SECRET' # my Foursquare Secret hidden after run 
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: SOLOMON_ID
CLIENT_SECRET:SOLOMON_SECRET


### 3. Methodology 

#### Explore Neighborhoods in Manhattan and Brooklyn  

In [12]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        # print(name) 
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### 3.1 Neighborhoods in Manhattan 

#### Manhattan venues 

In [13]:
manhattan_venues = getNearbyVenues(names=manhattan_data['Neighborhood'],
                                   latitudes=manhattan_data['Latitude'],
                                   longitudes=manhattan_data['Longitude']) 
print('Manhattan venues obtained.') 
manhattan_venues.groupby('Neighborhood').count() 
print('Count.')
print('There are {} uniques categories.'.format(len(manhattan_venues['Venue Category'].unique()))) 
# one hot encoding
manhattan_onehot = pd.get_dummies(manhattan_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manhattan_onehot['Neighborhood'] = manhattan_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [manhattan_onehot.columns[-1]] + list(manhattan_onehot.columns[:-1])
manhattan_onehot = manhattan_onehot[fixed_columns] 
print('Encoded.') 
# manhattan_onehot.head() 
manhattan_grouped = manhattan_onehot.groupby('Neighborhood').mean().reset_index()
manhattan_grouped 
print('Grouped:',manhattan_grouped.shape)  

Manhattan venues obtained.
Count.
There are 332 uniques categories.
Encoded.
Grouped: (40, 333)


In [14]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues] 

In [15]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = manhattan_grouped['Neighborhood']

for ind in np.arange(manhattan_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(manhattan_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Battery Park City,Coffee Shop,Park,Hotel,Wine Shop,Italian Restaurant,Burger Joint,Cupcake Shop,Department Store,Plaza,BBQ Joint
1,Carnegie Hill,Pizza Place,Cosmetics Shop,Coffee Shop,Café,Yoga Studio,Gym,Bar,French Restaurant,Japanese Restaurant,Bookstore
2,Central Harlem,African Restaurant,French Restaurant,Pizza Place,American Restaurant,Gym / Fitness Center,Cosmetics Shop,Chinese Restaurant,Seafood Restaurant,Bookstore,Ethiopian Restaurant
3,Chelsea,Coffee Shop,Italian Restaurant,Ice Cream Shop,Nightclub,Bakery,American Restaurant,Hotel,Seafood Restaurant,Theater,Art Gallery
4,Chinatown,Chinese Restaurant,Bubble Tea Shop,American Restaurant,Cocktail Bar,Vietnamese Restaurant,Dim Sum Restaurant,Hotpot Restaurant,Salon / Barbershop,Noodle House,Bakery
5,Civic Center,Gym / Fitness Center,Italian Restaurant,Bakery,French Restaurant,Yoga Studio,Sporting Goods Shop,Park,Coffee Shop,Cocktail Bar,Gym
6,Clinton,Theater,Coffee Shop,Gym / Fitness Center,American Restaurant,Italian Restaurant,Gym,Wine Shop,Spa,Hotel,Dog Run
7,East Harlem,Mexican Restaurant,Bakery,Latin American Restaurant,Deli / Bodega,Thai Restaurant,Beer Bar,Liquor Store,Grocery Store,Coffee Shop,Spanish Restaurant
8,East Village,Bar,Ice Cream Shop,Wine Bar,Mexican Restaurant,Cocktail Bar,Speakeasy,Ramen Restaurant,Chinese Restaurant,Pizza Place,Korean Restaurant
9,Financial District,Coffee Shop,Hotel,Wine Shop,Bar,Gym,Steakhouse,Food Truck,Italian Restaurant,Pizza Place,Falafel Restaurant


#### Cluster Neighborhoods

In [16]:
# set number of clusters
kclusters = 5

manhattan_grouped_clustering = manhattan_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(manhattan_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 
manhattan_merged = manhattan_data

# add clustering labels
manhattan_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
manhattan_merged = manhattan_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

manhattan_merged.head() # check the last columns! 

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Manhattan,Marble Hill,40.876551,-73.91066,3,Coffee Shop,Discount Store,Yoga Studio,Kids Store,Steakhouse,Supplement Shop,Shopping Mall,Shoe Store,Tennis Stadium,Seafood Restaurant
1,Manhattan,Chinatown,40.715618,-73.994279,1,Chinese Restaurant,Bubble Tea Shop,American Restaurant,Cocktail Bar,Vietnamese Restaurant,Dim Sum Restaurant,Hotpot Restaurant,Salon / Barbershop,Noodle House,Bakery
2,Manhattan,Washington Heights,40.851903,-73.9369,0,Café,Bakery,Mobile Phone Shop,Spanish Restaurant,Sandwich Place,Chinese Restaurant,Shoe Store,Grocery Store,Gym,Mexican Restaurant
3,Manhattan,Inwood,40.867684,-73.92121,0,Café,Mexican Restaurant,Pizza Place,Lounge,Deli / Bodega,Wine Bar,American Restaurant,Bakery,Restaurant,Park
4,Manhattan,Hamilton Heights,40.823604,-73.949688,0,Mexican Restaurant,Deli / Bodega,Café,Coffee Shop,Pizza Place,Yoga Studio,Chinese Restaurant,Sandwich Place,Cocktail Bar,Liquor Store


In [17]:
manhattan_merged.groupby(['Cluster Labels']).size()

Cluster Labels
0    20
1     7
2     1
3     4
4     8
dtype: int64

#### 3.2 Neighborhoods in Brooklyn  

#### Brooklyn venues 

In [18]:
brooklyn_venues = getNearbyVenues(names=brooklyn_data['Neighborhood'],
                                   latitudes=brooklyn_data['Latitude'],
                                   longitudes=brooklyn_data['Longitude']) 
print('Brooklyn venues obtained.') 
brooklyn_venues.groupby('Neighborhood').count() 
print('Count.')
print('There are {} uniques categories.'.format(len(brooklyn_venues['Venue Category'].unique()))) 
# one hot encoding
brooklyn_onehot = pd.get_dummies(brooklyn_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
brooklyn_onehot['Neighborhood'] = brooklyn_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [brooklyn_onehot.columns[-1]] + list(brooklyn_onehot.columns[:-1])
brooklyn_onehot = brooklyn_onehot[fixed_columns] 
print('Encoded.') 
# brooklyn_onehot.head() 
brooklyn_grouped = brooklyn_onehot.groupby('Neighborhood').mean().reset_index()
brooklyn_grouped 
print('Grouped:',brooklyn_grouped.shape)  

Brooklyn venues obtained.
Count.
There are 289 uniques categories.
Encoded.
Grouped: (70, 289)


In [19]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = brooklyn_grouped['Neighborhood']

for ind in np.arange(brooklyn_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(brooklyn_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bath Beach,Chinese Restaurant,Pizza Place,Pharmacy,Donut Shop,Fast Food Restaurant,Sushi Restaurant,Shoe Store,Women's Store,Mobile Phone Shop,Italian Restaurant
1,Bay Ridge,Italian Restaurant,Pizza Place,Spa,Bar,American Restaurant,Bagel Shop,Greek Restaurant,Thai Restaurant,Grocery Store,Sushi Restaurant
2,Bedford Stuyvesant,Coffee Shop,Pizza Place,Café,Bar,Bus Stop,Juice Bar,Wine Bar,Gourmet Shop,Basketball Court,Boutique
3,Bensonhurst,Chinese Restaurant,Ice Cream Shop,Pizza Place,Sushi Restaurant,Hotpot Restaurant,Supermarket,Coffee Shop,Grocery Store,Noodle House,Liquor Store
4,Bergen Beach,Harbor / Marina,Hockey Field,Baseball Field,Donut Shop,Playground,Park,Athletics & Sports,Women's Store,Filipino Restaurant,Fish & Chips Shop
5,Boerum Hill,Coffee Shop,Bar,Dance Studio,French Restaurant,Spa,Bakery,Grocery Store,Kids Store,Furniture / Home Store,Martial Arts Dojo
6,Borough Park,Pizza Place,Deli / Bodega,Bank,Fast Food Restaurant,Café,Pharmacy,American Restaurant,Coffee Shop,Hotel,Restaurant
7,Brighton Beach,Beach,Eastern European Restaurant,Restaurant,Fast Food Restaurant,Russian Restaurant,Sushi Restaurant,Gourmet Shop,Bank,Mobile Phone Shop,Pharmacy
8,Broadway Junction,Donut Shop,Diner,Dessert Shop,Burger Joint,Ice Cream Shop,Metro Station,Moving Target,Fried Chicken Joint,Dry Cleaner,Deli / Bodega
9,Brooklyn Heights,Yoga Studio,Park,Italian Restaurant,Deli / Bodega,Gym,Cosmetics Shop,Wine Shop,Pet Store,Bar,Mexican Restaurant


#### Cluster neighboors 

In [20]:
# set number of clusters
kclusters = 5

brooklyn_grouped_clustering = brooklyn_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(brooklyn_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 
brooklyn_merged = brooklyn_data

# add clustering labels
brooklyn_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
brooklyn_merged = brooklyn_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

brooklyn_merged.head() # check the last columns! 

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Brooklyn,Bay Ridge,40.625801,-74.030621,2,Italian Restaurant,Pizza Place,Spa,Bar,American Restaurant,Bagel Shop,Greek Restaurant,Thai Restaurant,Grocery Store,Sushi Restaurant
1,Brooklyn,Bensonhurst,40.611009,-73.99518,0,Chinese Restaurant,Ice Cream Shop,Pizza Place,Sushi Restaurant,Hotpot Restaurant,Supermarket,Coffee Shop,Grocery Store,Noodle House,Liquor Store
2,Brooklyn,Sunset Park,40.645103,-74.010316,0,Bank,Mexican Restaurant,Pizza Place,Latin American Restaurant,Bakery,Mobile Phone Shop,Pharmacy,Gym,Grocery Store,Donut Shop
3,Brooklyn,Greenpoint,40.730201,-73.954241,2,Bar,Pizza Place,Coffee Shop,Café,Cocktail Bar,Yoga Studio,Record Shop,Bakery,Mexican Restaurant,French Restaurant
4,Brooklyn,Gravesend,40.59526,-73.973471,0,Pizza Place,Bus Station,Bakery,Lounge,Italian Restaurant,Chinese Restaurant,Diner,Martial Arts Dojo,Breakfast Spot,Metro Station


In [21]:
brooklyn_merged.groupby(['Cluster Labels']).size()

Cluster Labels
0    35
1     2
2    18
3     1
4    14
dtype: int64

### 4. Results 

#### 4.1 Visualize 

In [22]:
# create map
map_clusters = folium.Map(location=[latitudeNewyork, longitudeNewyork], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(manhattan_merged['Latitude'], manhattan_merged['Longitude'], manhattan_merged['Neighborhood'], manhattan_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters) 

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(brooklyn_merged['Latitude'], brooklyn_merged['Longitude'], brooklyn_merged['Neighborhood'], brooklyn_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=False,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)    

map_clusters

<a id='item5'></a>

#### 4.2 Examine clusters 

#### Manhattan 

Cluster 1

In [37]:
cluster1Manhattan = manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 0, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]] 
cluster1Manhattan 

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Washington Heights,Café,Bakery,Mobile Phone Shop,Spanish Restaurant,Sandwich Place,Chinese Restaurant,Shoe Store,Grocery Store,Gym,Mexican Restaurant
3,Inwood,Café,Mexican Restaurant,Pizza Place,Lounge,Deli / Bodega,Wine Bar,American Restaurant,Bakery,Restaurant,Park
4,Hamilton Heights,Mexican Restaurant,Deli / Bodega,Café,Coffee Shop,Pizza Place,Yoga Studio,Chinese Restaurant,Sandwich Place,Cocktail Bar,Liquor Store
5,Manhattanville,Deli / Bodega,Sushi Restaurant,Italian Restaurant,Mexican Restaurant,Seafood Restaurant,Diner,Supermarket,Burger Joint,Bus Station,Spanish Restaurant
6,Central Harlem,African Restaurant,French Restaurant,Pizza Place,American Restaurant,Gym / Fitness Center,Cosmetics Shop,Chinese Restaurant,Seafood Restaurant,Bookstore,Ethiopian Restaurant
10,Lenox Hill,Italian Restaurant,Coffee Shop,Sushi Restaurant,Gym / Fitness Center,Pizza Place,Gym,Sporting Goods Shop,Burger Joint,Art Gallery,Bakery
12,Upper West Side,Italian Restaurant,Bar,Coffee Shop,Wine Bar,Bakery,Burger Joint,Indian Restaurant,Vegetarian / Vegan Restaurant,Seafood Restaurant,Gym / Fitness Center
14,Clinton,Theater,Coffee Shop,Gym / Fitness Center,American Restaurant,Italian Restaurant,Gym,Wine Shop,Spa,Hotel,Dog Run
17,Chelsea,Coffee Shop,Italian Restaurant,Ice Cream Shop,Nightclub,Bakery,American Restaurant,Hotel,Seafood Restaurant,Theater,Art Gallery
18,Greenwich Village,Italian Restaurant,Sushi Restaurant,Clothing Store,French Restaurant,Seafood Restaurant,Indian Restaurant,Café,Chinese Restaurant,Sandwich Place,Caribbean Restaurant


Cluster 2

In [24]:
cluster2Manhattan = manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 1, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]
cluster2Manhattan 

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Chinatown,Chinese Restaurant,Bubble Tea Shop,American Restaurant,Cocktail Bar,Vietnamese Restaurant,Dim Sum Restaurant,Hotpot Restaurant,Salon / Barbershop,Noodle House,Bakery
8,Upper East Side,Italian Restaurant,Exhibit,Art Gallery,Coffee Shop,Bakery,Juice Bar,Hotel,French Restaurant,Boutique,Gym / Fitness Center
11,Roosevelt Island,Park,Sandwich Place,Deli / Bodega,Greek Restaurant,Liquor Store,Coffee Shop,Gym,School,Baseball Field,Residential Building (Apartment / Condo)
16,Murray Hill,Hotel,Japanese Restaurant,Bar,Spa,Coffee Shop,Salon / Barbershop,French Restaurant,Sandwich Place,Italian Restaurant,Gym
20,Lower East Side,Coffee Shop,Café,Latin American Restaurant,Ramen Restaurant,Japanese Restaurant,Sandwich Place,Cocktail Bar,Art Gallery,Shoe Store,Chinese Restaurant
26,Morningside Heights,Coffee Shop,Bookstore,American Restaurant,Food Truck,Park,Tennis Court,Deli / Bodega,Burger Joint,Sandwich Place,Outdoor Sculpture
39,Hudson Yards,Coffee Shop,Italian Restaurant,Café,Hotel,American Restaurant,Theater,Gym / Fitness Center,Restaurant,Thai Restaurant,Gym


Cluster 3

In [25]:
cluster3Manhattan = manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 2, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]] 
cluster3Manhattan 

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
30,Carnegie Hill,Pizza Place,Cosmetics Shop,Coffee Shop,Café,Yoga Studio,Gym,Bar,French Restaurant,Japanese Restaurant,Bookstore


Cluster 4

In [26]:
cluster4Manhattan = manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 3, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]
cluster4Manhattan 

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Marble Hill,Coffee Shop,Discount Store,Yoga Studio,Kids Store,Steakhouse,Supplement Shop,Shopping Mall,Shoe Store,Tennis Stadium,Seafood Restaurant
9,Yorkville,Italian Restaurant,Gym,Bar,Coffee Shop,Pizza Place,Japanese Restaurant,Sushi Restaurant,Mexican Restaurant,Deli / Bodega,Spa
25,Manhattan Valley,Coffee Shop,Pizza Place,Yoga Studio,Spa,Szechuan Restaurant,Mexican Restaurant,Bar,Italian Restaurant,Deli / Bodega,Thai Restaurant
28,Battery Park City,Coffee Shop,Park,Hotel,Wine Shop,Italian Restaurant,Burger Joint,Cupcake Shop,Department Store,Plaza,BBQ Joint


Cluster 5

In [27]:
cluster5Manhattan = manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 4, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]] 
cluster5Manhattan 

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,East Harlem,Mexican Restaurant,Bakery,Latin American Restaurant,Deli / Bodega,Thai Restaurant,Beer Bar,Liquor Store,Grocery Store,Coffee Shop,Spanish Restaurant
13,Lincoln Square,Gym / Fitness Center,Theater,Concert Hall,Italian Restaurant,Plaza,Café,French Restaurant,Park,Indie Movie Theater,Performing Arts Venue
15,Midtown,Hotel,Theater,Steakhouse,Clothing Store,Coffee Shop,American Restaurant,Bakery,Food Truck,Park,Bookstore
19,East Village,Bar,Ice Cream Shop,Wine Bar,Mexican Restaurant,Cocktail Bar,Speakeasy,Ramen Restaurant,Chinese Restaurant,Pizza Place,Korean Restaurant
21,Tribeca,Italian Restaurant,American Restaurant,Spa,Café,Park,Wine Bar,Coffee Shop,Greek Restaurant,Boutique,Gym
22,Little Italy,Bakery,Café,Yoga Studio,Bubble Tea Shop,Salon / Barbershop,Sandwich Place,Cocktail Bar,Seafood Restaurant,Ice Cream Shop,Women's Store
33,Midtown South,Korean Restaurant,Coffee Shop,Cosmetics Shop,Hotel Bar,Japanese Restaurant,Bakery,Italian Restaurant,Gym / Fitness Center,Cocktail Bar,Boutique
37,Stuyvesant Town,Bar,Playground,Park,Basketball Court,Pet Service,Cocktail Bar,Coffee Shop,Heliport,Farmers Market,Baseball Field


#### Brooklyn 

Cluster 1 

In [28]:
cluster1Brooklyn = brooklyn_merged.loc[brooklyn_merged['Cluster Labels'] == 0, brooklyn_merged.columns[[1] + list(range(5, brooklyn_merged.shape[1]))]] 
cluster1Brooklyn 

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Bensonhurst,Chinese Restaurant,Ice Cream Shop,Pizza Place,Sushi Restaurant,Hotpot Restaurant,Supermarket,Coffee Shop,Grocery Store,Noodle House,Liquor Store
2,Sunset Park,Bank,Mexican Restaurant,Pizza Place,Latin American Restaurant,Bakery,Mobile Phone Shop,Pharmacy,Gym,Grocery Store,Donut Shop
4,Gravesend,Pizza Place,Bus Station,Bakery,Lounge,Italian Restaurant,Chinese Restaurant,Diner,Martial Arts Dojo,Breakfast Spot,Metro Station
5,Brighton Beach,Beach,Eastern European Restaurant,Restaurant,Fast Food Restaurant,Russian Restaurant,Sushi Restaurant,Gourmet Shop,Bank,Mobile Phone Shop,Pharmacy
7,Manhattan Terrace,Pizza Place,Grocery Store,Donut Shop,Cosmetics Shop,Liquor Store,Chinese Restaurant,Steakhouse,Bank,Convenience Store,Bagel Shop
9,Crown Heights,Pizza Place,Café,Museum,Candy Store,Salon / Barbershop,Burger Joint,Coffee Shop,Bakery,Bagel Shop,Supermarket
10,East Flatbush,Moving Target,Chinese Restaurant,Caribbean Restaurant,Hardware Store,Park,Food,Supermarket,Pharmacy,Deli / Bodega,Department Store
11,Kensington,Grocery Store,Thai Restaurant,Sandwich Place,Pizza Place,Ice Cream Shop,Lingerie Store,Liquor Store,Supermarket,Furniture / Home Store,Spa
13,Prospect Heights,Bar,Mexican Restaurant,Thai Restaurant,Cocktail Bar,Wine Shop,American Restaurant,Gourmet Shop,Bakery,Café,Coffee Shop
15,Williamsburg,Bar,Bagel Shop,Coffee Shop,Pizza Place,Lounge,Liquor Store,Taco Place,Grocery Store,Gym,Café


Cluster 2

In [29]:
cluster2Brooklyn = brooklyn_merged.loc[brooklyn_merged['Cluster Labels'] == 1, brooklyn_merged.columns[[1] + list(range(5, brooklyn_merged.shape[1]))]] 
cluster2Brooklyn 

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Windsor Terrace,Park,Plaza,Diner,Café,Beer Store,Coffee Shop,Grocery Store,Bookstore,Salad Place,Bar
52,Ocean Parkway,Gym,Sushi Restaurant,Indian Restaurant,Bus Station,Grocery Store,Gift Shop,General Entertainment,Steakhouse,Sake Bar,Bakery


Cluster 3 

In [30]:
cluster3Brooklyn = brooklyn_merged.loc[brooklyn_merged['Cluster Labels'] == 2, brooklyn_merged.columns[[1] + list(range(5, brooklyn_merged.shape[1]))]] 
cluster3Brooklyn 

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bay Ridge,Italian Restaurant,Pizza Place,Spa,Bar,American Restaurant,Bagel Shop,Greek Restaurant,Thai Restaurant,Grocery Store,Sushi Restaurant
3,Greenpoint,Bar,Pizza Place,Coffee Shop,Café,Cocktail Bar,Yoga Studio,Record Shop,Bakery,Mexican Restaurant,French Restaurant
6,Sheepshead Bay,Dessert Shop,Turkish Restaurant,Sandwich Place,Restaurant,Yoga Studio,Café,Russian Restaurant,Creperie,Diner,Boat or Ferry
18,Brooklyn Heights,Yoga Studio,Park,Italian Restaurant,Deli / Bodega,Gym,Cosmetics Shop,Wine Shop,Pet Store,Bar,Mexican Restaurant
23,Fort Greene,Coffee Shop,Italian Restaurant,Pizza Place,Flower Shop,New American Restaurant,French Restaurant,Theater,Mexican Restaurant,Spa,Cocktail Bar
33,Bath Beach,Chinese Restaurant,Pizza Place,Pharmacy,Donut Shop,Fast Food Restaurant,Sushi Restaurant,Shoe Store,Women's Store,Mobile Phone Shop,Italian Restaurant
36,Gerritsen Beach,Bar,Ice Cream Shop,Harbor / Marina,Event Space,Department Store,Convenience Store,Baseball Field,Seafood Restaurant,Deli / Bodega,Bagel Shop
38,Clinton Hill,Pizza Place,Italian Restaurant,Yoga Studio,Indian Restaurant,Restaurant,Mexican Restaurant,Chinese Restaurant,Japanese Restaurant,Grocery Store,Thai Restaurant
39,Sea Gate,Spa,Beach,Bus Station,Optical Shop,Home Service,Food Court,Fish Market,Flower Shop,Food,Food & Drink Shop
41,Boerum Hill,Coffee Shop,Bar,Dance Studio,French Restaurant,Spa,Bakery,Grocery Store,Kids Store,Furniture / Home Store,Martial Arts Dojo


Cluster 4 

In [31]:
cluster4Brooklyn = brooklyn_merged.loc[brooklyn_merged['Cluster Labels'] == 3, brooklyn_merged.columns[[1] + list(range(5, brooklyn_merged.shape[1]))]] 
cluster4Brooklyn 

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
47,Prospect Park South,Caribbean Restaurant,Fast Food Restaurant,Pizza Place,Grocery Store,Pharmacy,Mobile Phone Shop,Donut Shop,Clothing Store,Mexican Restaurant,Latin American Restaurant


Cluste 5 

In [32]:
cluster5Brooklyn = brooklyn_merged.loc[brooklyn_merged['Cluster Labels'] == 4, brooklyn_merged.columns[[1] + list(range(5, brooklyn_merged.shape[1]))]] 
cluster5Brooklyn 

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Flatbush,Coffee Shop,Mexican Restaurant,Caribbean Restaurant,Juice Bar,Bagel Shop,Middle Eastern Restaurant,Chinese Restaurant,Lounge,Liquor Store,Sandwich Place
14,Brownsville,Park,Restaurant,Discount Store,Donut Shop,Spanish Restaurant,Moving Target,Men's Store,Burger Joint,Performing Arts Venue,Fried Chicken Joint
17,Bedford Stuyvesant,Coffee Shop,Pizza Place,Café,Bar,Bus Stop,Juice Bar,Wine Bar,Gourmet Shop,Basketball Court,Boutique
19,Cobble Hill,Pizza Place,Bar,Coffee Shop,Ice Cream Shop,Italian Restaurant,Playground,Cocktail Bar,Yoga Studio,Deli / Bodega,American Restaurant
20,Carroll Gardens,Italian Restaurant,Bakery,Cocktail Bar,Coffee Shop,Pizza Place,Gym / Fitness Center,Bar,Wine Shop,Spa,Food & Drink Shop
24,Park Slope,American Restaurant,Coffee Shop,Burger Joint,Pizza Place,Italian Restaurant,Spa,Bagel Shop,Japanese Restaurant,Pub,Falafel Restaurant
25,Cypress Hills,Fast Food Restaurant,Latin American Restaurant,Ice Cream Shop,Donut Shop,Fried Chicken Joint,Spanish Restaurant,Chinese Restaurant,Liquor Store,Caribbean Restaurant,Baseball Field
27,Starrett City,Cosmetics Shop,Pharmacy,Caribbean Restaurant,Shoe Store,Supermarket,Convenience Store,River,Shopping Mall,Gym,Bus Stop
28,Canarsie,Chinese Restaurant,Bus Line,Caribbean Restaurant,Gym,Grocery Store,Asian Restaurant,Women's Store,Food & Drink Shop,Flower Shop,Food
29,Flatlands,Pharmacy,Fast Food Restaurant,Caribbean Restaurant,Discount Store,Nightclub,Lounge,Sandwich Place,Paper / Office Supplies Store,Park,Athletics & Sports


### 5. Discussion 

#### 5.1 Venues Density and Comparison  

- Venues in Manhattan are close each other but sparse in Brooklyn  

#### 5.2 Segments Naming and Comparison 

#### Manhattan 

In [45]:
# Cluster 1 
cluster1ManhattanMerge = cluster1Manhattan.melt() 
print('Cluster 1:') 
print(cluster1ManhattanMerge['value'].value_counts()[:3])  
# Cluster 2 
cluster2ManhattanMerge = cluster2Manhattan.melt() 
print('Cluster 2:') 
print(cluster2ManhattanMerge['value'].value_counts()[:3])  
# Cluster 3 
cluster3ManhattanMerge = cluster3Manhattan.melt() 
print('Cluster 3:') 
print(cluster3ManhattanMerge['value'].value_counts()[:3]) 
# Cluster 4 
cluster4ManhattanMerge = cluster4Manhattan.melt() 
print('Cluster 4:') 
print(cluster4ManhattanMerge['value'].value_counts()[:3]) 
# Cluster 5 
cluster5ManhattanMerge = cluster5Manhattan.melt() 
print('Cluster 5:') 
print(cluster5ManhattanMerge['value'].value_counts()[:3]) 

Cluster 1:
Italian Restaurant     15
Coffee Shop            11
American Restaurant     9
Name: value, dtype: int64
Cluster 2:
Coffee Shop            6
Sandwich Place         4
American Restaurant    3
Name: value, dtype: int64
Cluster 3:
Yoga Studio    1
Café           1
Bar            1
Name: value, dtype: int64
Cluster 4:
Coffee Shop           4
Italian Restaurant    3
Yoga Studio           2
Name: value, dtype: int64
Cluster 5:
Coffee Shop    5
Park           4
Bakery         4
Name: value, dtype: int64


Manhattan Segments:    
     - Cluster 1 (20): Italian/American Restaurant Place  
     - Cluster 2 (7): Coffee/Sandwich Place 
     - Cluster 3 (1): ---   
     - Cluster 4 (4): Coffee Shop   
     - Cluster 5 (8): Park  

#### Brooklyn 

In [46]:
# Cluster 1 
cluster1BrooklynMerge = cluster1Brooklyn.melt() 
print('Cluster 1:') 
print(cluster1BrooklynMerge['value'].value_counts()[:3])  
# Cluster 2 
cluster2BrooklynMerge = cluster2Brooklyn.melt() 
print('Cluster 2:') 
print(cluster2BrooklynMerge['value'].value_counts()[:3])  
# Cluster 3 
cluster3BrooklynMerge = cluster3Brooklyn.melt() 
print('Cluster 3:') 
print(cluster3BrooklynMerge['value'].value_counts()[:3]) 
# Cluster 4 
cluster4BrooklynMerge = cluster4Brooklyn.melt() 
print('Cluster 4:') 
print(cluster4BrooklynMerge['value'].value_counts()[:3]) 
# Cluster 5 
cluster5BrooklynMerge = cluster5Brooklyn.melt() 
print('Cluster 5:') 
print(cluster5BrooklynMerge['value'].value_counts()[:3]) 

Cluster 1:
Pizza Place    21
Café           13
Coffee Shop    13
Name: value, dtype: int64
Cluster 2:
Grocery Store    2
Plaza            1
Gift Shop        1
Name: value, dtype: int64
Cluster 3:
Pizza Place           7
Italian Restaurant    6
Bar                   6
Name: value, dtype: int64
Cluster 4:
Caribbean Restaurant    1
Clothing Store          1
Mobile Phone Shop       1
Name: value, dtype: int64
Cluster 5:
Caribbean Restaurant    7
Pizza Place             7
Coffee Shop             6
Name: value, dtype: int64


Brooklyn Segments: 
    - Cluster 1 (35): Pizza Place 
    - Cluster 2 (2): Grocery Store 
    - Cluster 3 (18): Food Area 
    - Cluster 4 (1): --- 
    - Cluster 5 (14): International Food  

Manhattan and Brooklyn Segments: 

- Manhattan has mainly Italian/American restaurants but Brooklyn has more Pizza places and Caribbean restaurants   
- Manhattan has more coffee places than Brooklyn 

### 6. Conclusion 

In conclusion, 
- Venues in Manhattan are close each other but sparse in Brooklyn 
- Manhattan has mainly Italian/American restaurants but Brooklyn has more Pizza places and Caribbean restaurants   
- Manhattan has more coffee places than Brooklyn 

<hr> 
Coursera Capstone Project. Code Reference: [Lab 2](http://cocl.us/DP0701EN_Coursera_Week3_LAB2)