## Step 0: Download all the libraries needed in this project

In [2]:
import numpy as np 
import pandas as pd 
import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

import matplotlib.cm as cm
import matplotlib.colors as colors

#import k-means from clustering stage
from sklearn.cluster import KMeans


## Step 1: Let's download and explore [NYC neighbourhood geography data]("https://geo.nyu.edu/catalog/nyu_2451_34572") 

In [3]:
with open('ny-geojson.json') as json_data:
    ny_geometry = json.load(json_data)
#ny_geometry

##### It looks we the "features" data is what we wanted.

In [4]:
nbh_geometry = ny_geometry['features']
nbh_geometry[0]

{'type': 'Feature',
 'id': 'nyu_2451_34572.1',
 'geometry': {'type': 'Point',
  'coordinates': [-73.84720052054902, 40.89470517661]},
 'geometry_name': 'geom',
 'properties': {'name': 'Wakefield',
  'stacked': 1,
  'annoline1': 'Wakefield',
  'annoline2': None,
  'annoline3': None,
  'annoangle': 0.0,
  'borough': 'Bronx',
  'bbox': [-73.84720052054902,
   40.89470517661,
   -73.84720052054902,
   40.89470517661]}}

##### Transform nbh_geometry into a pandas dataframe

In [5]:
# Instantiate the dataframe
ny_nbhs = pd.DataFrame(columns=['Borough', 'Neighborhood', 'Latitude', 'Longitude'])

# Go through the data and fill the dataframe one row at a time.
for data in nbh_geometry:
    borough = data['properties']['borough'] 
    nbh_name = data['properties']['name']
        
    nbh_coordinates = data['geometry']['coordinates']
    nbh_latitude = nbh_coordinates[1]
    nbh_lontitude = nbh_coordinates[0]
    
    ny_nbhs = ny_nbhs.append({'Borough': borough,
                              'Neighborhood': nbh_name,
                              'Latitude': nbh_latitude,
                              'Longitude': nbh_lontitude}, ignore_index=True)
print(ny_nbhs.shape)
ny_nbhs.head(10)

(306, 4)


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585
5,Bronx,Kingsbridge,40.881687,-73.902818
6,Manhattan,Marble Hill,40.876551,-73.91066
7,Bronx,Woodlawn,40.898273,-73.867315
8,Bronx,Norwood,40.877224,-73.879391
9,Bronx,Williamsbridge,40.881039,-73.857446


In [6]:
# Let's see how many boroughs and neighborhoods in our data
print(" There are {} Boroughs and {} Neighborhoods in New York city. ".format(len(ny_nbhs['Borough'].unique()), ny_nbhs.shape[0]))


 There are 5 Boroughs and 306 Neighborhoods in New York city. 


#####  Here we choose Manhattan to explore in this project as an example

In [14]:
borough='Manhattan'
Manhattan_df = ny_nbhs[ny_nbhs['Borough']==borough].reset_index(drop=True)
print("There are {} neighborhoods in borough Manhattan".format(Manhattan_df.shape[0]))
Manhattan_df.head(10)

There are 40 neighborhoods in borough Manhattan


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688
5,Manhattan,Manhattanville,40.816934,-73.957385
6,Manhattan,Central Harlem,40.815976,-73.943211
7,Manhattan,East Harlem,40.792249,-73.944182
8,Manhattan,Upper East Side,40.775639,-73.960508
9,Manhattan,Yorkville,40.77593,-73.947118


In [6]:
borough='Manhattan'
Brgh_df = ny_nbhs[ny_nbhs['Borough']==borough].reset_index(drop=True)
print(Brgh_df.shape)
Brgh_df.drop(['Borough'], axis=1, inplace = True)
Brgh_df.head(5)

(40, 4)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Marble Hill,40.876551,-73.91066
1,Chinatown,40.715618,-73.994279
2,Washington Heights,40.851903,-73.9369
3,Inwood,40.867684,-73.92121
4,Hamilton Heights,40.823604,-73.949688


In [7]:
print(" There are {} Neighborhoods in {}. ".format(Brgh_df.shape[0],borough))

 There are 40 Neighborhoods in Manhattan. 


In [8]:
geolocator = Nominatim(user_agent="nyc_agent")
location = geolocator.geocode('Manhattan, NY')
latitude = location.latitude
longitude = location.longitude
print('The coordinate of Manhattan is {}, {}.'.format(latitude, longitude))

The coordinate of Manhattan is 40.7896239, -73.9598939.


In [9]:
# create a map of NYC
nyc_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(Brgh_df['Latitude'], Brgh_df['Longitude'], Brgh_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(nyc_map)  
    
nyc_map

## Step 2:  Let's leverage the Foursquare API to explore the selected neighborhoods and segment them.

##### Step 2.1  Set up the URL for Foursquare request

In [10]:
LIMIT = 100
radius = 500

USER_ID = 'VBJRAO5IMFVHJ15014BHKY1WXSLLA1JVQ0L0HQGMAIF4PQLH' 
USER_SECRET = 'GFNHX055SNPUJUVPMUENX3JQKZ5MGXQ5TWQKBHEROMXTR5L3' 
VERSION = '20180605' 


##### Step 2.2  Define functions to extract the category of the venue and venues

In [11]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [12]:
def get_Nbh_Venues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            USER_ID, 
            USER_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 'Venue', 'Latitude', 'Longitude', 'Category']
    
    return(nearby_venues)

##### Step 2.3  Explore selected burough

In [16]:
nbh_venues = get_Nbh_Venues(names=Brgh_df['Neighborhood'],
                            latitudes=Brgh_df['Latitude'],
                            longitudes=Brgh_df['Longitude'])

print(nbh_venues.shape)
nbh_venues.head(10)

(2968, 5)


Unnamed: 0,Neighborhood,Venue,Latitude,Longitude,Category
0,Marble Hill,Arturo's,40.874412,-73.910271,Pizza Place
1,Marble Hill,Bikram Yoga,40.876844,-73.906204,Yoga Studio
2,Marble Hill,Tibbett Diner,40.880404,-73.908937,Diner
3,Marble Hill,Starbucks,40.877531,-73.905582,Coffee Shop
4,Marble Hill,Dunkin',40.877136,-73.906666,Donut Shop
5,Marble Hill,Rite Aid,40.875467,-73.908906,Pharmacy
6,Marble Hill,TCR The Club of Riverdale,40.878628,-73.914568,Tennis Stadium
7,Marble Hill,Land & Sea Restaurant,40.877885,-73.905873,Seafood Restaurant
8,Marble Hill,Starbucks,40.873755,-73.908613,Coffee Shop
9,Marble Hill,Astral Fitness & Wellness Center,40.876705,-73.906372,Gym


In [17]:
nbh_venues.groupby('Neighborhood').count().head(10)

Unnamed: 0_level_0,Venue,Latitude,Longitude,Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Battery Park City,55,55,55,55
Carnegie Hill,87,87,87,87
Central Harlem,44,44,44,44
Chelsea,99,99,99,99
Chinatown,100,100,100,100
Civic Center,89,89,89,89
Clinton,100,100,100,100
East Harlem,44,44,44,44
East Village,100,100,100,100
Financial District,100,100,100,100


In [18]:
venues_df = nbh_venues[nbh_venues['Category'].str.contains('Restaurant')].reset_index(drop=True)
venues_df

Unnamed: 0,Neighborhood,Venue,Latitude,Longitude,Category
0,Marble Hill,Land & Sea Restaurant,40.877885,-73.905873,Seafood Restaurant
1,Marble Hill,Boston Market,40.877430,-73.905412,American Restaurant
2,Chinatown,Kiki's,40.714476,-73.992036,Greek Restaurant
3,Chinatown,The Fat Radish,40.715323,-73.991950,English Restaurant
4,Chinatown,Da Yu Hot Pot 大渝火锅,40.716735,-73.995752,Hotpot Restaurant
...,...,...,...,...,...
769,Hudson Yards,Tavola,40.755531,-73.994769,Italian Restaurant
770,Hudson Yards,Aiyara Thai,40.755056,-73.994760,Thai Restaurant
771,Hudson Yards,Via Trenta,40.753004,-74.002898,Italian Restaurant
772,Hudson Yards,Treadwell,40.759964,-73.996284,Restaurant


In [19]:
nbh_group = venues_df.groupby(['Neighborhood']).count()
nbh_group.reset_index(inplace=True)
nbh_group.sort_values(by=['Venue'], inplace=True, ascending=False)
nbh_group = nbh_group[['Neighborhood','Venue']]
print(nbh_group.shape)
nbh_group.head(10)

(40, 2)


Unnamed: 0,Neighborhood,Venue
27,Noho,41
4,Chinatown,34
8,East Village,32
18,Little Italy,32
12,Greenwich Village,31
34,Turtle Bay,30
39,Yorkville,28
38,West Village,28
26,Murray Hill,27
16,Lenox Hill,26


In [20]:
print('There are {} uniques categories.'.format(len(venues_df['Category'].unique())))

There are 69 uniques categories.


### Step 3, Analyze each neiboughhood

In [21]:
# one hot encoding
venues_onehot = pd.get_dummies(venues_df[['Category']], prefix="",prefix_sep="")
# add neighborhood column back to dataframe
venues_onehot['Neighborhood'] = venues_df['Neighborhood']
# move neighborhood column to the first column
fixed_columns = [venues_onehot.columns[-1]] + list(venues_onehot.columns[:-1])
# check intermidiate variable
print(fixed_columns)
venues_onehot = venues_onehot[fixed_columns]
venues_onehot.head(6)


['Neighborhood', 'Afghan Restaurant', 'African Restaurant', 'American Restaurant', 'Arepa Restaurant', 'Argentinian Restaurant', 'Asian Restaurant', 'Australian Restaurant', 'Austrian Restaurant', 'Brazilian Restaurant', 'Cambodian Restaurant', 'Cantonese Restaurant', 'Caribbean Restaurant', 'Caucasian Restaurant', 'Chinese Restaurant', 'Cuban Restaurant', 'Czech Restaurant', 'Dim Sum Restaurant', 'Dumpling Restaurant', 'Empanada Restaurant', 'English Restaurant', 'Ethiopian Restaurant', 'Falafel Restaurant', 'Fast Food Restaurant', 'Filipino Restaurant', 'French Restaurant', 'German Restaurant', 'Greek Restaurant', 'Hawaiian Restaurant', 'Hotpot Restaurant', 'Indian Restaurant', 'Israeli Restaurant', 'Italian Restaurant', 'Japanese Curry Restaurant', 'Japanese Restaurant', 'Jewish Restaurant', 'Korean Restaurant', 'Kosher Restaurant', 'Latin American Restaurant', 'Lebanese Restaurant', 'Malay Restaurant', 'Mediterranean Restaurant', 'Mexican Restaurant', 'Middle Eastern Restaurant', '

Unnamed: 0,Neighborhood,Afghan Restaurant,African Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,Austrian Restaurant,Brazilian Restaurant,...,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Marble Hill,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Chinatown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Chinatown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Chinatown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,Chinatown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


##### Fact 1, Caculate venues count Groupby 'Neighborhood' 

In [22]:
venues_cata_group = venues_onehot.groupby('Neighborhood').sum().reset_index()
venues_cata_group.head(6)

Unnamed: 0,Neighborhood,Afghan Restaurant,African Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,Austrian Restaurant,Brazilian Restaurant,...,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Battery Park City,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Carnegie Hill,0,0,1,0,1,0,0,0,0,...,1,0,0,0,0,1,0,0,0,2
2,Central Harlem,0,2,2,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
3,Chelsea,0,0,2,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,Chinatown,0,0,3,0,0,2,1,1,0,...,0,0,0,1,0,1,0,0,1,2
5,Civic Center,0,0,2,0,0,1,1,0,0,...,3,0,0,0,0,0,0,0,1,0


In [23]:
num_top_venues = 8
indicators = ['st', 'nd', 'rd']
# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
        
# create a new dataframe
nbhs_sorted = pd.DataFrame(columns=columns)
nbhs_sorted['Neighborhood'] = nbh_group['Neighborhood']

In [24]:
for ind in range(nbh_group.shape[0]):
    hood = nbh_group.iloc[ind,0]
    print("----"+hood+"----")
    temp = venues_cata_group[venues_cata_group['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['Venue','Num']
    temp = temp.iloc[1:]
    temp['Num'] = temp['Num'].astype(int)
    final_temp = temp.sort_values('Num', ascending=False).reset_index(drop=True).head(num_top_venues) 
    final_temp['Num'] = final_temp['Num'].astype(str)
    final_temp['Combined'] = final_temp['Venue']+'('+final_temp['Num']+')'
    final_temp['Combined'].astype(str)  
    print(final_temp)
    print('\n')

    #nbhs_venues_sorted.iloc[ind, 1:] = return_most_common_venues(venues_cata_group.iloc[ind, :], num_top_venues)
    temp=final_temp.T.reset_index()
    nbhs_sorted.iloc[ind, 0] = hood
    nbhs_sorted.iloc[ind, 1:] = list(temp.iloc[2,1:])
    
nbhs_sorted['Total'] = nbh_group['Venue']
#nbhs_sorted.set_index('Total',inplace=True)
   

----Noho----
                     Venue Num                    Combined
0       Italian Restaurant  11      Italian Restaurant(11)
1      Japanese Restaurant   4      Japanese Restaurant(4)
2       Seafood Restaurant   3       Seafood Restaurant(3)
3    Vietnamese Restaurant   2    Vietnamese Restaurant(2)
4         Asian Restaurant   2         Asian Restaurant(2)
5  New American Restaurant   2  New American Restaurant(2)
6          Thai Restaurant   2          Thai Restaurant(2)
7         Greek Restaurant   2         Greek Restaurant(2)


----Chinatown----
                   Venue Num                  Combined
0     Chinese Restaurant   4     Chinese Restaurant(4)
1    American Restaurant   3    American Restaurant(3)
2  Vietnamese Restaurant   2  Vietnamese Restaurant(2)
3       Greek Restaurant   2       Greek Restaurant(2)
4       Malay Restaurant   2       Malay Restaurant(2)
5       Asian Restaurant   2       Asian Restaurant(2)
6    Shanghai Restaurant   2    Shanghai Restaurant

----Sutton Place----
                           Venue Num                          Combined
0             Italian Restaurant   6             Italian Restaurant(6)
1      Latin American Restaurant   2      Latin American Restaurant(2)
2             Mexican Restaurant   2             Mexican Restaurant(2)
3  Vegetarian / Vegan Restaurant   2  Vegetarian / Vegan Restaurant(2)
4               Sushi Restaurant   1               Sushi Restaurant(1)
5              Indian Restaurant   1              Indian Restaurant(1)
6             Persian Restaurant   1             Persian Restaurant(1)
7              French Restaurant   1              French Restaurant(1)


----Civic Center----
                        Venue Num                       Combined
0           French Restaurant   4           French Restaurant(4)
1            Sushi Restaurant   3            Sushi Restaurant(3)
2         American Restaurant   2         American Restaurant(2)
3  Modern European Restaurant   1  Modern European Restau

In [25]:
print('The rows of nbhs_sorted is {} '.format(nbhs_sorted.shape[0]))    
nbhs_sorted.head(5)

The rows of nbhs_sorted is 40 


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,Total
27,Noho,Italian Restaurant(11),Japanese Restaurant(4),Seafood Restaurant(3),Vietnamese Restaurant(2),Asian Restaurant(2),New American Restaurant(2),Thai Restaurant(2),Greek Restaurant(2),41
4,Chinatown,Chinese Restaurant(4),American Restaurant(3),Vietnamese Restaurant(2),Greek Restaurant(2),Malay Restaurant(2),Asian Restaurant(2),Shanghai Restaurant(2),Mexican Restaurant(2),34
8,East Village,Vietnamese Restaurant(3),Japanese Restaurant(3),Seafood Restaurant(2),Italian Restaurant(2),New American Restaurant(2),Greek Restaurant(2),Ramen Restaurant(2),Filipino Restaurant(2),32
18,Little Italy,Italian Restaurant(4),Chinese Restaurant(4),Mediterranean Restaurant(3),Thai Restaurant(3),Seafood Restaurant(2),French Restaurant(2),Cuban Restaurant(2),Vietnamese Restaurant(2),32
12,Greenwich Village,Italian Restaurant(9),Chinese Restaurant(3),Sushi Restaurant(3),Mediterranean Restaurant(2),French Restaurant(1),Restaurant(1),Falafel Restaurant(1),Seafood Restaurant(1),31


### Step 4,  Cluster neighborhoods

In [26]:
venues_grouped = venues_onehot.groupby('Neighborhood').mean().reset_index()
venues_grouped.head(6)

Unnamed: 0,Neighborhood,Afghan Restaurant,African Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,Austrian Restaurant,Brazilian Restaurant,...,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Battery Park City,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Carnegie Hill,0.0,0.0,0.05,0.0,0.05,0.0,0.0,0.0,0.0,...,0.05,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.1
2,Central Harlem,0.0,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0
3,Chelsea,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0
4,Chinatown,0.0,0.0,0.088235,0.0,0.0,0.058824,0.029412,0.029412,0.0,...,0.0,0.0,0.0,0.029412,0.0,0.029412,0.0,0.0,0.029412,0.058824
5,Civic Center,0.0,0.0,0.095238,0.0,0.0,0.047619,0.047619,0.0,0.0,...,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0


In [27]:
# Default centroid neighborhood
nbhd_centroids = ['Greenwich Village','Chinatown','East Village','Tribeca','Midtown','Civic Center','Manhattan Valley','Roosevelt Island']
# centroid array
centroids = venues_grouped[venues_grouped.Neighborhood.isin(nbhd_centroids)]
#centroids.head(9)
centroids = centroids.drop('Neighborhood', 1)

##### Run k-means to cluster the Borough into 5 clusters.

In [28]:
# set number of clusters
kclusters = 8
nbhs_grouped_clustering = venues_grouped.drop('Neighborhood', 1)
# run k-means clustering
# set the initiate centroids to init paramater
kmeans = KMeans(init= centroids, n_clusters=kclusters,n_init=1, random_state=0).fit(nbhs_grouped_clustering)
# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]


array([3, 3, 0, 3, 0, 5, 7, 4, 4, 0])

In [29]:
kmeans.cluster_centers_[0,:]


array([0.        , 0.02857143, 0.16276261, 0.        , 0.        ,
       0.01801471, 0.01213235, 0.00588235, 0.        , 0.        ,
       0.00588235, 0.01428571, 0.        , 0.07710084, 0.0125    ,
       0.        , 0.01801471, 0.01801471, 0.        , 0.00588235,
       0.03928571, 0.04154412, 0.01176471, 0.        , 0.04107143,
       0.        , 0.01176471, 0.01176471, 0.01213235, 0.025     ,
       0.        , 0.04852941, 0.        , 0.02977941, 0.        ,
       0.00588235, 0.        , 0.        , 0.        , 0.01801471,
       0.05551471, 0.02352941, 0.        , 0.        , 0.        ,
       0.        , 0.04264706, 0.        , 0.        , 0.        ,
       0.        , 0.01176471, 0.        , 0.07783613, 0.01176471,
       0.        , 0.01428571, 0.01213235, 0.        , 0.        ,
       0.        , 0.        , 0.00588235, 0.01428571, 0.02463235,
       0.        , 0.        , 0.00588235, 0.02426471])

##### Now create a new dataframe that includes the cluster as well as the top 10 venues for each postcode neighborhood

In [30]:
nbhs_cluster = nbhs_sorted
#nbhs_venues_sorted
nbhs_cluster.insert(0, 'Cluster Label', kmeans.labels_)
nbhs_cluster.head(5)

Unnamed: 0,Cluster Label,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,Total
27,3,Noho,Italian Restaurant(11),Japanese Restaurant(4),Seafood Restaurant(3),Vietnamese Restaurant(2),Asian Restaurant(2),New American Restaurant(2),Thai Restaurant(2),Greek Restaurant(2),41
4,3,Chinatown,Chinese Restaurant(4),American Restaurant(3),Vietnamese Restaurant(2),Greek Restaurant(2),Malay Restaurant(2),Asian Restaurant(2),Shanghai Restaurant(2),Mexican Restaurant(2),34
8,0,East Village,Vietnamese Restaurant(3),Japanese Restaurant(3),Seafood Restaurant(2),Italian Restaurant(2),New American Restaurant(2),Greek Restaurant(2),Ramen Restaurant(2),Filipino Restaurant(2),32
18,3,Little Italy,Italian Restaurant(4),Chinese Restaurant(4),Mediterranean Restaurant(3),Thai Restaurant(3),Seafood Restaurant(2),French Restaurant(2),Cuban Restaurant(2),Vietnamese Restaurant(2),32
12,0,Greenwich Village,Italian Restaurant(9),Chinese Restaurant(3),Sushi Restaurant(3),Mediterranean Restaurant(2),French Restaurant(1),Restaurant(1),Falafel Restaurant(1),Seafood Restaurant(1),31


In [31]:
#let's see these two list's shape before we merge them into one dataframe
print(nbhs_cluster.shape)
print(Brgh_df.shape)

(40, 11)
(40, 3)


In [32]:
nbhs_merged = Brgh_df
nbhs_merged = nbhs_merged.join(nbhs_cluster.set_index('Neighborhood'), on='Neighborhood')
nbhs_merged = nbhs_merged[nbhs_merged['Cluster Label'].notna()]
nbhs_merged['Cluster Label']= nbhs_merged['Cluster Label'].astype(int)
nbhs_merged.sort_values('Total', ascending=False, inplace = True)

print(nbhs_merged.shape)
nbhs_merged.head(10)

(40, 13)


Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,Total
31,Noho,40.723259,-73.988434,3,Italian Restaurant(11),Japanese Restaurant(4),Seafood Restaurant(3),Vietnamese Restaurant(2),Asian Restaurant(2),New American Restaurant(2),Thai Restaurant(2),Greek Restaurant(2),41
1,Chinatown,40.715618,-73.994279,3,Chinese Restaurant(4),American Restaurant(3),Vietnamese Restaurant(2),Greek Restaurant(2),Malay Restaurant(2),Asian Restaurant(2),Shanghai Restaurant(2),Mexican Restaurant(2),34
19,East Village,40.727847,-73.982226,0,Vietnamese Restaurant(3),Japanese Restaurant(3),Seafood Restaurant(2),Italian Restaurant(2),New American Restaurant(2),Greek Restaurant(2),Ramen Restaurant(2),Filipino Restaurant(2),32
22,Little Italy,40.719324,-73.997305,3,Italian Restaurant(4),Chinese Restaurant(4),Mediterranean Restaurant(3),Thai Restaurant(3),Seafood Restaurant(2),French Restaurant(2),Cuban Restaurant(2),Vietnamese Restaurant(2),32
18,Greenwich Village,40.726933,-73.999914,0,Italian Restaurant(9),Chinese Restaurant(3),Sushi Restaurant(3),Mediterranean Restaurant(2),French Restaurant(1),Restaurant(1),Falafel Restaurant(1),Seafood Restaurant(1),31
35,Turtle Bay,40.752042,-73.967708,5,Italian Restaurant(5),Sushi Restaurant(4),French Restaurant(3),American Restaurant(2),Greek Restaurant(2),Turkish Restaurant(2),Thai Restaurant(2),Seafood Restaurant(2),30
9,Yorkville,40.77593,-73.947118,7,Italian Restaurant(7),Sushi Restaurant(4),Japanese Restaurant(3),Mexican Restaurant(3),Chinese Restaurant(2),Vietnamese Restaurant(2),Thai Restaurant(1),Asian Restaurant(1),28
24,West Village,40.734434,-74.00618,4,Italian Restaurant(6),New American Restaurant(5),American Restaurant(4),Seafood Restaurant(3),Sushi Restaurant(1),Middle Eastern Restaurant(1),Korean Restaurant(1),Japanese Restaurant(1),28
16,Murray Hill,40.748303,-73.978332,4,Sushi Restaurant(3),Chinese Restaurant(3),Jewish Restaurant(2),American Restaurant(2),Mediterranean Restaurant(2),Japanese Restaurant(2),Indian Restaurant(2),French Restaurant(1),27
33,Midtown South,40.74851,-73.988713,3,Korean Restaurant(13),Japanese Restaurant(4),New American Restaurant(2),Sushi Restaurant(2),American Restaurant(1),Lebanese Restaurant(1),Persian Restaurant(1),Cuban Restaurant(1),26


In [33]:
nbhs_merged.head(50)

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,Total
31,Noho,40.723259,-73.988434,3,Italian Restaurant(11),Japanese Restaurant(4),Seafood Restaurant(3),Vietnamese Restaurant(2),Asian Restaurant(2),New American Restaurant(2),Thai Restaurant(2),Greek Restaurant(2),41
1,Chinatown,40.715618,-73.994279,3,Chinese Restaurant(4),American Restaurant(3),Vietnamese Restaurant(2),Greek Restaurant(2),Malay Restaurant(2),Asian Restaurant(2),Shanghai Restaurant(2),Mexican Restaurant(2),34
19,East Village,40.727847,-73.982226,0,Vietnamese Restaurant(3),Japanese Restaurant(3),Seafood Restaurant(2),Italian Restaurant(2),New American Restaurant(2),Greek Restaurant(2),Ramen Restaurant(2),Filipino Restaurant(2),32
22,Little Italy,40.719324,-73.997305,3,Italian Restaurant(4),Chinese Restaurant(4),Mediterranean Restaurant(3),Thai Restaurant(3),Seafood Restaurant(2),French Restaurant(2),Cuban Restaurant(2),Vietnamese Restaurant(2),32
18,Greenwich Village,40.726933,-73.999914,0,Italian Restaurant(9),Chinese Restaurant(3),Sushi Restaurant(3),Mediterranean Restaurant(2),French Restaurant(1),Restaurant(1),Falafel Restaurant(1),Seafood Restaurant(1),31
35,Turtle Bay,40.752042,-73.967708,5,Italian Restaurant(5),Sushi Restaurant(4),French Restaurant(3),American Restaurant(2),Greek Restaurant(2),Turkish Restaurant(2),Thai Restaurant(2),Seafood Restaurant(2),30
9,Yorkville,40.77593,-73.947118,7,Italian Restaurant(7),Sushi Restaurant(4),Japanese Restaurant(3),Mexican Restaurant(3),Chinese Restaurant(2),Vietnamese Restaurant(2),Thai Restaurant(1),Asian Restaurant(1),28
24,West Village,40.734434,-74.00618,4,Italian Restaurant(6),New American Restaurant(5),American Restaurant(4),Seafood Restaurant(3),Sushi Restaurant(1),Middle Eastern Restaurant(1),Korean Restaurant(1),Japanese Restaurant(1),28
16,Murray Hill,40.748303,-73.978332,4,Sushi Restaurant(3),Chinese Restaurant(3),Jewish Restaurant(2),American Restaurant(2),Mediterranean Restaurant(2),Japanese Restaurant(2),Indian Restaurant(2),French Restaurant(1),27
33,Midtown South,40.74851,-73.988713,3,Korean Restaurant(13),Japanese Restaurant(4),New American Restaurant(2),Sushi Restaurant(2),American Restaurant(1),Lebanese Restaurant(1),Persian Restaurant(1),Cuban Restaurant(1),26


In [34]:
latitude = location.latitude
longitude = location.longitude
print('The coordinate are {}, {}.'.format(latitude, longitude))
# create map
map_clusters = folium.Map(location=[latitude,longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(nbhs_merged['Latitude'], nbhs_merged['Longitude'], nbhs_merged['Neighborhood'], nbhs_merged['Cluster Label']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

The coordinate are 40.7896239, -73.9598939.


### Step 5,  Examine Clusters

In [35]:
column_num = nbhs_merged.shape[1]

##### Cluster 1

In [36]:
nbhs_merged.loc[nbhs_merged['Cluster Label'] == 0]

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,Total
19,East Village,40.727847,-73.982226,0,Vietnamese Restaurant(3),Japanese Restaurant(3),Seafood Restaurant(2),Italian Restaurant(2),New American Restaurant(2),Greek Restaurant(2),Ramen Restaurant(2),Filipino Restaurant(2),32
18,Greenwich Village,40.726933,-73.999914,0,Italian Restaurant(9),Chinese Restaurant(3),Sushi Restaurant(3),Mediterranean Restaurant(2),French Restaurant(1),Restaurant(1),Falafel Restaurant(1),Seafood Restaurant(1),31
10,Lenox Hill,40.768113,-73.95886,0,Italian Restaurant(6),Sushi Restaurant(4),Thai Restaurant(2),Mexican Restaurant(2),French Restaurant(1),Restaurant(1),Greek Restaurant(1),Czech Restaurant(1),26
23,Soho,40.722184,-74.000657,0,Italian Restaurant(7),Mediterranean Restaurant(3),French Restaurant(2),Seafood Restaurant(1),Spanish Restaurant(1),American Restaurant(1),Australian Restaurant(1),Thai Restaurant(1),19
5,Manhattanville,40.816934,-73.957385,0,Seafood Restaurant(3),Mexican Restaurant(2),Italian Restaurant(2),Sushi Restaurant(1),Indian Restaurant(1),Falafel Restaurant(1),Dumpling Restaurant(1),Spanish Restaurant(1),17


In [37]:
nbhs_merged.loc[nbhs_merged['Cluster Label'] == 1]

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,Total
29,Financial District,40.707107,-74.010665,1,American Restaurant(3),Falafel Restaurant(3),Japanese Restaurant(2),Italian Restaurant(2),Restaurant(1),Mediterranean Restaurant(1),Mexican Restaurant(1),Seafood Restaurant(1),17


In [38]:
nbhs_merged.loc[nbhs_merged['Cluster Label'] == 2]

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,Total
13,Lincoln Square,40.773529,-73.985338,2,Italian Restaurant(4),American Restaurant(3),Mediterranean Restaurant(2),French Restaurant(2),Seafood Restaurant(1),Chinese Restaurant(1),Greek Restaurant(1),New American Restaurant(0),14


In [39]:
nbhs_merged.loc[nbhs_merged['Cluster Label'] == 3]

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,Total
31,Noho,40.723259,-73.988434,3,Italian Restaurant(11),Japanese Restaurant(4),Seafood Restaurant(3),Vietnamese Restaurant(2),Asian Restaurant(2),New American Restaurant(2),Thai Restaurant(2),Greek Restaurant(2),41
1,Chinatown,40.715618,-73.994279,3,Chinese Restaurant(4),American Restaurant(3),Vietnamese Restaurant(2),Greek Restaurant(2),Malay Restaurant(2),Asian Restaurant(2),Shanghai Restaurant(2),Mexican Restaurant(2),34
22,Little Italy,40.719324,-73.997305,3,Italian Restaurant(4),Chinese Restaurant(4),Mediterranean Restaurant(3),Thai Restaurant(3),Seafood Restaurant(2),French Restaurant(2),Cuban Restaurant(2),Vietnamese Restaurant(2),32
33,Midtown South,40.74851,-73.988713,3,Korean Restaurant(13),Japanese Restaurant(4),New American Restaurant(2),Sushi Restaurant(2),American Restaurant(1),Lebanese Restaurant(1),Persian Restaurant(1),Cuban Restaurant(1),26
8,Upper East Side,40.775639,-73.960508,3,Italian Restaurant(8),American Restaurant(3),Sushi Restaurant(2),French Restaurant(2),Chinese Restaurant(1),Mexican Restaurant(1),North Indian Restaurant(1),Latin American Restaurant(1),25
2,Washington Heights,40.851903,-73.9369,3,Chinese Restaurant(3),Latin American Restaurant(2),New American Restaurant(2),Mexican Restaurant(2),Spanish Restaurant(2),Tapas Restaurant(2),Caribbean Restaurant(1),Restaurant(1),23
32,Civic Center,40.715229,-74.005415,3,French Restaurant(4),Sushi Restaurant(3),American Restaurant(2),Modern European Restaurant(1),Falafel Restaurant(1),New American Restaurant(1),Cuban Restaurant(1),Italian Restaurant(1),21
7,East Harlem,40.792249,-73.944182,3,Mexican Restaurant(5),Thai Restaurant(3),Seafood Restaurant(1),Spanish Restaurant(1),Fast Food Restaurant(1),Latin American Restaurant(1),French Restaurant(1),Cuban Restaurant(1),16
25,Manhattan Valley,40.797307,-73.964286,3,Mexican Restaurant(2),Thai Restaurant(2),Chinese Restaurant(2),Vietnamese Restaurant(1),Caribbean Restaurant(1),Korean Restaurant(1),Italian Restaurant(1),Indian Restaurant(1),15
6,Central Harlem,40.815976,-73.943211,3,Chinese Restaurant(2),American Restaurant(2),African Restaurant(2),Seafood Restaurant(2),French Restaurant(2),Caribbean Restaurant(1),Ethiopian Restaurant(1),Tapas Restaurant(1),14


In [40]:
nbhs_merged.loc[nbhs_merged['Cluster Label'] == 4]

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,Total
24,West Village,40.734434,-74.00618,4,Italian Restaurant(6),New American Restaurant(5),American Restaurant(4),Seafood Restaurant(3),Sushi Restaurant(1),Middle Eastern Restaurant(1),Korean Restaurant(1),Japanese Restaurant(1),28
16,Murray Hill,40.748303,-73.978332,4,Sushi Restaurant(3),Chinese Restaurant(3),Jewish Restaurant(2),American Restaurant(2),Mediterranean Restaurant(2),Japanese Restaurant(2),Indian Restaurant(2),French Restaurant(1),27
36,Tudor City,40.746917,-73.971219,4,Mexican Restaurant(3),Vietnamese Restaurant(2),Thai Restaurant(2),Sushi Restaurant(2),Restaurant(2),Greek Restaurant(2),French Restaurant(1),Spanish Restaurant(1),22
34,Sutton Place,40.76028,-73.963556,4,Italian Restaurant(6),Latin American Restaurant(2),Mexican Restaurant(2),Vegetarian / Vegan Restaurant(2),Sushi Restaurant(1),Indian Restaurant(1),Persian Restaurant(1),French Restaurant(1),22
21,Tribeca,40.721522,-74.010683,4,Italian Restaurant(5),American Restaurant(3),Greek Restaurant(2),Seafood Restaurant(1),Argentinian Restaurant(1),Korean Restaurant(1),French Restaurant(1),Modern European Restaurant(1),18
17,Chelsea,40.744035,-74.003116,4,Seafood Restaurant(3),Italian Restaurant(3),American Restaurant(2),Spanish Restaurant(1),Chinese Restaurant(1),Ramen Restaurant(1),Israeli Restaurant(1),Indian Restaurant(1),18
15,Midtown,40.754691,-73.981669,4,Cuban Restaurant(3),French Restaurant(2),Mediterranean Restaurant(2),Japanese Restaurant(2),Vietnamese Restaurant(1),Italian Restaurant(1),South American Restaurant(1),Indian Restaurant(1),18
27,Gramercy,40.73721,-73.981376,4,Italian Restaurant(4),American Restaurant(2),Mexican Restaurant(2),Vietnamese Restaurant(1),Thai Restaurant(1),Restaurant(1),Sushi Restaurant(1),Moroccan Restaurant(0),12
11,Roosevelt Island,40.76216,-73.949168,4,Greek Restaurant(1),Japanese Restaurant(1),Afghan Restaurant(0),Peruvian Restaurant(0),Persian Restaurant(0),North Indian Restaurant(0),New American Restaurant(0),Moroccan Restaurant(0),2


In [41]:
nbhs_merged.loc[nbhs_merged['Cluster Label'] == 5]

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,Total
35,Turtle Bay,40.752042,-73.967708,5,Italian Restaurant(5),Sushi Restaurant(4),French Restaurant(3),American Restaurant(2),Greek Restaurant(2),Turkish Restaurant(2),Thai Restaurant(2),Seafood Restaurant(2),30
4,Hamilton Heights,40.823604,-73.949688,5,Mexican Restaurant(3),Sushi Restaurant(2),Chinese Restaurant(2),Caribbean Restaurant(2),Indian Restaurant(2),Italian Restaurant(1),Seafood Restaurant(1),Latin American Restaurant(1),17
38,Flatiron,40.739673,-73.990947,5,Italian Restaurant(4),American Restaurant(2),Vegetarian / Vegan Restaurant(2),Mediterranean Restaurant(2),Japanese Restaurant(2),Indian Restaurant(1),Fast Food Restaurant(1),New American Restaurant(1),16


In [42]:
nbhs_merged.loc[nbhs_merged['Cluster Label'] == 6]

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,Total
14,Clinton,40.759101,-73.996119,6,American Restaurant(3),Italian Restaurant(3),Thai Restaurant(2),Seafood Restaurant(1),New American Restaurant(1),Korean Restaurant(1),Mediterranean Restaurant(1),Peruvian Restaurant(1),16


In [43]:
nbhs_merged.loc[nbhs_merged['Cluster Label'] == 7]

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,Total
9,Yorkville,40.77593,-73.947118,7,Italian Restaurant(7),Sushi Restaurant(4),Japanese Restaurant(3),Mexican Restaurant(3),Chinese Restaurant(2),Vietnamese Restaurant(2),Thai Restaurant(1),Asian Restaurant(1),28
12,Upper West Side,40.787658,-73.977059,7,Italian Restaurant(4),Mediterranean Restaurant(2),Middle Eastern Restaurant(2),Indian Restaurant(2),Seafood Restaurant(2),Sushi Restaurant(1),Greek Restaurant(1),American Restaurant(1),22
30,Carnegie Hill,40.782683,-73.953256,7,Japanese Restaurant(3),Italian Restaurant(3),Vietnamese Restaurant(2),French Restaurant(2),Restaurant(1),Indian Restaurant(1),Sushi Restaurant(1),Fast Food Restaurant(1),20
3,Inwood,40.867684,-73.92121,7,Mexican Restaurant(4),Restaurant(3),American Restaurant(2),Chinese Restaurant(2),Empanada Restaurant(1),Fast Food Restaurant(1),Latin American Restaurant(1),Spanish Restaurant(1),17
