## Step 0: Download all the libraries needed in this project

In [1]:
import numpy as np 
import pandas as pd 
import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

import matplotlib.cm as cm
import matplotlib.colors as colors

#import k-means from clustering stage
from sklearn.cluster import KMeans


## Step 1: Let's download and explore [NYC neighbourhood geography data]("https://geo.nyu.edu/catalog/nyu_2451_34572") 

In [4]:
with open('ny-geojson.json') as json_data:
    ny_geometry = json.load(json_data)

# ny_geometry

##### It looks we the "features" data is what we wanted.

In [5]:
nbh_geometry = ny_geometry['features']
nbh_geometry[0]

{'type': 'Feature',
 'id': 'nyu_2451_34572.1',
 'geometry': {'type': 'Point',
  'coordinates': [-73.84720052054902, 40.89470517661]},
 'geometry_name': 'geom',
 'properties': {'name': 'Wakefield',
  'stacked': 1,
  'annoline1': 'Wakefield',
  'annoline2': None,
  'annoline3': None,
  'annoangle': 0.0,
  'borough': 'Bronx',
  'bbox': [-73.84720052054902,
   40.89470517661,
   -73.84720052054902,
   40.89470517661]}}

##### Transform nbh_geometry into a pandas dataframe

In [6]:
# Instantiate the dataframe
ny_nbhs = pd.DataFrame(columns=['Borough', 'Neighborhood', 'Latitude', 'Longitude'])

# Go through the data and fill the dataframe one row at a time.
for data in nbh_geometry:
    borough = data['properties']['borough'] 
    nbh_name = data['properties']['name']
        
    nbh_coordinates = data['geometry']['coordinates']
    nbh_latitude = nbh_coordinates[1]
    nbh_lontitude = nbh_coordinates[0]
    
    ny_nbhs = ny_nbhs.append({'Borough': borough,
                              'Neighborhood': nbh_name,
                              'Latitude': nbh_latitude,
                              'Longitude': nbh_lontitude}, ignore_index=True)
ny_nbhs.head(10)

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585
5,Bronx,Kingsbridge,40.881687,-73.902818
6,Manhattan,Marble Hill,40.876551,-73.91066
7,Bronx,Woodlawn,40.898273,-73.867315
8,Bronx,Norwood,40.877224,-73.879391
9,Bronx,Williamsbridge,40.881039,-73.857446


In [7]:
# Let's see how many boroughs and neighborhoods in our data
print(" There are {} Boroughs and {} Neighborhoods in ny_nbhs. ".format(len(ny_nbhs['Borough'].unique()), ny_nbhs.shape[0]))


 There are 5 Boroughs and 306 Neighborhoods in ny_nbhs. 


#####  Here we choose Manhattan to explore in this project as an example

In [8]:
Brgh_df = ny_nbhs[ny_nbhs['Borough']=='Manhattan'].reset_index(drop=True)
print(Brgh_df.shape)
Brgh_df.head(10)

(40, 4)


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688
5,Manhattan,Manhattanville,40.816934,-73.957385
6,Manhattan,Central Harlem,40.815976,-73.943211
7,Manhattan,East Harlem,40.792249,-73.944182
8,Manhattan,Upper East Side,40.775639,-73.960508
9,Manhattan,Yorkville,40.77593,-73.947118


In [9]:
print(" There are {} Boroughs and {} Neighborhoods in ny_nbhs. ".format(len(Brgh_df['Borough'].unique()), Brgh_df.shape[0]))

 There are 1 Boroughs and 40 Neighborhoods in ny_nbhs. 


In [10]:
geolocator = Nominatim(user_agent="nyc_agent")
location = geolocator.geocode('Manhattan, NY')
latitude = location.latitude
longitude = location.longitude
print('The coordinate of Manhattan is {}, {}.'.format(latitude, longitude))

The coordinate of Manhattan is 40.7896239, -73.9598939.


In [11]:

# create a map of NYC
nyc_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(Brgh_df['Latitude'], Brgh_df['Longitude'], Brgh_df['Borough'], Brgh_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(nyc_map)  
    
nyc_map

## Step 2:  Let's leverage the Foursquare API to explore the selected neighborhoods and segment them.

##### Step 2.1  Set up the URL for Foursquare request

In [12]:
LIMIT = 100
radius = 500

USER_ID = 'VBJRAO5IMFVHJ15014BHKY1WXSLLA1JVQ0L0HQGMAIF4PQLH' 
USER_SECRET = 'GFNHX055SNPUJUVPMUENX3JQKZ5MGXQ5TWQKBHEROMXTR5L3' 
VERSION = '20180605' 


##### Step 2.2  Define functions to extract the category of the venue and venues

In [13]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [14]:
def get_Nbh_Venues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            USER_ID, 
            USER_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 'Venue', 'Latitude', 'Longitude', 'Category']
    
    return(nearby_venues)

##### Step 2.3  Explore selected burough

In [15]:
nbh_venues = get_Nbh_Venues(names=Brgh_df['Neighborhood'],
                            latitudes=Brgh_df['Latitude'],
                            longitudes=Brgh_df['Longitude'])

print(nbh_venues.shape)
nbh_venues.head(15)

(3297, 5)


Unnamed: 0,Neighborhood,Venue,Latitude,Longitude,Category
0,Marble Hill,Arturo's,40.874412,-73.910271,Pizza Place
1,Marble Hill,Bikram Yoga,40.876844,-73.906204,Yoga Studio
2,Marble Hill,Tibbett Diner,40.880404,-73.908937,Diner
3,Marble Hill,Dunkin',40.877136,-73.906666,Donut Shop
4,Marble Hill,Starbucks,40.877531,-73.905582,Coffee Shop
5,Marble Hill,Astral Fitness & Wellness Center,40.876705,-73.906372,Gym
6,Marble Hill,Blink Fitness,40.877271,-73.905595,Gym
7,Marble Hill,T.J. Maxx,40.877232,-73.905042,Department Store
8,Marble Hill,Land & Sea Restaurant,40.877885,-73.905873,Seafood Restaurant
9,Marble Hill,TCR The Club of Riverdale,40.878628,-73.914568,Tennis Stadium


In [16]:
nbh_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Venue,Latitude,Longitude,Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Battery Park City,89,89,89,89
Carnegie Hill,100,100,100,100
Central Harlem,43,43,43,43
Chelsea,100,100,100,100
Chinatown,100,100,100,100
Civic Center,100,100,100,100
Clinton,100,100,100,100
East Harlem,42,42,42,42
East Village,100,100,100,100
Financial District,100,100,100,100


In [17]:
nbh_venues.head(10)

Unnamed: 0,Neighborhood,Venue,Latitude,Longitude,Category
0,Marble Hill,Arturo's,40.874412,-73.910271,Pizza Place
1,Marble Hill,Bikram Yoga,40.876844,-73.906204,Yoga Studio
2,Marble Hill,Tibbett Diner,40.880404,-73.908937,Diner
3,Marble Hill,Dunkin',40.877136,-73.906666,Donut Shop
4,Marble Hill,Starbucks,40.877531,-73.905582,Coffee Shop
5,Marble Hill,Astral Fitness & Wellness Center,40.876705,-73.906372,Gym
6,Marble Hill,Blink Fitness,40.877271,-73.905595,Gym
7,Marble Hill,T.J. Maxx,40.877232,-73.905042,Department Store
8,Marble Hill,Land & Sea Restaurant,40.877885,-73.905873,Seafood Restaurant
9,Marble Hill,TCR The Club of Riverdale,40.878628,-73.914568,Tennis Stadium


In [18]:
venues_df = nbh_venues[nbh_venues['Category'].str.contains('Restaurant')].reset_index(drop=True)
venues_df

Unnamed: 0,Neighborhood,Venue,Latitude,Longitude,Category
0,Marble Hill,Land & Sea Restaurant,40.877885,-73.905873,Seafood Restaurant
1,Marble Hill,Boston Market,40.877430,-73.905412,American Restaurant
2,Chinatown,Da Yu Hot Pot 大渝火锅,40.716735,-73.995752,Hotpot Restaurant
3,Chinatown,Kiki's,40.714476,-73.992036,Greek Restaurant
4,Chinatown,Spicy Village,40.717010,-73.993530,Chinese Restaurant
...,...,...,...,...,...
939,Hudson Yards,Via Trenta,40.753004,-74.002898,Italian Restaurant
940,Hudson Yards,Thai Select,40.754867,-73.995007,Thai Restaurant
941,Hudson Yards,La Barra,40.752849,-74.001750,Tapas Restaurant
942,Hudson Yards,Treadwell,40.759964,-73.996284,Restaurant


In [19]:
#Let's see how many venues for every Category in each neighborhood
venues_group_count = venues_df.groupby(['Neighborhood','Category'])["Venue"].nunique().to_frame()
venues_group_count.sort_values(by=['Neighborhood','Venue'], inplace=True, ascending=False)
venues_group_count.head(250)

Unnamed: 0_level_0,Unnamed: 1_level_0,Venue
Neighborhood,Category,Unnamed: 2_level_1
Yorkville,Italian Restaurant,7
Yorkville,Japanese Restaurant,3
Yorkville,Mexican Restaurant,3
Yorkville,Sushi Restaurant,3
Yorkville,Vietnamese Restaurant,2
...,...,...
Manhattanville,Japanese Curry Restaurant,1
Manhattanville,Ramen Restaurant,1
Manhattanville,Spanish Restaurant,1
Manhattanville,Sushi Restaurant,1


In [20]:
print('There are {} uniques categories.'.format(len(venues_df['Category'].unique())))

There are 77 uniques categories.


### Step 3, Analyze each neiboughhood

In [21]:
# one hot encoding
venues_onehot = pd.get_dummies(venues_df[['Category']], prefix="",prefix_sep="")
# add neighborhood column back to dataframe
venues_onehot['Neighborhood'] = venues_df['Neighborhood']
# move neighborhood column to the first column
fixed_columns = [venues_onehot.columns[-1]] + list(venues_onehot.columns[:-1])
# check intermidiate variable
print(fixed_columns)
venues_onehot = venues_onehot[fixed_columns]
venues_onehot.head(10)


['Neighborhood', 'Afghan Restaurant', 'African Restaurant', 'American Restaurant', 'Arepa Restaurant', 'Argentinian Restaurant', 'Asian Restaurant', 'Australian Restaurant', 'Austrian Restaurant', 'Brazilian Restaurant', 'Cajun / Creole Restaurant', 'Cambodian Restaurant', 'Cantonese Restaurant', 'Caribbean Restaurant', 'Caucasian Restaurant', 'Chinese Restaurant', 'Cuban Restaurant', 'Czech Restaurant', 'Dim Sum Restaurant', 'Dumpling Restaurant', 'Eastern European Restaurant', 'Empanada Restaurant', 'English Restaurant', 'Ethiopian Restaurant', 'Falafel Restaurant', 'Fast Food Restaurant', 'Filipino Restaurant', 'French Restaurant', 'German Restaurant', 'Greek Restaurant', 'Hawaiian Restaurant', 'Himalayan Restaurant', 'Hotpot Restaurant', 'Indian Restaurant', 'Israeli Restaurant', 'Italian Restaurant', 'Japanese Curry Restaurant', 'Japanese Restaurant', 'Jewish Restaurant', 'Kebab Restaurant', 'Korean Restaurant', 'Kosher Restaurant', 'Latin American Restaurant', 'Lebanese Restauran

Unnamed: 0,Neighborhood,Afghan Restaurant,African Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,Austrian Restaurant,Brazilian Restaurant,...,Swiss Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Vietnamese Restaurant
0,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Marble Hill,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Chinatown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Chinatown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Chinatown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,Chinatown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Chinatown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Chinatown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Chinatown,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Chinatown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
venues_grouped = venues_onehot.groupby('Neighborhood').mean().reset_index()
venues_grouped

Unnamed: 0,Neighborhood,Afghan Restaurant,African Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,Austrian Restaurant,Brazilian Restaurant,...,Swiss Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Vietnamese Restaurant
0,Battery Park City,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Carnegie Hill,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.086957
2,Central Harlem,0.0,0.2,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0
3,Chelsea,0.0,0.0,0.111111,0.0,0.0,0.037037,0.0,0.0,0.0,...,0.0,0.0,0.0,0.074074,0.037037,0.0,0.0,0.037037,0.0,0.0
4,Chinatown,0.0,0.0,0.093023,0.0,0.0,0.046512,0.0,0.023256,0.0,...,0.0,0.0,0.023256,0.0,0.023256,0.0,0.0,0.023256,0.0,0.069767
5,Civic Center,0.0,0.0,0.157895,0.0,0.0,0.052632,0.052632,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Clinton,0.0,0.0,0.16,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.0,0.0
7,East Harlem,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.214286,0.0,0.0,0.0,0.0,0.0
8,East Village,0.0,0.0,0.052632,0.026316,0.026316,0.0,0.0,0.0,0.0,...,0.026316,0.0,0.026316,0.026316,0.0,0.0,0.0,0.078947,0.0,0.052632
9,Financial District,0.0,0.0,0.192308,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
num_top_venues = 6
for hood in venues_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = venues_grouped[venues_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Battery Park City----
                      venue  freq
0        Italian Restaurant  0.29
1       American Restaurant  0.14
2  Mediterranean Restaurant  0.14
3        Mexican Restaurant  0.14
4        Chinese Restaurant  0.14
5          Sushi Restaurant  0.14


----Carnegie Hill----
                   venue  freq
0    Japanese Restaurant  0.13
1      French Restaurant  0.13
2  Vietnamese Restaurant  0.09
3     Italian Restaurant  0.09
4      Indian Restaurant  0.09
5     Chinese Restaurant  0.04


----Central Harlem----
                  venue  freq
0    African Restaurant  0.20
1    Chinese Restaurant  0.13
2   American Restaurant  0.13
3    Seafood Restaurant  0.13
4     French Restaurant  0.13
5  Caribbean Restaurant  0.07


----Chelsea----
                 venue  freq
0   Italian Restaurant  0.19
1  American Restaurant  0.11
2   Seafood Restaurant  0.07
3     Tapas Restaurant  0.07
4     Sushi Restaurant  0.07
5  Japanese Restaurant  0.07


----Chinatown----
                   

5  Japanese Restaurant  0.04


----Upper West Side----
                       venue  freq
0         Italian Restaurant  0.16
1   Mediterranean Restaurant  0.08
2          Indian Restaurant  0.08
3            Thai Restaurant  0.05
4  Middle Eastern Restaurant  0.05
5                 Restaurant  0.05


----Washington Heights----
                       venue  freq
0         Chinese Restaurant  0.14
1  Latin American Restaurant  0.10
2    New American Restaurant  0.10
3         Mexican Restaurant  0.10
4         Spanish Restaurant  0.10
5           Tapas Restaurant  0.10


----West Village----
                     venue  freq
0       Italian Restaurant  0.27
1  New American Restaurant  0.18
2      American Restaurant  0.15
3      Japanese Restaurant  0.06
4       Chinese Restaurant  0.06
5        French Restaurant  0.06


----Yorkville----
                   venue  freq
0     Italian Restaurant  0.25
1       Sushi Restaurant  0.11
2     Mexican Restaurant  0.11
3    Japanese Restaurant  0.

In [24]:

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

In [25]:
num_top_venues = 5
indicators = ['st', 'nd', 'rd']
# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
# create a new dataframe
nbhs_venues_sorted = pd.DataFrame(columns=columns)
nbhs_venues_sorted['Neighborhood'] = venues_grouped['Neighborhood']
for ind in np.arange(venues_grouped.shape[0]):
    nbhs_venues_sorted.iloc[ind, 1:] = return_most_common_venues(venues_grouped.iloc[ind, :], num_top_venues)
    
nbhs_venues_sorted.head(10)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Battery Park City,Italian Restaurant,Sushi Restaurant,American Restaurant,Chinese Restaurant,Mediterranean Restaurant
1,Carnegie Hill,Japanese Restaurant,French Restaurant,Vietnamese Restaurant,Indian Restaurant,Italian Restaurant
2,Central Harlem,African Restaurant,French Restaurant,American Restaurant,Chinese Restaurant,Seafood Restaurant
3,Chelsea,Italian Restaurant,American Restaurant,French Restaurant,Tapas Restaurant,Japanese Restaurant
4,Chinatown,Chinese Restaurant,American Restaurant,Vietnamese Restaurant,Hotpot Restaurant,Dim Sum Restaurant
5,Civic Center,French Restaurant,American Restaurant,Falafel Restaurant,Sushi Restaurant,Cajun / Creole Restaurant
6,Clinton,Italian Restaurant,American Restaurant,Mediterranean Restaurant,New American Restaurant,Caucasian Restaurant
7,East Harlem,Mexican Restaurant,Thai Restaurant,Latin American Restaurant,Restaurant,Cuban Restaurant
8,East Village,Mexican Restaurant,Vegetarian / Vegan Restaurant,Italian Restaurant,Korean Restaurant,Chinese Restaurant
9,Financial District,American Restaurant,Italian Restaurant,Mexican Restaurant,Japanese Restaurant,Falafel Restaurant


### Step 4,  Cluster neighborhoods

##### Run k-means to cluster the Borough into 5 clusters.

In [26]:
#import k-means from clustering stage
#from sklearn.cluster import KMeans

# set number of clusters
kclusters = 7
nbhs_grouped_clustering = venues_grouped.drop('Neighborhood', 1)
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(nbhs_grouped_clustering)
# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 3, 3, 3, 3, 3, 1, 6, 3, 1])

##### Now create a new dataframe that includes the cluster as well as the top 10 venues for each postcode neighborhood

In [27]:
nbhs_venues_sorted.insert(0, 'Cluster Label', kmeans.labels_)
nbhs_venues_sorted.head(10)

Unnamed: 0,Cluster Label,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,1,Battery Park City,Italian Restaurant,Sushi Restaurant,American Restaurant,Chinese Restaurant,Mediterranean Restaurant
1,3,Carnegie Hill,Japanese Restaurant,French Restaurant,Vietnamese Restaurant,Indian Restaurant,Italian Restaurant
2,3,Central Harlem,African Restaurant,French Restaurant,American Restaurant,Chinese Restaurant,Seafood Restaurant
3,3,Chelsea,Italian Restaurant,American Restaurant,French Restaurant,Tapas Restaurant,Japanese Restaurant
4,3,Chinatown,Chinese Restaurant,American Restaurant,Vietnamese Restaurant,Hotpot Restaurant,Dim Sum Restaurant
5,3,Civic Center,French Restaurant,American Restaurant,Falafel Restaurant,Sushi Restaurant,Cajun / Creole Restaurant
6,1,Clinton,Italian Restaurant,American Restaurant,Mediterranean Restaurant,New American Restaurant,Caucasian Restaurant
7,6,East Harlem,Mexican Restaurant,Thai Restaurant,Latin American Restaurant,Restaurant,Cuban Restaurant
8,3,East Village,Mexican Restaurant,Vegetarian / Vegan Restaurant,Italian Restaurant,Korean Restaurant,Chinese Restaurant
9,1,Financial District,American Restaurant,Italian Restaurant,Mexican Restaurant,Japanese Restaurant,Falafel Restaurant


In [28]:
print(nbhs_venues_sorted.shape)
print(Brgh_df.shape)

(40, 7)
(40, 4)


In [29]:
nbhs_merged = Brgh_df
#nbhs_merged = nbhs_merged.join(nbhs_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
nbhs_merged = nbhs_merged.join(nbhs_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
nbhs_merged = nbhs_merged[nbhs_merged['Cluster Label'].notna()]
nbhs_merged.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 40 entries, 0 to 39
Data columns (total 10 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Borough                40 non-null     object 
 1   Neighborhood           40 non-null     object 
 2   Latitude               40 non-null     float64
 3   Longitude              40 non-null     float64
 4   Cluster Label          40 non-null     int32  
 5   1st Most Common Venue  40 non-null     object 
 6   2nd Most Common Venue  40 non-null     object 
 7   3rd Most Common Venue  40 non-null     object 
 8   4th Most Common Venue  40 non-null     object 
 9   5th Most Common Venue  40 non-null     object 
dtypes: float64(2), int32(1), object(7)
memory usage: 3.3+ KB


In [31]:
nbhs_merged = nbhs_merged.astype({"Cluster Label": int}) 
nbhs_merged = nbhs_merged[,1:]
print(nbhs_merged.shape)
nbhs_merged.head(10) # check the last columns!

SyntaxError: invalid syntax (<ipython-input-31-cbe5b1e64328>, line 2)

In [32]:
latitude = location.latitude
longitude = location.longitude
print('The coordinate are {}, {}.'.format(latitude, longitude))
# create map
map_clusters = folium.Map(location=[latitude,longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(nbhs_merged['Latitude'], nbhs_merged['Longitude'], nbhs_merged['Neighborhood'], nbhs_merged['Cluster Label']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

The coordinate are 40.7896239, -73.9598939.


### Step 5,  Examine Clusters

In [33]:
column_num = nbhs_merged.shape[1]

##### Cluster 1

In [34]:
nbhs_merged.loc[nbhs_merged['Cluster Label'] == 0, nbhs_merged.columns[[1] + list(range(4, column_num))]]

Unnamed: 0,Neighborhood,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
37,Stuyvesant Town,0,German Restaurant,Vietnamese Restaurant,Empanada Restaurant,English Restaurant,Ethiopian Restaurant


In [35]:
nbhs_merged.loc[nbhs_merged['Cluster Label'] == 1, nbhs_merged.columns[[1] + list(range(4, column_num))]]

Unnamed: 0,Neighborhood,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
8,Upper East Side,1,Italian Restaurant,French Restaurant,Mexican Restaurant,American Restaurant,Sushi Restaurant
13,Lincoln Square,1,Italian Restaurant,French Restaurant,American Restaurant,Mediterranean Restaurant,Mexican Restaurant
14,Clinton,1,Italian Restaurant,American Restaurant,Mediterranean Restaurant,New American Restaurant,Caucasian Restaurant
21,Tribeca,1,American Restaurant,Italian Restaurant,Greek Restaurant,French Restaurant,Vietnamese Restaurant
23,Soho,1,Italian Restaurant,Mediterranean Restaurant,Vegetarian / Vegan Restaurant,Seafood Restaurant,French Restaurant
24,West Village,1,Italian Restaurant,New American Restaurant,American Restaurant,Japanese Restaurant,Chinese Restaurant
26,Morningside Heights,1,American Restaurant,Mexican Restaurant,Indian Restaurant,Ethiopian Restaurant,Seafood Restaurant
27,Gramercy,1,Italian Restaurant,American Restaurant,Mexican Restaurant,Thai Restaurant,Sushi Restaurant
28,Battery Park City,1,Italian Restaurant,Sushi Restaurant,American Restaurant,Chinese Restaurant,Mediterranean Restaurant
29,Financial District,1,American Restaurant,Italian Restaurant,Mexican Restaurant,Japanese Restaurant,Falafel Restaurant


In [36]:
nbhs_merged.loc[nbhs_merged['Cluster Label'] == 2, nbhs_merged.columns[[1] + list(range(4, column_num))]]

Unnamed: 0,Neighborhood,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
11,Roosevelt Island,2,Japanese Restaurant,Greek Restaurant,Japanese Curry Restaurant,French Restaurant,Empanada Restaurant


In [37]:
nbhs_merged.loc[nbhs_merged['Cluster Label'] == 3, nbhs_merged.columns[[1] + list(range(4, column_num))]]

Unnamed: 0,Neighborhood,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,Chinatown,3,Chinese Restaurant,American Restaurant,Vietnamese Restaurant,Hotpot Restaurant,Dim Sum Restaurant
4,Hamilton Heights,3,Mexican Restaurant,Caribbean Restaurant,Indian Restaurant,Chinese Restaurant,Sushi Restaurant
5,Manhattanville,3,Seafood Restaurant,Italian Restaurant,Chinese Restaurant,Mexican Restaurant,Dumpling Restaurant
6,Central Harlem,3,African Restaurant,French Restaurant,American Restaurant,Chinese Restaurant,Seafood Restaurant
9,Yorkville,3,Italian Restaurant,Sushi Restaurant,Japanese Restaurant,Mexican Restaurant,Vietnamese Restaurant
10,Lenox Hill,3,Italian Restaurant,Sushi Restaurant,Turkish Restaurant,Thai Restaurant,Mexican Restaurant
12,Upper West Side,3,Italian Restaurant,Indian Restaurant,Mediterranean Restaurant,Restaurant,Middle Eastern Restaurant
15,Midtown,3,American Restaurant,French Restaurant,Japanese Restaurant,Indian Restaurant,Sushi Restaurant
16,Murray Hill,3,American Restaurant,Japanese Restaurant,Italian Restaurant,Mediterranean Restaurant,Vietnamese Restaurant
17,Chelsea,3,Italian Restaurant,American Restaurant,French Restaurant,Tapas Restaurant,Japanese Restaurant


In [38]:
nbhs_merged.loc[nbhs_merged['Cluster Label'] == 4, nbhs_merged.columns[[1] + list(range(4, column_num))]]

Unnamed: 0,Neighborhood,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
33,Midtown South,4,Korean Restaurant,Japanese Restaurant,American Restaurant,Vegetarian / Vegan Restaurant,Italian Restaurant


In [39]:
nbhs_merged.loc[nbhs_merged['Cluster Label'] == 5, nbhs_merged.columns[[1] + list(range(4, column_num))]]

Unnamed: 0,Neighborhood,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Marble Hill,5,American Restaurant,Seafood Restaurant,Vietnamese Restaurant,German Restaurant,English Restaurant


In [40]:
nbhs_merged.loc[nbhs_merged['Cluster Label'] == 6, nbhs_merged.columns[[1] + list(range(4, column_num))]]

Unnamed: 0,Neighborhood,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,Washington Heights,6,Chinese Restaurant,Spanish Restaurant,Latin American Restaurant,Mexican Restaurant,New American Restaurant
3,Inwood,6,Mexican Restaurant,Restaurant,American Restaurant,Chinese Restaurant,Fast Food Restaurant
7,East Harlem,6,Mexican Restaurant,Thai Restaurant,Latin American Restaurant,Restaurant,Cuban Restaurant
36,Tudor City,6,Mexican Restaurant,Vietnamese Restaurant,Asian Restaurant,Spanish Restaurant,Sushi Restaurant
