## 1. Import Libraries

In [1]:
import pandas as pd
import numpy as np
import sys
!{sys.executable} -m pip install wikitables
from wikitables import import_tables
import json
import requests
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
!conda install -c conda-forge folium=0.5.0 --yes
import folium
from geopy.geocoders import Nominatim
print ('Everything is ready!')

Collecting wikitables
  Downloading https://files.pythonhosted.org/packages/62/bd/c356270a94851b601ee06bdd4db74d54cd8e761212916bf43a25a894baa8/wikitables-0.4.2-py2.py3-none-any.whl
Collecting mwparserfromhell>=0.4.3 (from wikitables)
  Downloading https://files.pythonhosted.org/packages/bb/87/13c195cab36757e50a7fdb22f7bd8dfa8b8d14b62b0d2fe78d4c272e10ed/mwparserfromhell-0.5.2.tar.gz (132kB)
[K    100% |████████████████████████████████| 133kB 6.5MB/s eta 0:00:01
[?25hRequirement not upgraded as not directly required: requests>=2.9.1 in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from wikitables)
Requirement not upgraded as not directly required: chardet<3.1.0,>=3.0.2 in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from requests>=2.9.1->wikitables)
Requirement not upgraded as not directly required: idna<2.7,>=2.5 in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from requests>=2.9.1->wikitables)
Requirement not upgraded as not directly required: url

## 2. Import Data

In [2]:
# We import the table
tables = import_tables('List of Manhattan neighborhoods')
neighborhoods_upt=tables[0].rows
neighborhoods_mid=tables[1].rows
neighborhoods_mid_dow=tables[2].rows
neighborhoods_dow=tables[3].rows

In [3]:
# We define the dataframe columns
column_names = ['Neighborhood_not_ready'] 

# instantiate the dataframe
neighborhoods_manh = pd.DataFrame(columns=column_names)

In [4]:
# We fill our dataframe with the data
group_neigh=(neighborhoods_upt,neighborhoods_mid,neighborhoods_mid_dow,neighborhoods_dow)
for i in group_neigh:
    for data in i:
        neighborhood_name = data['Name of the neighborhood']
        neighborhoods_manh = neighborhoods_manh.append({'Neighborhood_not_ready':neighborhood_name}, ignore_index=True)

In [5]:
neighborhoods_manh.head(5)

Unnamed: 0,Neighborhood_not_ready
0,Upper Manhattan
1,Marble Hill
2,Inwood
3,Fort George (part of Washington Heights)
4,Washington Heights


In [6]:
neighborhoods_manh.shape

(84, 1)

In [7]:
# in order to make easiest the changes in the dataframe
neighborhoods_manh= neighborhoods_manh.applymap(str)

In [8]:
#We clean the data a little bit
neighborhoods_manh['Neighborhood_not_ready'] = neighborhoods_manh['Neighborhood_not_ready'].replace(' †','',regex=True)


In [9]:
neighborhoods_manh['Neighborhood'] = neighborhoods_manh['Neighborhood_not_ready'].str.split(',').str[0]

In [10]:
neighborhoods_manh['Neighborhood'] = neighborhoods_manh['Neighborhood'] + ', Manhattan'

In [11]:
# we need the coordinates of each neighborhood
address = neighborhoods_manh['Neighborhood']

url='https://maps.googleapis.com/maps/api/geocode/json?address='
key='&key=AIzaSyBKlu0QTQZPFcAsQmAHy2Mn41E8h_xxVXA'
n=0
for i in address:
    try:
        data_loc=(requests.get(url+i+key)).json()
        latitude = data_loc['results'][0]['geometry']['location']['lat']
        longitude = data_loc['results'][0]['geometry']['location']['lng']
        neighborhoods_manh = neighborhoods_manh.append({'Latitude':latitude, 'Longitude':longitude}, ignore_index=True)
    except IndexError:
        pass 


In [12]:
df_1 = neighborhoods_manh.iloc[0:84]
df_2 = neighborhoods_manh.iloc[84:168]

In [13]:
df_1=(df_1.drop(['Latitude','Longitude','Neighborhood_not_ready'], axis=1)).reset_index(drop=True)

In [14]:
df_2=(df_2.drop(['Neighborhood_not_ready','Neighborhood'], axis=1)).reset_index(drop=True)

In [15]:
neighborhood_manhattan= pd.concat([df_1, df_2], axis=1)

In [16]:
neighborhood_manhattan

Unnamed: 0,Neighborhood,Latitude,Longitude
0,"Upper Manhattan, Manhattan",40.824048,-73.944764
1,"Marble Hill, Manhattan",40.876117,-73.910263
2,"Inwood, Manhattan",40.867714,-73.921202
3,"Fort George (part of Washington Heights), Manh...",40.859032,-73.933043
4,"Washington Heights, Manhattan",40.841708,-73.939355
5,"Hudson Heights (part of Washington Heights), M...",40.853497,-73.937452
6,"West Harlem, Manhattan",40.811550,-73.946477
7,"Hamilton Heights (part of Harlem), Manhattan",40.825960,-73.949608
8,"Manhattanville, Manhattan",40.816944,-73.955833
9,"Morningside Heights, Manhattan",40.808956,-73.962433


In [17]:
address_manh = 'Manhattan, NY'

geolocator = Nominatim()
location = geolocator.geocode(address_manh)
latitude_manh = location.latitude
longitude_manh = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude_manh, longitude_manh))

The geograpical coordinate of Manhattan are 40.7900869, -73.9598295.


In [18]:
# Let's create map of Manhattan using latitude and longitude values just to a visual check
map_manh = folium.Map(location=[latitude_manh, longitude_manh], zoom_start=10)

# add markers to map
for lat, lng, neighborhood in zip(neighborhood_manhattan['Latitude'], neighborhood_manhattan['Longitude'], neighborhood_manhattan['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_manh)  
    
map_manh

## 3. Define Foursquare Credentials and Version

In [19]:
CLIENT_ID = '5I30MDOXKVLKCQURREIZ0FSWHABE14LI4C35JQV2S5R0K3P2' # your Foursquare ID
CLIENT_SECRET = '3ST1EJE1CK5CWLTXQ2H0WZLPBHWKKFMW1ZG5N1FYJHPLI1TH' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30

## 4. Explore neighborhoods in Manhattan

In [20]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [21]:
manhattan_venues = getNearbyVenues(names=neighborhood_manhattan['Neighborhood'],
                                   latitudes=neighborhood_manhattan['Latitude'],
                                   longitudes=neighborhood_manhattan['Longitude']
                                  )

Upper Manhattan, Manhattan
Marble Hill, Manhattan
Inwood, Manhattan
Fort George (part of Washington Heights), Manhattan
Washington Heights, Manhattan
Hudson Heights (part of Washington Heights), Manhattan
West Harlem, Manhattan
Hamilton Heights (part of Harlem), Manhattan
Manhattanville, Manhattan
Morningside Heights, Manhattan
Central Harlem, Manhattan
Harlem, Manhattan
St. Nicholas Historic District , Manhattan
Astor Row (Central Harlem), Manhattan
Sugar Hill (Central Harlem), Manhattan
Marcus Garvey Park , Manhattan
Le Petit Senegal (Little Senegal), Manhattan
East Harlem (Spanish Harlem), Manhattan
Upper East Side, Manhattan
Lenox Hill, Manhattan
Carnegie Hill, Manhattan
Yorkville, Manhattan
Upper West Side, Manhattan
Manhattan Valley , Manhattan
Lincoln Square (once San Juan Hill), Manhattan
Midtown, Manhattan
Columbus Circle, Manhattan
Sutton Place, Manhattan
Rockefeller Center, Manhattan
Diamond District, Manhattan
Theater District, Manhattan
Turtle Bay, Manhattan
Midtown East, 

In [22]:
manhattan_venues.head(5)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Upper Manhattan, Manhattan",40.824048,-73.944764,Manhattanville Coffee,40.821496,-73.944595,Coffee Shop
1,"Upper Manhattan, Manhattan",40.824048,-73.944764,Branson Got Juice!,40.825525,-73.943681,Juice Bar
2,"Upper Manhattan, Manhattan",40.824048,-73.944764,Monkey Cup,40.825694,-73.947234,Coffee Shop
3,"Upper Manhattan, Manhattan",40.824048,-73.944764,Jackie Robinson Park Recreation Center,40.823965,-73.94242,Pool
4,"Upper Manhattan, Manhattan",40.824048,-73.944764,Sugar Hill Cafe,40.82433,-73.945537,Café


## 5. Let's create a map with the difference among the areas of the city

In [23]:
manhattan_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Alphabet City and Loisaida, Manhattan",30,30,30,30,30,30
"Astor Row (Central Harlem), Manhattan",30,30,30,30,30,30
"Battery Park City, Manhattan",30,30,30,30,30,30
"Bowery, Manhattan",30,30,30,30,30,30
"Brookdale, Manhattan",30,30,30,30,30,30
"Carnegie Hill, Manhattan",30,30,30,30,30,30
"Central Harlem, Manhattan",16,16,16,16,16,16
"Chelsea, Manhattan",30,30,30,30,30,30
"Chinatown, Manhattan",30,30,30,30,30,30
"Civic Center, Manhattan",30,30,30,30,30,30


In [24]:
# one hot encoding
manh_venues_onehot = pd.get_dummies(manhattan_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manh_venues_onehot['Neighborhood'] = manhattan_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [manh_venues_onehot.columns[-1]] + list(manh_venues_onehot.columns[:-1])
manh_venues_onehot = manh_venues_onehot[fixed_columns]

manh_venues_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,African Restaurant,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Arcade,Argentinian Restaurant,Art Gallery,...,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Volleyball Court,Whisky Bar,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,"Upper Manhattan, Manhattan",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Upper Manhattan, Manhattan",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Upper Manhattan, Manhattan",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Upper Manhattan, Manhattan",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Upper Manhattan, Manhattan",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
# Let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
manh_venues_grouped = manh_venues_onehot.groupby('Neighborhood').mean().reset_index()
manh_venues_grouped

Unnamed: 0,Neighborhood,Accessories Store,African Restaurant,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Arcade,Argentinian Restaurant,Art Gallery,...,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Volleyball Court,Whisky Bar,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,"Alphabet City and Loisaida, Manhattan",0.000000,0.000000,0.033333,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.033333,0.033333,0.000000,0.000000
1,"Astor Row (Central Harlem), Manhattan",0.000000,0.033333,0.033333,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.033333,0.000000,0.033333
2,"Battery Park City, Manhattan",0.000000,0.000000,0.033333,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,"Bowery, Manhattan",0.000000,0.000000,0.066667,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,"Brookdale, Manhattan",0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.000000,0.000000,0.033333,0.000000,0.000000,0.033333,0.000000,0.000000
5,"Carnegie Hill, Manhattan",0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.033333,0.0,0.000000,0.000000,0.000000,0.000000,0.033333,0.066667,0.000000,0.033333
6,"Central Harlem, Manhattan",0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
7,"Chelsea, Manhattan",0.000000,0.000000,0.033333,0.0,0.0,0.000000,0.000000,0.000000,0.033333,...,0.033333,0.0,0.000000,0.000000,0.000000,0.000000,0.033333,0.033333,0.000000,0.000000
8,"Chinatown, Manhattan",0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.033333
9,"Civic Center, Manhattan",0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.033333,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [26]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [27]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = manh_venues_grouped['Neighborhood']

for ind in np.arange(manh_venues_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(manh_venues_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,"Alphabet City and Loisaida, Manhattan",Cocktail Bar,Coffee Shop,Italian Restaurant,Bar,Event Space
1,"Astor Row (Central Harlem), Manhattan",Southern / Soul Food Restaurant,Jazz Club,Grocery Store,Sushi Restaurant,Yoga Studio
2,"Battery Park City, Manhattan",Fountain,Cupcake Shop,Food Court,Coffee Shop,Park
3,"Bowery, Manhattan",Italian Restaurant,Japanese Restaurant,American Restaurant,Gift Shop,Pizza Place
4,"Brookdale, Manhattan",Park,Playground,Kids Store,Bike Rental / Bike Share,Bookstore
5,"Carnegie Hill, Manhattan",Café,Wine Shop,French Restaurant,Pizza Place,Italian Restaurant
6,"Central Harlem, Manhattan",Gym,Gym / Fitness Center,Chinese Restaurant,Latin American Restaurant,Athletics & Sports
7,"Chelsea, Manhattan",Café,French Restaurant,Tapas Restaurant,Theater,Indian Restaurant
8,"Chinatown, Manhattan",Chinese Restaurant,Bakery,Dim Sum Restaurant,Spa,Cocktail Bar
9,"Civic Center, Manhattan",Chinese Restaurant,Park,Dim Sum Restaurant,Japanese Restaurant,Mexican Restaurant


In [28]:
#Run k-means to cluster the neighborhood into 5 clusters
kclusters = 5

manhattan_venues_grouped_clustering = manh_venues_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(manhattan_venues_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 2, 0, 2, 0, 1, 0, 2, 3, 3], dtype=int32)

In [29]:
#manhattan_merged = neighborhood_manhattan

# add clustering labels
neighborhoods_venues_sorted['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
manhattan_merged_venues = neighborhoods_venues_sorted.join(neighborhood_manhattan.set_index('Neighborhood'), on='Neighborhood')

manhattan_merged_venues.head() # check the last columns!

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,Cluster Labels,Latitude,Longitude
0,"Alphabet City and Loisaida, Manhattan",Cocktail Bar,Coffee Shop,Italian Restaurant,Bar,Event Space,1,40.725843,-73.977492
1,"Astor Row (Central Harlem), Manhattan",Southern / Soul Food Restaurant,Jazz Club,Grocery Store,Sushi Restaurant,Yoga Studio,2,40.8103,-73.9416
2,"Battery Park City, Manhattan",Fountain,Cupcake Shop,Food Court,Coffee Shop,Park,0,40.711579,-74.015844
3,"Bowery, Manhattan",Italian Restaurant,Japanese Restaurant,American Restaurant,Gift Shop,Pizza Place,2,40.725257,-73.990349
4,"Brookdale, Manhattan",Park,Playground,Kids Store,Bike Rental / Bike Share,Bookstore,0,40.717566,-74.014463


In [30]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
#s=z*1000,
markers_colors = []
for lat, lon, poi, cluster in zip(manhattan_merged_venues['Latitude'], manhattan_merged_venues['Longitude'], manhattan_merged_venues['Neighborhood'], manhattan_merged_venues['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        #s=1000,
        radius=12,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters


In [31]:
len(manhattan_merged_venues.loc[manhattan_merged_venues['Cluster Labels'] == 0, manhattan_merged_venues.columns[[1] + list(range(0, manhattan_merged_venues.shape[1]))]])

20

In [32]:
resident_1=manhattan_merged_venues.loc[manhattan_merged_venues['Cluster Labels'] == 0, manhattan_merged_venues.columns[[1] + list(range(0, manhattan_merged_venues.shape[1]))]]
resident_1

Unnamed: 0,1st Most Common Venue,Neighborhood,1st Most Common Venue.1,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,Cluster Labels,Latitude,Longitude
2,Fountain,"Battery Park City, Manhattan",Fountain,Cupcake Shop,Food Court,Coffee Shop,Park,0,40.711579,-74.015844
4,Park,"Brookdale, Manhattan",Park,Playground,Kids Store,Bike Rental / Bike Share,Bookstore,0,40.717566,-74.014463
6,Gym,"Central Harlem, Manhattan",Gym,Gym / Fitness Center,Chinese Restaurant,Latin American Restaurant,Athletics & Sports,0,40.813875,-73.93713
11,Park,"Cooperative Village, Manhattan",Park,Art Gallery,Ramen Restaurant,Bike Trail,Supermarket,0,40.714787,-73.980976
15,Coffee Shop,"East Village, Manhattan",Coffee Shop,Vegetarian / Vegan Restaurant,Ice Cream Shop,Caribbean Restaurant,Tapas Restaurant,0,40.726477,-73.981534
16,Coffee Shop,"Financial District, Manhattan",Coffee Shop,Steakhouse,Gym / Fitness Center,Pizza Place,Jewelry Store,0,40.707491,-74.011276
20,Pizza Place,"Fort George (part of Washington Heights), Manh...",Pizza Place,Chinese Restaurant,Park,Bar,Gym / Fitness Center,0,40.859032,-73.933043
27,Korean Restaurant,"Herald Square, Manhattan",Korean Restaurant,Cosmetics Shop,Lingerie Store,Clothing Store,Yoga Studio,0,40.750247,-73.98771
32,Korean Restaurant,"Koreatown, Manhattan",Korean Restaurant,Scenic Lookout,Cosmetics Shop,Hotel Bar,Restaurant,0,40.747738,-73.986894
39,Park,"Little Syria (historic), Manhattan",Park,Exhibit,Planetarium,Wine Bar,Playground,0,40.78306,-73.971249


In [33]:
len(manhattan_merged_venues.loc[manhattan_merged_venues['Cluster Labels'] == 1, manhattan_merged_venues.columns[[1] + list(range(0, manhattan_merged_venues.shape[1]))]])

26

In [34]:
outer_area_services=manhattan_merged_venues.loc[manhattan_merged_venues['Cluster Labels'] == 1, manhattan_merged_venues.columns[[1] + list(range(0, manhattan_merged_venues.shape[1]))]]
outer_area_services

Unnamed: 0,1st Most Common Venue,Neighborhood,1st Most Common Venue.1,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,Cluster Labels,Latitude,Longitude
0,Cocktail Bar,"Alphabet City and Loisaida, Manhattan",Cocktail Bar,Coffee Shop,Italian Restaurant,Bar,Event Space,1,40.725843,-73.977492
5,Café,"Carnegie Hill, Manhattan",Café,Wine Shop,French Restaurant,Pizza Place,Italian Restaurant,1,40.784465,-73.955086
14,Bakery,"East Harlem (Spanish Harlem), Manhattan",Bakery,Mexican Restaurant,Pizza Place,Café,Taco Place,1,40.79574,-73.938921
24,Bar,"Hamilton Heights (part of Harlem), Manhattan",Bar,Coffee Shop,Yoga Studio,Caribbean Restaurant,Chinese Restaurant,1,40.82596,-73.949608
26,Wine Bar,"Hell's Kitchen , Manhattan",Wine Bar,Thai Restaurant,Wine Shop,Mexican Restaurant,Cosmetics Shop,1,40.763758,-73.991818
28,Wine Shop,"Hudson Heights (part of Washington Heights), M...",Wine Shop,Park,Café,Ramen Restaurant,Chinese Restaurant,1,40.853497,-73.937452
30,Mexican Restaurant,"Inwood, Manhattan",Mexican Restaurant,Café,Bakery,Wine Bar,Restaurant,1,40.867714,-73.921202
31,Indian Restaurant,"Kips Bay, Manhattan",Indian Restaurant,Italian Restaurant,Ice Cream Shop,Gourmet Shop,Grocery Store,1,40.742329,-73.980064
33,Seafood Restaurant,"Le Petit Senegal (Little Senegal), Manhattan",Seafood Restaurant,Bakery,Restaurant,Southern / Soul Food Restaurant,Coffee Shop,1,40.804252,-73.95436
38,Café,"Little Italy, Manhattan",Café,Ice Cream Shop,Bakery,Sandwich Place,Coffee Shop,1,40.719141,-73.997327


In [35]:
len(manhattan_merged_venues.loc[manhattan_merged_venues['Cluster Labels'] == 2, manhattan_merged_venues.columns[[1] + list(range(0, manhattan_merged_venues.shape[1]))]])

32

In [36]:
main_area_services=manhattan_merged_venues.loc[manhattan_merged_venues['Cluster Labels'] == 2, manhattan_merged_venues.columns[[1] + list(range(0, manhattan_merged_venues.shape[1]))]]
main_area_services

Unnamed: 0,1st Most Common Venue,Neighborhood,1st Most Common Venue.1,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,Cluster Labels,Latitude,Longitude
1,Southern / Soul Food Restaurant,"Astor Row (Central Harlem), Manhattan",Southern / Soul Food Restaurant,Jazz Club,Grocery Store,Sushi Restaurant,Yoga Studio,2,40.8103,-73.9416
3,Italian Restaurant,"Bowery, Manhattan",Italian Restaurant,Japanese Restaurant,American Restaurant,Gift Shop,Pizza Place,2,40.725257,-73.990349
7,Café,"Chelsea, Manhattan",Café,French Restaurant,Tapas Restaurant,Theater,Indian Restaurant,2,40.7465,-74.001374
10,Jazz Club,"Columbus Circle, Manhattan",Jazz Club,French Restaurant,Bakery,Spa,Juice Bar,2,40.767875,-73.981498
12,Boutique,"Diamond District, Manhattan",Boutique,Sporting Goods Shop,Plaza,Japanese Restaurant,Ramen Restaurant,2,40.756977,-73.978788
13,Women's Store,"Downtown Manhattan, Manhattan",Women's Store,Yoga Studio,Men's Store,Art Gallery,Spanish Restaurant,2,40.721834,-73.999835
18,Cycle Studio,"Flatiron District , Manhattan",Cycle Studio,Furniture / Home Store,Gym,Mexican Restaurant,Gym / Fitness Center,2,40.740083,-73.990349
19,Gym / Fitness Center,"Flower District, Manhattan",Gym / Fitness Center,Hotel,Spa,Coffee Shop,Hotel Bar,2,40.746796,-73.991972
21,Coffee Shop,"Garment District, Manhattan",Coffee Shop,Italian Restaurant,Dance Studio,Hotel,Yoga Studio,2,40.754707,-73.991634
22,Pizza Place,"Gramercy Park, Manhattan",Pizza Place,Café,Cheese Shop,Italian Restaurant,Gym / Fitness Center,2,40.736778,-73.984472


In [64]:
south_area_services=manhattan_merged_venues.loc[manhattan_merged_venues['Cluster Labels'] == 3, manhattan_merged_venues.columns[[1] + list(range(0, manhattan_merged_venues.shape[1]))]]

In [150]:
len(south_area_services)

4

In [38]:
cultural_services=manhattan_merged_venues.loc[manhattan_merged_venues['Cluster Labels'] == 4, manhattan_merged_venues.columns[[1] + list(range(0, manhattan_merged_venues.shape[1]))]]

In [151]:
len(cultural_services)

2

## 6. Let's make a map with our search of competitiveness: Restaurants in the area

In [39]:
# Due to our duty to get the best location for our Spanish Restaurant, we filter the data frame to check the current offer
Manhattan_restaurants=manhattan_venues[manhattan_venues['Venue Category'].str.contains('Restaurant')]

In [103]:
# Just to check how many of the restaurants are Spanish...
Manhattan_restaurants_spanish=manhattan_venues[manhattan_venues['Venue Category'].str.contains('Spanish','Tapas')]
Manhattan_restaurants_spanish

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
75,"Inwood, Manhattan",40.867714,-73.921202,La Reina Del Chicharron,40.866357,-73.925012,Spanish Restaurant
127,"Washington Heights, Manhattan",40.841708,-73.939355,Galicia Restaurant,40.844152,-73.93904,Spanish Restaurant
163,"West Harlem, Manhattan",40.81155,-73.946477,Tropical Grill & Restaurant,40.810583,-73.94717,Spanish Restaurant
250,"Manhattanville, Manhattan",40.816944,-73.955833,Mofongo del Valle,40.820317,-73.954977,Spanish Restaurant
299,"Harlem, Manhattan",40.81155,-73.946477,Tropical Grill & Restaurant,40.810583,-73.94717,Spanish Restaurant
387,"Astor Row (Central Harlem), Manhattan",40.8103,-73.9416,Tropical Grill & Restaurant,40.810583,-73.94717,Spanish Restaurant
472,"Le Petit Senegal (Little Senegal), Manhattan",40.804252,-73.95436,tropical grill restaurant,40.803038,-73.952079,Spanish Restaurant
487,"East Harlem (Spanish Harlem), Manhattan",40.79574,-73.938921,El Nuevo Caridad,40.796895,-73.937876,Spanish Restaurant
516,"Upper East Side, Manhattan",40.773565,-73.956555,Boqueria,40.77151,-73.956073,Spanish Restaurant
518,"Upper East Side, Manhattan",40.773565,-73.956555,Pil Pil,40.772781,-73.955852,Spanish Restaurant


In [104]:
Manhattan_restaurants.shape

(720, 7)

In [105]:
Manhattan_restaurants_spanish.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Alphabet City and Loisaida, Manhattan",1,1,1,1,1,1
"Astor Row (Central Harlem), Manhattan",1,1,1,1,1,1
"Downtown Manhattan, Manhattan",1,1,1,1,1,1
"East Harlem (Spanish Harlem), Manhattan",1,1,1,1,1,1
"Harlem, Manhattan",1,1,1,1,1,1
"Inwood, Manhattan",1,1,1,1,1,1
"Le Petit Senegal (Little Senegal), Manhattan",1,1,1,1,1,1
"Little Italy, Manhattan",1,1,1,1,1,1
"Manhattanville, Manhattan",1,1,1,1,1,1
"Murray Hill, Manhattan",1,1,1,1,1,1


In [42]:
Manhattan_restaurants.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Alphabet City and Loisaida, Manhattan",9,9,9,9,9,9
"Astor Row (Central Harlem), Manhattan",12,12,12,12,12,12
"Battery Park City, Manhattan",1,1,1,1,1,1
"Bowery, Manhattan",12,12,12,12,12,12
"Brookdale, Manhattan",2,2,2,2,2,2
"Carnegie Hill, Manhattan",7,7,7,7,7,7
"Central Harlem, Manhattan",2,2,2,2,2,2
"Chelsea, Manhattan",12,12,12,12,12,12
"Chinatown, Manhattan",10,10,10,10,10,10
"Civic Center, Manhattan",13,13,13,13,13,13


## 7. Analyze each neighborhood depending on their restaurants

In [48]:
# one hot encoding
manh_rest_onehot = pd.get_dummies(Manhattan_restaurants[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manh_rest_onehot['Neighborhood'] = Manhattan_restaurants['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [manh_rest_onehot.columns[-1]] + list(manh_rest_onehot.columns[:-1])
manh_rest_onehot = manh_rest_onehot[fixed_columns]

manh_rest_onehot.head()

Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,Austrian Restaurant,Brazilian Restaurant,Caribbean Restaurant,Chinese Restaurant,...,Swiss Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Tonkatsu Restaurant,Turkish Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
6,"Upper Manhattan, Manhattan",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,"Upper Manhattan, Manhattan",0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
11,"Upper Manhattan, Manhattan",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
13,"Upper Manhattan, Manhattan",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15,"Upper Manhattan, Manhattan",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [49]:
manh_rest_onehot.shape

(720, 65)

In [50]:
# Let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
manh_rest_grouped = manh_rest_onehot.groupby('Neighborhood').mean().reset_index()
manh_rest_grouped

Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,Austrian Restaurant,Brazilian Restaurant,Caribbean Restaurant,Chinese Restaurant,...,Swiss Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Tonkatsu Restaurant,Turkish Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,"Alphabet City and Loisaida, Manhattan",0.000000,0.111111,0.000000,0.000000,0.000000,0.111111,0.111111,0.000000,0.000000,...,0.0,0.000,0.000000,0.000000,0.000000,0.000,0.000000,0.0,0.000000,0.000000
1,"Astor Row (Central Harlem), Manhattan",0.083333,0.083333,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.083333,...,0.0,0.000,0.000000,0.000000,0.000000,0.000,0.000000,0.0,0.000000,0.000000
2,"Battery Park City, Manhattan",0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000,0.000000,0.000000,0.000000,0.000,0.000000,0.0,0.000000,0.000000
3,"Bowery, Manhattan",0.000000,0.166667,0.000000,0.083333,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000,0.000000,0.000000,0.083333,0.000,0.000000,0.0,0.000000,0.000000
4,"Brookdale, Manhattan",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000,0.000000,0.000000,0.000000,0.000,0.000000,0.0,0.000000,0.000000
5,"Carnegie Hill, Manhattan",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000,0.000000,0.000000,0.000000,0.000,0.000000,0.0,0.142857,0.000000
6,"Central Harlem, Manhattan",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.500000,...,0.0,0.000,0.000000,0.000000,0.000000,0.000,0.000000,0.0,0.000000,0.000000
7,"Chelsea, Manhattan",0.000000,0.083333,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.083333,...,0.0,0.000,0.000000,0.166667,0.000000,0.000,0.000000,0.0,0.083333,0.000000
8,"Chinatown, Manhattan",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.600000,...,0.0,0.000,0.100000,0.000000,0.000000,0.000,0.000000,0.0,0.000000,0.000000
9,"Civic Center, Manhattan",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.307692,...,0.0,0.000,0.000000,0.000000,0.000000,0.000,0.000000,0.0,0.076923,0.000000


In [51]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
rest_neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
rest_neighborhoods_venues_sorted['Neighborhood'] = manh_rest_grouped['Neighborhood']

for ind in np.arange(manh_rest_grouped.shape[0]):
    rest_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(manh_rest_grouped.iloc[ind, :], num_top_venues)

rest_neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,"Alphabet City and Loisaida, Manhattan",Italian Restaurant,American Restaurant,Latin American Restaurant,Austrian Restaurant,Brazilian Restaurant
1,"Astor Row (Central Harlem), Manhattan",Southern / Soul Food Restaurant,Sushi Restaurant,Chinese Restaurant,Seafood Restaurant,Mexican Restaurant
2,"Battery Park City, Manhattan",American Restaurant,Vietnamese Restaurant,Falafel Restaurant,Japanese Restaurant,Japanese Curry Restaurant
3,"Bowery, Manhattan",Italian Restaurant,American Restaurant,Japanese Restaurant,Asian Restaurant,Thai Restaurant
4,"Brookdale, Manhattan",Seafood Restaurant,Mexican Restaurant,Ethiopian Restaurant,Japanese Curry Restaurant,Italian Restaurant
5,"Carnegie Hill, Manhattan",Italian Restaurant,French Restaurant,Vegetarian / Vegan Restaurant,Kosher Restaurant,Restaurant
6,"Central Harlem, Manhattan",Latin American Restaurant,Chinese Restaurant,Vietnamese Restaurant,Falafel Restaurant,Japanese Curry Restaurant
7,"Chelsea, Manhattan",Tapas Restaurant,French Restaurant,Italian Restaurant,American Restaurant,Vegetarian / Vegan Restaurant
8,"Chinatown, Manhattan",Chinese Restaurant,Dim Sum Restaurant,Malay Restaurant,Taiwanese Restaurant,Vietnamese Restaurant
9,"Civic Center, Manhattan",Chinese Restaurant,Dim Sum Restaurant,Mexican Restaurant,Japanese Restaurant,Peking Duck Restaurant


## 8. Cluster Restaurants

In [85]:
#Run k-means to cluster the neighborhood into 5 clusters
kclusters = 5

manhattan_rest_grouped_clustering = manh_rest_grouped.drop('Neighborhood', 1)
#manhattan_rest_grouped_clustering
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(manhattan_rest_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([4, 1, 0, 4, 2, 4, 3, 1, 3, 3], dtype=int32)

In [75]:
# As information

manhattan_merged_restaurants=rest_neighborhoods_venues_sorted.join(neighborhood_manhattan.set_index('Neighborhood'), on='Neighborhood')
manhattan_merged_restaurants

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,Cluster Labels,Latitude,Longitude
0,"Alphabet City and Loisaida, Manhattan",Italian Restaurant,American Restaurant,Latin American Restaurant,Austrian Restaurant,Brazilian Restaurant,2,40.725843,-73.977492
1,"Astor Row (Central Harlem), Manhattan",Southern / Soul Food Restaurant,Sushi Restaurant,Chinese Restaurant,Seafood Restaurant,Mexican Restaurant,0,40.810300,-73.941600
2,"Battery Park City, Manhattan",American Restaurant,Vietnamese Restaurant,Falafel Restaurant,Japanese Restaurant,Japanese Curry Restaurant,3,40.711579,-74.015844
3,"Bowery, Manhattan",Italian Restaurant,American Restaurant,Japanese Restaurant,Asian Restaurant,Thai Restaurant,2,40.725257,-73.990349
4,"Brookdale, Manhattan",Seafood Restaurant,Mexican Restaurant,Ethiopian Restaurant,Japanese Curry Restaurant,Italian Restaurant,0,40.717566,-74.014463
5,"Carnegie Hill, Manhattan",Italian Restaurant,French Restaurant,Vegetarian / Vegan Restaurant,Kosher Restaurant,Restaurant,2,40.784465,-73.955086
6,"Central Harlem, Manhattan",Latin American Restaurant,Chinese Restaurant,Vietnamese Restaurant,Falafel Restaurant,Japanese Curry Restaurant,1,40.813875,-73.937130
7,"Chelsea, Manhattan",Tapas Restaurant,French Restaurant,Italian Restaurant,American Restaurant,Vegetarian / Vegan Restaurant,0,40.746500,-74.001374
8,"Chinatown, Manhattan",Chinese Restaurant,Dim Sum Restaurant,Malay Restaurant,Taiwanese Restaurant,Vietnamese Restaurant,1,40.715751,-73.997031
9,"Civic Center, Manhattan",Chinese Restaurant,Dim Sum Restaurant,Mexican Restaurant,Japanese Restaurant,Peking Duck Restaurant,1,40.714052,-74.002836


In [86]:
# Our clustering map will be however...
# add clustering labels
rest_neighborhoods_venues_sorted['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
manhattan_merged = rest_neighborhoods_venues_sorted.join(Manhattan_restaurants.set_index('Neighborhood'), on='Neighborhood')

#manhattan_merged # check the last columns!


In [149]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
#s=z*1000,
markers_colors = []

for lat, lon, poi, cluster,spa in zip(manhattan_merged['Venue Latitude'], manhattan_merged['Venue Longitude'], manhattan_merged['Venue'], manhattan_merged['Cluster Labels'],manhattan_merged['Venue Category']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        #s=1000,
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
    
# We are going to add different markers for Spanish restaurants
for lati, lng, venue in zip(Manhattan_restaurants_spanish['Venue Latitude'], Manhattan_restaurants_spanish['Venue Longitude'], Manhattan_restaurants_spanish['Venue']):
    label = '{}'.format(venue)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lati, lng],
        radius=15,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_clusters)

       
map_clusters


## 8.1. 1st Cluster: Analysis

In [88]:
len(manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 0, manhattan_merged.columns[[1] + list(range(0, manhattan_merged.shape[1]))]])

12

In [91]:
american_restau=manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 0, manhattan_merged.columns[[1] + list(range(0, manhattan_merged.shape[1]))]]

In [135]:
pd.value_counts(american_restau.values.flatten())

American Restaurant               29
0                                 12
Vietnamese Restaurant             10
Italian Restaurant                 9
New American Restaurant            7
Greek Restaurant                   7
-73.96243270000001                 6
Morningside Heights, Manhattan     6
40.8089564                         6
Japanese Curry Restaurant          6
Seafood Restaurant                 5
Latin American Restaurant          4
Falafel Restaurant                 4
-73.9738236                        3
40.7374844                         3
Waterside Plaza, Manhattan         3
-73.9102628                        2
Marble Hill, Manhattan             2
Ethiopian Restaurant               2
Dig Inn                            2
40.8761173                         2
-73.97916732060398                 1
Boston Market                      1
Riverpark                          1
40.81009651809897                  1
40.87788463309788                  1
-73.9578685714203                  1
L

In [108]:
len(american_restau[american_restau['Venue Category'].str.contains('Spanish','Tapas')])

0

## 8.2. 2nd Cluster: Analysis

In [92]:
len(manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 1, manhattan_merged.columns[[1] + list(range(0, manhattan_merged.shape[1]))]])

262

In [137]:
main_american_restaurants=manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 1, manhattan_merged.columns[[1] + list(range(0, manhattan_merged.shape[1]))]]

In [138]:
pd.value_counts(main_american_restaurants.values.flatten())

1                                  262
Seafood Restaurant                 181
French Restaurant                  160
American Restaurant                134
Japanese Restaurant                131
Southern / Soul Food Restaurant    118
Sushi Restaurant                   117
Vegetarian / Vegan Restaurant       98
Spanish Restaurant                  84
Ramen Restaurant                    78
Italian Restaurant                  66
Caribbean Restaurant                66
African Restaurant                  53
Asian Restaurant                    47
Restaurant                          46
Mexican Restaurant                  44
New American Restaurant             43
Greek Restaurant                    40
Chinese Restaurant                  36
-73.9668408                         30
Cuban Restaurant                    30
40.7540369                          30
Tapas Restaurant                    27
-73.9464769                         26
40.8115504                          26
Indian Restaurant        

## 8.3. 3nd Cluster: Analysis

In [115]:
len(manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 2, manhattan_merged.columns[[1] + list(range(0, manhattan_merged.shape[1]))]])

88

In [112]:
mix_2=manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 2, manhattan_merged.columns[[1] + list(range(0, manhattan_merged.shape[1]))]]

In [113]:
len(mix_2[mix_2['Venue Category'].str.contains('Spanish','Tapas')])

4

In [139]:
pd.value_counts(mix_2.values.flatten())

Mexican Restaurant                         128
2                                           88
Thai Restaurant                             70
Italian Restaurant                          56
Latin American Restaurant                   44
Japanese Restaurant                         34
Mediterranean Restaurant                    31
Korean Restaurant                           27
Spanish Restaurant                          22
-73.9918181                                 22
40.7637581                                  22
Chinese Restaurant                          19
American Restaurant                         19
French Restaurant                           19
Indian Restaurant                           16
Swiss Restaurant                            12
Hawaiian Restaurant                         12
Middle Eastern Restaurant                   12
Greek Restaurant                            12
-73.93892129999999                          11
East Harlem (Spanish Harlem), Manhattan     11
Midtown West,

## 8.4. 4nd Cluster: Analysis

In [96]:
len(manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 3, manhattan_merged.columns[[1] + list(range(0, manhattan_merged.shape[1]))]])

86

In [98]:
asiatic_restau=manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 3, manhattan_merged.columns[[1] + list(range(0, manhattan_merged.shape[1]))]]

In [116]:
len(asiatic_restau[asiatic_restau['Venue Category'].str.contains('Spanish','Tapas')])

0

In [140]:
pd.value_counts(asiatic_restau.values.flatten())

Chinese Restaurant                   206
3                                     86
Dim Sum Restaurant                    68
Mexican Restaurant                    63
Peking Duck Restaurant                51
Vegetarian / Vegan Restaurant         38
Vietnamese Restaurant                 31
Filipino Restaurant                   19
-74.00047339999999                    18
40.7143585                            18
Five Points (historic), Manhattan     18
40.7107469                            17
-73.99696                             17
Two Bridges, Manhattan                17
Ramen Restaurant                      17
French Restaurant                     16
Japanese Restaurant                   15
Taiwanese Restaurant                  13
-74.00283639999999                    13
40.7140519                            13
Civic Center, Manhattan               13
-73.9842724                           12
40.715033                             12
Lower East Side, Manhattan            12
Malay Restaurant

## 8.4. 4nd Cluster: Analysis

In [99]:
len(manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 4, manhattan_merged.columns[[1] + list(range(0, manhattan_merged.shape[1]))]])

272

In [142]:
italian_american_restaurants=manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 4, manhattan_merged.columns[[1] + list(range(0, manhattan_merged.shape[1]))]]

In [143]:
len(italian_american_restaurants[italian_american_restaurants['Venue Category'].str.contains('Spanish','Tapas')])

3

In [144]:
pd.value_counts(italian_american_restaurants.values.flatten())

Italian Restaurant               454
4                                272
American Restaurant              233
New American Restaurant          111
Japanese Restaurant              107
Indian Restaurant                103
French Restaurant                 85
Korean Restaurant                 68
Mediterranean Restaurant          67
Mexican Restaurant                66
Restaurant                        57
Seafood Restaurant                38
Asian Restaurant                  37
Thai Restaurant                   35
Tapas Restaurant                  34
Sushi Restaurant                  33
Falafel Restaurant                30
Latin American Restaurant         25
Australian Restaurant             24
Vegetarian / Vegan Restaurant     24
Vietnamese Restaurant             24
Cuban Restaurant                  24
Spanish Restaurant                22
Argentinian Restaurant            20
40.7735649                        19
-73.9565551                       19
Upper East Side, Manhattan        19
C