# Opening an Asian Restaurant in Amsterdam

This notebook contains the code to analyze the Amsterdam neighbourhoods using the venue data retrieved from Foursquare API to analyze the concentration and location of Chinese, Indian and Japanese restaurants. 

## Initial Setup

In [1]:
# Install folium & beautifulsoup
!pip install folium
!pip install beautifulsoup4
!pip install seaborn==0.9.0



You should consider upgrading via the 'c:\users\tan.c.8\appdata\local\continuum\anaconda3\python.exe -m pip install --upgrade pip' command.




You should consider upgrading via the 'c:\users\tan.c.8\appdata\local\continuum\anaconda3\python.exe -m pip install --upgrade pip' command.




You should consider upgrading via the 'c:\users\tan.c.8\appdata\local\continuum\anaconda3\python.exe -m pip install --upgrade pip' command.


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
import requests
import json
from bs4 import BeautifulSoup
from html.parser import HTMLParser

import matplotlib.cm as cm
import matplotlib.colors as colors

%matplotlib inline

## Retrieving and processing Amsterdam Neighbourhood Data

In [3]:
amsterdam_df = pd.read_csv('Amsterdam_Neighbourhoods.csv')
amsterdam_df.head(5)

Unnamed: 0,Neighbourhoods,Latitude,Longitude
0,Admiralenbuurt,52.372734,4.856363
1,Amsteldorp,52.36054,4.90516
2,Amsterdam Oud-West,52.36539,4.87022
3,Amsterdam Oud-Zuid,52.35235,4.87788
4,Amsterdam Science Park,52.35432,4.95803


In [4]:
amsterdam_latlong = [52.3676, 4.9041]

In [5]:
# create map of Amsterdam using latitude and longitude values
amsterdam_map = folium.Map(location=amsterdam_latlong, zoom_start=11)

# add markers to map
for lat, lng, label in zip(amsterdam_df['Latitude'], amsterdam_df['Longitude'], amsterdam_df['Neighbourhoods']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='#2E7D32',
        fill=True,
        fill_color='#FFEB3B',
        fill_opacity=0.75,
        parse_html=False).add_to(amsterdam_map)

amsterdam_map

## Retrieving Venue Data using Foursquare API

In [67]:
# define Foursquare Credentials and Version
CLIENT_ID = 'XXXXXXXXXXXX' # your Foursquare ID
CLIENT_SECRET = 'XXXXXXXXXXXX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentials:
CLIENT_ID: XXXXXXXXXXXX
CLIENT_SECRET:XXXXXXXXXXXX


In [7]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhoods in zip(amsterdam_df['Latitude'], amsterdam_df['Longitude'], amsterdam_df['Neighbourhoods']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhoods,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [8]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(9445, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Admiralenbuurt,52.372734,4.856363,Radijs,52.371049,4.856756,Bistro
1,Admiralenbuurt,52.372734,4.856363,Café Cook,52.371208,4.852792,Pub
2,Admiralenbuurt,52.372734,4.856363,Rein Cityspa,52.371217,4.855969,Spa
3,Admiralenbuurt,52.372734,4.856363,Local Hero,52.374947,4.860698,Café
4,Admiralenbuurt,52.372734,4.856363,Ram's Roti West,52.374865,4.859968,Caribbean Restaurant


In [9]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Admiralenbuurt,100,100,100,100,100,100
Amsteldorp,100,100,100,100,100,100
Amsterdam Oud-West,100,100,100,100,100,100
Amsterdam Oud-Zuid,100,100,100,100,100,100
Amsterdam Science Park,100,100,100,100,100,100
Apollobuurt,100,100,100,100,100,100
Betondorp,71,71,71,71,71,71
Bijlmermeer,81,81,81,81,81,81
Binnenstad,100,100,100,100,100,100
Bos en Lommer,100,100,100,100,100,100


In [10]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 285 uniques categories.


In [11]:
# print out the list of categories
venues_df['VenueCategory'].unique()[:50]

array(['Bistro', 'Pub', 'Spa', 'Café', 'Caribbean Restaurant',
       'Coffee Shop', 'Ramen Restaurant', 'Falafel Restaurant',
       'Tea Room', 'Dance Studio', 'Middle Eastern Restaurant', 'Park',
       'Lebanese Restaurant', 'Deli / Bodega', 'Scandinavian Restaurant',
       'Pet Café', 'Pizza Place', 'Beer Garden', 'Sandwich Place', 'Bar',
       'Italian Restaurant', 'Bakery', 'Fried Chicken Joint', 'Nightclub',
       'Yoga Studio', 'Gym', 'Wine Bar', 'Hostel', 'Burger Joint',
       'Bookstore', 'Movie Theater', 'Indonesian Restaurant', 'Hotel',
       'Thai Restaurant', 'Restaurant', 'Food Court',
       'Moroccan Restaurant', 'Grocery Store', 'Dutch Restaurant',
       'Mediterranean Restaurant', 'Turkish Restaurant', 'Ice Cream Shop',
       'Creperie', 'Cocktail Bar', 'French Restaurant',
       'Empanada Restaurant', 'Market', 'Marijuana Dispensary',
       'Garden Center', 'Korean Restaurant'], dtype=object)

In [12]:
# one hot encoding
amsterdam_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
amsterdam_onehot['Neighbourhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [amsterdam_onehot.columns[-1]] + list(amsterdam_onehot.columns[:-1])
amsterdam_onehot = amsterdam_onehot[fixed_columns]

print(amsterdam_onehot.shape)
amsterdam_onehot.head()

(9445, 286)


Unnamed: 0,Neighbourhoods,Accessories Store,Adult Boutique,Advertising Agency,African Restaurant,American Restaurant,Aquarium,Arcade,Argentinian Restaurant,Art Gallery,...,Warehouse Store,Water Park,Whisky Bar,Windmill,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,Admiralenbuurt,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Admiralenbuurt,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Admiralenbuurt,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Admiralenbuurt,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Admiralenbuurt,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [13]:
amsterdam_grouped = amsterdam_onehot.groupby(["Neighbourhoods"]).mean().reset_index()

print(amsterdam_grouped.shape)
amsterdam_grouped

(105, 286)


Unnamed: 0,Neighbourhoods,Accessories Store,Adult Boutique,Advertising Agency,African Restaurant,American Restaurant,Aquarium,Arcade,Argentinian Restaurant,Art Gallery,...,Warehouse Store,Water Park,Whisky Bar,Windmill,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,Admiralenbuurt,0.00,0.00,0.00,0.00,0.000000,0.00,0.000000,0.00,0.000000,...,0.00,0.0,0.00,0.00,0.01,0.00,0.00,0.03,0.00,0.00
1,Amsteldorp,0.00,0.00,0.00,0.00,0.000000,0.00,0.000000,0.00,0.000000,...,0.00,0.0,0.00,0.00,0.02,0.01,0.01,0.02,0.01,0.00
2,Amsterdam Oud-West,0.00,0.00,0.00,0.00,0.000000,0.00,0.000000,0.00,0.000000,...,0.00,0.0,0.01,0.00,0.02,0.00,0.00,0.07,0.00,0.00
3,Amsterdam Oud-Zuid,0.00,0.00,0.00,0.00,0.000000,0.00,0.000000,0.00,0.000000,...,0.00,0.0,0.01,0.00,0.01,0.01,0.00,0.03,0.00,0.00
4,Amsterdam Science Park,0.00,0.00,0.00,0.00,0.000000,0.00,0.000000,0.00,0.000000,...,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
5,Apollobuurt,0.00,0.00,0.00,0.00,0.000000,0.00,0.000000,0.00,0.000000,...,0.00,0.0,0.00,0.00,0.00,0.01,0.00,0.01,0.00,0.00
6,Betondorp,0.00,0.00,0.00,0.00,0.014085,0.00,0.000000,0.00,0.014085,...,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
7,Bijlmermeer,0.00,0.00,0.00,0.00,0.000000,0.00,0.000000,0.00,0.000000,...,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
8,Binnenstad,0.00,0.00,0.00,0.00,0.000000,0.00,0.000000,0.00,0.000000,...,0.00,0.0,0.00,0.00,0.00,0.00,0.01,0.01,0.01,0.00
9,Bos en Lommer,0.00,0.00,0.00,0.00,0.000000,0.00,0.020000,0.00,0.000000,...,0.00,0.0,0.00,0.00,0.01,0.00,0.00,0.02,0.00,0.00


In [14]:
num_top_venues = 5

for nbrhood in amsterdam_grouped['Neighbourhoods']:
    print("---- "+nbrhood+" ----")
    temp = amsterdam_grouped[amsterdam_grouped['Neighbourhoods'] == nbrhood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---- Admiralenbuurt ----
                venue  freq
0         Coffee Shop  0.10
1          Restaurant  0.05
2                Café  0.04
3  Italian Restaurant  0.03
4         Yoga Studio  0.03


---- Amsteldorp ----
            venue  freq
0           Hotel  0.07
1  Breakfast Spot  0.06
2      Restaurant  0.05
3     Coffee Shop  0.05
4             Bar  0.04


---- Amsterdam Oud-West ----
                venue  freq
0         Yoga Studio  0.07
1               Hotel  0.05
2         Coffee Shop  0.05
3  Italian Restaurant  0.04
4          Art Museum  0.03


---- Amsterdam Oud-Zuid ----
                 venue  freq
0  Japanese Restaurant  0.05
1           Art Museum  0.05
2   Seafood Restaurant  0.04
3          Coffee Shop  0.04
4       Breakfast Spot  0.04


---- Amsterdam Science Park ----
                  venue  freq
0  Gym / Fitness Center  0.05
1                   Bar  0.05
2            Restaurant  0.04
3           Coffee Shop  0.04
4       Harbor / Marina  0.04


---- Apollobuurt --

                venue  freq
0         Supermarket  0.10
1               Hotel  0.08
2         Snack Place  0.06
3            Bus Stop  0.04
4  Turkish Restaurant  0.04


---- Molenwijk ----
                venue  freq
0         Supermarket  0.08
1        Soccer Field  0.06
2  Chinese Restaurant  0.04
3            Bus Stop  0.04
4                 Gym  0.04


---- Museumkwartier ----
            venue  freq
0             Bar  0.08
1           Hotel  0.06
2          Bakery  0.04
3  Ice Cream Shop  0.04
4            Café  0.04


---- NDSM ----
         venue  freq
0   Restaurant  0.14
1         Café  0.06
2          Bar  0.06
3  Coffee Shop  0.05
4    Gastropub  0.04


---- Negen Straatjes ----
            venue  freq
0           Hotel  0.10
1      Restaurant  0.05
2    Cocktail Bar  0.05
3  Breakfast Spot  0.05
4             Bar  0.05


---- Nieuw Sloten ----
                venue  freq
0         Supermarket  0.10
1  Turkish Restaurant  0.05
2           Drugstore  0.04
3         Coffee Sh

         venue  freq
0          Bar  0.08
1  Zoo Exhibit  0.05
2  Coffee Shop  0.04
3         Café  0.04
4        Hotel  0.04


---- Westelijke Tuinsteden ----
            venue  freq
0           Hotel  0.10
1      Restaurant  0.05
2    Cocktail Bar  0.05
3  Breakfast Spot  0.05
4             Bar  0.05


---- Westerpark ----
         venue  freq
0          Bar  0.09
1   Restaurant  0.09
2  Coffee Shop  0.08
3         Café  0.06
4       Bistro  0.03


---- Willemspark ----
                venue  freq
0      Breakfast Spot  0.06
1                Café  0.05
2          Art Museum  0.05
3              Bakery  0.04
4  Italian Restaurant  0.04


---- Zeeburgereiland ----
            venue  freq
0           Hotel  0.10
1      Restaurant  0.05
2    Cocktail Bar  0.05
3  Breakfast Spot  0.05
4             Bar  0.05


---- Zeeheldenbuurt ----
         venue  freq
0   Restaurant  0.09
1          Bar  0.08
2  Coffee Shop  0.06
3      Theater  0.05
4         Café  0.04


---- Zuidas ----
           

## Analyzing Most Common Venues in Each Neighbourhood

In [15]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [16]:
indicators = ['st', 'nd', 'rd']
num_top_venues = 10

# create columns according to number of top venues
columns = ['Neighbourhoods']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
amsterdam_sorted_df = pd.DataFrame(columns=columns)
amsterdam_sorted_df['Neighbourhoods'] = amsterdam_grouped['Neighbourhoods']

for ind in np.arange(amsterdam_grouped.shape[0]):
    amsterdam_sorted_df.iloc[ind, 1:] = return_most_common_venues(amsterdam_grouped.iloc[ind, :], num_top_venues)

amsterdam_sorted_df.head()

Unnamed: 0,Neighbourhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Admiralenbuurt,Coffee Shop,Restaurant,Café,Pizza Place,Indonesian Restaurant,Dutch Restaurant,Italian Restaurant,Caribbean Restaurant,Hotel,Bar
1,Amsteldorp,Hotel,Breakfast Spot,Coffee Shop,Restaurant,Bar,Cocktail Bar,Bakery,Italian Restaurant,Café,Steakhouse
2,Amsterdam Oud-West,Yoga Studio,Hotel,Coffee Shop,Italian Restaurant,Café,Music Venue,Art Museum,Pizza Place,Cocktail Bar,Bar
3,Amsterdam Oud-Zuid,Japanese Restaurant,Art Museum,Breakfast Spot,Deli / Bodega,Ice Cream Shop,Coffee Shop,Seafood Restaurant,Bakery,Park,Italian Restaurant
4,Amsterdam Science Park,Gym / Fitness Center,Bar,Coffee Shop,Soccer Field,Harbor / Marina,French Restaurant,Restaurant,Ice Cream Shop,Café,Bus Stop


## Analyzing Chinese, Indian and Japanese Restaurants in Amsterdam

In [17]:
len(amsterdam_grouped[amsterdam_grouped["Chinese Restaurant"] > 0])

21

In [18]:
len(amsterdam_grouped[amsterdam_grouped["Indian Restaurant"] > 0])

28

In [20]:
len(amsterdam_grouped[amsterdam_grouped["Japanese Restaurant"] > 0])

29

## Cluster Analysis of Chinese Restaurants

In [25]:
from sklearn.cluster import KMeans

In [26]:
amsterdam_chi = amsterdam_grouped[["Neighbourhoods","Chinese Restaurant"]]

In [27]:
amsterdam_chi.head()

Unnamed: 0,Neighbourhoods,Chinese Restaurant
0,Admiralenbuurt,0.0
1,Amsteldorp,0.0
2,Amsterdam Oud-West,0.0
3,Amsterdam Oud-Zuid,0.0
4,Amsterdam Science Park,0.01


In [39]:
# set number of clusters
kclusters = 2

amsterdam_clustering_chi = amsterdam_chi.drop(["Neighbourhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(amsterdam_clustering_chi)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 1, 1, 1, 1, 1, 0, 1, 1])

In [40]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
amsterdam_chi_merged = amsterdam_chi.copy()

# add clustering labels
amsterdam_chi_merged["Cluster Labels"] = kmeans.labels_

In [41]:
amsterdam_chi_merged.rename(columns={"Neighbourhoods": "Neighbourhoods"}, inplace=True)
amsterdam_chi_merged.head()

Unnamed: 0,Neighbourhoods,Chinese Restaurant,Cluster Labels
0,Admiralenbuurt,0.0,1
1,Amsteldorp,0.0,1
2,Amsterdam Oud-West,0.0,1
3,Amsterdam Oud-Zuid,0.0,1
4,Amsterdam Science Park,0.01,1


In [42]:
# merge data to add latitude/longitude for each neighborhood
amsterdam_chi_merged = amsterdam_chi_merged.join(amsterdam_df.set_index("Neighbourhoods"), on="Neighbourhoods")

print(amsterdam_chi_merged.shape)
amsterdam_chi_merged.head()

(105, 5)


Unnamed: 0,Neighbourhoods,Chinese Restaurant,Cluster Labels,Latitude,Longitude
0,Admiralenbuurt,0.0,1,52.372734,4.856363
1,Amsteldorp,0.0,1,52.36054,4.90516
2,Amsterdam Oud-West,0.0,1,52.36539,4.87022
3,Amsterdam Oud-Zuid,0.0,1,52.35235,4.87788
4,Amsterdam Science Park,0.01,1,52.35432,4.95803


In [43]:
# sort the results by Cluster Labels
print(amsterdam_chi_merged.shape)
amsterdam_chi_merged.sort_values(["Cluster Labels"], inplace=True)
amsterdam_chi_merged

(105, 5)


Unnamed: 0,Neighbourhoods,Chinese Restaurant,Cluster Labels,Latitude,Longitude
52,NDSM,0.020000,0,52.400143,4.897116
54,Nieuw Sloten,0.040000,0,52.345610,4.811920
82,Slotervaart,0.020000,0,52.352360,4.832320
36,IJburg,0.021739,0,52.365759,4.975539
67,Oud Osdorp,0.020000,0,52.356360,4.790600
66,Osdorp,0.020000,0,52.356360,4.790600
65,Oostzanerwerf,0.041667,0,52.425365,4.885640
29,Gaasperdam,0.050000,0,52.296800,4.978120
91,Tuindorp Oostzaan,0.016949,0,52.413210,4.888060
13,Bullewijk,0.031915,0,52.304868,4.950105


In [44]:
# create map
amsterdam_chi_map_clusters = folium.Map(location=amsterdam_latlong, zoom_start=11)

# set color scheme for the clusters
x = np.arange(3)
ys = [i+x+(i*x)**2 for i in range(3)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(amsterdam_chi_merged['Latitude'], amsterdam_chi_merged['Longitude'], amsterdam_chi_merged['Neighbourhoods'], amsterdam_chi_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(amsterdam_chi_map_clusters)
       
amsterdam_chi_map_clusters

In [45]:
amsterdam_chi_merged.loc[amsterdam_chi_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighbourhoods,Chinese Restaurant,Cluster Labels,Latitude,Longitude
52,NDSM,0.02,0,52.400143,4.897116
54,Nieuw Sloten,0.04,0,52.34561,4.81192
82,Slotervaart,0.02,0,52.35236,4.83232
36,IJburg,0.021739,0,52.365759,4.975539
67,Oud Osdorp,0.02,0,52.35636,4.7906
66,Osdorp,0.02,0,52.35636,4.7906
65,Oostzanerwerf,0.041667,0,52.425365,4.88564
29,Gaasperdam,0.05,0,52.2968,4.97812
91,Tuindorp Oostzaan,0.016949,0,52.41321,4.88806
13,Bullewijk,0.031915,0,52.304868,4.950105


In [46]:
amsterdam_chi_merged.loc[amsterdam_chi_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighbourhoods,Chinese Restaurant,Cluster Labels,Latitude,Longitude
64,Oostpoort,0.000000,1,52.356740,4.927790
74,Prinses Irenebuurt,0.000000,1,52.341769,4.875224
57,Nieuwendammerdijk en Buiksloterdijk,0.000000,1,52.386748,4.947710
73,Plantage,0.000000,1,52.366620,4.911890
72,Overtoomse Veld,0.000000,1,52.363540,4.836800
71,Overtoombuurt,0.000000,1,52.359582,4.862307
58,Olympisch Kwartier,0.000000,1,52.369930,4.907880
70,Overhoeks,0.000000,1,52.391680,4.905780
63,Oostoever,0.010000,1,52.370000,4.830460
69,Oude Pijp,0.000000,1,52.356250,4.890570


## Cluster Analysis of Indian Restaurants

In [47]:
amsterdam_ind = amsterdam_grouped[["Neighbourhoods","Indian Restaurant"]]

In [48]:
amsterdam_ind.head()

Unnamed: 0,Neighbourhoods,Indian Restaurant
0,Admiralenbuurt,0.0
1,Amsteldorp,0.0
2,Amsterdam Oud-West,0.0
3,Amsterdam Oud-Zuid,0.0
4,Amsterdam Science Park,0.0


In [49]:
# set number of clusters
kclusters = 2

amsterdam_clustering_ind = amsterdam_ind.drop(["Neighbourhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(amsterdam_clustering_ind)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0])

In [50]:
# create a new dataframe that includes the cluster each neighborhood.
amsterdam_ind_merged = amsterdam_ind.copy()

# add clustering labels
amsterdam_ind_merged["Cluster Labels"] = kmeans.labels_

In [51]:
amsterdam_ind_merged.rename(columns={"Neighbourhoods": "Neighbourhoods"}, inplace=True)
amsterdam_ind_merged.head()

Unnamed: 0,Neighbourhoods,Indian Restaurant,Cluster Labels
0,Admiralenbuurt,0.0,0
1,Amsteldorp,0.0,0
2,Amsterdam Oud-West,0.0,0
3,Amsterdam Oud-Zuid,0.0,0
4,Amsterdam Science Park,0.0,0


In [52]:
# merge data to add latitude/longitude for each neighborhood
amsterdam_ind_merged = amsterdam_ind_merged.join(amsterdam_df.set_index("Neighbourhoods"), on="Neighbourhoods")

print(amsterdam_ind_merged.shape)
amsterdam_ind_merged.head()

(105, 5)


Unnamed: 0,Neighbourhoods,Indian Restaurant,Cluster Labels,Latitude,Longitude
0,Admiralenbuurt,0.0,0,52.372734,4.856363
1,Amsteldorp,0.0,0,52.36054,4.90516
2,Amsterdam Oud-West,0.0,0,52.36539,4.87022
3,Amsterdam Oud-Zuid,0.0,0,52.35235,4.87788
4,Amsterdam Science Park,0.0,0,52.35432,4.95803


In [53]:
# sort the results by Cluster Labels
print(amsterdam_ind_merged.shape)
amsterdam_ind_merged.sort_values(["Cluster Labels"], inplace=True)
amsterdam_ind_merged

(105, 5)


Unnamed: 0,Neighbourhoods,Indian Restaurant,Cluster Labels,Latitude,Longitude
0,Admiralenbuurt,0.00,0,52.372734,4.856363
72,Overtoomse Veld,0.00,0,52.363540,4.836800
71,Overtoombuurt,0.00,0,52.359582,4.862307
69,Oude Pijp,0.00,0,52.356250,4.890570
68,Oud-Oost,0.00,0,52.360130,4.925320
67,Oud Osdorp,0.00,0,52.356360,4.790600
66,Osdorp,0.00,0,52.356360,4.790600
65,Oostzanerwerf,0.00,0,52.425365,4.885640
64,Oostpoort,0.00,0,52.356740,4.927790
63,Oostoever,0.00,0,52.370000,4.830460


In [54]:
# create map
amsterdam_ind_map_clusters = folium.Map(location=amsterdam_latlong, zoom_start=11)

# set color scheme for the clusters
x = np.arange(3)
ys = [i+x+(i*x)**2 for i in range(3)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(amsterdam_ind_merged['Latitude'], amsterdam_ind_merged['Longitude'], amsterdam_ind_merged['Neighbourhoods'], amsterdam_ind_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(amsterdam_ind_map_clusters)
       
amsterdam_ind_map_clusters

In [55]:
amsterdam_ind_merged.loc[amsterdam_ind_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighbourhoods,Indian Restaurant,Cluster Labels,Latitude,Longitude
0,Admiralenbuurt,0.0,0,52.372734,4.856363
72,Overtoomse Veld,0.0,0,52.363540,4.836800
71,Overtoombuurt,0.0,0,52.359582,4.862307
69,Oude Pijp,0.0,0,52.356250,4.890570
68,Oud-Oost,0.0,0,52.360130,4.925320
67,Oud Osdorp,0.0,0,52.356360,4.790600
66,Osdorp,0.0,0,52.356360,4.790600
65,Oostzanerwerf,0.0,0,52.425365,4.885640
64,Oostpoort,0.0,0,52.356740,4.927790
63,Oostoever,0.0,0,52.370000,4.830460


In [56]:
amsterdam_ind_merged.loc[amsterdam_ind_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighbourhoods,Indian Restaurant,Cluster Labels,Latitude,Longitude
102,Zeeburgereiland,0.01,1,52.36993,4.90788
8,Binnenstad,0.01,1,52.37216,4.90437
86,Steigereiland,0.01,1,52.36993,4.90788
92,Uilenburg,0.01,1,52.369367,4.90275
12,Buitenveldert,0.01,1,52.33093,4.87518
99,Westelijke Tuinsteden,0.01,1,52.36993,4.90788
15,Burgwallen Oude Zijde,0.01,1,52.37169,4.89724
22,De Wallen,0.01,1,52.37169,4.89724
17,Cruquiuseiland,0.01,1,52.36993,4.90788
48,Lastage,0.01,1,52.36922,4.90568


## Cluster Analysis of Japanese Restaurants

In [57]:
amsterdam_jp = amsterdam_grouped[["Neighbourhoods","Japanese Restaurant"]]

In [58]:
amsterdam_jp.head()

Unnamed: 0,Neighbourhoods,Japanese Restaurant
0,Admiralenbuurt,0.0
1,Amsteldorp,0.01
2,Amsterdam Oud-West,0.0
3,Amsterdam Oud-Zuid,0.05
4,Amsterdam Science Park,0.01


In [59]:
# set number of clusters
kclusters = 2

amsterdam_clustering_jp = amsterdam_jp.drop(["Neighbourhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(amsterdam_clustering_jp)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 1, 0, 1, 0, 1, 1, 1, 1])

In [60]:
# create a new dataframe that includes the cluster each neighborhood.
amsterdam_jp_merged = amsterdam_jp.copy()

# add clustering labels
amsterdam_jp_merged["Cluster Labels"] = kmeans.labels_

In [61]:
amsterdam_jp_merged.rename(columns={"Neighbourhoods": "Neighbourhoods"}, inplace=True)
amsterdam_jp_merged.head()

Unnamed: 0,Neighbourhoods,Japanese Restaurant,Cluster Labels
0,Admiralenbuurt,0.0,1
1,Amsteldorp,0.01,1
2,Amsterdam Oud-West,0.0,1
3,Amsterdam Oud-Zuid,0.05,0
4,Amsterdam Science Park,0.01,1


In [62]:
# merge data to add latitude/longitude for each neighborhood
amsterdam_jp_merged = amsterdam_jp_merged.join(amsterdam_df.set_index("Neighbourhoods"), on="Neighbourhoods")

print(amsterdam_jp_merged.shape)
amsterdam_jp_merged.head()

(105, 5)


Unnamed: 0,Neighbourhoods,Japanese Restaurant,Cluster Labels,Latitude,Longitude
0,Admiralenbuurt,0.0,1,52.372734,4.856363
1,Amsteldorp,0.01,1,52.36054,4.90516
2,Amsterdam Oud-West,0.0,1,52.36539,4.87022
3,Amsterdam Oud-Zuid,0.05,0,52.35235,4.87788
4,Amsterdam Science Park,0.01,1,52.35432,4.95803


In [63]:
# sort the results by Cluster Labels
print(amsterdam_jp_merged.shape)
amsterdam_jp_merged.sort_values(["Cluster Labels"], inplace=True)
amsterdam_jp_merged

(105, 5)


Unnamed: 0,Neighbourhoods,Japanese Restaurant,Cluster Labels,Latitude,Longitude
104,Zuidas,0.050000,0,52.336690,4.875920
74,Prinses Irenebuurt,0.040000,0,52.341769,4.875224
24,Duivelseiland,0.040000,0,52.353760,4.884450
23,Diamantbuurt,0.040000,0,52.351124,4.906147
85,Stadionbuurt,0.040000,0,52.349521,4.884681
21,De Pijp,0.020000,0,52.356250,4.890570
69,Oude Pijp,0.020000,0,52.356250,4.890570
12,Buitenveldert,0.020000,0,52.330930,4.875180
96,Weesperzijde,0.030000,0,52.353673,4.909461
31,Gouden Reael,0.020000,0,52.326081,4.865353


In [64]:
# create map
amsterdam_jp_map_clusters = folium.Map(location=amsterdam_latlong, zoom_start=11)

# set color scheme for the clusters
x = np.arange(3)
ys = [i+x+(i*x)**2 for i in range(3)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(amsterdam_jp_merged['Latitude'], amsterdam_jp_merged['Longitude'], amsterdam_jp_merged['Neighbourhoods'], amsterdam_jp_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(amsterdam_jp_map_clusters)
       
amsterdam_jp_map_clusters

In [65]:
amsterdam_jp_merged.loc[amsterdam_jp_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighbourhoods,Japanese Restaurant,Cluster Labels,Latitude,Longitude
104,Zuidas,0.05,0,52.33669,4.87592
74,Prinses Irenebuurt,0.04,0,52.341769,4.875224
24,Duivelseiland,0.04,0,52.35376,4.88445
23,Diamantbuurt,0.04,0,52.351124,4.906147
85,Stadionbuurt,0.04,0,52.349521,4.884681
21,De Pijp,0.02,0,52.35625,4.89057
69,Oude Pijp,0.02,0,52.35625,4.89057
12,Buitenveldert,0.02,0,52.33093,4.87518
96,Weesperzijde,0.03,0,52.353673,4.909461
31,Gouden Reael,0.02,0,52.326081,4.865353


In [66]:
amsterdam_jp_merged.loc[amsterdam_jp_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighbourhoods,Japanese Restaurant,Cluster Labels,Latitude,Longitude
63,Oostoever,0.000000,1,52.370000,4.830460
77,Rivierenbuurt,0.010000,1,53.206170,6.566960
76,Rieteilanden,0.000000,1,52.369930,4.907880
75,Ransdorp,0.000000,1,52.393060,4.994260
58,Olympisch Kwartier,0.000000,1,52.369930,4.907880
73,Plantage,0.000000,1,52.366620,4.911890
72,Overtoomse Veld,0.000000,1,52.363540,4.836800
71,Overtoombuurt,0.000000,1,52.359582,4.862307
70,Overhoeks,0.000000,1,52.391680,4.905780
60,Oostelijke Eilanden,0.000000,1,52.368570,4.928600
