# Battle of the Neighborhoods in Tokyo Capstone

## Created by Jason Robson

<br />

## Table of Contents

### 1. Retrieve database and clean data. Determine Latitude and Longitude.
<br />

### 2. Log into FourSquare
<br />

### 3. Collect data on coffee shops and restaurants
<br />

### 4. Cluster data
<br />

### 5. Create map

<br />

### 1. Retrieve database and clean data. 

In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import os
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim 
import matplotlib.cm as cm
import matplotlib.colors as colors
!conda install -c conda-forge lxml --yes

### Pandas is used to transform the data into a dataframe. Data is taken from Wikipedia.

In [None]:
df = pd.read_html('https://en.wikipedia.org/wiki/Special_wards_of_Tokyo#List_of_special_wards') [3]
df

### Data is cleaned

In [None]:
df.drop("Flag", axis=1, inplace=True)
df.drop("Kanji", axis=1, inplace=True)
df.rename(columns={df.columns[2]:"Population"}, inplace=True)
df = df.rename(columns={"Density(/km2)": "Density"})
df = df.rename(columns={"Area(km2)": "Area"})
df.drop([23], inplace=True)
df

### Coordinates are determined and new table is pictured

In [8]:
geolocator = Nominatim(user_agent="Tokyo_explorer")

df['Major_Districts']= df['Name'].apply(geolocator.geocode).apply(lambda x: (x.latitude, x.longitude))
df[['Latitude', 'Longitude']] = df['Major_Districts'].apply(pd.Series)

df.drop(['Major_Districts'], axis=1, inplace=True)
df

Unnamed: 0,No.,Name,Population,Density,Area,Major districts,Latitude,Longitude
0,1,Chiyoda,59441,5100,11.66,"Nagatachō, Kasumigaseki, Ōtemachi, Marunouchi,...",35.69381,139.753216
1,2,Chūō,147620,14460,10.21,"Nihonbashi, Kayabachō, Ginza, Tsukiji, Hatchōb...",35.666255,139.775565
2,3,Minato,248071,12180,20.37,"Odaiba, Shinbashi, Hamamatsuchō, Mita, Roppong...",35.643227,139.740055
3,4,Shinjuku,339211,18620,18.22,"Shinjuku, Takadanobaba, Ōkubo, Kagurazaka, Ich...",35.693763,139.703632
4,5,Bunkyō,223389,19790,11.29,"Hongō, Yayoi, Hakusan",35.71881,139.744732
5,6,Taitō,200486,19830,10.11,"Ueno, Asakusa",35.71745,139.790859
6,7,Sumida,260358,18910,13.77,"Kinshichō, Morishita, Ryōgoku",35.700429,139.805017
7,8,Kōtō,502579,12510,40.16,"Kiba, Ariake, Kameido, Tōyōchō, Monzennakachō,...",35.649154,139.81279
8,9,Shinagawa,392492,17180,22.84,"Shinagawa, Gotanda, Ōsaki, Hatanodai, Ōimachi,...",35.599252,139.73891
9,10,Meguro,280283,19110,14.67,"Meguro, Nakameguro, Jiyugaoka, Komaba, Aobadai",35.62125,139.688014


### 2. Foursquare Login

### Venues near Tokyo special wards are retrieved.

In [138]:
CLIENT_ID = 'WL5230EQST0RNTHM4RACIFYTZXHGFUIMNRYICERYSR1UZ3QE'
CLIENT_SECRET = 'RODQMYVAL5ZGX1EFOTWJPUV40XHDV42DADTVBEKSCLLHCN3N'
VERSION = '20200601'

In [148]:
def getNearbyVenues(names, latitudes, longitudes):
    radius=500
    LIMIT=100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Ward', 
                  'Ward Latitude', 
                  'Ward Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [149]:
Tokyo_venues = getNearbyVenues(names=df['Name'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Chiyoda
Chūō
Minato
Shinjuku
Bunkyō
Taitō
Sumida
Kōtō
Shinagawa
Meguro
Ōta
Setagaya
Shibuya
Nakano
Suginami
Toshima
Kita
Arakawa
Itabashi
Nerima
Adachi
Katsushika
Edogawa


### 3. Collect data on coffee shops and restaurants.

### Determine unique categories and count

In [150]:
Tokyo_venues_coffee = Tokyo_venues[Tokyo_venues['Venue Category'].str.contains('Coffee')].reset_index(drop=True)
Tokyo_venues_coffee.index = np.arange(1, len(Tokyo_venues_coffee )+1)

print (Tokyo_venues_coffee['Venue Category'].value_counts())

Coffee Shop    31
Name: Venue Category, dtype: int64


In [151]:
print('There are {} unique categories.'.format(len(Tokyo_venues_coffee['Venue Category'].unique())))

There are 1 unique categories.


In [152]:
Tokyo_venues_restaurants = Tokyo_venues[Tokyo_venues['Venue Category'].str.contains('Restaurant')].reset_index(drop=True)
Tokyo_venues_restaurants.index = np.arange(1, len(Tokyo_venues_restaurants )+1)

print (Tokyo_venues_restaurants['Venue Category'].value_counts())

Ramen Restaurant                 52
Japanese Restaurant              45
Chinese Restaurant               34
Sushi Restaurant                 28
Italian Restaurant               22
Soba Restaurant                  20
Restaurant                       18
Donburi Restaurant               10
Tonkatsu Restaurant               9
Indian Restaurant                 8
French Restaurant                 7
Unagi Restaurant                  6
Yoshoku Restaurant                5
Dumpling Restaurant               5
Japanese Curry Restaurant         5
Korean Restaurant                 4
Yakitori Restaurant               4
Seafood Restaurant                4
Thai Restaurant                   3
Tempura Restaurant                2
South American Restaurant         2
Szechuan Restaurant               2
Brazilian Restaurant              2
Sukiyaki Restaurant               2
Hotpot Restaurant                 1
Kaiseki Restaurant                1
Spanish Restaurant                1
Dongbei Restaurant          

In [153]:
print('There are {} unique categories.'.format(len(Tokyo_venues_restaurants['Venue Category'].unique())))

There are 45 unique categories.


### Determine frequencies of coffee shops and restaurants.

In [154]:
Tokyo_Dist_venues_Top5 = Tokyo_venues_coffee['Venue Category'].value_counts()[0:5].to_frame(name='frequency')
Tokyo_Dist_venues_Top5=Tokyo_Dist_venues_Top5.reset_index()


Tokyo_Dist_venues_Top5.rename(index=str, columns={"index": "Venue Category", "frequency": "Frequency"}, inplace=True)
Tokyo_Dist_venues_Top5

Unnamed: 0,Venue Category,Frequency
0,Coffee Shop,31


In [155]:



Tokyo_Dist_venues_Top15 = Tokyo_venues_restaurants['Venue Category'].value_counts()[0:15].to_frame(name='frequency')
Tokyo_Dist_venues_Top15=Tokyo_Dist_venues_Top15.reset_index()


Tokyo_Dist_venues_Top15.rename(index=str, columns={"index": "Venue Category", "frequency": "Frequency"}, inplace=True)
Tokyo_Dist_venues_Top15

Unnamed: 0,Venue Category,Frequency
0,Ramen Restaurant,52
1,Japanese Restaurant,45
2,Chinese Restaurant,34
3,Sushi Restaurant,28
4,Italian Restaurant,22
5,Soba Restaurant,20
6,Restaurant,18
7,Donburi Restaurant,10
8,Tonkatsu Restaurant,9
9,Indian Restaurant,8


### Tokyo coffee shops by Ward

In [156]:
Tokyo_venues_coffee.head(15)

Unnamed: 0,Ward,Ward Latitude,Ward Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
1,Chiyoda,35.69381,139.753216,Mi Cafeto Café & Brasserie,35.694187,139.758268,Coffee Shop
2,Chiyoda,35.69381,139.753216,Starbucks,35.690846,139.757197,Coffee Shop
3,Chiyoda,35.69381,139.753216,Starbucks,35.695789,139.752266,Coffee Shop
4,Chiyoda,35.69381,139.753216,EXCELSIOR CAFFÉ,35.696766,139.751164,Coffee Shop
5,Chiyoda,35.69381,139.753216,St. Marc Café (サンマルクカフェ),35.69066,139.756979,Coffee Shop
6,Chiyoda,35.69381,139.753216,Starbucks,35.695611,139.750705,Coffee Shop
7,Chiyoda,35.69381,139.753216,Ueshima Coffee House (上島珈琲店),35.690824,139.757217,Coffee Shop
8,Chūō,35.666255,139.775565,Turret COFFEE,35.668105,139.771903,Coffee Shop
9,Chūō,35.666255,139.775565,Live Coffee (ライブコーヒー),35.668185,139.773762,Coffee Shop
10,Chūō,35.666255,139.775565,Tully's Coffee,35.667762,139.774028,Coffee Shop


### Tokyo restaurants by Ward

In [157]:
Tokyo_venues_restaurants.head(15)

Unnamed: 0,Ward,Ward Latitude,Ward Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
1,Chiyoda,35.69381,139.753216,Jimbocho Kurosu (神保町 黒須),35.695539,139.754851,Ramen Restaurant
2,Chiyoda,35.69381,139.753216,Bondy (欧風カレー ボンディ),35.695544,139.757356,Japanese Curry Restaurant
3,Chiyoda,35.69381,139.753216,Sushi Masa (九段下 寿司政),35.695234,139.752227,Sushi Restaurant
4,Chiyoda,35.69381,139.753216,たいよう軒,35.696454,139.754809,Chinese Restaurant
5,Chiyoda,35.69381,139.753216,Fukumen Tomo (覆麺 智),35.696403,139.75707,Ramen Restaurant
6,Chiyoda,35.69381,139.753216,咸亨酒店,35.69601,139.75673,Chinese Restaurant
7,Chiyoda,35.69381,139.753216,Yojinbo (用心棒),35.696409,139.756696,Ramen Restaurant
8,Chiyoda,35.69381,139.753216,三希房,35.697159,139.750652,Chinese Restaurant
9,Chiyoda,35.69381,139.753216,ぴえもん,35.696472,139.75124,Italian Restaurant
10,Chiyoda,35.69381,139.753216,Mandala (マンダラ),35.695413,139.757691,Indian Restaurant


### Coffee shop count by Ward

In [159]:
Tokyo_venues_coffee_ncount = Tokyo_venues_coffee.groupby(['Ward'])['Venue Category'].apply(lambda x: x[x.str.contains('Coffee Shop')].count())
Tokyo_venues_coffee_ncount

Ward
Chiyoda       7
Chūō          4
Katsushika    2
Kita          3
Meguro        1
Minato        1
Nakano        1
Shibuya       3
Shinjuku      1
Suginami      1
Sumida        1
Taitō         6
Name: Venue Category, dtype: int64

### Restaurant count by Ward

In [213]:
Tokyo_venues_restaurants_ncount = Tokyo_venues_restaurants.groupby(['Ward'])['Venue Category'].apply(lambda x: x[x.str.contains('Restaurant')].count())
Tokyo_venues_restaurants_ncount

Ward
Adachi         3
Arakawa        9
Bunkyō         4
Chiyoda       31
Chūō          57
Edogawa        2
Itabashi       3
Katsushika     7
Kita          16
Kōtō           2
Meguro         6
Minato        11
Nakano         9
Nerima         1
Setagaya       8
Shibuya       20
Shinagawa      6
Shinjuku      26
Suginami      10
Sumida         6
Taitō         24
Toshima       18
Ōta           41
Name: Venue Category, dtype: int64

### Coffee shop array

In [214]:
Tokyo_venues_coffee_df  = Tokyo_venues_coffee_ncount.to_frame().reset_index()
Tokyo_venues_coffee_df.columns = ['Ward', 'Number of Coffee Shops']
Tokyo_venues_coffee_df.index = np.arange(1, len(Tokyo_venues_coffee_df)+1)
list_rest_no =Tokyo_venues_coffee_df['Number of Coffee Shops'].to_list()
list_dist =Tokyo_venues_coffee_df['Ward'].to_list()
print(list_rest_no)
print(list_dist)

[7, 4, 2, 3, 1, 1, 1, 3, 1, 1, 1, 6]
['Chiyoda', 'Chūō', 'Katsushika', 'Kita', 'Meguro', 'Minato', 'Nakano', 'Shibuya', 'Shinjuku', 'Suginami', 'Sumida', 'Taitō']


### Restaurant array

In [198]:
Tokyo_venues_restaurants_df  = Tokyo_venues_restaurants_ncount.to_frame().reset_index()
Tokyo_venues_restaurants_df.columns = ['Ward', 'Number of Restaurants']
Tokyo_venues_restaurants_df.index = np.arange(1, len(Tokyo_venues_restaurants_df)+1)
list_rest_no =Tokyo_venues_restaurants_df['Number of Restaurants'].to_list()
list_dist =Tokyo_venues_restaurants_df['Ward'].to_list()
print(list_rest_no)
print(list_dist)

[3, 9, 4, 31, 57, 2, 3, 7, 16, 2, 6, 11, 9, 1, 8, 20, 6, 26, 10, 6, 24, 18, 41]
['Adachi', 'Arakawa', 'Bunkyō', 'Chiyoda', 'Chūō', 'Edogawa', 'Itabashi', 'Katsushika', 'Kita', 'Kōtō', 'Meguro', 'Minato', 'Nakano', 'Nerima', 'Setagaya', 'Shibuya', 'Shinagawa', 'Shinjuku', 'Suginami', 'Sumida', 'Taitō', 'Toshima', 'Ōta']


### Tokyo coffee mean and frequency by Ward

In [215]:
Tokyo_onehot_c = pd.get_dummies(Tokyo_venues_coffee[['Venue Category']], prefix="", prefix_sep="")

Tokyo_onehot_c.insert(loc=0, column='Ward', value=Tokyo_venues_coffee["Ward"] )
Tokyo_grouped_c = Tokyo_onehot_c.groupby('Ward').mean().reset_index()
Tokyo_grouped_c.head()

Unnamed: 0,Ward,Coffee Shop
0,Chiyoda,1
1,Chūō,1
2,Katsushika,1
3,Kita,1
4,Meguro,1


In [223]:
num_top_venues_c = 5

for hood in Tokyo_grouped_c['Ward']:
    print("----"+hood+"----")
    temp = Tokyo_grouped_c[Tokyo_grouped_c['Ward'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues_c))
    print('\n')

----Chiyoda----
         venue  freq
0  Coffee Shop   1.0


----Chūō----
         venue  freq
0  Coffee Shop   1.0


----Katsushika----
         venue  freq
0  Coffee Shop   1.0


----Kita----
         venue  freq
0  Coffee Shop   1.0


----Meguro----
         venue  freq
0  Coffee Shop   1.0


----Minato----
         venue  freq
0  Coffee Shop   1.0


----Nakano----
         venue  freq
0  Coffee Shop   1.0


----Shibuya----
         venue  freq
0  Coffee Shop   1.0


----Shinjuku----
         venue  freq
0  Coffee Shop   1.0


----Suginami----
         venue  freq
0  Coffee Shop   1.0


----Sumida----
         venue  freq
0  Coffee Shop   1.0


----Taitō----
         venue  freq
0  Coffee Shop   1.0




### Tokyo restaurant mean and frequency by Ward

In [218]:
Tokyo_onehot_r = pd.get_dummies(Tokyo_venues_restaurants[['Venue Category']], prefix="", prefix_sep="")

Tokyo_onehot_r.insert(loc=0, column='Ward', value=Tokyo_venues_restaurants["Ward"] )
Tokyo_grouped_r = Tokyo_onehot_r.groupby('Ward').mean().reset_index()
Tokyo_grouped_r.head()

Unnamed: 0,Ward,Asian Restaurant,Brazilian Restaurant,Cajun / Creole Restaurant,Chinese Restaurant,Donburi Restaurant,Dongbei Restaurant,Dumpling Restaurant,Fast Food Restaurant,French Restaurant,...,Taiwanese Restaurant,Tempura Restaurant,Thai Restaurant,Tonkatsu Restaurant,Udon Restaurant,Unagi Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yakitori Restaurant,Yoshoku Restaurant
0,Adachi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Arakawa,0.0,0.0,0.0,0.222222,0.111111,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bunkyō,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Chiyoda,0.0,0.0,0.0,0.16129,0.0,0.0,0.0,0.0,0.064516,...,0.0,0.0,0.032258,0.032258,0.0,0.0,0.0,0.0,0.0,0.032258
4,Chūō,0.0,0.0,0.0,0.0,0.035088,0.0,0.0,0.0,0.017544,...,0.0,0.035088,0.0,0.017544,0.0,0.035088,0.0,0.0,0.017544,0.017544


In [229]:
num_top_venues_r = 8

for hood in Tokyo_grouped_r['Ward']:
    print("----"+hood+"----")
    temp = Tokyo_grouped_r[Tokyo_grouped_r['Ward'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues_r))
    print('\n')

----Adachi----
                       venue  freq
0                 Restaurant  0.33
1         Italian Restaurant  0.33
2        Japanese Restaurant  0.33
3         Seafood Restaurant  0.00
4     Shabu-Shabu Restaurant  0.00
5            Soba Restaurant  0.00
6  South American Restaurant  0.00
7    South Indian Restaurant  0.00


----Arakawa----
                 venue  freq
0     Ramen Restaurant  0.22
1   Chinese Restaurant  0.22
2   Donburi Restaurant  0.11
3    Indian Restaurant  0.11
4   Italian Restaurant  0.11
5    Korean Restaurant  0.11
6  Japanese Restaurant  0.11
7     Asian Restaurant  0.00


----Bunkyō----
                    venue  freq
0     Szechuan Restaurant  0.25
1      Chinese Restaurant  0.25
2       Indian Restaurant  0.25
3     Japanese Restaurant  0.25
4        Asian Restaurant  0.00
5      Seafood Restaurant  0.00
6  Shabu-Shabu Restaurant  0.00
7         Soba Restaurant  0.00


----Chiyoda----
                       venue  freq
0           Ramen Restaurant  0.2

### Tokyo top ten venues (restaurant) by Ward

In [230]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [231]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']


columns = ['Ward']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))


neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Ward'] = Tokyo_grouped['Ward']

for ind in np.arange(Tokyo_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Tokyo_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(23)

Unnamed: 0,Ward,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adachi,Restaurant,Japanese Restaurant,Italian Restaurant,Yoshoku Restaurant,Indian Restaurant,Kushikatsu Restaurant,Kosher Restaurant,Korean Restaurant,Kebab Restaurant,Kaiseki Restaurant
1,Arakawa,Ramen Restaurant,Chinese Restaurant,Italian Restaurant,Donburi Restaurant,Korean Restaurant,Japanese Restaurant,Indian Restaurant,Yoshoku Restaurant,Kushikatsu Restaurant,Kosher Restaurant
2,Bunkyō,Indian Restaurant,Chinese Restaurant,Japanese Restaurant,Szechuan Restaurant,Yoshoku Restaurant,Kushikatsu Restaurant,Kosher Restaurant,Korean Restaurant,Kebab Restaurant,Kaiseki Restaurant
3,Chiyoda,Ramen Restaurant,Chinese Restaurant,Japanese Curry Restaurant,Indian Restaurant,French Restaurant,Japanese Restaurant,Restaurant,Italian Restaurant,Sushi Restaurant,Soba Restaurant
4,Chūō,Sushi Restaurant,Japanese Restaurant,Soba Restaurant,Italian Restaurant,Seafood Restaurant,Unagi Restaurant,Donburi Restaurant,Tempura Restaurant,Ramen Restaurant,Yoshoku Restaurant
5,Edogawa,Ramen Restaurant,Italian Restaurant,Yoshoku Restaurant,Indian Restaurant,Kushikatsu Restaurant,Kosher Restaurant,Korean Restaurant,Kebab Restaurant,Kaiseki Restaurant,Japanese Restaurant
6,Itabashi,Chinese Restaurant,Italian Restaurant,Yoshoku Restaurant,Indian Restaurant,Kushikatsu Restaurant,Kosher Restaurant,Korean Restaurant,Kebab Restaurant,Kaiseki Restaurant,Japanese Restaurant
7,Katsushika,Donburi Restaurant,Ramen Restaurant,Korean Restaurant,Dumpling Restaurant,Japanese Restaurant,Indian Restaurant,Yoshoku Restaurant,Italian Restaurant,Kushikatsu Restaurant,Kosher Restaurant
8,Kita,Restaurant,South American Restaurant,Vegetarian / Vegan Restaurant,Cajun / Creole Restaurant,Seafood Restaurant,French Restaurant,Yoshoku Restaurant,Italian Restaurant,Korean Restaurant,Kebab Restaurant
9,Kōtō,Chinese Restaurant,Restaurant,Yoshoku Restaurant,Indian Restaurant,Kushikatsu Restaurant,Kosher Restaurant,Korean Restaurant,Kebab Restaurant,Kaiseki Restaurant,Japanese Restaurant


### 4. Cluster data

### Data is clustered into five clusters

In [233]:
kclusters = 5
Tokyo_grouped_clustering = Tokyo_grouped_r.drop('Ward', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Tokyo_grouped_clustering)
kmeans.labels_[0:10]
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
tokyo_merged = df
tokyo_merged.rename(columns={'Name':'Ward'}, inplace=True)
tokyo_merged = tokyo_merged.join(neighborhoods_venues_sorted.set_index('Ward'), on='Ward')
tokyo_merged.head() 

Unnamed: 0,No.,Ward,Population,Density,Area,Major districts,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,Chiyoda,59441,5100,11.66,"Nagatachō, Kasumigaseki, Ōtemachi, Marunouchi,...",35.69381,139.753216,1,Ramen Restaurant,Chinese Restaurant,Japanese Curry Restaurant,Indian Restaurant,French Restaurant,Japanese Restaurant,Restaurant,Italian Restaurant,Sushi Restaurant,Soba Restaurant
1,2,Chūō,147620,14460,10.21,"Nihonbashi, Kayabachō, Ginza, Tsukiji, Hatchōb...",35.666255,139.775565,0,Sushi Restaurant,Japanese Restaurant,Soba Restaurant,Italian Restaurant,Seafood Restaurant,Unagi Restaurant,Donburi Restaurant,Tempura Restaurant,Ramen Restaurant,Yoshoku Restaurant
2,3,Minato,248071,12180,20.37,"Odaiba, Shinbashi, Hamamatsuchō, Mita, Roppong...",35.643227,139.740055,1,Soba Restaurant,Yakitori Restaurant,Kosher Restaurant,Chinese Restaurant,Kebab Restaurant,Kaiseki Restaurant,French Restaurant,Japanese Restaurant,Indian Restaurant,Italian Restaurant
3,4,Shinjuku,339211,18620,18.22,"Shinjuku, Takadanobaba, Ōkubo, Kagurazaka, Ich...",35.693763,139.703632,1,Ramen Restaurant,Chinese Restaurant,Japanese Restaurant,Italian Restaurant,Soba Restaurant,Tonkatsu Restaurant,Yoshoku Restaurant,Brazilian Restaurant,Yakitori Restaurant,Thai Restaurant
4,5,Bunkyō,223389,19790,11.29,"Hongō, Yayoi, Hakusan",35.71881,139.744732,0,Indian Restaurant,Chinese Restaurant,Japanese Restaurant,Szechuan Restaurant,Yoshoku Restaurant,Kushikatsu Restaurant,Kosher Restaurant,Korean Restaurant,Kebab Restaurant,Kaiseki Restaurant


### Cluster 0

In [234]:
tokyo_merged.loc[tokyo_merged['Cluster Labels'] == 0, tokyo_merged.columns[[1] + list(range(5, tokyo_merged.shape[1]))]]

Unnamed: 0,Ward,Major districts,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Chūō,"Nihonbashi, Kayabachō, Ginza, Tsukiji, Hatchōb...",35.666255,139.775565,0,Sushi Restaurant,Japanese Restaurant,Soba Restaurant,Italian Restaurant,Seafood Restaurant,Unagi Restaurant,Donburi Restaurant,Tempura Restaurant,Ramen Restaurant,Yoshoku Restaurant
4,Bunkyō,"Hongō, Yayoi, Hakusan",35.71881,139.744732,0,Indian Restaurant,Chinese Restaurant,Japanese Restaurant,Szechuan Restaurant,Yoshoku Restaurant,Kushikatsu Restaurant,Kosher Restaurant,Korean Restaurant,Kebab Restaurant,Kaiseki Restaurant
9,Meguro,"Meguro, Nakameguro, Jiyugaoka, Komaba, Aobadai",35.62125,139.688014,0,Chinese Restaurant,Japanese Restaurant,Sushi Restaurant,Italian Restaurant,Hotpot Restaurant,Kushikatsu Restaurant,Kosher Restaurant,Korean Restaurant,Kebab Restaurant,Kaiseki Restaurant
20,Adachi,"Ayase, Kitasenju, Takenotsuka",35.783703,139.795319,0,Restaurant,Japanese Restaurant,Italian Restaurant,Yoshoku Restaurant,Indian Restaurant,Kushikatsu Restaurant,Kosher Restaurant,Korean Restaurant,Kebab Restaurant,Kaiseki Restaurant


### Cluster 1

In [235]:
tokyo_merged.loc[tokyo_merged['Cluster Labels'] == 1, tokyo_merged.columns[[1] + list(range(5, tokyo_merged.shape[1]))]]

Unnamed: 0,Ward,Major districts,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Chiyoda,"Nagatachō, Kasumigaseki, Ōtemachi, Marunouchi,...",35.69381,139.753216,1,Ramen Restaurant,Chinese Restaurant,Japanese Curry Restaurant,Indian Restaurant,French Restaurant,Japanese Restaurant,Restaurant,Italian Restaurant,Sushi Restaurant,Soba Restaurant
2,Minato,"Odaiba, Shinbashi, Hamamatsuchō, Mita, Roppong...",35.643227,139.740055,1,Soba Restaurant,Yakitori Restaurant,Kosher Restaurant,Chinese Restaurant,Kebab Restaurant,Kaiseki Restaurant,French Restaurant,Japanese Restaurant,Indian Restaurant,Italian Restaurant
3,Shinjuku,"Shinjuku, Takadanobaba, Ōkubo, Kagurazaka, Ich...",35.693763,139.703632,1,Ramen Restaurant,Chinese Restaurant,Japanese Restaurant,Italian Restaurant,Soba Restaurant,Tonkatsu Restaurant,Yoshoku Restaurant,Brazilian Restaurant,Yakitori Restaurant,Thai Restaurant
5,Taitō,"Ueno, Asakusa",35.71745,139.790859,1,Soba Restaurant,Ramen Restaurant,Japanese Restaurant,Sushi Restaurant,Sukiyaki Restaurant,Italian Restaurant,Monjayaki Restaurant,Unagi Restaurant,Chinese Restaurant,Nabe Restaurant
6,Sumida,"Kinshichō, Morishita, Ryōgoku",35.700429,139.805017,1,Japanese Restaurant,Unagi Restaurant,Chinese Restaurant,Tonkatsu Restaurant,Ramen Restaurant,Yoshoku Restaurant,Indian Restaurant,Kosher Restaurant,Korean Restaurant,Kebab Restaurant
8,Shinagawa,"Shinagawa, Gotanda, Ōsaki, Hatanodai, Ōimachi,...",35.599252,139.73891,1,Donburi Restaurant,Restaurant,Japanese Restaurant,Soba Restaurant,Sushi Restaurant,Yoshoku Restaurant,Indian Restaurant,Kosher Restaurant,Korean Restaurant,Kebab Restaurant
10,Ōta,"Ōmori, Kamata, Haneda, Den-en-chōfu",35.561206,139.715843,1,Ramen Restaurant,Japanese Restaurant,Chinese Restaurant,Tonkatsu Restaurant,Dumpling Restaurant,Italian Restaurant,Sushi Restaurant,Yoshoku Restaurant,Japanese Curry Restaurant,Korean Restaurant
11,Setagaya,"Setagaya, Shimokitazawa, Kinuta, Karasuyama, T...",35.646096,139.65627,1,Ramen Restaurant,Restaurant,Unagi Restaurant,Japanese Restaurant,Fast Food Restaurant,Szechuan Restaurant,Yoshoku Restaurant,Indian Restaurant,Korean Restaurant,Kebab Restaurant
12,Shibuya,"Shibuya, Ebisu, Harajuku, Daikanyama, Hiroo, S...",35.664596,139.698711,1,Chinese Restaurant,Ramen Restaurant,French Restaurant,Sushi Restaurant,Japanese Restaurant,Brazilian Restaurant,Mongolian Restaurant,Donburi Restaurant,Shabu-Shabu Restaurant,South Indian Restaurant
13,Nakano,Nakano,35.718123,139.664468,1,Ramen Restaurant,Italian Restaurant,Chinese Restaurant,Donburi Restaurant,Korean Restaurant,Japanese Restaurant,Yoshoku Restaurant,Kushikatsu Restaurant,Kosher Restaurant,Kebab Restaurant


### Cluster 2

In [236]:
tokyo_merged.loc[tokyo_merged['Cluster Labels'] == 2, tokyo_merged.columns[[1] + list(range(5, tokyo_merged.shape[1]))]]

Unnamed: 0,Ward,Major districts,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Kōtō,"Kiba, Ariake, Kameido, Tōyōchō, Monzennakachō,...",35.649154,139.81279,2,Chinese Restaurant,Restaurant,Yoshoku Restaurant,Indian Restaurant,Kushikatsu Restaurant,Kosher Restaurant,Korean Restaurant,Kebab Restaurant,Kaiseki Restaurant,Japanese Restaurant
16,Kita,"Akabane, Ōji, Tabata",-0.220164,-78.512327,2,Restaurant,South American Restaurant,Vegetarian / Vegan Restaurant,Cajun / Creole Restaurant,Seafood Restaurant,French Restaurant,Yoshoku Restaurant,Italian Restaurant,Korean Restaurant,Kebab Restaurant


### Cluster 3

In [237]:
tokyo_merged.loc[tokyo_merged['Cluster Labels'] == 3, tokyo_merged.columns[[1] + list(range(5, tokyo_merged.shape[1]))]]

Unnamed: 0,Ward,Major districts,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Itabashi,"Itabashi, Takashimadaira",35.774143,139.681209,3,Chinese Restaurant,Italian Restaurant,Yoshoku Restaurant,Indian Restaurant,Kushikatsu Restaurant,Kosher Restaurant,Korean Restaurant,Kebab Restaurant,Kaiseki Restaurant,Japanese Restaurant
19,Nerima,"Nerima, Ōizumi, Hikarigaoka",35.74836,139.638735,3,Chinese Restaurant,Yoshoku Restaurant,Indian Restaurant,Kushikatsu Restaurant,Kosher Restaurant,Korean Restaurant,Kebab Restaurant,Kaiseki Restaurant,Japanese Restaurant,Japanese Curry Restaurant


### Cluster 4

In [238]:
tokyo_merged.loc[tokyo_merged['Cluster Labels'] == 4, tokyo_merged.columns[[1] + list(range(5, tokyo_merged.shape[1]))]]

Unnamed: 0,Ward,Major districts,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Edogawa,"Kasai, Koiwa",35.678278,139.871091,4,Ramen Restaurant,Italian Restaurant,Yoshoku Restaurant,Indian Restaurant,Kushikatsu Restaurant,Kosher Restaurant,Korean Restaurant,Kebab Restaurant,Kaiseki Restaurant,Japanese Restaurant


<br />

### 5. Generate map of Tokyo and map clusters

### Geographic coordinates for Tokyo, Japan

In [240]:
address = 'Tokyo, JP'

geolocator = Nominatim(user_agent="Tokyo_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Tokyo are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Tokyo are 35.6828387, 139.7594549.


In [241]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-4.1.0               |             py_1         614 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         713 KB

The following NEW packages will be INSTALLED:

    altair:  4.1.0-py_1 conda-forge
    branca:  0.4.1-py_0 conda-forge
    folium:  0.5.0-py_0 conda-forge
    vincent: 0.4.4-py_1 conda-forge


Downloading and Extracting Packages
altair-4.1.0         | 614 KB    | #####

In [242]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(tokyo_merged['Latitude'], tokyo_merged['Longitude'], tokyo_merged['Ward'], tokyo_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
    

map_clusters