# Capstone Project - The Battle of Neighborhoods (Week 2)

#### In this week, you will continue working on your capstone project to complete the full report.

## Part I

## Scrape data from Wikipedia page

In [1]:
# import the library we use to open URLs
import urllib.request

# specify which URL/web page we are going to be scraping
url = "https://en.wikipedia.org/wiki/Postal_codes_in_Singapore"

# open the url using urllib.request and put the HTML into the page variable
page = urllib.request.urlopen(url)

In [2]:
# import the BeautifulSoup library so we can parse HTML and XML documents
from bs4 import BeautifulSoup

# parse the HTML from our URL into the BeautifulSoup parse tree format
soup = BeautifulSoup(page, "lxml")

In [3]:
#Take a look at the underlying HTML code
print(soup.prettify())

#Tables are made up of: 
#rows starting and ending with <tr> and </tr>
#The top row of headers has <th> tags while the data rows beneath for each club has <td> tags. 
#It’s in these tags that we will tell Python to extract our data from.

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Postal codes in Singapore - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"XsAOqwpAIC8AABHUM1IAAABF","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"Postal_codes_in_Singapore","wgTitle":"Postal codes in Singapore","wgCurRevisionId":942914634,"wgRevisionId":942914634,"wgArticleId":24625951,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Postal codes by country","Postal system of Singapore"],"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevan

In [4]:
sg_table = soup.find('table',{'class':'wikitable'})
print(sg_table)

<table class="wikitable">
<tbody><tr>
<th>Postal district
</th>
<th>Postal sector<br/>(1st 2 digits of 6-digit postal codes)
</th>
<th>General location
</th></tr>
<tr>
<td>01
</td>
<td>01, 02, 03, 04, 05, 06
</td>
<td>Raffles Place, Cecil, Marina, People's Park
</td></tr>
<tr>
<td>02
</td>
<td>07, 08
</td>
<td>Anson, Tanjong Pagar
</td></tr>
<tr>
<td>03
</td>
<td>14, 15, 16
</td>
<td><a href="/wiki/Bukit_Merah" title="Bukit Merah">Bukit Merah</a>, <a href="/wiki/Queenstown,_Singapore" title="Queenstown, Singapore">Queenstown</a>, <a href="/wiki/Tiong_Bahru" title="Tiong Bahru">Tiong Bahru</a>
</td></tr>
<tr>
<td>04
</td>
<td>09, 10
</td>
<td>Telok Blangah, Harbourfront
</td></tr>
<tr>
<td>05
</td>
<td>11, 12, 13
</td>
<td>Pasir Panjang, Hong Leong Garden, Clementi New Town
</td></tr>
<tr>
<td>06
</td>
<td>17
</td>
<td>High Street, Beach Road (part)
</td></tr>
<tr>
<td>07
</td>
<td>18, 19
</td>
<td>Middle Road, Golden Mile
</td></tr>
<tr>
<td>08
</td>
<td>20, 21
</td>
<td><a href="/wiki

## Create a new dataframe

In [5]:
#Transform the data into a pandas dataframe
import pandas as pd

column_names=['Postal district', 'Postal sector','General location']
singapore = pd.DataFrame(columns = column_names)

In [6]:
#store the scraped data into singapore 

district = 0
sector = 0
location = 0

for tr in sg_table.find_all('tr'):
    i = 0
    for td in tr.find_all('td'):
        if i == 0:
            district = td.text.replace('\n', '')
            i = i + 1
        elif i == 1:
            sector = td.text.replace('\n', '')
            i = i + 1
        elif i == 2: 
            location = td.text.replace('\n', '')
    
    singapore = singapore.append({'Postal district': district,'Postal sector': sector,'General location': location},ignore_index=True)

singapore

Unnamed: 0,Postal district,Postal sector,General location
0,0,0,0
1,1,"01, 02, 03, 04, 05, 06","Raffles Place, Cecil, Marina, People's Park"
2,2,"07, 08","Anson, Tanjong Pagar"
3,3,"14, 15, 16","Bukit Merah, Queenstown, Tiong Bahru"
4,4,"09, 10","Telok Blangah, Harbourfront"
5,5,"11, 12, 13","Pasir Panjang, Hong Leong Garden, Clementi New..."
6,6,17,"High Street, Beach Road (part)"
7,7,"18, 19","Middle Road, Golden Mile"
8,8,"20, 21","Little India, Farrer Park, Jalan Besar, Lavender"
9,9,"22, 23","Orchard, Cairnhill, River Valley"


## Data cleaning

In [7]:
#Remove the first row
clean_singapore=singapore.drop(0,axis=0)
clean_singapore.reset_index(drop = True, inplace = True)
clean_singapore

Unnamed: 0,Postal district,Postal sector,General location
0,1,"01, 02, 03, 04, 05, 06","Raffles Place, Cecil, Marina, People's Park"
1,2,"07, 08","Anson, Tanjong Pagar"
2,3,"14, 15, 16","Bukit Merah, Queenstown, Tiong Bahru"
3,4,"09, 10","Telok Blangah, Harbourfront"
4,5,"11, 12, 13","Pasir Panjang, Hong Leong Garden, Clementi New..."
5,6,17,"High Street, Beach Road (part)"
6,7,"18, 19","Middle Road, Golden Mile"
7,8,"20, 21","Little India, Farrer Park, Jalan Besar, Lavender"
8,9,"22, 23","Orchard, Cairnhill, River Valley"
9,10,"24, 25, 26, 27","Ardmore, Bukit Timah, Holland Road, Tanglin"


### An excel file was generated based on the approximate coordinates (Latitude and Longitude) of respectively district.

In [8]:
import types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.
client_f94613240f5444a49431dfaa2ce2fb0e = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='lGUOwdht7UHm8LXrYoqnPox7OTQ_WOH7KdQ2QPpOurcZ',
    ibm_auth_endpoint="https://iam.ng.bluemix.net/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3-api.us-geo.objectstorage.service.networklayer.com')

body = client_f94613240f5444a49431dfaa2ce2fb0e.get_object(Bucket='capstoneproject-donotdelete-pr-ismlnn9byi5u9x',Key='SgLatLng.xlsx')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_data_0 = pd.read_excel(body)
df_data_0.head()


Unnamed: 0,General location,Latitude,Longtitude
0,"Raffles Place, Cecil, Marina, People's Park",1.28435,103.85107
1,"Anson, Tanjong Pagar",1.27462,103.84547
2,"Bukit Merah, Queenstown, Tiong Bahru",1.28677,103.82542
3,"Telok Blangah, Harbourfront",1.26479,103.82082
4,"Pasir Panjang, Hong Leong Garden, Clementi New...",1.31583,103.76472


In [9]:
final_singapore = pd.merge(clean_singapore,df_data_0,on='General location')
final_singapore.rename(columns={'Longtitude':'Longitude'}, inplace=True)
final_singapore.head()

Unnamed: 0,Postal district,Postal sector,General location,Latitude,Longitude
0,1,"01, 02, 03, 04, 05, 06","Raffles Place, Cecil, Marina, People's Park",1.28435,103.85107
1,2,"07, 08","Anson, Tanjong Pagar",1.27462,103.84547
2,4,"09, 10","Telok Blangah, Harbourfront",1.26479,103.82082
3,5,"11, 12, 13","Pasir Panjang, Hong Leong Garden, Clementi New...",1.31583,103.76472
4,6,17,"High Street, Beach Road (part)",1.28967,103.85007


In [10]:
#import libraries
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
print('Libraries imported 1.')

# import k-means from clustering stage
from sklearn.cluster import KMeans
print('Libraries imported 2.')

#!conda install -c conda-forge folium=0.5.0 --yes
!pip install folium==0.5.0
import folium # map rendering library
print('Libraries imported 3.')

Libraries imported 1.
Libraries imported 2.
Libraries imported 3.


## Use geopy library to get the latitude and longitude values of Singapore.

In [11]:
address = 'Singapore, Singapore'

geolocator = Nominatim(user_agent="SG_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Singapore are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Singapore are 1.357107, 103.8194992.


## Create a map of Singapore with the districts superimposed on top.

In [12]:
# create map of Singapore using latitude and longitude values
map_singapore = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng in zip(final_singapore['Latitude'],final_singapore['Longitude']):
    label = '{}, {}'.format(latitude, longitude)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_singapore)  
    
map_singapore

## Define Foursquare Credentials and Version

In [13]:
CLIENT_ID = 'SMNTIIMN2A0X5MWSNUMY4MG1G1TQX4ZVA2305LDPRMIBIADG' # your Foursquare ID
CLIENT_SECRET = 'TR4EOC3SX0W5WX3S1IRZVL5TUFHLASYPKDILPOV4D3UZLIYJ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentials:
CLIENT_ID: SMNTIIMN2A0X5MWSNUMY4MG1G1TQX4ZVA2305LDPRMIBIADG
CLIENT_SECRET:TR4EOC3SX0W5WX3S1IRZVL5TUFHLASYPKDILPOV4D3UZLIYJ


## Explore districts in Singapore

In [14]:
import requests 

def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['General location', 
                  'District latitude', 
                  'District longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [15]:
singapore_venues = getNearbyVenues(names=final_singapore['General location'],
                                   latitudes=final_singapore['Latitude'],
                                   longitudes=final_singapore['Longitude']
                                  )

Raffles Place, Cecil, Marina, People's Park
Anson, Tanjong Pagar
Telok Blangah, Harbourfront
Pasir Panjang, Hong Leong Garden, Clementi New Town
High Street, Beach Road (part)
Middle Road, Golden Mile
Orchard, Cairnhill, River Valley
Ardmore, Bukit Timah, Holland Road, Tanglin
Watten Estate, Novena, Thomson
Balestier, Toa Payoh, Serangoon
Macpherson, Braddell
Geylang, Eunos
Katong, Joo Chiat, Amber Road
Bedok, Upper East Coast, Eastwood, Kew Drive
Loyang, Changi
Simei, Tampines, Pasir Ris
Serangoon Garden, Hougang, Punggol
Bishan, Ang Mo Kio
Upper Bukit Timah, Clementi Park, Ulu Pandan
Jurong, Tuas
Hillview, Dairy Farm, Bukit Panjang, Choa Chu Kang
Lim Chu Kang, Tengah
Kranji, Woodgrove, Woodlands
Upper Thomson, Springleaf
Yishun, Sembawang
Seletar


In [16]:
print(singapore_venues.shape)
singapore_venues.head()

(1077, 7)


Unnamed: 0,General location,District latitude,District longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Raffles Place, Cecil, Marina, People's Park",1.28435,103.85107,CITY Hot Pot Shabu shabu,1.284173,103.851585,Hotpot Restaurant
1,"Raffles Place, Cecil, Marina, People's Park",1.28435,103.85107,Virgin Active,1.284608,103.850815,Gym / Fitness Center
2,"Raffles Place, Cecil, Marina, People's Park",1.28435,103.85107,The Salad Shop,1.285523,103.851177,Salad Place
3,"Raffles Place, Cecil, Marina, People's Park",1.28435,103.85107,The Fullerton Bay Hotel,1.283878,103.853314,Hotel
4,"Raffles Place, Cecil, Marina, People's Park",1.28435,103.85107,CULINARYON,1.284876,103.850933,Comfort Food Restaurant


## Analyze each district

In [17]:
singapore_onehot = pd.get_dummies(singapore_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
singapore_onehot['General location'] = singapore_venues['General location'] 

# move neighborhood column to the first column
fixed_columns = [singapore_onehot.columns[-1]] + list(singapore_onehot.columns[:-1])
singapore_onehot = singapore_onehot[fixed_columns]

singapore_onehot.head()

Unnamed: 0,General location,ATM,Airport,Airport Terminal,American Restaurant,Arcade,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Waterfront,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo Exhibit
0,"Raffles Place, Cecil, Marina, People's Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Raffles Place, Cecil, Marina, People's Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Raffles Place, Cecil, Marina, People's Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Raffles Place, Cecil, Marina, People's Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Raffles Place, Cecil, Marina, People's Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
# Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
singapore_grouped = singapore_onehot.groupby('General location').mean().reset_index()
singapore_grouped.shape
singapore_grouped.head()

Unnamed: 0,General location,ATM,Airport,Airport Terminal,American Restaurant,Arcade,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Waterfront,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo Exhibit
0,"Anson, Tanjong Pagar",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Ardmore, Bukit Timah, Holland Road, Tanglin",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Balestier, Toa Payoh, Serangoon",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.069767,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Bedok, Upper East Coast, Eastwood, Kew Drive",0.0,0.0,0.0,0.016667,0.0,0.0,0.0,0.05,0.0,...,0.016667,0.0,0.0,0.0,0.0,0.0,0.016667,0.0,0.0,0.0
4,"Bishan, Ang Mo Kio",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [19]:
# Print each district along with the top 5 most common venues
num_top_venues = 5

for hood in singapore_grouped['General location']:
    print("----"+hood+"----")
    temp = singapore_grouped[singapore_grouped['General location'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Anson, Tanjong Pagar----
                  venue  freq
0   Japanese Restaurant  0.13
1           Coffee Shop  0.10
2                  Café  0.06
3                 Hotel  0.05
4  Gym / Fitness Center  0.05


----Ardmore, Bukit Timah, Holland Road, Tanglin----
               venue  freq
0               Café  0.17
1  French Restaurant  0.17
2      Shopping Mall  0.08
3                Bar  0.08
4        Supermarket  0.08


----Balestier, Toa Payoh, Serangoon----
                venue  freq
0        Noodle House  0.14
1  Chinese Restaurant  0.14
2         Coffee Shop  0.09
3          Food Court  0.09
4         Snack Place  0.07


----Bedok, Upper East Coast, Eastwood, Kew Drive----
                 venue  freq
0   Chinese Restaurant  0.07
1          Coffee Shop  0.07
2     Asian Restaurant  0.05
3       Sandwich Place  0.05
4  Japanese Restaurant  0.05


----Bishan, Ang Mo Kio----
             venue  freq
0       Food Court  0.12
1      Coffee Shop  0.07
2  Bubble Tea Shop  0.07
3   Ice

In [20]:
import numpy as np

# Put data into a dataframe
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['General location']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
GL_venues_sorted = pd.DataFrame(columns=columns)
GL_venues_sorted['General location'] = singapore_grouped['General location']

for ind in np.arange(singapore_grouped.shape[0]):
    GL_venues_sorted.iloc[ind, 1:] = return_most_common_venues(singapore_grouped.iloc[ind, :], num_top_venues)

GL_venues_sorted.head()

Unnamed: 0,General location,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Anson, Tanjong Pagar",Japanese Restaurant,Coffee Shop,Café,Gym / Fitness Center,Hotel,Ramen Restaurant,Salad Place,Bakery,Korean Restaurant,Cocktail Bar
1,"Ardmore, Bukit Timah, Holland Road, Tanglin",French Restaurant,Café,Coffee Shop,Bookstore,Bus Station,Shopping Mall,Supermarket,Gym Pool,Toy / Game Store,Bar
2,"Balestier, Toa Payoh, Serangoon",Noodle House,Chinese Restaurant,Coffee Shop,Food Court,Thai Restaurant,Asian Restaurant,Snack Place,Steakhouse,Supermarket,Grocery Store
3,"Bedok, Upper East Coast, Eastwood, Kew Drive",Coffee Shop,Chinese Restaurant,Asian Restaurant,Food Court,Sandwich Place,Japanese Restaurant,Fast Food Restaurant,Supermarket,Sushi Restaurant,Ice Cream Shop
4,"Bishan, Ang Mo Kio",Food Court,Bubble Tea Shop,Coffee Shop,Japanese Restaurant,Chinese Restaurant,Café,Supermarket,Cosmetics Shop,Ice Cream Shop,Asian Restaurant


## Cluster districts

In [21]:
# set number of clusters
kclusters = 5

singapore_grouped_clustering = singapore_grouped.drop('General location', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(singapore_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0], dtype=int32)

In [22]:
# add clustering labels
GL_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

singapore_merged = final_singapore

# merge singapore_grouped with singapore_data to add latitude/longitude for each neighborhood
singapore_merged = singapore_merged.join(GL_venues_sorted.set_index('General location'), on='General location')

# check the last columns!
singapore_merged.head()

Unnamed: 0,Postal district,Postal sector,General location,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,"01, 02, 03, 04, 05, 06","Raffles Place, Cecil, Marina, People's Park",1.28435,103.85107,0,Hotel,Café,Food Court,Cocktail Bar,Seafood Restaurant,Nightclub,Gym,Waterfront,French Restaurant,Coffee Shop
1,2,"07, 08","Anson, Tanjong Pagar",1.27462,103.84547,0,Japanese Restaurant,Coffee Shop,Café,Gym / Fitness Center,Hotel,Ramen Restaurant,Salad Place,Bakery,Korean Restaurant,Cocktail Bar
2,4,"09, 10","Telok Blangah, Harbourfront",1.26479,103.82082,0,Chinese Restaurant,Japanese Restaurant,Toy / Game Store,Fast Food Restaurant,Coffee Shop,Clothing Store,Bakery,Multiplex,Café,Korean Restaurant
3,5,"11, 12, 13","Pasir Panjang, Hong Leong Garden, Clementi New...",1.31583,103.76472,0,Coffee Shop,Food Court,Dim Sum Restaurant,Chinese Restaurant,Asian Restaurant,Dessert Shop,Japanese Restaurant,Noodle House,Electronics Store,Chinese Breakfast Place
4,6,17,"High Street, Beach Road (part)",1.28967,103.85007,0,Japanese Restaurant,Bar,Yoga Studio,Italian Restaurant,Nightclub,Bistro,Restaurant,Gym / Fitness Center,Cocktail Bar,Coffee Shop


In [23]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(singapore_merged['Latitude'], singapore_merged['Longitude'], singapore_merged['General location'], singapore_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        #color=rainbow[cluster-1],
        fill=True,
        #fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters

In [24]:
singapore_merged.loc[singapore_merged['Cluster Labels'] == 0, singapore_merged.columns[[1] + list(range(5, singapore_merged.shape[1]))]]

Unnamed: 0,Postal sector,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"01, 02, 03, 04, 05, 06",0,Hotel,Café,Food Court,Cocktail Bar,Seafood Restaurant,Nightclub,Gym,Waterfront,French Restaurant,Coffee Shop
1,"07, 08",0,Japanese Restaurant,Coffee Shop,Café,Gym / Fitness Center,Hotel,Ramen Restaurant,Salad Place,Bakery,Korean Restaurant,Cocktail Bar
2,"09, 10",0,Chinese Restaurant,Japanese Restaurant,Toy / Game Store,Fast Food Restaurant,Coffee Shop,Clothing Store,Bakery,Multiplex,Café,Korean Restaurant
3,"11, 12, 13",0,Coffee Shop,Food Court,Dim Sum Restaurant,Chinese Restaurant,Asian Restaurant,Dessert Shop,Japanese Restaurant,Noodle House,Electronics Store,Chinese Breakfast Place
4,17,0,Japanese Restaurant,Bar,Yoga Studio,Italian Restaurant,Nightclub,Bistro,Restaurant,Gym / Fitness Center,Cocktail Bar,Coffee Shop
5,"18, 19",0,Japanese Restaurant,Bar,Yoga Studio,Italian Restaurant,Nightclub,Bistro,Restaurant,Gym / Fitness Center,Cocktail Bar,Coffee Shop
6,"22, 23",0,Boutique,Sushi Restaurant,Bakery,Hotel,Shopping Mall,Bubble Tea Shop,Japanese Restaurant,Chinese Restaurant,Cosmetics Shop,Asian Restaurant
7,"24, 25, 26, 27",0,French Restaurant,Café,Coffee Shop,Bookstore,Bus Station,Shopping Mall,Supermarket,Gym Pool,Toy / Game Store,Bar
8,"28, 29, 30",0,Café,Coffee Shop,Italian Restaurant,Hotel,Asian Restaurant,Japanese Restaurant,Ramen Restaurant,Sandwich Place,Thai Restaurant,Supermarket
9,"31, 32, 33",0,Noodle House,Chinese Restaurant,Coffee Shop,Food Court,Thai Restaurant,Asian Restaurant,Snack Place,Steakhouse,Supermarket,Grocery Store


In [25]:
singapore_merged.loc[singapore_merged['Cluster Labels'] == 1, singapore_merged.columns[[1] + list(range(5, singapore_merged.shape[1]))]]

Unnamed: 0,Postal sector,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,"60, 61, 62, 63, 64",1,Turkish Restaurant,Zoo Exhibit,Event Space,Food Truck,Food Stand,Food Court,Food & Drink Shop,Fish & Chips Shop,Filipino Restaurant,Field


In [26]:
singapore_merged.loc[singapore_merged['Cluster Labels'] == 2, singapore_merged.columns[[1] + list(range(5, singapore_merged.shape[1]))]]

Unnamed: 0,Postal sector,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
25,"79, 80",2,Food Court,Snack Place,Café,Harbor / Marina,Zoo Exhibit,Event Space,Food & Drink Shop,Fish & Chips Shop,Filipino Restaurant,Field


In [27]:
singapore_merged.loc[singapore_merged['Cluster Labels'] == 3, singapore_merged.columns[[1] + list(range(5, singapore_merged.shape[1]))]]

Unnamed: 0,Postal sector,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,"49, 50, 81",3,Airport,Airport Terminal,Spa,Rest Area,Hotel,Bookstore,Event Space,Food Stand,Food Court,Food & Drink Shop


In [28]:
singapore_merged.loc[singapore_merged['Cluster Labels'] == 4, singapore_merged.columns[[1] + list(range(5, singapore_merged.shape[1]))]]

Unnamed: 0,Postal sector,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,"69, 70, 71",4,Zoo Exhibit,Farm,Event Space,Food Truck,Food Stand,Food Court,Food & Drink Shop,Fish & Chips Shop,Filipino Restaurant,Field
