## Import Libraries

In [85]:
import numpy as np 
import pandas as pd 
import json
from geopy.geocoders import Nominatim
import geocoder 
import requests
from bs4 import BeautifulSoup
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans
import folium

In [77]:
neighborhood_dict = {
    'neighborhood': ['Changi', 'Geylang', 'Woodlands', 'Tanjong Pagar', 'Holland Village', 'Chinatown', 'Kampong Glam', 'Bukit Timah',
                    'Katong', 'Tampines', 'Bedok', 'Queenstown', 'Punggol', 'Toa Payoh', 'Pasir Ris', 'Yishun', 'Sengkang', 'Sembawang',
                    'Clementi', 'Ang Mo Kio', 'Tiong Bahru', 'Serangoon', 'Choa Chu Kang', 'Harbourfront', 'Tanglin', 'Seletar', 
                    'Marine Parade', 'Jurong East', 'Rochor', 'Bukit Panjang', 'Novena', 'Siglap', 'Kallang', 'Outram', 'Bisham',
                    'Paya Lebar', 'Hougang', 'Boonlay']
}

df = pd.DataFrame(neighborhood_dict, columns=neighborhood_dict.keys())
df

Unnamed: 0,neighborhood
0,Changi
1,Geylang
2,Woodlands
3,Tanjong Pagar
4,Holland Village
5,Chinatown
6,Kampong Glam
7,Bukit Timah
8,Katong
9,Tampines


In [78]:
# get the coordinates

def get_latlng(neighborhood):

    g = geocoder.arcgis('{}, Singapore'.format(neighborhood))
    lat_lng_coords = g.latlng
    
    return lat_lng_coords

In [79]:
latitude = []
longtitude = []

neighborhood_list = ['Changi', 'Geylang', 'Woodlands', 'Tanjong Pagar', 'Holland Village', 'Chinatown', 'Kampong Glam', 'Bukit Timah',
                    'Katong', 'Tampines', 'Bedok', 'Queenstown', 'Punggol', 'Toa Payoh', 'Pasir Ris', 'Yishun', 'Sengkang', 'Sembawang',
                    'Clementi', 'Ang Mo Kio', 'Tiong Bahru', 'Serangoon', 'Choa Chu Kang', 'Harbourfront', 'Tanglin', 'Seletar', 
                    'Marine Parade', 'Jurong East', 'Rochor', 'Bukit Panjang', 'Novena', 'Siglap', 'Kallang', 'Outram', 'Bisham',
                    'Paya Lebar', 'Hougang', 'Boonlay']

for each in neighborhood_list:

    coords = get_latlng(each)
    
    latitude.append(coords[0])
    longtitude.append(coords[1])

In [80]:
df_coords = pd.concat([pd.Series(latitude), pd.Series(longtitude)], axis = 1)
df_coords.columns = ['latitude', 'longtitude']

In [81]:
df['latitude'] = df_coords['latitude']
df['longtitude'] = df_coords['longtitude']

df.head()

Unnamed: 0,neighborhood,latitude,longtitude
0,Changi,1.35514,103.99006
1,Geylang,1.31147,103.88218
2,Woodlands,1.43585,103.78698
3,Tanjong Pagar,1.27889,103.84539
4,Holland Village,1.31194,103.79333


## Create a map of Singapore with neighborhoods superimposed



In [83]:
address = 'Singapore'

geolocator = Nominatim(user_agent='super-app')
location = geolocator.geocode(address)
latitude_sg = location.latitude
longitude_sg = location.longitude

print('The geograpical coordinate of Singapore {}, {}.'.format(latitude_sg, longitude_sg))

The geograpical coordinate of Singapore 1.357107, 103.8194992.


In [87]:
# create map of singapore using latitude and longitude values
map_sg = folium.Map(location=[latitude_sg, longitude_sg], zoom_start=12)

# add markers to map
for lat, lng, neighborhood in zip(df['latitude'], df['longtitude'], df['neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_sg)  
    
map_sg

## Use the Foursquare API top explore the neighborhood

In [88]:
CLIENT_ID = 'Y2KXS2K2QTYTMSTWKRMYDN10U3D2CJD2VZ0JCFHODJ2FLPR2'
CLIENT_SECRET = 'M0RUIEFFVPBZSD3QRQX2H5SLCTI14W00FYRMYE4G2LQNL21H'
VERSION = '20180605' 

In [90]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(df['latitude'], df['longtitude'], df['neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [91]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(3545, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Changi,1.35514,103.99006,Singapore Changi Airport (SIN) (Singapore Chan...,1.353767,103.987849,Airport
1,Changi,1.35514,103.99006,Jewel Changi Airport,1.360119,103.98979,Shopping Mall
2,Changi,1.35514,103.99006,HSBC Rain Vortex,1.360151,103.98974,Waterfall
3,Changi,1.35514,103.99006,Singapore Airlines First Class Check-In Reception,1.355134,103.986732,Airport Lounge
4,Changi,1.35514,103.99006,Crowne Plaza Changi Airport,1.358561,103.987967,Hotel


In [92]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ang Mo Kio,100,100,100,100,100,100
Bedok,100,100,100,100,100,100
Bisham,100,100,100,100,100,100
Boonlay,100,100,100,100,100,100
Bukit Panjang,71,71,71,71,71,71
Bukit Timah,97,97,97,97,97,97
Changi,75,75,75,75,75,75
Chinatown,100,100,100,100,100,100
Choa Chu Kang,81,81,81,81,81,81
Clementi,100,100,100,100,100,100


In [94]:
len(venues_df['VenueCategory'].unique())

287

In [95]:
venues_df['VenueCategory'].unique()[:50]

array(['Airport', 'Shopping Mall', 'Waterfall', 'Airport Lounge', 'Hotel',
       'Ice Cream Shop', 'Public Art', 'Park', 'Snack Place', 'Garden',
       'Supermarket', 'Electronics Store', 'Border Crossing',
       'Food Court', 'Burger Joint', 'Smoke Shop', 'Sandwich Place',
       'Coffee Shop', 'Bubble Tea Shop', 'Juice Bar',
       'Dim Sum Restaurant', 'Yunnan Restaurant',
       'Latin American Restaurant', 'BBQ Joint', 'Candy Store',
       'Asian Restaurant', 'Hobby Shop', 'Cocktail Bar', 'Gift Shop',
       'Boutique', 'Bakery', 'Toy / Game Store', 'Shoe Store',
       'Chinese Restaurant', 'Fast Food Restaurant', 'Restaurant',
       'Multiplex', 'Café', 'Spa', 'History Museum', 'Clothing Store',
       'Thai Restaurant', 'Wine Bar', 'Fried Chicken Joint', 'Beer Bar',
       'Halal Restaurant', 'Bus Station', 'Tunnel', 'Accessories Store',
       'Climbing Gym'], dtype=object)

## Analyse each neighborhood

In [96]:
# one hot encoding
sg_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
sg_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [sg_onehot.columns[-1]] + list(sg_onehot.columns[:-1])
sg_onehot = sg_onehot[fixed_columns]

print(sg_onehot.shape)
sg_onehot.head()

(3545, 288)


Unnamed: 0,Neighborhoods,Accessories Store,Airport,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Aquarium,Arcade,Art Gallery,...,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Yoga Studio,Yunnan Restaurant,Zhejiang Restaurant,Zoo,Zoo Exhibit
0,Changi,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Changi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Changi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Changi,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Changi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [97]:
sg_grouped = sg_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(sg_grouped.shape)
sg_grouped

(38, 288)


Unnamed: 0,Neighborhoods,Accessories Store,Airport,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Aquarium,Arcade,Art Gallery,...,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Yoga Studio,Yunnan Restaurant,Zhejiang Restaurant,Zoo,Zoo Exhibit
0,Ang Mo Kio,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0
1,Bedok,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bisham,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Boonlay,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.02
4,Bukit Panjang,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Bukit Timah,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.010309,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Changi,0.013333,0.026667,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.013333,0.0,0.0,0.0,0.013333,0.0,0.0,0.0
7,Chinatown,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,...,0.02,0.01,0.03,0.01,0.0,0.03,0.0,0.0,0.0,0.0
8,Choa Chu Kang,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Clementi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [98]:
len(sg_grouped[sg_grouped["Shopping Mall"] > 0])

27

In [99]:
sg_mall = sg_grouped[["Neighborhoods","Shopping Mall"]]
sg_mall.head()

Unnamed: 0,Neighborhoods,Shopping Mall
0,Ang Mo Kio,0.0
1,Bedok,0.02
2,Bisham,0.01
3,Boonlay,0.02
4,Bukit Panjang,0.042254


## Cluster neighborhoods

In [100]:
kclusters = 3

sg_clustering = sg_mall.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(sg_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 2, 2, 2, 1, 2, 2, 0, 2, 2], dtype=int32)

In [101]:
sg_merged = sg_mall.copy()

# add clustering labels
sg_merged["Cluster Labels"] = kmeans.labels_

In [102]:
sg_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
sg_merged.head()

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels
0,Ang Mo Kio,0.0,0
1,Bedok,0.02,2
2,Bisham,0.01,2
3,Boonlay,0.02,2
4,Bukit Panjang,0.042254,1


In [105]:
# merge sg_grouped with sg_data to add latitude/longitude for each neighborhood
sg_merged = sg_merged.join(df.set_index("neighborhood"), on="Neighborhood")

print(sg_merged.shape)
sg_merged.head() # check the last columns!

(38, 5)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,latitude,longtitude
0,Ang Mo Kio,0.0,0,1.37161,103.84546
1,Bedok,0.02,2,1.32425,103.95297
2,Bisham,0.01,2,1.35079,103.8511
3,Boonlay,0.02,2,1.33333,103.7
4,Bukit Panjang,0.042254,1,1.37877,103.76977


In [106]:
# sort the results by Cluster Labels
print(sg_merged.shape)
sg_merged.sort_values(["Cluster Labels"], inplace=True)
sg_merged

(38, 5)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,latitude,longtitude
0,Ang Mo Kio,0.0,0,1.37161,103.84546
24,Queenstown,0.0,0,1.29966,103.80172
13,Hougang,0.0,0,1.37124,103.89162
26,Seletar,0.0,0,1.41,103.87417
22,Paya Lebar,0.0,0,1.32503,103.89049
30,Siglap,0.0,0,1.31059,103.9254
7,Chinatown,0.0,0,1.28479,103.84419
20,Outram,0.0,0,1.284268,103.835192
33,Tanjong Pagar,0.0,0,1.27889,103.84539
35,Toa Payoh,0.0,0,1.33448,103.85108


In [110]:
# create map
map_clusters = folium.Map(location=[latitude_sg, longitude_sg], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(sg_merged['latitude'], sg_merged['longtitude'], sg_merged['Neighborhood'], sg_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine the clusters

In [111]:
sg_merged.loc[sg_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,latitude,longtitude
0,Ang Mo Kio,0.0,0,1.37161,103.84546
24,Queenstown,0.0,0,1.29966,103.80172
13,Hougang,0.0,0,1.37124,103.89162
26,Seletar,0.0,0,1.41,103.87417
22,Paya Lebar,0.0,0,1.32503,103.89049
30,Siglap,0.0,0,1.31059,103.9254
7,Chinatown,0.0,0,1.28479,103.84419
20,Outram,0.0,0,1.284268,103.835192
33,Tanjong Pagar,0.0,0,1.27889,103.84539
35,Toa Payoh,0.0,0,1.33448,103.85108


In [112]:
sg_merged.loc[sg_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,latitude,longtitude
4,Bukit Panjang,0.042254,1,1.37877,103.76977
34,Tiong Bahru,0.04,1,1.28953,103.83208
14,Jurong East,0.03,1,1.33437,103.74367
25,Rochor,0.03,1,1.30413,103.85029
23,Punggol,0.03,1,1.40246,103.90686
36,Woodlands,0.034483,1,1.43585,103.78698
28,Sengkang,0.03,1,1.39244,103.8947


In [113]:
sg_merged.loc[sg_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,latitude,longtitude
32,Tanglin,0.01,2,1.31667,103.81667
29,Serangoon,0.01,2,1.35554,103.8766
27,Sembawang,0.017857,2,1.44794,103.81891
18,Marine Parade,0.01,2,1.30306,103.90778
19,Novena,0.01,2,1.3191,103.84372
17,Katong,0.01,2,1.30457,103.90288
16,Kampong Glam,0.02,2,1.30413,103.86347
15,Kallang,0.01,2,1.33333,103.86667
12,Holland Village,0.01,2,1.31194,103.79333
11,Harbourfront,0.02,2,1.2652,103.8201


*Observation*

Most shopping malls are concentrated in the central area of Singapore with the highest number in cluster 2 and lowest in cluster 1. This project recommends property developers and government to capitalize on these findings to open new shopping malls in neighborhoods in cluster 1 as there is a lack of such malls. Lastly, the government is advised to avoid neighborhoods in cluster 2 which already have high concentration of shopping malls and suffering from intense competition.