In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

from bs4 import BeautifulSoup

print('Libraries imported.')

Libraries imported.


In [2]:
# extract the website in text form
website_url1 = requests.get('https://en.wikipedia.org/wiki/Postal_codes_in_Singapore').text
soup1 = BeautifulSoup(website_url1, 'lxml')

In [3]:
divs1 = soup1.find('table',{'class':'wikitable'})

In [4]:
tag1 = divs1.findAll('td')

In [5]:
list1 = []
for j in tag1:
    l = j.get_text().replace('\n', '')
    list1.append(l)

In [6]:
data1 = np.array_split(list1,28)

In [7]:
column_names1 = ['Postal_District', 'Postal_Sector', 'General_Location']

In [8]:
df1 = pd.DataFrame(data = data1,
                  columns=column_names1)
df1.drop(columns="Postal_Sector", inplace=True)

In [9]:
def ungroup_delim(col, delim=','):
    """Split elements in a column by the given delimiter, stacking columnwise"""
    return col.str.split(delim, expand=True).stack()

# Apply the ungrouping function, and forward fill elements that aren't grouped.
ungrouped = df1.apply(ungroup_delim).ffill()

# Drop the unwieldy altered index for a new one.
ungrouped = ungrouped.reset_index(drop=True)

In [10]:
lat_lng_coords = None
geolocator = Nominatim(user_agent="sgex")
address = ungrouped['General_Location'] + ', Singapore'

In [11]:
lat = []
lgn = []
for i in address:
    location = geolocator.geocode(i)
    try:
        lat.append(location.latitude)
        lgn.append(location.longitude)
    except:
        lat.append('Error')
        lgn.append('Error')

In [12]:
ungrouped['Latitude'] = lat
ungrouped['Longtitude'] = lgn

In [13]:
ungrouped.at[12,'Latitude']=1.28883
ungrouped.at[13,'Latitude']=1.28883
ungrouped.at[12,'Longtitude']=103.776
ungrouped.at[13,'Longtitude']=103.776
ungrouped.at[15,'Latitude']=1.29018
ungrouped.at[15,'Longtitude']=103.85

In [14]:
df_Singapore = ungrouped.groupby(['Postal_District']).General_Location.apply(', '.join).reset_index()

In [15]:
SG1 = ungrouped.groupby('Postal_District')['Latitude'].max().tolist()
SG2 = ungrouped.groupby('Postal_District')['Longtitude'].max().tolist()

In [16]:
df_Singapore['Latitude'] = SG1
df_Singapore['Longtitude'] = SG2

In [27]:
df_Singapore

Unnamed: 0,Postal_District,General_Location,Latitude,Longtitude
0,1,"Raffles Place, Cecil, Marina, People's Park",1.290475,103.852036
1,2,"Anson, Tanjong Pagar",1.276571,103.846731
2,3,"Bukit Merah, Queenstown, Tiong Bahru",1.294623,103.830591
3,4,"Telok Blangah, Harbourfront",1.270586,103.822652
4,5,"Pasir Panjang, Hong Leong Garden, Clementi N...",1.288834,103.776
5,6,"High Street, Beach Road (part)",1.29018,103.85004
6,7,"Middle Road, Golden Mile",1.302747,103.865186
7,8,"Little India, Farrer Park, Jalan Besar, Lav...",1.312755,103.86303
8,9,"Orchard, Cairnhill, River Valley",1.308398,103.886149
9,10,"Ardmore, Bukit Timah, Holland Road, Tanglin",1.35469,103.83013


In [17]:
address = 'Singapore'

location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Singapore are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Singapore are 1.2904753, 103.8520359.


In [18]:
# create map of Singapore using latitude and longitude values
map_Singapore = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, Postal_District, General_Location in zip(df_Singapore['Latitude'], df_Singapore['Longtitude'], df_Singapore['Postal_District'], df_Singapore['General_Location']):
    label = '{}, {}'.format(Postal_District, General_Location)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Singapore)  
    
map_Singapore

In [19]:
CLIENT_ID = 'UEAKNJF0EQZEFBADPM42RG2YHPDDFMWQBRGX0YHSHKGWSA2L' # your Foursquare ID
CLIENT_SECRET = 'TSRNB5CZEIX1TOCA05RU4OFK4D4CMBGFTLIJHJOF3PVDCXQ0' # your Foursquare Secret
VERSION = '20180323' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: UEAKNJF0EQZEFBADPM42RG2YHPDDFMWQBRGX0YHSHKGWSA2L
CLIENT_SECRET:TSRNB5CZEIX1TOCA05RU4OFK4D4CMBGFTLIJHJOF3PVDCXQ0


In [20]:
radius=1200
LIMIT=500

In [21]:
def getNearbyVenues(names, latitudes, longitudes, radius=radius):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['id'],
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['General_Location', 
                  'GL Latitude', 
                  'GL Longitude', 
                  'Venue ID',
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [22]:
Singapore_venues = getNearbyVenues(names=df_Singapore['General_Location'],
                                   latitudes=df_Singapore['Latitude'],
                                   longitudes=df_Singapore['Longtitude']
                                  )

Raffles Place,  Cecil,  Marina,  People's Park
Anson,  Tanjong Pagar
Bukit Merah,  Queenstown,  Tiong Bahru
Telok Blangah,  Harbourfront
Pasir Panjang,  Hong Leong Garden,  Clementi New Town
High Street,  Beach Road (part)
Middle Road,  Golden Mile
Little India,  Farrer Park,  Jalan Besar,  Lavender
Orchard,  Cairnhill,  River Valley
Ardmore,  Bukit Timah,  Holland Road,  Tanglin
Watten Estate,  Novena,  Thomson
Balestier,  Toa Payoh,  Serangoon
Macpherson,  Braddell
Geylang,  Eunos
Katong,  Joo Chiat,  Amber Road
Bedok,  Upper East Coast,  Eastwood,  Kew Drive
Loyang,  Changi
Simei,  Tampines,  Pasir Ris
Serangoon Garden,  Hougang,  Punggol
Bishan,  Ang Mo Kio
Upper Bukit Timah,  Clementi Park,  Ulu Pandan
Jurong,  Tuas
Hillview,  Dairy Farm,  Bukit Panjang,  Choa Chu Kang
Lim Chu Kang,  Tengah
Kranji,  Woodgrove,  Woodlands
Upper Thomson,  Springleaf
Yishun,  Sembawang
Seletar


In [29]:
Singapore_venues.head()

Unnamed: 0,General_Location,GL Latitude,GL Longitude,Venue ID,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Raffles Place, Cecil, Marina, People's Park",1.290475,103.852036,4d438c6514aa8cfa743d5c3d,National Gal­lery Singa­pore,1.29074,103.851548,Art Gallery
1,"Raffles Place, Cecil, Marina, People's Park",1.290475,103.852036,4b058810f964a52036af22e3,Esplanade Park,1.288968,103.85358,Park
2,"Raffles Place, Cecil, Marina, People's Park",1.290475,103.852036,4f0bbc70e4b0baf830387ef5,The Oval @ Singapore Cricket Club Pavilion,1.289006,103.852438,Restaurant
3,"Raffles Place, Cecil, Marina, People's Park",1.290475,103.852036,571b758d498e166ab0717ce7,Esplanade Theatre,1.289932,103.855037,Theater
4,"Raffles Place, Cecil, Marina, People's Park",1.290475,103.852036,4b058814f964a52090b022e3,Esplanade - Theatres On The Bay,1.289688,103.85512,Performing Arts Venue


In [24]:
array = ['Ice Cream Shop', 'Dessert Shop']
ice_cream = Singapore_venues['Venue Category'].isin(array)
IC = Singapore_venues[ice_cream]
print(IC.shape)

(45, 8)


In [26]:
New_IC = IC.drop_duplicates(subset='Venue', keep='first', inplace=False)
print(New_IC.shape)
Freq = New_IC.groupby("General_Location").count().apply(lambda g: g / g.sum())
Freq.sort_values(by=['GL Latitude'], ascending=False)

(37, 8)


Unnamed: 0_level_0,GL Latitude,GL Longitude,Venue ID,Venue,Venue Latitude,Venue Longitude,Venue Category
General_Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"Orchard, Cairnhill, River Valley",0.162162,0.162162,0.162162,0.162162,0.162162,0.162162,0.162162
"Balestier, Toa Payoh, Serangoon",0.135135,0.135135,0.135135,0.135135,0.135135,0.135135,0.135135
"Ardmore, Bukit Timah, Holland Road, Tanglin",0.081081,0.081081,0.081081,0.081081,0.081081,0.081081,0.081081
"Katong, Joo Chiat, Amber Road",0.081081,0.081081,0.081081,0.081081,0.081081,0.081081,0.081081
"Watten Estate, Novena, Thomson",0.081081,0.081081,0.081081,0.081081,0.081081,0.081081,0.081081
"Anson, Tanjong Pagar",0.054054,0.054054,0.054054,0.054054,0.054054,0.054054,0.054054
"Bedok, Upper East Coast, Eastwood, Kew Drive",0.054054,0.054054,0.054054,0.054054,0.054054,0.054054,0.054054
"Little India, Farrer Park, Jalan Besar, Lavender",0.054054,0.054054,0.054054,0.054054,0.054054,0.054054,0.054054
"Middle Road, Golden Mile",0.054054,0.054054,0.054054,0.054054,0.054054,0.054054,0.054054
"Raffles Place, Cecil, Marina, People's Park",0.054054,0.054054,0.054054,0.054054,0.054054,0.054054,0.054054
