### Import necessary Libraries

In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

from bs4 import BeautifulSoup #library for beautifulSoup

import geocoder # import geocoder
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


!conda install -c conda-forge geopy --yes

### Scrapping of the wikipedia page containing all the post code of Singapore

In [302]:
url = "https://en.wikipedia.org/wiki/Postal_codes_in_Singapore"

#Get the content of the wiki page on a string
results = requests.get(url).text

#Transform the content in xml with BeautifulSoup
page = BeautifulSoup(results, "lxml")
#print(page)

In [303]:
title = page.find("title")
print(title)

<title>Postal codes in Singapore - Wikipedia</title>


In [304]:
tab = page.tbody

for a in tab.findAll('a'):
    a.replaceWithChildren()
tab

<tbody><tr>
<th>Postal district
</th>
<th>Postal sector<br/>(1st 2 digits of 6-digit postal codes)
</th>
<th>General location
</th></tr>
<tr>
<td>01
</td>
<td>01, 02, 03, 04, 05, 06
</td>
<td>Raffles Place, Cecil, Marina, People's Park
</td></tr>
<tr>
<td>02
</td>
<td>07, 08
</td>
<td>Anson, Tanjong Pagar
</td></tr>
<tr>
<td>03
</td>
<td>14, 15, 16
</td>
<td>Bukit Merah, Queenstown, Tiong Bahru
</td></tr>
<tr>
<td>04
</td>
<td>09, 10
</td>
<td>Telok Blangah, Harbourfront
</td></tr>
<tr>
<td>05
</td>
<td>11, 12, 13
</td>
<td>Pasir Panjang, Hong Leong Garden, Clementi New Town
</td></tr>
<tr>
<td>06
</td>
<td>17
</td>
<td>High Street, Beach Road (part)
</td></tr>
<tr>
<td>07
</td>
<td>18, 19
</td>
<td>Middle Road, Golden Mile
</td></tr>
<tr>
<td>08
</td>
<td>20, 21
</td>
<td>Little India, Farrer Park, Jalan Besar, Lavender
</td></tr>
<tr>
<td>09
</td>
<td>22, 23
</td>
<td>Orchard, Cairnhill, River Valley
</td></tr>
<tr>
<td>10
</td>
<td>24, 25, 26, 27
</td>
<td>Ardmore, Bukit Timah, Holl

### Create the Dataframe that will contain postcodes

In [321]:
postal_code = pd.DataFrame(columns=['Postal District', 'Neighborhood'])
postal_code

Unnamed: 0,Postal District,Neighborhood


### Fill the Dataframe with the postcodes

In [322]:
tab = page.tbody
count = 0
for row in tab.find_all('tr'):
    #print(row)
    postDistrict = row.contents[1].string
    postSector = row.contents[3].string
    neighborhood_name = row.contents[5].string
    if(neighborhood_name == None):
        neighborhood_name2 = row.contents[5].contents
        neighborhood_name2 = [x for x in neighborhood_name2 if (x != ', ' and x != '\n')]
    else:
        neighborhood_name2 = neighborhood_name.split(',')
    
    for ele in neighborhood_name2:
        postal_code = postal_code.append({'Postal District': postDistrict,
                                      #'Postal Sector': postSector,
                                      'Neighborhood': ele},
                                      ignore_index=True)
    
#Remove all space and '\n' in the dataFrame
postal_code['Neighborhood'] = postal_code['Neighborhood'].str.strip()
#remove first line of the dataframe
postal_code = postal_code[1:]
postal_code.head(10)

Unnamed: 0,Postal District,Neighborhood
1,1,Raffles Place
2,1,Cecil
3,1,Marina
4,1,People's Park
5,2,Anson
6,2,Tanjong Pagar
7,3,Bukit Merah
8,3,Queenstown
9,3,Tiong Bahru
10,4,Telok Blangah


In [323]:
postal_code.shape

(75, 2)

### Get the location of every district of Singapore using geolocator

In [310]:
geolocator = Nominatim(user_agent="singap_agent")

for index in postal_code.index:
    overtime = 0
    location = None
    address = postal_code.loc[index, 'Neighborhood'] + ', Singapore, SG'
    while(location is None and overtime < 3):
        location = geolocator.geocode(address)
        #print(location)
        #after 3 try, we give up the geolocalisation
        overtime +=1
    if(location != None):
        postal_code.loc[index, 'Latitude'] = location.latitude
        postal_code.loc[index, 'Longitude'] = location.longitude
    else:
        postal_code.loc[index, 'Latitude'] = None
        postal_code.loc[index, 'Longitude'] = None
        
#We remove the districts we couldn't locate
postal_code.dropna(axis=0, inplace = True)

In [311]:
postal_code

Unnamed: 0,Postal District,Neighborhood,Latitude,Longitude
1,1,Raffles Place,1.283595,103.851568
2,1,Cecil,1.278716,103.847738
3,1,Marina,1.290475,103.852036
4,1,People's Park,1.28581,103.84416
5,2,Anson,1.271363,103.842698
6,2,Tanjong Pagar,1.276571,103.845848
7,3,Bukit Merah,1.280628,103.830591
8,4,Telok Blangah,1.270586,103.809863
9,4,Harbourfront,1.265451,103.822652
10,5,Pasir Panjang,1.288834,103.775657


### Exploration of the neighborhoods in Singapore

In [180]:
address = 'Singapore, SG'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

1.3408528 103.878446863736


In [313]:
# create map of Toronto using latitude and longitude values
map_singapore = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, postcode, neighborhood in zip(postal_code['Latitude'], postal_code['Longitude'], postal_code['Postal District'], postal_code['Neighborhood']):
    label = '{}, {}'.format(postcode, neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_singapore)
    
map_singapore

Let's save the file so we can use again the data for our project without having to execute the search again

In [316]:
postal_code.to_csv('Singapore_neighborhood_geolocalisation.csv', index = False)