In [1]:
# Extracting Singapore land planning areas' coordinates
# Source: https://data.gov.sg/dataset/master-plan-2014-planning-area-boundary-web
# Date: 05 February 2021
# Author: Akbar Azad

In [2]:
# Import packages
from lxml import etree
import xml.etree.ElementTree as ET
import pandas as pd
import datetime

In [3]:
# Import KML data
tree = ET.parse('MP14_PLNG_AREA_WEB_PL.kml')
print(tree)

<xml.etree.ElementTree.ElementTree object at 0x000002B993230088>


In [4]:
# Get namespace
root = tree.getroot()
print(root)

<Element '{http://www.opengis.net/kml/2.2}kml' at 0x000002B99321F728>


In [5]:
# Show namespace
namespace = '{http://www.opengis.net/kml/2.2}'
print(f"Namespace: {namespace}")

Namespace: {http://www.opengis.net/kml/2.2}


In [6]:
# KML structure
[elem.tag for elem in root.iter()][:20]

['{http://www.opengis.net/kml/2.2}kml',
 '{http://www.opengis.net/kml/2.2}Document',
 '{http://www.opengis.net/kml/2.2}name',
 '{http://www.opengis.net/kml/2.2}Snippet',
 '{http://www.opengis.net/kml/2.2}Folder',
 '{http://www.opengis.net/kml/2.2}name',
 '{http://www.opengis.net/kml/2.2}Snippet',
 '{http://www.opengis.net/kml/2.2}Placemark',
 '{http://www.opengis.net/kml/2.2}name',
 '{http://www.opengis.net/kml/2.2}Snippet',
 '{http://www.opengis.net/kml/2.2}description',
 '{http://www.opengis.net/kml/2.2}styleUrl',
 '{http://www.opengis.net/kml/2.2}MultiGeometry',
 '{http://www.opengis.net/kml/2.2}Polygon',
 '{http://www.opengis.net/kml/2.2}extrude',
 '{http://www.opengis.net/kml/2.2}altitudeMode',
 '{http://www.opengis.net/kml/2.2}outerBoundaryIs',
 '{http://www.opengis.net/kml/2.2}LinearRing',
 '{http://www.opengis.net/kml/2.2}coordinates',
 '{http://www.opengis.net/kml/2.2}Placemark']

In [7]:
# Test code
test = root.findall("./{http://www.opengis.net/kml/2.2}Document/{http://www.opengis.net/kml/2.2}Folder/{http://www.opengis.net/kml/2.2}Placemark/[@id='ID_00054']")
test_children = test[0].getchildren()
test_polygon = test_children[4].getchildren()
test_polygon2 = [elem for elem in test_polygon[0]]
test_polygon3 = [elem for elem in test_polygon2[2]]
test_polygon4 = [elem for elem in test_polygon3[0]]
test_coordinates = test_polygon4[0].text.strip()

  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.


In [8]:
# Display test result
print(test_polygon2)

[<Element '{http://www.opengis.net/kml/2.2}extrude' at 0x000002B9932BFB88>, <Element '{http://www.opengis.net/kml/2.2}altitudeMode' at 0x000002B9932BFBD8>, <Element '{http://www.opengis.net/kml/2.2}outerBoundaryIs' at 0x000002B9932BFC28>]


In [9]:
# Get planning area
num = 0
planning_area_list = []
for item in root.findall('./{http://www.opengis.net/kml/2.2}Document/{http://www.opengis.net/kml/2.2}Folder/')[2:]:
    item_dict = {'Order': num,
                 'Planning_Area': item.getchildren()[0].text.strip()}
    planning_area_list.append(item_dict)
    num += 1
    
planning_area_df = pd.DataFrame(planning_area_list)
print(f"Number of rows: {planning_area_df.shape[0]}\nNumber of columns: {planning_area_df.shape[1]}")
print(planning_area_df.head())

Number of rows: 55
Number of columns: 2
   Order  Planning_Area
0      0         BISHAN
1      1    BUKIT BATOK
2      2    BUKIT MERAH
3      3  BUKIT PANJANG
4      4    BUKIT TIMAH


  


In [10]:
# Get IDs
num = 0
id_list = []
for item in root.findall('./{http://www.opengis.net/kml/2.2}Document/{http://www.opengis.net/kml/2.2}Folder/')[2:]:
    #print(f"{item.tag} with attributes {item.attrib}")
    try:
        id_ = item.attrib['id']
    except:
        pass
    id_list.append(id_)

In [11]:
# Get Placemark
num = 0
placemark_list = []
for item in id_list:
    placemark_find = root.findall("./{http://www.opengis.net/kml/2.2}Document/{http://www.opengis.net/kml/2.2}Folder/{http://www.opengis.net/kml/2.2}Placemark/[@id=" + "'{}']".format(item))
    placemark_list.append(placemark_find[0])

In [12]:
# Get Placemark children
num = 0
placemark_children_list = []
for item in placemark_list:
    children = item.getchildren()
    placemark_children_list.append(children)

  """


In [13]:
# Get MultiGeometry
multigeometry_list = []
for item in placemark_children_list:
    num = 0
    for item2 in item:
        if num == 4:
            multigeometry_list.append(item2)
        num += 1

In [14]:
# Get Polygon (Multigeometry children)
num = 0
polygon_list = []
for item in multigeometry_list:
    polygon_list.append(item.getchildren())

  """


In [15]:
# Get Polygon children
coordinates_list = []
num = 0
for item in polygon_list:
    for item2 in item:
        polygon_children = item2.getchildren()
        outer_boundary = polygon_children[2]
        outer_boundary_children = outer_boundary.getchildren()
        linear_ring = outer_boundary_children[0].getchildren()
        coordinates = linear_ring[0].text.strip()
        coordinates_split = coordinates.split(sep = ",0 ")
        for item3 in coordinates_split:
            item3_dict = {'Order': num,
                         'Latitude': item3.split(sep = ",")[1],
                         'Longitude': item3.split(sep = ",")[0]}
            coordinates_list.append(item3_dict)
    num += 1
            
coordinates_df = pd.DataFrame(coordinates_list)
print(f"Number of rows: {coordinates_df.shape[0]}\nNumber of columns: {coordinates_df.shape[1]}")
print(coordinates_df.head())
#coordinates_df.to_csv('singapore_planning_area_kml_{}.csv'.format(datetime.datetime.now().strftime('%Y%m%d%H%M%S')), index = False)

Number of rows: 38325
Number of columns: 3
   Order           Latitude          Longitude
0      0  1.362752820720889  103.8492429187323
1      0  1.363027350968694  103.8487496535366
2      0  1.363286277265267  103.8482481275976
3      0  1.363529348189151  103.8477388288432
4      0    1.3635855162765  103.8476165152005


  
  
  if __name__ == '__main__':


In [16]:
# Merge coordinates dataframe with planning area dataframe
merge_df = pd.merge(coordinates_df, planning_area_df, on = ['Order'])
merge_df = merge_df.drop_duplicates()
merge_df = merge_df.reset_index(drop = True)
print(f"Number of rows: {merge_df.shape[0]}\nNumber of columns: {merge_df.shape[1]}")
print(merge_df.head())
merge_df.to_csv('singapore_planning_area_kml_{}.csv'.format(datetime.datetime.now().strftime('%Y%m%d%H%M%S')), index = False)

Number of rows: 38214
Number of columns: 4
   Order           Latitude          Longitude Planning_Area
0      0  1.362752820720889  103.8492429187323        BISHAN
1      0  1.363027350968694  103.8487496535366        BISHAN
2      0  1.363286277265267  103.8482481275976        BISHAN
3      0  1.363529348189151  103.8477388288432        BISHAN
4      0    1.3635855162765  103.8476165152005        BISHAN
