In [1]:
# Extracting Singapore land survey districts' coordinates
# Source: https://data.gov.sg/dataset/sla-land-survey-district
# Date: 04 February 2021
# Author: Akbar Azad

In [2]:
# Import packages
from lxml import etree
import xml.etree.ElementTree as ET
import pandas as pd
import datetime

In [3]:
# Import KML data
tree = ET.parse('sla-land-survey-district-kml.kml')
print(tree)

<xml.etree.ElementTree.ElementTree object at 0x000002789E010088>


In [4]:
# Get namespace
root = tree.getroot()
print(root)

<Element '{http://www.opengis.net/kml/2.2}kml' at 0x000002789DFFD638>


In [5]:
# Show namespace
namespace = '{http://www.opengis.net/kml/2.2}'
print(f"Namespace: {namespace}")

Namespace: {http://www.opengis.net/kml/2.2}


In [6]:
# Find coordinates
lineStrings = tree.findall('.//{http://www.opengis.net/kml/2.2}LinearRing')
print(lineStrings[:5])

[<Element '{http://www.opengis.net/kml/2.2}LinearRing' at 0x000002789E011908>, <Element '{http://www.opengis.net/kml/2.2}LinearRing' at 0x000002789E014228>, <Element '{http://www.opengis.net/kml/2.2}LinearRing' at 0x000002789E0149A8>, <Element '{http://www.opengis.net/kml/2.2}LinearRing' at 0x000002789E0150E8>, <Element '{http://www.opengis.net/kml/2.2}LinearRing' at 0x000002789E015818>]


In [7]:
# KML structure
[elem.tag for elem in root.iter()][:5]

['{http://www.opengis.net/kml/2.2}kml',
 '{http://www.opengis.net/kml/2.2}Document',
 '{http://www.opengis.net/kml/2.2}name',
 '{http://www.opengis.net/kml/2.2}visibility',
 '{http://www.opengis.net/kml/2.2}Schema']

In [8]:
# Get districts
num = 0
district_list = []
simpledata = [elem.tag for elem in root.iter('{http://www.opengis.net/kml/2.2}SimpleData')]
for item in root.iter('{http://www.opengis.net/kml/2.2}SimpleData'):
    if item.attrib['name'] == 'SURVEY_DISTRICT':
        #print(item.attrib['name'])
        #print(item.text)
        #print(num)
        district_dict = {'Order': num,
                        'District': item.text}
        num += 1
        district_list.append(district_dict)
    #print(item.text)
district_df = pd.DataFrame(district_list)

In [9]:
# Get coordinates
num = 0
coordinates_list = []
coordinates = [elem.tag for elem in root.iter('{http://www.opengis.net/kml/2.2}coordinates')]
for item in root.iter('{http://www.opengis.net/kml/2.2}coordinates'):
    item_split = item.text.split(sep = ',0.0 ')
    for item2 in item_split:
        coordinates_dict = {'Order': num,
                    'Latitude': item2.split(sep = ',')[1],
                     'Longitude': item2.split(sep = ',')[0]
                     }
        coordinates_list.append(coordinates_dict)
    #print(item_split)
    #coordinates_dict = {'Order': num,
    #                   'Coordinates': }
    num += 1
coordinates_df = pd.DataFrame(coordinates_list)

In [10]:
# Show coordinates dataframe
print(f"Number of rows: {coordinates_df.shape[0]}\nNumber of columns: {coordinates_df.shape[1]}")
coordinates_df.head()

Number of rows: 22489
Number of columns: 3


Unnamed: 0,Order,Latitude,Longitude
0,0,1.35821281191433,103.950157756439
1,0,1.35828650813368,103.950163501803
2,0,1.3583586845387,103.950179221158
3,0,1.3584305076863,103.950206361249
4,0,1.3584924455653,103.950245541627


In [11]:
# Show districts dataframe
print(f"Number of rows: {district_df.shape[0]}\nNumber of columns: {district_df.shape[1]}")
district_df.head()

Number of rows: 65
Number of columns: 2


Unnamed: 0,Order,District
0,0,MK31
1,1,TS11
2,2,MK04
3,3,MK11
4,4,MK29


In [12]:
# Merge coordinates dataframe with districts dataframe on Order
# Export merged dataframe as CSV
merge_df = pd.merge(coordinates_df, district_df, on = ['Order'])
merge_df = merge_df.reset_index(drop = True)
merge_df.to_csv('singapore_districts_kml_{}.csv'.format(datetime.datetime.now().strftime('%Y%m%d%H%M%S')), index = False)
print(f"Number of rows: {merge_df.shape[0]}\nNumber of columns: {merge_df.shape[1]}")
merge_df.head()

Number of rows: 22489
Number of columns: 4


Unnamed: 0,Order,Latitude,Longitude,District
0,0,1.35821281191433,103.950157756439,MK31
1,0,1.35828650813368,103.950163501803,MK31
2,0,1.3583586845387,103.950179221158,MK31
3,0,1.3584305076863,103.950206361249,MK31
4,0,1.3584924455653,103.950245541627,MK31
