# Building geospatial dataset processing

### Import dependencies

In [2]:
# Data processing
import numpy as np
import pandas as pd
import geopandas as gpd

# Visualisation
import matplotlib
from ipyleaflet import Map, GeoData, basemaps, LayersControl
import seaborn
import folium
import mapclassify

### Importing shapefiles

In [10]:
# Building polygons
buildings_osm = gpd.read_file('Generated Files/buildings_polygons.shp')

# Postal codes database
postal_codes = gpd.read_file('Generated Files/buildings_EPSG3414_no-dups.shp')

# Housing prices
all_resale = pd.read_csv('Data Sources/TL_whole_data/all_resale_prices_by_year.csv')
resale_2020 = all_resale.loc[all_resale['year'] == 2020]

In [9]:
buildings_osm

Unnamed: 0,osm_id,code,fclass,name,type,geometry
0,162288645,1500,building,Raffles Lighthouse,,"POLYGON ((17695.447 15897.831, 17696.115 15901..."
1,162288649,1500,building,,,"POLYGON ((17691.664 15911.852, 17694.224 15911..."
2,162288650,1500,building,,,"POLYGON ((17706.467 15924.158, 17714.258 15929..."
3,162380270,1500,building,,,"POLYGON ((17733.636 15975.077, 17746.468 15975..."
4,162288647,1500,building,,,"POLYGON ((15529.250 18357.218, 15530.341 18370..."
...,...,...,...,...,...,...
111421,97938127,1500,building,,,"POLYGON ((54181.506 44262.067, 54181.505 44271..."
111422,97942438,1500,building,,,"POLYGON ((52671.986 43514.437, 52686.308 43519..."
111423,97947951,1500,building,,,"POLYGON ((51771.064 43018.565, 51778.231 43019..."
111424,97947953,1500,building,,,"POLYGON ((51773.446 43011.455, 51779.422 43012..."


In [10]:
postal_codes

Unnamed: 0,ADDRESS,BLK_NO,BUILDING,LATITUDE,LONGITUDE,LONGTITUDE,POSTAL,ROAD_NAME,SEARCHVAL,X,Y,geometry
0,1 STRAITS BOULEVARD SINGAPORE CHINESE CULTURAL...,1,SINGAPORE CHINESE CULTURAL CENTRE,1.275805,103.849615,103.849615,018906,STRAITS BOULEVARD,SINGAPORE CHINESE CULTURAL CENTRE,29813.663491,28697.520756,POINT (29813.663 28697.521)
1,11A STRAITS BOULEVARD TEMPORARY SITE OFFICE SI...,11A,TEMPORARY SITE OFFICE,1.274950,103.851665,103.851665,018907,STRAITS BOULEVARD,TEMPORARY SITE OFFICE,30041.838898,28602.987244,POINT (30041.839 28602.987)
2,5A MARINA GARDENS DRIVE SINGAPORE 018910,5A,NIL,1.279587,103.868956,103.868956,018910,MARINA GARDENS DRIVE,5A MARINA GARDENS DRIVE SINGAPORE 018910,31966.120787,29115.753373,POINT (31966.121 29115.753)
3,2 CENTRAL BOULEVARD CENTRAL BOULEVARD TOWERS S...,2,CENTRAL BOULEVARD TOWERS,1.279744,103.851591,103.851591,018916,CENTRAL BOULEVARD,CENTRAL BOULEVARD TOWERS,30033.604463,29133.104676,POINT (30033.604 29133.105)
4,21 PARK STREET DBS MARINA BAY MRT STATION SING...,21,DBS MARINA BAY MRT STATION,1.276427,103.854598,103.854598,018925,PARK STREET,DBS MARINA BAY MRT STATION,30368.205612,28766.381902,POINT (30368.206 28766.382)
...,...,...,...,...,...,...,...,...,...,...,...,...
121356,1A UPPER CHANGI ROAD NORTH SINGAPORE 886129,1A,NIL,1.352316,103.966549,103.966549,886129,UPPER CHANGI ROAD NORTH,1A UPPER CHANGI ROAD NORTH SINGAPORE 886129,42827.069212,37158.184353,POINT (42827.069 37158.184)
121357,100A KRANJI LOOP SINGAPORE 887327,100A,NIL,1.433629,103.758648,103.758648,887327,KRANJI LOOP,100A KRANJI LOOP SINGAPORE 887327,19690.280996,46149.118386,POINT (19690.281 46149.118)
121358,A PASIR PANJANG ROAD SINGAPORE 887328,A,NIL,1.277170,103.795840,103.795840,887328,PASIR PANJANG ROAD,A PASIR PANJANG ROAD SINGAPORE 887328,23828.902741,28848.553424,POINT (23828.903 28848.553)
121359,GATE C7 AIRPORT CARGO ROAD CHANGI ANIMAL AND P...,GATE C7,CHANGI ANIMAL AND PLANT QUARANTINE CENTRE,1.375315,103.996683,103.996683,918104,AIRPORT CARGO ROAD,CHANGI ANIMAL AND PLANT QUARANTINE CENTRE,46180.468208,39701.534286,POINT (46180.468 39701.534)


In [11]:
all_resale

Unnamed: 0,year,flat,real_price,LATITUDE,LONGITUDE,norm_price
0,1990,1 BEACH RD,81364,1.303671,103.864479,0.175955
1,1990,1 BEDOK STH AVE 1,57285,1.320852,103.933721,0.113400
2,1990,1 CHAI CHEE RD,233192,1.327969,103.922716,0.570399
3,1990,1 DELTA AVE,286862,1.292075,103.828584,0.709831
4,1990,1 DOVER RD,63279,1.302526,103.783273,0.128971
...,...,...,...,...,...,...
184537,2020,990B JURONG WEST ST 93,405063,1.335278,103.694776,0.235300
184538,2020,990C JURONG WEST ST 93,217920,1.335597,103.694486,0.050899
184539,2020,99B LOR 2 TOA PAYOH,1032850,1.338745,103.847253,0.853887
184540,2020,9A BOON TIONG RD,1063499,1.286827,103.828660,0.884087


### Building matching

Resources:

GeoPandas within https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoSeries.within.html

GeoPandas contains https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoSeries.contains.html#geopandas.GeoSeries.contains

### Test

In [6]:
# Import test datasets
test_poly = gpd.read_file('Generated Files/test_poly.shp')
test_point = gpd.read_file('Generated Files/test_point.shp')

In [18]:
# Verify polygon contains point test
test_poly['geometry'].contains(test_point['geometry'])

0     True
1    False
dtype: bool

In [24]:
# Verify point is within polygon test
test_point['geometry'].within(test_poly['geometry'], align=True)

0     True
1    False
dtype: bool

In [49]:
# Test nested for loops
for i in range(len(test_point)):
    for x in range(len(test_poly)):
        print(test_point.iloc[i]['geometry'].within(test_poly.iloc[x]['geometry']))

True
False
False
False


In [59]:
# Reset geometry column in test point database
test_point_nogeom = pd.DataFrame(test_point.drop(columns='geometry'))
test_point_nogeom['Names'] = ['Neom', 'Moop']
test_point_newgeom = test_point_nogeom
test_point_newgeom['geometry'] = gpd.GeoSeries()
test_point_newgeom

Unnamed: 0,id,Names,geometry
0,16,Neom,
1,15,Moop,


In [61]:
# Adding polygons for buildings
for i in range(len(test_point)):
    for x in range(len(test_poly)):
        if (test_point.iloc[i]['geometry'].within(test_poly.iloc[x]['geometry']) == True):
            test_point_newgeom['geometry'].iloc[i] = test_poly.iloc[x]['geometry']

In [62]:
test_point_newgeom

Unnamed: 0,id,Names,geometry
0,16,Neom,"POLYGON ((103.77190 1.30804, 103.77275 1.30788..."
1,15,Moop,


### Part 1: Matching Postal Codes to OSM Buildings

Function goals:  
<ul>
<li>Match points in `postal_codes` to polygons in `buildings_osm`</li>
<li>Add postcode column to `buildings_osm` with the corresponding postal code in `postal_codes` AND/OR</li>
<li>Make duplicate of `postal_codes` with polygon geometry instead of point geometry.</li>
</ul>

In [3]:
postal_codes['geometry']

0         POINT (29813.663 28697.521)
1         POINT (30041.839 28602.987)
2         POINT (31966.121 29115.753)
3         POINT (30033.604 29133.105)
4         POINT (30368.206 28766.382)
                     ...             
121356    POINT (42827.069 37158.184)
121357    POINT (19690.281 46149.118)
121358    POINT (23828.903 28848.553)
121359    POINT (46180.468 39701.534)
121360    POINT (45333.002 37451.715)
Name: geometry, Length: 121361, dtype: geometry

In [4]:
buildings_osm['geometry'].contains(postal_codes['geometry'], align=True)

  warn("The indices of the two GeoSeries are different.")


0         False
1         False
2         False
3         False
4         False
          ...  
121356    False
121357    False
121358    False
121359    False
121360    False
Length: 121361, dtype: bool

In [12]:
# Create new postal code database with reset geometry
postal_codes_polygons = pd.DataFrame(postal_codes.drop(columns='geometry'))
postal_codes_polygons['geometry'] = gpd.GeoSeries()
postal_codes_polygons

Unnamed: 0,ADDRESS,BLK_NO,BUILDING,LATITUDE,LONGITUDE,LONGTITUDE,POSTAL,ROAD_NAME,SEARCHVAL,X,Y,geometry
0,1 STRAITS BOULEVARD SINGAPORE CHINESE CULTURAL...,1,SINGAPORE CHINESE CULTURAL CENTRE,1.275805,103.849615,103.849615,018906,STRAITS BOULEVARD,SINGAPORE CHINESE CULTURAL CENTRE,29813.663491,28697.520756,
1,11A STRAITS BOULEVARD TEMPORARY SITE OFFICE SI...,11A,TEMPORARY SITE OFFICE,1.274950,103.851665,103.851665,018907,STRAITS BOULEVARD,TEMPORARY SITE OFFICE,30041.838898,28602.987244,
2,5A MARINA GARDENS DRIVE SINGAPORE 018910,5A,NIL,1.279587,103.868956,103.868956,018910,MARINA GARDENS DRIVE,5A MARINA GARDENS DRIVE SINGAPORE 018910,31966.120787,29115.753373,
3,2 CENTRAL BOULEVARD CENTRAL BOULEVARD TOWERS S...,2,CENTRAL BOULEVARD TOWERS,1.279744,103.851591,103.851591,018916,CENTRAL BOULEVARD,CENTRAL BOULEVARD TOWERS,30033.604463,29133.104676,
4,21 PARK STREET DBS MARINA BAY MRT STATION SING...,21,DBS MARINA BAY MRT STATION,1.276427,103.854598,103.854598,018925,PARK STREET,DBS MARINA BAY MRT STATION,30368.205612,28766.381902,
...,...,...,...,...,...,...,...,...,...,...,...,...
121356,1A UPPER CHANGI ROAD NORTH SINGAPORE 886129,1A,NIL,1.352316,103.966549,103.966549,886129,UPPER CHANGI ROAD NORTH,1A UPPER CHANGI ROAD NORTH SINGAPORE 886129,42827.069212,37158.184353,
121357,100A KRANJI LOOP SINGAPORE 887327,100A,NIL,1.433629,103.758648,103.758648,887327,KRANJI LOOP,100A KRANJI LOOP SINGAPORE 887327,19690.280996,46149.118386,
121358,A PASIR PANJANG ROAD SINGAPORE 887328,A,NIL,1.277170,103.795840,103.795840,887328,PASIR PANJANG ROAD,A PASIR PANJANG ROAD SINGAPORE 887328,23828.902741,28848.553424,
121359,GATE C7 AIRPORT CARGO ROAD CHANGI ANIMAL AND P...,GATE C7,CHANGI ANIMAL AND PLANT QUARANTINE CENTRE,1.375315,103.996683,103.996683,918104,AIRPORT CARGO ROAD,CHANGI ANIMAL AND PLANT QUARANTINE CENTRE,46180.468208,39701.534286,


In [6]:
# Load current version
postal_codes_polygons = gpd.read_file('Generated Files/postal_codes_polygons_first500.shp')

In [7]:
postal_codes_polygons

Unnamed: 0,ADDRESS,BLK_NO,BUILDING,LATITUDE,LONGITUDE,LONGTITUDE,POSTAL,ROAD_NAME,SEARCHVAL,X,Y,geometry
0,1 STRAITS BOULEVARD SINGAPORE CHINESE CULTURAL...,1,SINGAPORE CHINESE CULTURAL CENTRE,1.275805,103.849615,103.849615,018906,STRAITS BOULEVARD,SINGAPORE CHINESE CULTURAL CENTRE,29813.663491,28697.520756,"POLYGON ((29764.149 28685.851, 29799.595 28742..."
1,11A STRAITS BOULEVARD TEMPORARY SITE OFFICE SI...,11A,TEMPORARY SITE OFFICE,1.274950,103.851665,103.851665,018907,STRAITS BOULEVARD,TEMPORARY SITE OFFICE,30041.838898,28602.987244,
2,5A MARINA GARDENS DRIVE SINGAPORE 018910,5A,NIL,1.279587,103.868956,103.868956,018910,MARINA GARDENS DRIVE,5A MARINA GARDENS DRIVE SINGAPORE 018910,31966.120787,29115.753373,
3,2 CENTRAL BOULEVARD CENTRAL BOULEVARD TOWERS S...,2,CENTRAL BOULEVARD TOWERS,1.279744,103.851591,103.851591,018916,CENTRAL BOULEVARD,CENTRAL BOULEVARD TOWERS,30033.604463,29133.104676,"POLYGON ((30002.734 29126.659, 30027.819 29166..."
4,21 PARK STREET DBS MARINA BAY MRT STATION SING...,21,DBS MARINA BAY MRT STATION,1.276427,103.854598,103.854598,018925,PARK STREET,DBS MARINA BAY MRT STATION,30368.205612,28766.381902,
...,...,...,...,...,...,...,...,...,...,...,...,...
121356,1A UPPER CHANGI ROAD NORTH SINGAPORE 886129,1A,NIL,1.352316,103.966549,103.966549,886129,UPPER CHANGI ROAD NORTH,1A UPPER CHANGI ROAD NORTH SINGAPORE 886129,42827.069212,37158.184353,
121357,100A KRANJI LOOP SINGAPORE 887327,100A,NIL,1.433629,103.758648,103.758648,887327,KRANJI LOOP,100A KRANJI LOOP SINGAPORE 887327,19690.280996,46149.118386,
121358,A PASIR PANJANG ROAD SINGAPORE 887328,A,NIL,1.277170,103.795840,103.795840,887328,PASIR PANJANG ROAD,A PASIR PANJANG ROAD SINGAPORE 887328,23828.902741,28848.553424,
121359,GATE C7 AIRPORT CARGO ROAD CHANGI ANIMAL AND P...,GATE C7,CHANGI ANIMAL AND PLANT QUARANTINE CENTRE,1.375315,103.996683,103.996683,918104,AIRPORT CARGO ROAD,CHANGI ANIMAL AND PLANT QUARANTINE CENTRE,46180.468208,39701.534286,


In [10]:
postal_codes_polygons['geometry'].dropna()

0      POLYGON ((29764.149 28685.851, 29799.595 28742...
3      POLYGON ((30002.734 29126.659, 30027.819 29166...
7      POLYGON ((29848.451 28911.446, 29856.653 28924...
8      POLYGON ((30423.444 28628.611, 30435.797 28647...
9      POLYGON ((31696.258 28632.119, 31790.555 28697...
                             ...                        
495    POLYGON ((29148.979 29482.175, 29159.306 29499...
496    POLYGON ((29148.979 29482.175, 29159.306 29499...
497    POLYGON ((29144.226 29484.972, 29154.554 29502...
498    POLYGON ((29152.173 29516.066, 29163.903 29535...
499    POLYGON ((29139.474 29487.770, 29149.791 29505...
Name: geometry, Length: 472, dtype: geometry

In [17]:
# Verification
for i in range(10):
    for x in range(len(buildings_osm)):
        if (postal_codes.iloc[i]['geometry'].within(buildings_osm.iloc[x]['geometry']) == True):
            postal_codes_polygons['geometry'].iloc[i] = buildings_osm.iloc[x]['geometry']
            continue

In [18]:
postal_codes_polygons.iloc[1:10]

Unnamed: 0,ADDRESS,BLK_NO,BUILDING,LATITUDE,LONGITUDE,LONGTITUDE,POSTAL,ROAD_NAME,SEARCHVAL,X,Y,geometry
1,11A STRAITS BOULEVARD TEMPORARY SITE OFFICE SI...,11A,TEMPORARY SITE OFFICE,1.27495,103.851665,103.851665,18907,STRAITS BOULEVARD,TEMPORARY SITE OFFICE,30041.838898,28602.987244,
2,5A MARINA GARDENS DRIVE SINGAPORE 018910,5A,NIL,1.279587,103.868956,103.868956,18910,MARINA GARDENS DRIVE,5A MARINA GARDENS DRIVE SINGAPORE 018910,31966.120787,29115.753373,
3,2 CENTRAL BOULEVARD CENTRAL BOULEVARD TOWERS S...,2,CENTRAL BOULEVARD TOWERS,1.279744,103.851591,103.851591,18916,CENTRAL BOULEVARD,CENTRAL BOULEVARD TOWERS,30033.604463,29133.104676,"POLYGON ((30002.734 29126.659, 30027.819 29166..."
4,21 PARK STREET DBS MARINA BAY MRT STATION SING...,21,DBS MARINA BAY MRT STATION,1.276427,103.854598,103.854598,18925,PARK STREET,DBS MARINA BAY MRT STATION,30368.205612,28766.381902,
5,23 PARK STREET MARINA BAY MRT STATION SINGAPOR...,23,MARINA BAY MRT STATION (CE2),1.276251,103.855447,103.855447,18926,PARK STREET,MARINA BAY MRT STATION (CE2),30462.734977,28746.933633,
6,20A PARK STREET TEMPORARY SITE OFFICE SINGAPOR...,20A,TEMPORARY SITE OFFICE,1.275241,103.853659,103.853659,18927,PARK STREET,TEMPORARY SITE OFFICE,30263.723862,28635.230971,
7,2 PARK STREET SINGAPORE 018928,2,NIL,1.277681,103.850157,103.850157,18928,PARK STREET,2 PARK STREET SINGAPORE 018928,29874.012009,28905.043652,"POLYGON ((29848.451 28911.446, 29856.653 28924..."
8,25 PARK STREET SINGAPORE 018929,25,NIL,1.275195,103.855174,103.855174,18929,PARK STREET,25 PARK STREET SINGAPORE 018929,30432.310175,28630.067888,"POLYGON ((30423.444 28628.611, 30435.797 28647..."
9,51 MARINA SOUTH DRIVE SINGAPORE 018930,51,NIL,1.275358,103.867028,103.867028,18930,MARINA SOUTH DRIVE,51 MARINA SOUTH DRIVE SINGAPORE 018930,31751.571058,28648.189602,"POLYGON ((31696.258 28632.119, 31790.555 28697..."


In [5]:
for i in range(len(postal_codes)):
    for x in range(len(buildings_osm)):
        if (postal_codes.iloc[i]['geometry'].within(buildings_osm.iloc[x]['geometry']) == True):
            postal_codes_polygons['geometry'].iloc[i] = buildings_osm.iloc[x]['geometry']
            continue

Exception ignored in: <function BaseGeometry.__del__ at 0x7f90992401f0>
Traceback (most recent call last):
  File "/Users/joshuavargas/opt/anaconda3/lib/python3.9/site-packages/shapely/geometry/base.py", line 209, in __del__
    self._empty(val=None)
  File "/Users/joshuavargas/opt/anaconda3/lib/python3.9/site-packages/shapely/geometry/base.py", line 193, in _empty
    try:
KeyboardInterrupt: 


In [30]:
# Attempt to do this in chunks
for i in range(200, 500):
    for x in range(len(buildings_osm)):
        if (postal_codes.iloc[i]['geometry'].within(buildings_osm.iloc[x]['geometry']) == True):
            postal_codes_polygons['geometry'].iloc[i] = buildings_osm.iloc[x]['geometry']
            continue

In [31]:
postal_codes_polygons

Unnamed: 0,ADDRESS,BLK_NO,BUILDING,LATITUDE,LONGITUDE,LONGTITUDE,POSTAL,ROAD_NAME,SEARCHVAL,X,Y,geometry
0,1 STRAITS BOULEVARD SINGAPORE CHINESE CULTURAL...,1,SINGAPORE CHINESE CULTURAL CENTRE,1.275805,103.849615,103.849615,018906,STRAITS BOULEVARD,SINGAPORE CHINESE CULTURAL CENTRE,29813.663491,28697.520756,"POLYGON ((29764.149 28685.851, 29799.595 28742..."
1,11A STRAITS BOULEVARD TEMPORARY SITE OFFICE SI...,11A,TEMPORARY SITE OFFICE,1.274950,103.851665,103.851665,018907,STRAITS BOULEVARD,TEMPORARY SITE OFFICE,30041.838898,28602.987244,
2,5A MARINA GARDENS DRIVE SINGAPORE 018910,5A,NIL,1.279587,103.868956,103.868956,018910,MARINA GARDENS DRIVE,5A MARINA GARDENS DRIVE SINGAPORE 018910,31966.120787,29115.753373,
3,2 CENTRAL BOULEVARD CENTRAL BOULEVARD TOWERS S...,2,CENTRAL BOULEVARD TOWERS,1.279744,103.851591,103.851591,018916,CENTRAL BOULEVARD,CENTRAL BOULEVARD TOWERS,30033.604463,29133.104676,"POLYGON ((30002.734 29126.659, 30027.819 29166..."
4,21 PARK STREET DBS MARINA BAY MRT STATION SING...,21,DBS MARINA BAY MRT STATION,1.276427,103.854598,103.854598,018925,PARK STREET,DBS MARINA BAY MRT STATION,30368.205612,28766.381902,
...,...,...,...,...,...,...,...,...,...,...,...,...
121356,1A UPPER CHANGI ROAD NORTH SINGAPORE 886129,1A,NIL,1.352316,103.966549,103.966549,886129,UPPER CHANGI ROAD NORTH,1A UPPER CHANGI ROAD NORTH SINGAPORE 886129,42827.069212,37158.184353,
121357,100A KRANJI LOOP SINGAPORE 887327,100A,NIL,1.433629,103.758648,103.758648,887327,KRANJI LOOP,100A KRANJI LOOP SINGAPORE 887327,19690.280996,46149.118386,
121358,A PASIR PANJANG ROAD SINGAPORE 887328,A,NIL,1.277170,103.795840,103.795840,887328,PASIR PANJANG ROAD,A PASIR PANJANG ROAD SINGAPORE 887328,23828.902741,28848.553424,
121359,GATE C7 AIRPORT CARGO ROAD CHANGI ANIMAL AND P...,GATE C7,CHANGI ANIMAL AND PLANT QUARANTINE CENTRE,1.375315,103.996683,103.996683,918104,AIRPORT CARGO ROAD,CHANGI ANIMAL AND PLANT QUARANTINE CENTRE,46180.468208,39701.534286,


In [24]:
range(200, 400)

range(200, 400)

In [32]:
postal_codes_polygons = gpd.GeoDataFrame(postal_codes_polygons, geometry=postal_codes_polygons['geometry'])
postal_codes_polygons.to_file('Generated Files/postal_codes_polygons_first500.shp')

In [None]:
postal_codes_polygons.to_file('Generated Files/buildings_EPSG3414_no-dups.shp')

In [None]:
# Joy suggestion
for code in postal_codes:
        code['geometry'] = buildings_osm[buildings_osm['geometry']==code['geometry']]['geometry']

# Post-QGIS

We used QGIS to do the final processing because Python was taking too long. Below, I import and clean the resulting shapefiles.

### Cleaning buildings_polygons_postcodes

In [58]:
# Importing postal codes and polygons
buildings_polygons_postcodes = gpd.read_file('Generated Files/buildings_polygons_postcodes.shp')
buildings_polygons_postcodes

Unnamed: 0,osm_id,code,fclass,name,type,ADDRESS,BLK_NO,BUILDING,LATITUDE,LONGITUDE,...,ROAD_NAME,SEARCHVAL,X,Y,osm_id_2,code_2,fclass_2,name_2,type_2,geometry
0,162288645,1500,building,Raffles Lighthouse,,,,,,,...,,,,,,,,,,"POLYGON ((17695.447 15897.831, 17696.115 15901..."
1,162288649,1500,building,,,,,,,,...,,,,,,,,,,"POLYGON ((17691.664 15911.852, 17694.224 15911..."
2,162288650,1500,building,,,,,,,,...,,,,,,,,,,"POLYGON ((17706.467 15924.158, 17714.258 15929..."
3,162380270,1500,building,,,,,,,,...,,,,,,,,,,"POLYGON ((17733.636 15975.077, 17746.468 15975..."
4,162288647,1500,building,,,,,,,,...,,,,,,,,,,"POLYGON ((15529.250 18357.218, 15530.341 18370..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111421,97938127,1500,building,,,,,,,,...,,,,,,,,,,"POLYGON ((54181.506 44262.067, 54181.505 44271..."
111422,97942438,1500,building,,,,,,,,...,,,,,,,,,,"POLYGON ((52671.986 43514.437, 52686.308 43519..."
111423,97947951,1500,building,,,,,,,,...,,,,,,,,,,"POLYGON ((51771.064 43018.565, 51778.231 43019..."
111424,97947953,1500,building,,,,,,,,...,,,,,,,,,,"POLYGON ((51773.446 43011.455, 51779.422 43012..."


In [59]:
# Drop rows with blank fields - indicating nonmatches, or buildings that have either postcode or polygon but not both
buildings_polygons_postcodes = buildings_polygons_postcodes.dropna()
buildings_polygons_postcodes

Unnamed: 0,osm_id,code,fclass,name,type,ADDRESS,BLK_NO,BUILDING,LATITUDE,LONGITUDE,...,ROAD_NAME,SEARCHVAL,X,Y,osm_id_2,code_2,fclass_2,name_2,type_2,geometry
21,30527021,1500,building,Sim Lim Square,retail,1 ROCHOR CANAL ROAD DBS SIM LIM SQUARE SINGAPO...,1,DBS SIM LIM SQUARE,1.302907,103.853032,...,ROCHOR CANAL ROAD,DBS SIM LIM SQUARE,30193.909910,31694.317593,30527021,1500.0,building,Sim Lim Square,retail,"POLYGON ((30138.613 31703.762, 30153.681 31721..."
22,31307895,1500,building,Ang Mo Kio Public Library,public,4300 ANG MO KIO AVENUE 6 ANG MO KIO PUBLIC LIB...,4300,ANG MO KIO PUBLIC LIBRARY,1.374790,103.845581,...,ANG MO KIO AVENUE 6,ANG MO KIO PUBLIC LIBRARY,29364.621911,39642.823145,31307895,1500.0,building,Ang Mo Kio Public Library,public,"POLYGON ((29335.688 39665.557, 29384.298 39672..."
27,32416309,1500,building,Broadway Plaza,retail,4190 ANG MO KIO AVENUE 6 BROADWAY PLAZA SINGAP...,4190,BROADWAY PLAZA,1.371936,103.845974,...,ANG MO KIO AVENUE 6,BROADWAY PLAZA,29408.369075,39327.283047,32416309,1500.0,building,Broadway Plaza,retail,"POLYGON ((29377.589 39339.351, 29432.833 39345..."
28,32482974,1500,building,709,commercial,709 ANG MO KIO AVENUE 8 HDB-ANG MO KIO SINGAPO...,709,HDB-ANG MO KIO,1.371137,103.847662,...,ANG MO KIO AVENUE 8,HDB-ANG MO KIO,29596.283857,39238.906436,32482974,1500.0,building,709,commercial,"POLYGON ((29561.894 39239.082, 29600.822 39243..."
63,34575159,1500,building,White Sands,retail,1 PASIR RIS CENTRAL STREET 3 DBS NTUC WHITE SA...,1,DBS NTUC WHITE SANDS,1.372288,103.949734,...,PASIR RIS CENTRAL STREET 3,DBS NTUC WHITE SANDS,40955.611885,39366.423091,34575159,1500.0,building,White Sands,retail,"POLYGON ((40904.374 39371.052, 40917.370 39414..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111022,1084032845,1500,building,Tomlinson Heights,apartments,31 TOMLINSON ROAD TOMLINSON HEIGHTS SINGAPORE ...,31,TOMLINSON HEIGHTS,1.302934,103.826950,...,TOMLINSON ROAD,TOMLINSON HEIGHTS,27291.186965,31697.348598,1084032845,1500.0,building,Tomlinson Heights,apartments,"POLYGON ((27273.874 31693.405, 27324.277 31706..."
111023,1084032848,1500,building,Boulevard Vue,apartments,11 CUSCADEN WALK BOULEVARD VUE SINGAPORE 249697,11,BOULEVARD VUE,1.303083,103.829027,...,CUSCADEN WALK,BOULEVARD VUE,27522.377638,31713.784779,1084032848,1500.0,building,Boulevard Vue,apartments,"POLYGON ((27514.095 31701.332, 27514.863 31705..."
111109,1091132342,1500,building,Espada,residential,48 SAINT THOMAS WALK ESPADA SINGAPORE 238126,48,ESPADA,1.297342,103.836742,...,SAINT THOMAS WALK,ESPADA,28380.955632,31079.002627,1091132342,1500.0,building,Espada,residential,"POLYGON ((28356.045 31071.255, 28364.203 31093..."
111115,1091236116,1500,building,711,residential,711 CLEMENTI WEST STREET 2 HDB-CLEMENTI SINGAP...,711,HDB-CLEMENTI,1.304844,103.761107,...,CLEMENTI WEST STREET 2,HDB-CLEMENTI,19963.519287,31908.679449,1091236116,1500.0,building,711,residential,"POLYGON ((19945.517 31932.693, 19966.920 31988..."


In [127]:
# Selecting only relevant columns
buildings_poly_post_cleaned = buildings_polygons_postcodes.iloc[:,[0, 4, 3, 7, 5, 6, 11, 12, 21]]
buildings_poly_post_cleaned.columns = ['osm_id', 'osm_type', 'osm_name', 'postal_name', 'address', 'blk_no', 'postcode', 'road_name', 'geometry']

In [128]:
# Resetting index
buildings_poly_post_cleaned = buildings_poly_post_cleaned.reset_index().iloc[:,1:10].set_index('osm_id')
buildings_poly_post_cleaned

Unnamed: 0_level_0,osm_type,osm_name,postal_name,address,blk_no,postcode,road_name,geometry
osm_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
30527021,retail,Sim Lim Square,DBS SIM LIM SQUARE,1 ROCHOR CANAL ROAD DBS SIM LIM SQUARE SINGAPO...,1,188504,ROCHOR CANAL ROAD,"POLYGON ((30138.613 31703.762, 30153.681 31721..."
31307895,public,Ang Mo Kio Public Library,ANG MO KIO PUBLIC LIBRARY,4300 ANG MO KIO AVENUE 6 ANG MO KIO PUBLIC LIB...,4300,569842,ANG MO KIO AVENUE 6,"POLYGON ((29335.688 39665.557, 29384.298 39672..."
32416309,retail,Broadway Plaza,BROADWAY PLAZA,4190 ANG MO KIO AVENUE 6 BROADWAY PLAZA SINGAP...,4190,569841,ANG MO KIO AVENUE 6,"POLYGON ((29377.589 39339.351, 29432.833 39345..."
32482974,commercial,709,HDB-ANG MO KIO,709 ANG MO KIO AVENUE 8 HDB-ANG MO KIO SINGAPO...,709,560709,ANG MO KIO AVENUE 8,"POLYGON ((29561.894 39239.082, 29600.822 39243..."
34575159,retail,White Sands,DBS NTUC WHITE SANDS,1 PASIR RIS CENTRAL STREET 3 DBS NTUC WHITE SA...,1,518457,PASIR RIS CENTRAL STREET 3,"POLYGON ((40904.374 39371.052, 40917.370 39414..."
...,...,...,...,...,...,...,...,...
1084032845,apartments,Tomlinson Heights,TOMLINSON HEIGHTS,31 TOMLINSON ROAD TOMLINSON HEIGHTS SINGAPORE ...,31,247855,TOMLINSON ROAD,"POLYGON ((27273.874 31693.405, 27324.277 31706..."
1084032848,apartments,Boulevard Vue,BOULEVARD VUE,11 CUSCADEN WALK BOULEVARD VUE SINGAPORE 249697,11,249697,CUSCADEN WALK,"POLYGON ((27514.095 31701.332, 27514.863 31705..."
1091132342,residential,Espada,ESPADA,48 SAINT THOMAS WALK ESPADA SINGAPORE 238126,48,238126,SAINT THOMAS WALK,"POLYGON ((28356.045 31071.255, 28364.203 31093..."
1091236116,residential,711,HDB-CLEMENTI,711 CLEMENTI WEST STREET 2 HDB-CLEMENTI SINGAP...,711,120711,CLEMENTI WEST STREET 2,"POLYGON ((19945.517 31932.693, 19966.920 31988..."


In [129]:
# Saving this dataset
buildings_poly_post_cleaned.to_file('Generated Files/buildings_poly_post_cleaned.shp')

  buildings_poly_post_cleaned.to_file('Generated Files/buildings_poly_post_cleaned.shp')


### Cleaning all_resale_polygons

In [165]:
# Importing postal codes and polygons
all_resale_polygons = gpd.read_file('Generated Files/all_resale_polygons.shp')
all_resale_polygons

Unnamed: 0,osm_id,code,fclass,name,type,ADDRESS,BLK_NO,BUILDING,LATITUDE,LONGITUDE,...,osm_id_2,code_2,fclass_2,name_2,type_2,year,flat,real_price,norm_price,geometry
0,32395512,1500,building,,commercial,727 ANG MO KIO AVENUE 6 ANG MO KIO CENTRAL POS...,727,ANG MO KIO CENTRAL POST OFFICE,1.372990,103.846013,...,32395512,1500.0,building,,commercial,2003,727 ANG MO KIO AVE 6,268262,0.233985,"POLYGON ((29403.152 39460.862, 29419.555 39462..."
1,32395512,1500,building,,commercial,727 ANG MO KIO AVENUE 6 ANG MO KIO CENTRAL POS...,727,ANG MO KIO CENTRAL POST OFFICE,1.372990,103.846013,...,32395512,1500.0,building,,commercial,2012,727 ANG MO KIO AVE 6,292853,0.115936,"POLYGON ((29403.152 39460.862, 29419.555 39462..."
2,32395512,1500,building,,commercial,727 ANG MO KIO AVENUE 6 ANG MO KIO CENTRAL POS...,727,ANG MO KIO CENTRAL POST OFFICE,1.372990,103.846013,...,32395512,1500.0,building,,commercial,2016,727 ANG MO KIO AVE 6,302559,0.124564,"POLYGON ((29403.152 39460.862, 29419.555 39462..."
3,32395524,1500,building,728,,728 ANG MO KIO AVENUE 6 HDB-ANG MO KIO SINGAPO...,728,HDB-ANG MO KIO,1.372628,103.845683,...,32395524,1500.0,building,728,,2000,728 ANG MO KIO AVE 6,188437,0.139693,"POLYGON ((29361.040 39462.332, 29376.498 39464..."
4,32395524,1500,building,728,,728 ANG MO KIO AVENUE 6 HDB-ANG MO KIO SINGAPO...,728,HDB-ANG MO KIO,1.372628,103.845683,...,32395524,1500.0,building,728,,2001,728 ANG MO KIO AVE 6,231628,0.215768,"POLYGON ((29361.040 39462.332, 29376.498 39464..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174870,1091236117,1500,building,712,residential,712 CLEMENTI WEST STREET 2 CLEMENTI WEST FIRE ...,712,CLEMENTI WEST FIRE POST,1.304959,103.761913,...,1091236117,1500.0,building,712,residential,2015,712 CLEMENTI WEST ST 2,394766,0.272174,"POLYGON ((19995.621 31942.820, 19999.249 31951..."
174871,1091236117,1500,building,712,residential,712 CLEMENTI WEST STREET 2 CLEMENTI WEST FIRE ...,712,CLEMENTI WEST FIRE POST,1.304959,103.761913,...,1091236117,1500.0,building,712,residential,2016,712 CLEMENTI WEST ST 2,398090,0.228485,"POLYGON ((19995.621 31942.820, 19999.249 31951..."
174872,1091236117,1500,building,712,residential,712 CLEMENTI WEST STREET 2 CLEMENTI WEST FIRE ...,712,CLEMENTI WEST FIRE POST,1.304959,103.761913,...,1091236117,1500.0,building,712,residential,2017,712 CLEMENTI WEST ST 2,404210,0.256314,"POLYGON ((19995.621 31942.820, 19999.249 31951..."
174873,1091236117,1500,building,712,residential,712 CLEMENTI WEST STREET 2 CLEMENTI WEST FIRE ...,712,CLEMENTI WEST FIRE POST,1.304959,103.761913,...,1091236117,1500.0,building,712,residential,2019,712 CLEMENTI WEST ST 2,364944,0.204333,"POLYGON ((19995.621 31942.820, 19999.249 31951..."


In [132]:
# There is no need to drop na as nonmatches have been automatically filtered at QGIS end

In [168]:
# Cleaning and rename columns
all_resale_polygons_cleaned = all_resale_polygons.iloc[:, [0, 4, 3, 7, 5, 6, 11, 12, 23, 24, 25]]
all_resale_polygons_cleaned.columns = ['osm_id', 'osm_type', 'osm_name', 'postal_name', 'address', 'blk_no', 'postcode', 'road_name', 'real_price', 'norm_price', 'geometry']
all_resale_polygons_cleaned

Unnamed: 0,osm_id,osm_type,osm_name,postal_name,address,blk_no,postcode,road_name,real_price,norm_price,geometry
0,32395512,commercial,,ANG MO KIO CENTRAL POST OFFICE,727 ANG MO KIO AVENUE 6 ANG MO KIO CENTRAL POS...,727,560727,ANG MO KIO AVENUE 6,268262,0.233985,"POLYGON ((29403.152 39460.862, 29419.555 39462..."
1,32395512,commercial,,ANG MO KIO CENTRAL POST OFFICE,727 ANG MO KIO AVENUE 6 ANG MO KIO CENTRAL POS...,727,560727,ANG MO KIO AVENUE 6,292853,0.115936,"POLYGON ((29403.152 39460.862, 29419.555 39462..."
2,32395512,commercial,,ANG MO KIO CENTRAL POST OFFICE,727 ANG MO KIO AVENUE 6 ANG MO KIO CENTRAL POS...,727,560727,ANG MO KIO AVENUE 6,302559,0.124564,"POLYGON ((29403.152 39460.862, 29419.555 39462..."
3,32395524,,728,HDB-ANG MO KIO,728 ANG MO KIO AVENUE 6 HDB-ANG MO KIO SINGAPO...,728,560728,ANG MO KIO AVENUE 6,188437,0.139693,"POLYGON ((29361.040 39462.332, 29376.498 39464..."
4,32395524,,728,HDB-ANG MO KIO,728 ANG MO KIO AVENUE 6 HDB-ANG MO KIO SINGAPO...,728,560728,ANG MO KIO AVENUE 6,231628,0.215768,"POLYGON ((29361.040 39462.332, 29376.498 39464..."
...,...,...,...,...,...,...,...,...,...,...,...
174870,1091236117,residential,712,CLEMENTI WEST FIRE POST,712 CLEMENTI WEST STREET 2 CLEMENTI WEST FIRE ...,712,120712,CLEMENTI WEST STREET 2,394766,0.272174,"POLYGON ((19995.621 31942.820, 19999.249 31951..."
174871,1091236117,residential,712,CLEMENTI WEST FIRE POST,712 CLEMENTI WEST STREET 2 CLEMENTI WEST FIRE ...,712,120712,CLEMENTI WEST STREET 2,398090,0.228485,"POLYGON ((19995.621 31942.820, 19999.249 31951..."
174872,1091236117,residential,712,CLEMENTI WEST FIRE POST,712 CLEMENTI WEST STREET 2 CLEMENTI WEST FIRE ...,712,120712,CLEMENTI WEST STREET 2,404210,0.256314,"POLYGON ((19995.621 31942.820, 19999.249 31951..."
174873,1091236117,residential,712,CLEMENTI WEST FIRE POST,712 CLEMENTI WEST STREET 2 CLEMENTI WEST FIRE ...,712,120712,CLEMENTI WEST STREET 2,364944,0.204333,"POLYGON ((19995.621 31942.820, 19999.249 31951..."


In [169]:
all_resale_polygons_cleaned.to_file('Generated Files/all_resale_polygons_cleaned.shp')

  all_resale_polygons_cleaned.to_file('Generated Files/all_resale_polygons_cleaned.shp')


### Loading the new datasets

This is the code you need to read all of the datasets generated in this notebook.

In [171]:
# buildings_poly_post_cleaned
# includes building type, name, address, block number, postal codes, and geometry in the form of building polygons
buildings_poly_post_cleaned = gpd.read_file('Generated Files/buildings_poly_post_cleaned.shp')

# all_resale_polygons_cleaned
# includes building type, name, address, block number, postal codes, real price, normal price, and geometry in the form of building polygons
# does not include buildings not covered in all_resale_data
all_resale_polygons_cleaned = gpd.read_file('Generated Files/all_resale_polygons_cleaned.shp')


In [175]:
len(all_resale)

184542