In [1]:
# remove hash to install package
# !pip3 install owslib==0.25.0 fiona==1.8.21 geopandas==0.10.2 requests==2.28.0 folium==0.12.1

In [2]:
from owslib.wfs import WebFeatureService
import geopandas
import folium
import io
import zipfile
import pandas as pd
import os
from urllib.request import urlretrieve
from shapely.validation import make_valid
from shapely.geometry import Point


import warnings
warnings.filterwarnings("ignore")

## Generate POA to SA2 Lookup Table

In [3]:
# load selected external data
sa2_bound = geopandas.read_file(f'../data/abs/sa2_boundaries.gml')
poa_bound = geopandas.read_file(f'../data/abs/poa_boundaries.gml')

# Open zipfile
unzip_poa_sa2 = zipfile.ZipFile('../data/abs/poa_sa2_lookup.zip')
poa_to_sa2 = pd.read_excel(unzip_poa_sa2
                           .open('1270055006_CG_POSTCODE_2011_SA2_2011.xls')
                        , sheet_name='Table 3', skiprows=5)

In [4]:
poa_to_sa2 = poa_to_sa2.dropna()

In [5]:
poa_to_sa2["SA2_MAINCODE_2011"].unique().size

2162

In [6]:
sa2_bound.head()

Unnamed: 0,gml_id,primaryindex,sa2_maincode_2016,sa2_5digitcode_2016,sa2_name_2016,sa3_code_2016,sa3_name_2016,sa4_code_2016,sa4_name_2016,gccsa_code_2016,gccsa_name_2016,state_code_2016,state_name_2016,area_albers_sqkm,geometry
0,sa2_2016_aust.1,1,101021007,11007,Braidwood,10102,Queanbeyan,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,3418.3525,"POLYGON ((149.58420 -35.44430, 149.58440 -35.4..."
1,sa2_2016_aust.2,2,101021008,11008,Karabar,10102,Queanbeyan,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,6.9825,"POLYGON ((149.21900 -35.36740, 149.21800 -35.3..."
2,sa2_2016_aust.9,9,101031015,11015,Cooma Region,10103,Snowy Mountains,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,6250.8748,"POLYGON ((148.60440 -36.13520, 148.60450 -36.1..."
3,sa2_2016_aust.10,10,101031016,11016,Jindabyne - Berridale,10103,Snowy Mountains,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,3939.5484,"POLYGON ((148.27030 -36.46410, 148.27060 -36.4..."
4,sa2_2016_aust.11,11,101041017,11017,Batemans Bay,10104,South Coast,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,63.7074,"POLYGON ((150.23540 -35.70390, 150.23530 -35.7..."


In [7]:
poa_bound["geometry"] = poa_bound["geometry"].to_crs(epsg=4326)
sa2_bound["geometry"] = sa2_bound["geometry"].to_crs(epsg=4326)

In [8]:
poa_bound["centroid"] = poa_bound["geometry"].centroid

In [9]:
poa_bound = poa_bound[poa_bound["centroid"].notnull()]
sa2_bound = sa2_bound[sa2_bound["geometry"].notnull()]

In [10]:
sa2_bound["geometry"] = sa2_bound["geometry"].apply(make_valid)

In [11]:
poa_list = poa_bound["centroid"].to_list()
sa2_list = []
for point in poa_list:
    contains = sa2_bound["geometry"].contains(Point(point))
    sa2_list.append(contains.index[contains])

In [12]:
for i in range(len(sa2_list)):
    try:
        sa2_list[i] = sa2_list[i][0]
    except IndexError:
        sa2_list[i] = None

38
66
138
206
335
347
349
352
1408
1539
1784
1863
2160
2302
2492
2496
2524
2528
2531
2567
2603
2626


In [13]:
poa_bound["sa2_area"] = pd.Series(sa2_list)

In [14]:
poa_bound[poa_bound["sa2_area"].isnull()]

Unnamed: 0,gml_id,primaryindex,objectid,poa_code_2016,poa_name_2016,area_albers_sqkm,geometry,centroid,sa2_area
38,poa_2016_aust.33,33,33,880,880,88.9707,"MULTIPOLYGON (((136.67621 -12.23769, 136.67551...",POINT (136.64622 -12.24609),
66,poa_2016_aust.55,55,55,2027,2027,1.3761,"MULTIPOLYGON (((151.25861 -33.85969, 151.25861...",POINT (151.24107 -33.87246),
138,poa_2016_aust.122,122,122,2105,2105,2.1888,"MULTIPOLYGON (((151.28001 -33.63819, 151.28001...",POINT (151.28383 -33.64082),
206,poa_2016_aust.2110,2110,2110,5575,5575,1367.32,"MULTIPOLYGON (((136.92601 -35.02879, 136.92601...",POINT (137.36100 -34.92621),
335,poa_2016_aust.231,231,231,2230,2230,9.1909,"MULTIPOLYGON (((151.12451 -34.07999, 151.12451...",POINT (151.14707 -34.05728),
347,poa_2016_aust.243,243,243,2261,2261,51.4591,"MULTIPOLYGON (((151.50201 -33.33019, 151.50201...",POINT (151.46022 -33.35433),
349,poa_2016_aust.245,245,245,2263,2263,22.3942,"MULTIPOLYGON (((151.57271 -33.29069, 151.57261...",POINT (151.52484 -33.25350),
352,poa_2016_aust.248,248,248,2267,2267,3.6556,"POLYGON ((151.57021 -33.06989, 151.57021 -33.0...",POINT (151.58460 -33.07011),
1408,poa_2016_aust.1359,1359,1359,3960,3960,831.516,"MULTIPOLYGON (((146.30501 -39.15189, 146.30471...",POINT (146.30266 -38.83823),
1539,poa_2016_aust.1480,1480,1480,4178,4178,44.3278,"MULTIPOLYGON (((153.18851 -27.40099, 153.18851...",POINT (153.18793 -27.39817),


In [15]:
poa_w_sa2 = poa_bound.merge(sa2_bound[['sa2_maincode_2016', 'sa2_name_2016','geometry']], how="left", left_on="sa2_area", right_on=sa2_bound.index)

In [16]:
poa_w_sa2 = poa_w_sa2[["poa_code_2016", "poa_name_2016", "sa2_maincode_2016", "sa2_name_2016", "geometry_y"]]

In [17]:
poa_w_sa2

Unnamed: 0,poa_code_2016,poa_name_2016,sa2_maincode_2016,sa2_name_2016,geometry_y
0,800,0800,701011002.0,Darwin City,"POLYGON ((130.83451 -12.45799, 130.83391 -12.4..."
1,810,0810,701021013.0,Brinkin - Nakara,"POLYGON ((130.86381 -12.36689, 130.86631 -12.3..."
2,812,0812,701021014.0,Buffalo Creek,"POLYGON ((130.90101 -12.36579, 130.90081 -12.3..."
3,815,0815,701021013.0,Brinkin - Nakara,"POLYGON ((130.86381 -12.36689, 130.86631 -12.3..."
4,820,0820,701011006.0,Ludmilla - The Narrows,"POLYGON ((130.84491 -12.41539, 130.84511 -12.4..."
...,...,...,...,...,...
2663,7268,7268,602021055.0,Grindelwald - Lanena,GEOMETRYCOLLECTION (POLYGON ((146.80531 -41.30...
2664,7270,7270,602021055.0,Grindelwald - Lanena,GEOMETRYCOLLECTION (POLYGON ((146.80531 -41.30...
2665,7275,7275,602021055.0,Grindelwald - Lanena,GEOMETRYCOLLECTION (POLYGON ((146.80531 -41.30...
2666,7276,7276,,,


In [18]:
# check the postcodes not allocated
undef_poa_ind = poa_w_sa2[poa_w_sa2["sa2_maincode_2016"].isnull()].index.values

In [19]:
undef_poa_ind

array([  38,   66,  138,  206,  335,  347,  349,  352, 1408, 1539, 1784,
       1863, 2160, 2302, 2492, 2496, 2524, 2528, 2531, 2567, 2603, 2624,
       2666, 2667])

In [20]:
df_undef_poa = (poa_bound[poa_bound.index.isin(undef_poa_ind)]
                [['poa_code_2016', 'poa_name_2016', 'sa2_area']])

In [21]:
# there are postcodes with multiple SA2s
display(poa_to_sa2.sort_values(by=['POSTCODE', 'PERCENTAGE'], ascending=True).head(20))

Unnamed: 0,POSTCODE,POSTCODE.1,SA2_MAINCODE_2011,SA2_NAME_2011,RATIO,PERCENTAGE
1,800,800.0,701011002.0,Darwin City,1.0,99.999998
13,810,810.0,701021029.0,Wanguri,0.060207,6.020652
5,810,810.0,701021018.0,Jingili,0.061562,6.156198
8,810,810.0,701021024.0,Moil,0.068575,6.857527
6,810,810.0,701021021.0,Lyons (NT),0.070201,7.02012
12,810,810.0,701021028.0,Wagaman,0.071421,7.14211
2,810,810.0,701021010.0,Alawa,0.071997,7.199707
7,810,810.0,701021023.0,Millner,0.083483,8.34829
11,810,810.0,701021027.0,Tiwi,0.085041,8.504137
3,810,810.0,701021013.0,Brinkin - Nakara,0.096392,9.639178


In [22]:
# only get the SA2 with highest percentage
idx = poa_to_sa2.groupby(['POSTCODE'])['PERCENTAGE'].transform(max) == poa_to_sa2['PERCENTAGE']
high_perc_sa2_poa = poa_to_sa2[idx].reset_index(drop=True)

In [23]:
poa_w_sa2 = pd.merge(poa_w_sa2, high_perc_sa2_poa[['POSTCODE', 'SA2_MAINCODE_2011', 'SA2_NAME_2011']], how='left', left_on='poa_name_2016', right_on='POSTCODE')

In [24]:
poa_w_sa2

Unnamed: 0,poa_code_2016,poa_name_2016,sa2_maincode_2016,sa2_name_2016,geometry_y,POSTCODE,SA2_MAINCODE_2011,SA2_NAME_2011
0,800,0800,701011002.0,Darwin City,"POLYGON ((130.83451 -12.45799, 130.83391 -12.4...",0800,701011002.0,Darwin City
1,810,0810,701021013.0,Brinkin - Nakara,"POLYGON ((130.86381 -12.36689, 130.86631 -12.3...",0810,701021025.0,Nightcliff
2,812,0812,701021014.0,Buffalo Creek,"POLYGON ((130.90101 -12.36579, 130.90081 -12.3...",0812,701021019.0,Karama
3,815,0815,701021013.0,Brinkin - Nakara,"POLYGON ((130.86381 -12.36689, 130.86631 -12.3...",,,
4,820,0820,701011006.0,Ludmilla - The Narrows,"POLYGON ((130.84491 -12.41539, 130.84511 -12.4...",0820,701011008.0,Stuart Park
...,...,...,...,...,...,...,...,...
2664,7268,7268,602021055.0,Grindelwald - Lanena,GEOMETRYCOLLECTION (POLYGON ((146.80531 -41.30...,7268,602031058.0,Dilston - Lilydale
2665,7270,7270,602021055.0,Grindelwald - Lanena,GEOMETRYCOLLECTION (POLYGON ((146.80531 -41.30...,7270,602021053.0,Beauty Point - Beaconsfield
2666,7275,7275,602021055.0,Grindelwald - Lanena,GEOMETRYCOLLECTION (POLYGON ((146.80531 -41.30...,7275,602021055.0,Grindelwald - Lanena
2667,7276,7276,,,,7276,602021055.0,Grindelwald - Lanena


All not allocated POAs were found using the 2011 data.

In [25]:
# impute sa2 name and maincode
poa_w_sa2['sa2_maincode_2016'].fillna(poa_w_sa2['SA2_MAINCODE_2011'], 
                                      inplace=True)
poa_w_sa2['sa2_name_2016'].fillna(poa_w_sa2['SA2_NAME_2011'], inplace=True)

In [26]:
#impute sa2 geometry
poa_w_sa2 = poa_w_sa2.merge(sa2_bound[["sa2_maincode_2016", "geometry"]], 
                            on="sa2_maincode_2016", how="left")

In [27]:
poa_w_sa2 = poa_w_sa2[["poa_code_2016", "poa_name_2016", "sa2_maincode_2016", 
                       "sa2_name_2016", "geometry"]]

In [28]:
poa_w_sa2.isna().sum()

poa_code_2016        0
poa_name_2016        0
sa2_maincode_2016    0
sa2_name_2016        0
geometry             5
dtype: int64

In [29]:
poa_w_sa2[poa_w_sa2.geometry.isnull()]

Unnamed: 0,poa_code_2016,poa_name_2016,sa2_maincode_2016,sa2_name_2016,geometry
335,2230,2230,128011528.0,Cronulla - Kurnell - Bundeena,
2492,6530,6530,508051210.0,Geraldton,
2496,6537,6537,508021197.0,Exmouth,
2524,6707,6707,508021197.0,Exmouth,
2528,6713,6713,508061223.0,Roebourne,


In [30]:
# save final poa to sa2 table
poa_w_sa2.to_csv('../data/curated/poa_w_sa2.csv', index=False)