In [1]:
# remove hash to install package
#pip install owslib==0.25.0 fiona==1.8.21 geopandas==0.10.2 requests==2.28.0 folium==0.12.1

In [2]:
from owslib.wfs import WebFeatureService
import geopandas
import folium
import io
import zipfile
import pandas as pd
import os
from urllib.request import urlretrieve

## Download external data from AURIN

In [3]:
WFS_USERNAME = 'nyjhp'
WFS_PASSWORD= 'aFdYtPH7foNjcD58'
WFS_URL='https://adp.aurin.org.au/geoserver/wfs'

In [4]:
adp_client = WebFeatureService(url=WFS_URL,username=WFS_USERNAME, password=WFS_PASSWORD, version='2.0.0')

In [5]:
def download_aurin_df(type_name, file_name):
    output_dir = '../data/abs'
    
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    response = adp_client.getfeature(typename=type_name)
    out = open(f'{output_dir}/{file_name}.gml', 'wb')
    out.write(response.read())
    out.close()
    return geopandas.read_file(f'{output_dir}/{file_name}.gml')

In [6]:
pop_df = download_aurin_df('datasource-AU_Govt_ABS-UoM_AURIN_DB_3:abs_regional_population_sa2_2001_2021', 
                           '2021_population_census')

sa2_bound = download_aurin_df('datasource-AU_Govt_ABS-UoM_AURIN_DB_GeoLevel:sa2_2016_aust',
                        'sa2_boundaries')

poa_bound = download_aurin_df('datasource-AU_Govt_ABS-UoM_AURIN_DB_GeoLevel:mb_2016_aust',
                        'poa_boundaries')

sa2_income = download_aurin_df('datasource-AU_Govt_ABS-UoM_AURIN_DB_3:abs_epi_income_distribution_sa2_2010_11',
                               'sa2_income')

In [7]:
area_id = ['gml_id', 'primaryindex', 'state_code_2016', 'sa2_maincode_2016',
           'sa2_name_2016']
col_2021 = [x for x in pop_df.columns if '2021' in x or '2020_21' in x]
pop_21 = pop_df[area_id + col_2021]

In [8]:
pop_21.head()

Unnamed: 0,gml_id,primaryindex,state_code_2016,sa2_maincode_2016,sa2_name_2016,erp_2021,erp_change_number_2020_21,erp_change_per_cent_2020_21,pop_density_2021_people_per_km2,births_2020_21,deaths_2020_21,natural_increase_2020_21,internal_arrivals_2020_21,internal_departures_2020_21,net_internal_migration_2020_21,overseas_arrivals_2020_21,overseas_departures_2020_21,net_overseas_migration_2020_21
0,abs_regional_population_sa2_2001_2021.1,1,1,101021007,Braidwood,4360,68,1.5843,1.2755,37.0,26.0,11.0,375.0,319.0,56.0,7.0,6.0,1.0
1,abs_regional_population_sa2_2001_2021.2,2,1,101021008,Karabar,8374,3,0.0358,1199.283936,114.0,41.0,73.0,871.0,946.0,-75.0,28.0,23.0,5.0
2,abs_regional_population_sa2_2001_2021.3,3,1,101021009,Queanbeyan,11401,-48,-0.4193,2393.458496,149.0,104.0,45.0,1486.0,1662.0,-176.0,165.0,82.0,83.0
3,abs_regional_population_sa2_2001_2021.7,7,1,101031013,Bombala,2411,-10,-0.4131,0.6044,19.0,41.0,-22.0,163.0,152.0,11.0,2.0,1.0,1.0
4,abs_regional_population_sa2_2001_2021.14,14,1,101041020,Bega-Eden Hinterland,8996,117,1.3177,1.9099,83.0,67.0,16.0,738.0,639.0,99.0,9.0,7.0,2.0


In [9]:
pop_21.shape

(2292, 18)

There should be 2,310 SA2 regions.

In [10]:
# ERP refers to estimated resident population
pop_21.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2292 entries, 0 to 2291
Data columns (total 18 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   gml_id                           2292 non-null   object 
 1   primaryindex                     2292 non-null   int64  
 2   state_code_2016                  2292 non-null   int64  
 3   sa2_maincode_2016                2292 non-null   int64  
 4   sa2_name_2016                    2292 non-null   object 
 5   erp_2021                         2292 non-null   int64  
 6   erp_change_number_2020_21        2292 non-null   int64  
 7   erp_change_per_cent_2020_21      2292 non-null   float64
 8   pop_density_2021_people_per_km2  2292 non-null   float64
 9   births_2020_21                   2288 non-null   float64
 10  deaths_2020_21                   2288 non-null   float64
 11  natural_increase_2020_21         2288 non-null   float64
 12  internal_arrivals_20

In [11]:
sa2_bound.head()

Unnamed: 0,gml_id,primaryindex,sa2_maincode_2016,sa2_5digitcode_2016,sa2_name_2016,sa3_code_2016,sa3_name_2016,sa4_code_2016,sa4_name_2016,gccsa_code_2016,gccsa_name_2016,state_code_2016,state_name_2016,area_albers_sqkm,geometry
0,sa2_2016_aust.1,1,101021007,11007,Braidwood,10102,Queanbeyan,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,3418.3525,"POLYGON ((149.58420 -35.44430, 149.58440 -35.4..."
1,sa2_2016_aust.2,2,101021008,11008,Karabar,10102,Queanbeyan,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,6.9825,"POLYGON ((149.21900 -35.36740, 149.21800 -35.3..."
2,sa2_2016_aust.9,9,101031015,11015,Cooma Region,10103,Snowy Mountains,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,6250.8748,"POLYGON ((148.60440 -36.13520, 148.60450 -36.1..."
3,sa2_2016_aust.10,10,101031016,11016,Jindabyne - Berridale,10103,Snowy Mountains,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,3939.5484,"POLYGON ((148.27030 -36.46410, 148.27060 -36.4..."
4,sa2_2016_aust.11,11,101041017,11017,Batemans Bay,10104,South Coast,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,63.7074,"POLYGON ((150.23540 -35.70390, 150.23530 -35.7..."


In [12]:
poa_bound.state_name_2016.value_counts()

New South Wales                 109880
Victoria                         85014
Queensland                       69764
Western Australia                42449
South Australia                  28205
Tasmania                         12981
Australian Capital Territory      6393
Northern Territory                3299
Other Territories                  137
Name: state_name_2016, dtype: int64

---
## Download Postcode to SA2 table

In [13]:
def download_url(url, filename):
    output_dir = '../data/abs'
    
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    print(f"Begin downloading file_name data")
    output_dir = f"{output_dir}/poa_sa2_lookup.zip"
    urlretrieve(url, output_dir)
    print(f"Completed")

In [14]:
download_url('https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&1270055006_CG_POSTCODE_2011_SA2_2011.zip&1270.0.55.006&Data%20Cubes&70A3CE8A2E6F9A6BCA257A29001979B2&0&July%202011&27.06.2012&Latest'
             , 'poa_sa2_lookup')

Begin downloading file_name data
Completed


In [15]:
# open zipfile
unzip_poa_sa2 = zipfile.ZipFile('../data/abs/poa_sa2_lookup.zip') 

In [16]:
poa_to_sa2 = pd.read_excel(unzip_poa_sa2.open('1270055006_CG_POSTCODE_2011_SA2_2011.xls')
                        , sheet_name='Table 3', skiprows=5)

In [17]:
poa_to_sa2 = poa_to_sa2.dropna()

In [18]:
poa_to_sa2

Unnamed: 0,POSTCODE,POSTCODE.1,SA2_MAINCODE_2011,SA2_NAME_2011,RATIO,PERCENTAGE
1,0800,800.0,701011002.0,Darwin City,1.000000,99.999998
2,0810,810.0,701021010.0,Alawa,0.071997,7.199707
3,0810,810.0,701021013.0,Brinkin - Nakara,0.096392,9.639178
4,0810,810.0,701021016.0,Coconut Grove,0.096494,9.649355
5,0810,810.0,701021018.0,Jingili,0.061562,6.156198
...,...,...,...,...,...,...
5984,7466,7466.0,604031097.0,West Coast (Tas.),1.000000,100.000000
5985,7467,7467.0,604031097.0,West Coast (Tas.),1.000000,100.000000
5986,7468,7468.0,604031097.0,West Coast (Tas.),1.000000,100.000000
5987,7469,7469.0,604031097.0,West Coast (Tas.),1.000000,100.000000
