In [1]:
import os
from urllib.request import urlretrieve
import zipfile

In [2]:
# Dowload population data
data_url = "https://www.abs.gov.au/statistics/people/population/regional-population/2022-23/32180_ERP_2023_SA2_GDA2020.zip"
urlretrieve(data_url, "../data/landing/population.zip")

('../data/landing/population.zip', <http.client.HTTPMessage at 0x1042719d0>)

In [3]:
# Extract the contents of the zip file
with zipfile.ZipFile("../data/landing/population.zip", 'r') as zip_ref:
    zip_ref.extractall("../data/population/")

In [22]:
import geopandas as gpd

# Load the GeoPackage file
gdf = gpd.read_file("../data/population/32180_ERP_2023_SA2_GDA2020.gpkg")

# Check the first few rows of the dataset to understand its structure
print(gdf.head(1))


   State_code_2021  State_name_2021  SA2_code_2021 SA2_name_2021  \
0                1  New South Wales      101021007     Braidwood   

   SA3_code_2021 SA3_name_2021  SA4_code_2021   SA4_name_2021 GCCSA_code_2021  \
0          10102    Queanbeyan            101  Capital Region           1RNSW   

  GCCSA_name_2021  ...  Births_2022_23  Deaths_2022_23  \
0     Rest of NSW  ...              44              41   

   Natural_increase_2022_23  Internal_arrivals_2022_23  \
0                         3                        316   

   Internal_departures_2022_23  Net_internal_migration_2022_23  \
0                          301                              15   

   Overseas_arrivals_2022_23  Overseas_departures_2022_23  \
0                         19                            7   

   Net_overseas_migration_2022_23  \
0                              12   

                                            geometry  
0  MULTIPOLYGON (((149.58424 -35.44426, 149.58432...  

[1 rows x 56 columns]


In [23]:
gdf = gdf[gdf["State_name_2021"] == "Victoria"]
gdf["SA2_name_2021"].unique()

array(['Alfredton', 'Ballarat', 'Buninyong', 'Delacombe', 'Smythes Creek',
       'Wendouree - Miners Rest', 'Ballarat East - Warrenheip',
       'Ballarat North - Invermay', 'Canadian - Mount Clear',
       'Sebastopol - Redan', 'Bacchus Marsh Surrounds',
       'Creswick - Clunes', 'Daylesford', 'Gordon (Vic.)', 'Avoca',
       'Beaufort', 'Golden Plains - North', 'Maryborough (Vic.)',
       'Maryborough Surrounds', 'Bendigo', 'California Gully - Eaglehawk',
       'East Bendigo - Kennington', 'Flora Hill - Spring Gully',
       'Kangaroo Flat - Golden Square', 'Maiden Gully', 'Strathfieldsaye',
       'White Hills - Ascot', 'Bendigo Surrounds - South', 'Castlemaine',
       'Castlemaine Surrounds', 'Heathcote', 'Kyneton', 'Woodend',
       'Bendigo Surrounds - North', 'Loddon', 'Bannockburn',
       'Golden Plains - South', 'Winchelsea', 'Belmont', 'Geelong',
       'Geelong West - Hamlyn Heights', 'Highton', 'Lara', 'Leopold',
       'Newcomb - Moolap', 'Newtown (Vic.)', 'North Ge

In [24]:
gdf.columns

Index(['State_code_2021', 'State_name_2021', 'SA2_code_2021', 'SA2_name_2021',
       'SA3_code_2021', 'SA3_name_2021', 'SA4_code_2021', 'SA4_name_2021',
       'GCCSA_code_2021', 'GCCSA_name_2021', 'ERP_2001', 'ERP_2002',
       'ERP_2003', 'ERP_2004', 'ERP_2005', 'ERP_2006', 'ERP_2007', 'ERP_2008',
       'ERP_2009', 'ERP_2010', 'ERP_2011', 'ERP_2012', 'ERP_2013', 'ERP_2014',
       'ERP_2015', 'ERP_2016', 'ERP_2017', 'ERP_2018', 'ERP_2019', 'ERP_2020',
       'ERP_2021', 'ERP_2022', 'ERP_2023', 'ERP_change_number_2022_23',
       'ERP_change_per_cent_2022_23', 'Area_km2',
       'Pop_density_2023_people_per_km2', 'Births_2021_22', 'Deaths_2021_22',
       'Natural_increase_2021_22', 'Internal_arrivals_2021_22',
       'Internal_departures_2021_22', 'Net_internal_migration_2021_22',
       'Overseas_arrivals_2021_22', 'Overseas_departures_2021_22',
       'Net_overseas_migration_2021_22', 'Births_2022_23', 'Deaths_2022_23',
       'Natural_increase_2022_23', 'Internal_arrivals_2022_2

In [25]:
# Calculate net migration
gdf["Net_migration_2021_22"] = gdf["Net_overseas_migration_2021_22"] + gdf["Net_internal_migration_2021_22"]
gdf["Net_migration_2022_23"] = gdf["Net_overseas_migration_2022_23"] + gdf["Net_internal_migration_2022_23"]

In [26]:
# Calculate ERP/km2 from 2001-2023
for year in range(2001, 2024):
    gdf[f"ERP_per_km2_{year}"] = gdf[f"ERP_{year}"] / gdf["Area_km2"]

In [27]:
gdf.columns

Index(['State_code_2021', 'State_name_2021', 'SA2_code_2021', 'SA2_name_2021',
       'SA3_code_2021', 'SA3_name_2021', 'SA4_code_2021', 'SA4_name_2021',
       'GCCSA_code_2021', 'GCCSA_name_2021', 'ERP_2001', 'ERP_2002',
       'ERP_2003', 'ERP_2004', 'ERP_2005', 'ERP_2006', 'ERP_2007', 'ERP_2008',
       'ERP_2009', 'ERP_2010', 'ERP_2011', 'ERP_2012', 'ERP_2013', 'ERP_2014',
       'ERP_2015', 'ERP_2016', 'ERP_2017', 'ERP_2018', 'ERP_2019', 'ERP_2020',
       'ERP_2021', 'ERP_2022', 'ERP_2023', 'ERP_change_number_2022_23',
       'ERP_change_per_cent_2022_23', 'Area_km2',
       'Pop_density_2023_people_per_km2', 'Births_2021_22', 'Deaths_2021_22',
       'Natural_increase_2021_22', 'Internal_arrivals_2021_22',
       'Internal_departures_2021_22', 'Net_internal_migration_2021_22',
       'Overseas_arrivals_2021_22', 'Overseas_departures_2021_22',
       'Net_overseas_migration_2021_22', 'Births_2022_23', 'Deaths_2022_23',
       'Natural_increase_2022_23', 'Internal_arrivals_2022_2

In [28]:
import folium
import branca.colormap as cm

In [29]:
# Create directories to store plots
os.makedirs("../plots/net_migrants/", exist_ok=True)
os.makedirs("../plots/erp/", exist_ok=True)
os.makedirs("../plots/area/", exist_ok=True)
os.makedirs("../plots/erp_per_km2/", exist_ok=True)

In [13]:
def create_map(gdf, column, ouput_directory): 
    # Create a map centered on Victoria
    m = folium.Map(location=[-37.4713, 144.7852], zoom_start=7)

    # Create a color map using branca
    colormap = cm.LinearColormap(
        colors=['blue', 'white', 'yellow', 'orange', 'red'],
        vmin=gdf[column].min(),
        vmax=gdf[column].max(),
        caption=column
    )

     # Define a function to style the features
    def style_function(feature):
        return {
            'fillOpacity': 0.7,
            'weight': 0.5,
            'fillColor': colormap(feature['properties'][column]),
            'color': 'black'
        }

    # Add the GeoDataFrame to the map with Folium
    folium.GeoJson(
        gdf,
        style_function=style_function,
        tooltip=folium.GeoJsonTooltip(
            fields=['SA2_name_2021', column], 
            aliases=['Suburb Name', column]
        )
    ).add_to(m)

    # Add the colormap legend to the map
    colormap.add_to(m)

    # Save and display the map
    m.save(f"{ouput_directory}/suburbs_{column.lower()}.html")

In [17]:
# Plot suburbs' net migrations from 2021-2023
for col in ['Net_internal_migration_2021_22', 'Net_overseas_migration_2021_22', 'Net_migration_2021_22', 
'Net_internal_migration_2022_23', 'Net_overseas_migration_2022_23', 'Net_migration_2022_23']:
    create_map(gdf, col, '../plots/net_migrants')


In [18]:
# Plot suburbs' ERP from 2001-2023    
for col in ['ERP_2001', 'ERP_2002',
       'ERP_2003', 'ERP_2004', 'ERP_2005', 'ERP_2006', 'ERP_2007', 'ERP_2008',
       'ERP_2009', 'ERP_2010', 'ERP_2011', 'ERP_2012', 'ERP_2013', 'ERP_2014',
       'ERP_2015', 'ERP_2016', 'ERP_2017', 'ERP_2018', 'ERP_2019', 'ERP_2020',
       'ERP_2021', 'ERP_2022', 'ERP_2023']:
    create_map(gdf, col, '../plots/erp')

In [19]:
# Plot suburbs' area
create_map(gdf,'Area_km2', '../plots/area')

In [30]:
# Plot suburbs' ERP/km2 from 2001-2023
for col in ['ERP_per_km2_2001', 'ERP_per_km2_2002',
       'ERP_per_km2_2003', 'ERP_per_km2_2004', 'ERP_per_km2_2005',
       'ERP_per_km2_2006', 'ERP_per_km2_2007', 'ERP_per_km2_2008',
       'ERP_per_km2_2009', 'ERP_per_km2_2010', 'ERP_per_km2_2011',
       'ERP_per_km2_2012', 'ERP_per_km2_2013', 'ERP_per_km2_2014',
       'ERP_per_km2_2015', 'ERP_per_km2_2016', 'ERP_per_km2_2017',
       'ERP_per_km2_2018', 'ERP_per_km2_2019', 'ERP_per_km2_2020',
       'ERP_per_km2_2021', 'ERP_per_km2_2022', 'ERP_per_km2_2023']:
    create_map(gdf, col, '../plots/erp_per_km2')