In [3]:
import pandas as pd
import numpy as np
import geopandas as gpd
import topojson as tp
import matplotlib.pyplot as plt
import seaborn as sns
import time
import matplotlib.cm as cm
import matplotlib.colors as colors

print("reading and preparing geodata files ...")

bounds = [148.76279568, -35.92051721, 149.39929255, -35.12440294]
mesh_2021 = gpd.read_file("./geodata/MB_2021_AUST_SHP_GDA2020.zip", bbox=bounds).iloc[:, [0, 1, 17, 19]]
mesh_2021.columns = ["code", "category", "area", "geometry"]
mesh_2021["code"] = mesh_2021["code"].astype("int64")
mesh_2021.set_index("code", inplace=True)
mesh_2021 = mesh_2021[mesh_2021["area"] > 0]

mesh_2016 = pd.concat([
    gpd.read_file("./geodata/1270055001_mb_2016_act_shape.zip", bbox=bounds),
    gpd.read_file("./geodata/1270055001_mb_2016_nsw_shape.zip", bbox=bounds)
]).iloc[:, [0, 1, 15, 16]]
mesh_2016.columns = ["code", "category", "area", "geometry"]
mesh_2016["code"] = mesh_2016["code"].astype("int64")
mesh_2016.set_index("code", inplace=True)
mesh_2016 = mesh_2016[mesh_2016["area"] > 0]

mesh_2011 = pd.concat([
    gpd.read_file("./geodata/1270055001_mb_2011_act_shape.zip", bbox=bounds),
    gpd.read_file("./geodata/1270055001_mb_2011_nsw_shape.zip", bbox=bounds)
]).iloc[:, [0, 1, 15, 16]]
mesh_2011.columns = ["code", "category", "area", "geometry"]
mesh_2011["code"] = mesh_2011["code"].astype("int64")
mesh_2011.set_index("code", inplace=True)
mesh_2011 = mesh_2011[mesh_2011["area"] > 0]

mesh_2016.to_crs(7844, inplace=True)
mesh_2011.to_crs(7844, inplace=True)

print("reading 2021 census data ...")

data_2021 = pd.DataFrame(
    columns=["code", "category", "area", "dwellings", "people", "state"]
)

print("gathering 2021 mesh data ...", end="\r")
for sheet in [1, 2, 11, 12]:
    print(f"gathering 2021 mesh data ... sheet {sheet}", end="\r")
    sheet_data = pd.read_excel(
        "./mesh2021.xlsx",
        sheet_name=sheet,
        header=6,
        skipfooter=4
    )
    sheet_data.columns = data_2021.columns
    data_2021 = pd.concat([data_2021, sheet_data])
print("gathering 2021 mesh data ... complete")
data_2021["code"] = data_2021["code"].astype("int64")
data_2021["dwellings"] = pd.to_numeric(data_2021["dwellings"])
data_2021["people"] = pd.to_numeric(data_2021["people"])
data_2021.set_index("code", inplace=True)

print("matching 2021 census data to geography ...")

geodata_2021 = gpd.GeoDataFrame(columns=["code", "category", "area", "people", "geometry"])
geodata_2021.set_index("code", inplace=True)

for code in mesh_2021.index:
    geodata_2021.loc[code] = [
        mesh_2021.at[code, "category"],
        mesh_2021.at[code, "area"],
        data_2021.at[code, "people"],
        mesh_2021.at[code, "geometry"]        
    ]
geodata_2021["density"] = geodata_2021["people"] / geodata_2021["area"] / 100
geodata_2021 = geodata_2021[geodata_2021["density"] >= 1]
geodata_2021.reset_index()[["density", "geometry"]].to_file("geodata_2021.geojson", driver="GeoJSON")

print("reading 2016 census data ...")

data_2016 = pd.read_csv(
    "./mesh2016.csv",
    skipfooter=5,
    engine="python"
)
data_2016.columns = ["code", "category", "area", "dwellings", "people", "state"]
data_2016["code"] = data_2016["code"].astype("int64")
data_2016["dwellings"] = pd.to_numeric(data_2016["dwellings"])
data_2016["people"] = pd.to_numeric(data_2016["people"])
data_2016.set_index("code", inplace=True)

print("matching 2016 census data to geography ...")

geodata_2016 = gpd.GeoDataFrame(columns=["code", "category", "area", "people", "geometry"])
geodata_2016.set_index("code", inplace=True)

for code in mesh_2016.index:
    geodata_2016.loc[code] = [
        mesh_2016.at[code, "category"],
        mesh_2016.at[code, "area"],
        data_2016.at[code, "people"],
        mesh_2016.at[code, "geometry"]        
    ]
geodata_2016["density"] = geodata_2016["people"] / geodata_2016["area"] / 100
geodata_2016 = geodata_2016[geodata_2016["density"] >= 1]
geodata_2016.reset_index()[["density", "geometry"]].to_file("geodata_2016.geojson", driver="GeoJSON")

print("reading 2011 census data ...")

data_2011 = pd.read_csv(
    "./mesh2011.csv",
    skipfooter=3,
    engine="python"
)
data_2011.columns = ["code", "dwellings", "people"]
data_2011["code"] = data_2011["code"].astype("int64")
data_2011["dwellings"] = pd.to_numeric(data_2011["dwellings"])
data_2011["people"] = pd.to_numeric(data_2011["people"])
data_2011.set_index("code", inplace=True)
data_2011

print("matching 2011 census data to geography ...")

geodata_2011 = gpd.GeoDataFrame(columns=["code", "category", "area", "people", "geometry"])
geodata_2011.set_index("code", inplace=True)

for code in mesh_2011.index:
    geodata_2011.loc[code] = [
        mesh_2011.at[code, "category"],
        mesh_2011.at[code, "area"],
        data_2011.at[code, "people"],
        mesh_2011.at[code, "geometry"]        
    ]
geodata_2011["density"] = geodata_2011["people"] / geodata_2011["area"] * 10000
geodata_2011 = geodata_2011[geodata_2011["density"] >= 1]
geodata_2011.reset_index()[["density", "geometry"]].to_file("geodata_2011.geojson", driver="GeoJSON")

reading and preparing geodata files ...
reading 2021 census data ...
gathering 2021 mesh data ... complete
matching 2021 census data to geography ...
reading 2016 census data ...
matching 2016 census data to geography ...
reading 2011 census data ...
matching 2011 census data to geography ...


In [4]:
tp.Topology(
    geodata_2021,
    toposimplify=1e-4
).topoquantize(1e4).to_json("geodata_2021.topojson")
tp.Topology(
    geodata_2016,
    toposimplify=1e-4
).topoquantize(1e4).to_json("geodata_2016.topojson")
tp.Topology(
    geodata_2011,
    toposimplify=1e-4
).topoquantize(1e4).to_json("geodata_2011.topojson")

In [125]:
test_2021 = geodata_2021.sample(frac=1)[["density", "geometry"]]
test_2016 = geodata_2016.sample(frac=1)[["density", "geometry"]]
test_2011 = geodata_2011.sample(frac=1)[["density", "geometry"]]
test_2021["year"] = 2021
test_2016["year"] = 2016
test_2011["year"] = 2011

test = pd.concat([
    test_2021.reset_index(),
    test_2016.reset_index(),
    test_2011.reset_index()
])
test.loc[:, "people"] = test["density"] * 10
test["density"] = test["density"].rank(pct=True)

test["fill"] = test["density"].apply(lambda x: colors.rgb2hex(cm.YlOrRd(x)))
tp.Topology(
    test[test["year"] == 2021][["people", "fill", "geometry"]],
    toposimplify=1e-4
).topoquantize(1e4).to_json("test_2021.topojson")
tp.Topology(
    test[test["year"] == 2016][["people", "fill", "geometry"]],
    toposimplify=1e-4
).topoquantize(1e4).to_json("test_2016.topojson")
tp.Topology(
    test[test["year"] == 2011][["people", "fill", "geometry"]],
    toposimplify=1e-4
).topoquantize(1e4).to_json("test_2011.topojson")

In [138]:
test["people"].quantile(.25)

209.33250181057375