In [457]:
import pandas as pd
import numpy as np
import geopandas as gpd
import topojson as tp
import matplotlib.pyplot as plt
import time
import matplotlib.cm as cm
import matplotlib.colors as colors

print("reading and preparing geodata files ...")

bounds = [148.76279568, -35.92051721, 149.39929255, -35.12440294]
mesh_2021 = gpd.read_file("./geodata/MB_2021_AUST_SHP_GDA2020.zip", bbox=bounds).iloc[:, [0, 1, 17, 19]]
mesh_2021.columns = ["code", "category", "area", "geometry"]
mesh_2021["code"] = mesh_2021["code"].astype("int64")
mesh_2021.set_index("code", inplace=True)
mesh_2021 = mesh_2021[mesh_2021["area"] > 0]

mesh_2016 = pd.concat([
    gpd.read_file("./geodata/1270055001_mb_2016_act_shape.zip", bbox=bounds),
    gpd.read_file("./geodata/1270055001_mb_2016_nsw_shape.zip", bbox=bounds)
]).iloc[:, [0, 1, 15, 16]]
mesh_2016.columns = ["code", "category", "area", "geometry"]
mesh_2016["code"] = mesh_2016["code"].astype("int64")
mesh_2016.set_index("code", inplace=True)
mesh_2016 = mesh_2016[mesh_2016["area"] > 0]

mesh_2011 = pd.concat([
    gpd.read_file("./geodata/1270055001_mb_2011_act_shape.zip", bbox=bounds),
    gpd.read_file("./geodata/1270055001_mb_2011_nsw_shape.zip", bbox=bounds)
]).iloc[:, [0, 1, 15, 16]]
mesh_2011.columns = ["code", "category", "area", "geometry"]
mesh_2011["code"] = mesh_2011["code"].astype("int64")
mesh_2011.set_index("code", inplace=True)
mesh_2011 = mesh_2011[mesh_2011["area"] > 0]

mesh_2016.to_crs(7844, inplace=True)
mesh_2011.to_crs(7844, inplace=True)

print("reading 2021 census data ...")

data_2021 = pd.DataFrame(
    columns=["code", "category", "area", "dwellings", "people", "state"]
)

print("gathering 2021 mesh data ...", end="\r")
for sheet in [1, 2, 11, 12]:
    print(f"gathering 2021 mesh data ... sheet {sheet}", end="\r")
    sheet_data = pd.read_excel(
        "./mesh2021.xlsx",
        sheet_name=sheet,
        header=6,
        skipfooter=4
    )
    sheet_data.columns = data_2021.columns
    data_2021 = pd.concat([data_2021, sheet_data])
print("gathering 2021 mesh data ... complete")
data_2021["code"] = data_2021["code"].astype("int64")
data_2021["dwellings"] = pd.to_numeric(data_2021["dwellings"])
data_2021["people"] = pd.to_numeric(data_2021["people"])
data_2021.set_index("code", inplace=True)

print("matching 2021 census data to geography ...")

geodata_2021 = gpd.GeoDataFrame(columns=["code", "category", "area", "people", "geometry"])
geodata_2021.set_index("code", inplace=True)

for code in mesh_2021.index:
    geodata_2021.loc[code] = [
        mesh_2021.at[code, "category"],
        mesh_2021.at[code, "area"],
        data_2021.at[code, "people"],
        mesh_2021.at[code, "geometry"]        
    ]
geodata_2021["density"] = geodata_2021["people"] / geodata_2021["area"] / 100
geodata_2021 = geodata_2021[geodata_2021["density"] >= 1]
geodata_2021.reset_index()[["density", "geometry"]].to_file("geodata_2021.geojson", driver="GeoJSON")

print("reading 2016 census data ...")

data_2016 = pd.read_csv(
    "./mesh2016.csv",
    skipfooter=5,
    engine="python"
)
data_2016.columns = ["code", "category", "area", "dwellings", "people", "state"]
data_2016["code"] = data_2016["code"].astype("int64")
data_2016["dwellings"] = pd.to_numeric(data_2016["dwellings"])
data_2016["people"] = pd.to_numeric(data_2016["people"])
data_2016.set_index("code", inplace=True)

print("matching 2016 census data to geography ...")

geodata_2016 = gpd.GeoDataFrame(columns=["code", "category", "area", "people", "geometry"])
geodata_2016.set_index("code", inplace=True)

for code in mesh_2016.index:
    geodata_2016.loc[code] = [
        mesh_2016.at[code, "category"],
        mesh_2016.at[code, "area"],
        data_2016.at[code, "people"],
        mesh_2016.at[code, "geometry"]        
    ]
geodata_2016["density"] = geodata_2016["people"] / geodata_2016["area"] / 100
geodata_2016 = geodata_2016[geodata_2016["density"] >= 1]
geodata_2016.reset_index()[["density", "geometry"]].to_file("geodata_2016.geojson", driver="GeoJSON")

print("reading 2011 census data ...")

data_2011 = pd.read_csv(
    "./mesh2011.csv",
    skipfooter=3,
    engine="python"
)
data_2011.columns = ["code", "dwellings", "people"]
data_2011["code"] = data_2011["code"].astype("int64")
data_2011["dwellings"] = pd.to_numeric(data_2011["dwellings"])
data_2011["people"] = pd.to_numeric(data_2011["people"])
data_2011.set_index("code", inplace=True)
data_2011

print("matching 2011 census data to geography ...")

geodata_2011 = gpd.GeoDataFrame(columns=["code", "category", "area", "people", "geometry"])
geodata_2011.set_index("code", inplace=True)

for code in mesh_2011.index:
    geodata_2011.loc[code] = [
        mesh_2011.at[code, "category"],
        mesh_2011.at[code, "area"],
        data_2011.at[code, "people"],
        mesh_2011.at[code, "geometry"]        
    ]
geodata_2011["density"] = geodata_2011["people"] / geodata["area"] * 10000
geodata_2011 = geodata_2011[geodata_2011["density"] >= 1]
geodata_2011.reset_index()[["density", "geometry"]].to_file("geodata_2011.geojson", driver="GeoJSON")

reading and preparing geodata files ...
reading 2021 census data ...
gathering 2021 mesh data ... complete
matching 2021 census data to geography ...
reading 2016 census data ...
matching 2016 census data to geography ...
reading 2011 census data ...
matching 2011 census data to geography ...


In [467]:
tp.Topology(
    geodata_2021,
    toposimplify=1e-4
).topoquantize(1e4).to_json("geodata_2021.topojson")
tp.Topology(
    geodata_2016,
    toposimplify=1e-4
).topoquantize(1e4).to_json("geodata_2016.topojson")
tp.Topology(
    geodata_2011,
    toposimplify=1e-4
).topoquantize(1e4).to_json("geodata_2011.topojson")

In [513]:
test = geodata_2021.sample(frac=1)[["density", "geometry"]]
test.loc[:, "people"] = test["density"] * 10
test["density"] = test["density"].rank(pct=True)
test["fill"] = test["density"].apply(lambda x: colors.rgb2hex(cm.YlOrRd(x)))
tp.Topology(
    test[["people", "fill", "geometry"]],
    toposimplify=1e-4
).topoquantize(1e4).to_json("test.topojson")

In [515]:
geodata_2021.sort_values("density")

Unnamed: 0_level_0,category,area,people,geometry,density
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10640980000,Other,1.4120,142,"POLYGON ((149.28673 -35.33651, 149.28670 -35.3...",1.005666
10638310000,Residential,0.8733,88,"POLYGON ((149.24541 -35.39925, 149.24644 -35.3...",1.007672
11248760000,Primary Production,0.2810,29,"POLYGON ((149.26651 -35.16399, 149.26655 -35.1...",1.032028
80047220000,Commercial,0.0290,3,"POLYGON ((149.12880 -35.42953, 149.12893 -35.4...",1.034483
80016341000,Education,0.0575,6,"POLYGON ((149.09565 -35.33791, 149.09551 -35.3...",1.043478
...,...,...,...,...,...
80044631000,Residential,0.0099,512,"POLYGON ((149.13314 -35.28331, 149.13262 -35.2...",517.171717
80056541700,Residential,0.0145,821,"POLYGON ((149.12468 -35.27692, 149.12402 -35.2...",566.206897
85872000000,Residential,0.0143,819,"POLYGON ((149.12896 -35.18163, 149.12900 -35.1...",572.727273
80056531400,Residential,0.0066,548,"POLYGON ((149.07464 -35.24099, 149.07458 -35.2...",830.303030
