In [1]:
%load_ext autoreload
%autoreload 2

import geopandas as gpd
import pandas as pd
import polars as pl
import polars_st as st
import pyogrio
from electoralyze import region
from electoralyze.common import constants
from electoralyze.common.geometry import to_geopandas, to_geopolars
from electoralyze.common.testing.region_fixture import (
    RegionMocked,
    create_fake_regions,
    read_true_geometry,
    read_true_metadata,
)
from electoralyze.region.redistribute import redistribute
from electoralyze.region.redistribute.mapping import (
    _create_intersection_area_mapping,
    _get_remaining_area,
    get_region_mapping_base,
)
from polars import testing

In [2]:
region.Federal2022

electoralyze.region.regions.federal_2022.Federal2022

In [3]:
region.SA1_2021

electoralyze.region.regions.SA1_2021.SA1_2021

In [4]:
region.Federal2022.geometry

federal_2022,geometry
str,binary
"""Adelaide""","b""\x01\x03\x00\x00\xa0\xe6\x10\x00\x00\x01\x00\x00\x00W\x00\x00\x00u\x8e\x01\xd9kTa@\x82\xfd\xd7\xb9ioA\xc0\x00\x00\x00\x00\x00\x00\x00\x00\xd0\x98I\xd4\x0bTa@\x0a\xd68\x9b\x8epA\xc0\x00\x00\x00""…"
"""Aston""","b""\x01\x03\x00\x00\xa0\xe6\x10\x00\x00\x01\x00\x00\x00\xd0\x02\x00\x00\xfc\x8e\xe1\xb1\x1f+b@|\xed\x99%\x01\xeeB\xc0\x00\x00\x00\x00\x00\x00\x00\x00;8\xd8\x9b\x18+b@-\xe8\xbd1\x04\xeeB\xc0\x00\x00\x00""…"
"""Ballarat""","b""\x01\x03\x00\x00\xa0\xe6\x10\x00\x00\x01\x00\x00\x00D\x09\x00\x00J\xb2\x0eGW\xfaa@p\xb071$\x07C\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x05\xc0x\x06\x8d\xfaa@\xfa\xd1p\xca\xdc\x02C\xc0\x00\x00\x00""…"
"""Banks""","b""\x01\x03\x00\x00\xa0\xe6\x10\x00\x00\x01\x00\x00\x00\x9b\x00\x00\x00\xab\x07\xccC&\xe4b@\x96\xaf\xcb\xf0\x9f\xfc@\xc0\x00\x00\x00\x00\x00\x00\x00\x006#\x83\xdc\xc5\xe3b@\xfc\xdf\x11\x15\xaa\xfd@\xc0\x00\x00\x00""…"
"""Barker""","b""\x01\x06\x00\x00\xa0\xe6\x10\x00\x00\x1b\x00\x00\x00\x01\x03\x00\x00\x80\x01\x00\x00\x00M\x0d\x00\x00wj.7\x18\xa0a@\xa3\x05h[\xcd\x02A\xc0\x00\x00\x00\x00\x00\x00\x00\x00\xa0PO\x1f\x01\xa0a@r7""…"
…,…
"""Werriwa""","b""\x01\x03\x00\x00\xa0\xe6\x10\x00\x00\x01\x00\x00\x00\x1d\x01\x00\x00*\x8d\x98\xd9g\xddb@\xf7\xe67L4\xf8@\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x1a\x17\x0e\x84d\xddb@G\xc8@\x9e]\xf8@\xc0\x00\x00\x00""…"
"""Whitlam""","b""\x01\x06\x00\x00\xa0\xe6\x10\x00\x00\x02\x00\x00\x00\x01\x03\x00\x00\x80\x01\x00\x00\x00\xd9\x04\x00\x00y\x95\xb5M\xf1\xdcb@\x17a\x8ariLA\xc0\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xb1KT\xef\xdcb@u\x1e""…"
"""Wide Bay""","b""\x01\x03\x00\x00\xa0\xe6\x10\x00\x00\x01\x00\x00\x00d\x0e\x00\x00\x8b\xe1\xea\x00\x88+c@\x01L\x198\xa0\x019\xc0\x00\x00\x00\x00\x00\x00\x00\x00\xf7\xb1\x82\xdf\x86+c@6\x93o\xb6\xb9\x019\xc0\x00\x00\x00""…"
"""Wills""","b""\x01\x03\x00\x00\xa0\xe6\x10\x00\x00\x01\x00\x00\x00D\x01\x00\x00\x16\x16\xdc\x0fx\x1fb@\xce\xe2\xc5\xc2\x10\xe3B\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x0c\x02+\x87\x16\x1fb@\x84\xf2>\x8e\xe6\xe2B\xc0\x00\x00\x00""…"


In [5]:
region.SA1_2021.geometry.head(100)

SA1_2021,geometry
i64,binary
10102100701,"b""\x01\x03\x00\x00\x20\xe6\x10\x00\x00\x01\x00\x00\x00\xc9\x02\x00\x00]\x16V\xf6\x80\xbcb@`j""\xe5\x88\x8bA\xc0K\x07\x81\xbe\x89\xbcb@\x98\xe7[Du\x8bA\xc0\xdc\x8d\x1c;\x92\xbcb@\xa9X\x94""…"
10102100702,"b""\x01\x03\x00\x00\x20\xe6\x10\x00\x00\x02\x00\x00\x00\x08\x01\x00\x00Y\xdc\xe9\xa6~\xb7b@\x9e\xfb\xe0\x81\x0c\xafA\xc0\xb5Q\xabKL\xb8b@y\x87\xa0!}\xafA\xc0\xdc\xeb1\x0bW\xb8b@A\xb6\x8d""…"
10102100703,"b""\x01\x03\x00\x00\x20\xe6\x10\x00\x00\x01\x00\x00\x00\x1a\x00\x00\x00\x0f\xf0N\x9b\xf5\xb8b@\xd60\xf1\x91g\xb8A\xc0u\xc3C\xa7\xf2\xb8b@G8\xcb\xb4+\xb8A\xc0B6\xc1\xac\xfb\xb8b@^p\xe5""…"
10102100704,"b""\x01\x03\x00\x00\x20\xe6\x10\x00\x00\x01\x00\x00\x00\x1e\x00\x00\x00\x95\x92\x10<}\xb9b@7J\xbb\x9f\x09\xb9A\xc0\xca1\xac\xee\x89\xb9b@j\x94\xfc\x020\xb8A\xc0RI\xbet\x8b\xb9b@\xc1\xe3\xb4""…"
10102100705,"b""\x01\x03\x00\x00\x20\xe6\x10\x00\x00\x01\x00\x00\x00""\x00\x00\x00\xdc\xbc\x02\x88p\xb9b@\xfe\x05l#y\xb9A\xc0g\x10R\x1cr\xb9b@X.\x07\x8cy\xb9A\xc0\x93\xecfnt\xb9b@Pc\x1f""…"
…,…
10102101221,"b""\x01\x03\x00\x00\x20\xe6\x10\x00\x00\x01\x00\x00\x00\x10\x00\x00\x005[\xf6\xe7\x8d\xa6b@\xcb\x87\xf1u\x05\xaeA\xc0\xbbC\x9a\xa8\x94\xa6b@\xb2K\xa5Z\xfb\xadA\xc0F\xfd\x19\xd2\x99\xa6b@\x03\xaf\xc8""…"
10102101222,"b""\x01\x03\x00\x00\x20\xe6\x10\x00\x00\x01\x00\x00\x00\x0e\x00\x00\x00=\xdaAU\x91\xa6b@s_\xc0h)\xb1A\xc0\x0d`\xa0T\x94\xa6b@\xbe\xa4\xab\x17\xf1\xb0A\xc0\xc3\xae@\xbb\xbe\xa6b@@O\x8a""…"
10102101223,"b""\x01\x03\x00\x00\x20\xe6\x10\x00\x00\x01\x00\x00\x00\x13\x00\x00\x00\x91\x96\xa2KN\xa6b@y:\\xa0\x8c\xb1A\xc0\x9a\xba\xda\xe8V\xa6b@\x02\x17\xdd@\xe1\xb0A\xc0\x00&\x14\xe0d\xa6b@r\xdd\xf7""…"
10102101224,"b""\x01\x03\x00\x00\x20\xe6\x10\x00\x00\x01\x00\x00\x00\x16\x00\x00\x00NH\xf4\xec\x9e\xa6b@\x9aY\xbb\xc8\x15\xb2A\xc0B)\xb7\x0d\x9a\xa6b@\xde1k\xaf\xf5\xb1A\xc0s\xa8\xce\x8a\x9f\xa6b@@\xfe\xac""…"


In [6]:
from electoralyze.region.redistribute.mapping import _get_intersection_area

In [26]:
geometry_from = region.Federal2022.geometry
geometry_to = region.SA1_2021.geometry.head(1000)

In [8]:
geometry_combined = geometry_from.rename({"geometry": "geometry_from"}).join(
    geometry_to.rename({"geometry": "geometry_to"}), how="cross"
)
intersection_area = geometry_combined.select(
    pl.exclude("geometry_from", "geometry_to"),
    st.geom("geometry_from").st.intersection(st.geom("geometry_to")).st.area().alias("intersection_area"),
)
intersection_area

federal_2022,SA1_2021,intersection_area
str,i64,f64
"""Adelaide""",10102100701,0.0
"""Adelaide""",10102100702,0.0
"""Adelaide""",10102100703,0.0
"""Adelaide""",10102100704,0.0
"""Adelaide""",10102100705,0.0
…,…,…
"""Wright""",10102101221,0.0
"""Wright""",10102101222,0.0
"""Wright""",10102101223,0.0
"""Wright""",10102101224,0.0


In [27]:
geometry_combined = (
    geometry_from.pipe(to_geopandas)
    .rename(columns={"geometry": "geometry_from"})
    .merge(geometry_to.pipe(to_geopandas).rename(columns={"geometry": "geometry_to"}), how="cross")
)
geometry_combined

Unnamed: 0,federal_2022,geometry_from,SA1_2021,geometry_to
0,Adelaide,"POLYGON Z ((138.63816 -34.87041 0, 138.62644 -...",10102100701,"POLYGON ((149.89074 -35.09012, 149.89181 -35.0..."
1,Adelaide,"POLYGON Z ((138.63816 -34.87041 0, 138.62644 -...",10102100702,"POLYGON ((149.73421 -35.36757, 149.75931 -35.3..."
2,Adelaide,"POLYGON Z ((138.63816 -34.87041 0, 138.62644 -...",10102100703,"POLYGON ((149.77998 -35.44066, 149.77962 -35.4..."
3,Adelaide,"POLYGON Z ((138.63816 -34.87041 0, 138.62644 -...",10102100704,"POLYGON ((149.79654 -35.44561, 149.79809 -35.4..."
4,Adelaide,"POLYGON Z ((138.63816 -34.87041 0, 138.62644 -...",10102100705,"POLYGON ((149.79499 -35.44901, 149.79518 -35.4..."
...,...,...,...,...
1509995,Wright,"POLYGON Z ((153.37012 -28.1064 0, 153.36976 -2...",11701163401,"POLYGON ((151.19881 -33.92297, 151.20146 -33.9..."
1509996,Wright,"POLYGON Z ((153.37012 -28.1064 0, 153.36976 -2...",11701163402,"POLYGON ((151.21869 -33.936, 151.21512 -33.938..."
1509997,Wright,"POLYGON Z ((153.37012 -28.1064 0, 153.36976 -2...",11701163403,"POLYGON ((151.21245 -33.92667, 151.21321 -33.9..."
1509998,Wright,"POLYGON Z ((153.37012 -28.1064 0, 153.36976 -2...",11701163404,"POLYGON ((151.21502 -33.92578, 151.21427 -33.9..."


In [28]:
intersection_area = (
    geometry_combined
    # .loc[lambda df: df["geometry_from"].intersects(df["geometry_to"])] # THIS TAKES MUCH LONGER (3x), REMOVE
    .assign(intersection_area=lambda df: df["geometry_from"].intersection(df["geometry_to"]).area).drop(
        ["geometry_from", "geometry_to"], axis=1
    )
)
intersection_area










48.2 s ± 544 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)





In [14]:
_get_intersection_area(
    geometry_from=region.Federal2022.geometry,
    geometry_to=region.SA1_2021.geometry.head(100),
).pipe(print)

shape: (25, 3)
┌──────────────┬─────────────┬───────────────────┐
│ federal_2022 ┆ SA1_2021    ┆ intersection_area │
│ ---          ┆ ---         ┆ ---               │
│ str          ┆ i64         ┆ f64               │
╞══════════════╪═════════════╪═══════════════════╡
│ Bean         ┆ 10102101217 ┆ 7.6340e-8         │
│ Canberra     ┆ 10102100908 ┆ 5.5317e-8         │
│ Canberra     ┆ 10102100909 ┆ 3.9356e-8         │
│ Canberra     ┆ 10102100910 ┆ 3.6131e-9         │
│ Canberra     ┆ 10102100927 ┆ 3.9933e-8         │
│ …            ┆ …           ┆ …                 │
│ Gilmore      ┆ 10102100707 ┆ 0.000002          │
│ Gilmore      ┆ 10102100710 ┆ 0.000005          │
│ Hume         ┆ 10102100701 ┆ 0.035891          │
│ Hume         ┆ 10102100709 ┆ 0.000004          │
│ Hume         ┆ 10102100710 ┆ 0.000343          │
└──────────────┴─────────────┴───────────────────┘



  .assign(intersection_area=lambda df: df["geometry_from"].intersection(df["geometry_to"]).area)


In [21]:
(
    region.Federal2022.geometry.join(
        region.SA1_2021.geometry.head(10000),
        how="cross",
    )
    .filter(st.geom("geometry").st.overlaps(pl.col("geometry_right")))
    .with_columns(st.geom("geometry").st.intersection(pl.col("geometry_right")).st.area())
)

federal_2022,geometry,SA1_2021,geometry_right
str,f64,i64,binary
"""Barton""",1.8669e-7,11701132501,"b""\x01\x03\x00\x00\x20\xe6\x10\x00\x00\x01\x00\x00\x00Y\x00\x00\x00J\xf9\x19|e\xe5b@x\xcd\xe8""\xb9\xf6@\xc0\xcf3\xd7\x1f\x8e\xe5b@z\x95xw\x98\xf6@\xc0N\xd2\xa7\xea\x8f\xe5b@\x03\x0c\xdf""…"
"""Bean""",7.6340e-8,10102101217,"b""\x01\x03\x00\x00\x20\xe6\x10\x00\x00\x01\x00\x00\x007\x00\x00\x00\x14\x10\x1de\xfb\xa5b@\x05\x0e\xc0_\xfb\xafA\xc00\x8d1\xb5\x8d\xa6b@\xc8\xc7k\xb6s\xacA\xc0Pj*`\x9b\xa6b@V\x1c\xd9""…"
"""Bean""",9.1402e-8,10102101226,"b""\x01\x03\x00\x00\x20\xe6\x10\x00\x00\x01\x00\x00\x00%\x00\x00\x00\x92`=\x18\x0d\xa6b@\x03%\x1d\xf7K\xb0A\xc0\x9a%J=\x17\xa6b@`\x9exj8\xb0A\xc01\xce\xc9$@\xa6b@\x09\xc9\x83""…"
"""Bean""",0.000002,10102161133,"b""\x01\x03\x00\x00\x20\xe6\x10\x00\x00\x01\x00\x00\x00\xec\x01\x00\x00\xe9\xa66\xfc\x92\xa7b@\xd1@\xb1V\x12\xb5A\xc0hB1\xbc\x8b\xa7b@\xfddV\xa4\x04\xb5A\xc0H=%x\x83\xa7b@B\xeb\x1e""…"
"""Bean""",0.00001,10102161134,"b""\x01\x03\x00\x00\x20\xe6\x10\x00\x00\x01\x00\x00\x00G\x01\x00\x00\x91\xa1\xe3\x0c\xa5\xa3b@\x04\x10\xe8O\xed\xcaA\xc0\xdb1\xd6R\xb8\xa3b@\x9e\xf9\xc0\xef\x0c\xcbA\xc0\x10\x16\xe9G\xf4\xa3b@u\xb8\xeb""…"
…,…,…,…
"""Wright""",0.000035,11202124723,"b""\x01\x03\x00\x00\x20\xe6\x10\x00\x00\x01\x00\x00\x00\x80\x01\x00\x00\x96X\xa5Pk\x18c@.\x9a\xfd/\xeb_<\xc0\x90\xb6\x8e\xb1h\x18c@\xc1\x87{\x0b\xc5_<\xc0_\x0f\xba_h\x18c@//\x19""…"
"""Wright""",0.000014,11202124724,"b""\x01\x03\x00\x00\x20\xe6\x10\x00\x00\x01\x00\x00\x00\xd6\x01\x00\x00{8\xe1h\xff\x1dc@\xa1\x86\x8as$]<\xc0CM\xcc\x85\x02\x1ec@I\x9ab-\x8e\<\xc0\xb8\x09\x87\xc6\x12\x1ec@\x87\xb5\xd8""…"
"""Wright""",0.000011,11203125205,"b""\x01\x03\x00\x00\x20\xe6\x10\x00\x00\x01\x00\x00\x00y\x01\x00\x00.K\xd6/\x09$c@}\x03x\xa0\xc8h<\xc0\xf0c\xd4\x00\x19$c@@\x88b\xc5#h<\xc0\xbeFA\x18\x19$c@\x08\xbcy""…"
"""Wright""",0.000022,11203125207,"b""\x01\x03\x00\x00\x20\xe6\x10\x00\x00\x01\x00\x00\x00\xe5\x00\x00\x00\xba\xf2.\xe4^%c@\xbd\xf4\xd7\xb5\xf2F<\xc0\x90\xfa\x96?l%c@\xa52\x8aW\x8eF<\xc0Z\x83\x7fOy%c@\xf2""\xaa""…"


In [22]:
(
    region.Federal2022.geometry.pipe(to_geopandas)
    .merge(
        region.SA1_2021.geometry.head(10000).pipe(to_geopandas),
        how="cross",
    )
    # .filter(st.geom("geometry").st.overlaps(pl.col("geometry_right")))
    # .with_columns(st.geom("geometry").st.intersection(pl.col("geometry_right")).st.area())
    .loc[lambda df: df["geometry_x"].overlaps(df["geometry_y"])]
    .assign(area=lambda df: df["geometry_x"].intersection(df["geometry_y"]).area)
)


  .assign(area=lambda df: df["geometry_x"].intersection(df["geometry_y"]).area)


Unnamed: 0,federal_2022,geometry_x,SA1_2021,geometry_y,area
59994,Barton,"POLYGON Z ((151.17424 -33.92497 0, 151.17381 -...",11701132501,"POLYGON ((151.16864 -33.92752, 151.1736 -33.92...",1.866918e-07
70091,Bean,"MULTIPOLYGON Z (((149.20634 -35.3459 0, 149.20...",10102101217,"POLYGON ((149.18694 -35.37486, 149.2048 -35.34...",7.634010e-08
70100,Bean,"MULTIPOLYGON Z (((149.20634 -35.3459 0, 149.20...",10102101226,"POLYGON ((149.1891 -35.37732, 149.19034 -35.37...",9.140183e-08
70154,Bean,"MULTIPOLYGON Z (((149.20634 -35.3459 0, 149.20...",10102161133,"POLYGON ((149.23669 -35.41462, 149.23581 -35.4...",2.338824e-06
70155,Bean,"MULTIPOLYGON Z (((149.20634 -35.3459 0, 149.20...",10102161134,"POLYGON ((149.1139 -35.58537, 149.11625 -35.58...",9.686587e-06
...,...,...,...,...,...
1507165,Wright,"POLYGON Z ((153.37012 -28.1064 0, 153.36976 -2...",11202124723,"POLYGON ((152.7631 -28.37468, 152.76278 -28.37...",3.525346e-05
1507166,Wright,"POLYGON Z ((153.37012 -28.1064 0, 153.36976 -2...",11202124724,"POLYGON ((152.93743 -28.36384, 152.93781 -28.3...",1.424399e-05
1507320,Wright,"POLYGON Z ((153.37012 -28.1064 0, 153.36976 -2...",11203125205,"POLYGON ((153.12612 -28.40931, 153.12805 -28.4...",1.065659e-05
1507322,Wright,"POLYGON Z ((153.37012 -28.1064 0, 153.36976 -2...",11203125207,"POLYGON ((153.16783 -28.27714, 153.16946 -28.2...",2.166705e-05


In [None]:
(
    region.Federal2022.geometry.pipe(to_geopandas)
    .merge(
        region.SA1_2021.geometry.head(100).pipe(to_geopandas),
        how="cross",
    )
    # .filter(st.geom("geometry").st.overlaps(pl.col("geometry_right")))
    # .with_columns(st.geom("geometry").st.intersection(pl.col("geometry_right")).st.area())
    .loc[lambda df: df["geometry_x"].overlaps(df["geometry_y"])]
    .assign(area=lambda df: df["geometry_x"].intersection(df["geometry_y"]).area)
)

In [21]:
some_intersection = _create_intersection_area_mapping(
    geometry_from=region.Federal2022.geometry, geometry_to=region.SA1_2021.geometry.head(100)
)
some_intersection


  .assign(intersection_area=lambda df: df["geometry_from"].intersection(df["geometry_to"]).area)


federal_2022,SA1_2021,mapping
str,i64,f64
"""Bean""",10102101217,7.6340e-8
"""Canberra""",10102100908,5.5317e-8
"""Canberra""",10102100909,3.9356e-8
"""Canberra""",10102100910,3.6131e-9
"""Canberra""",10102100927,3.9933e-8
…,…,…
,10102101221,0.000016
,10102101222,0.00002
,10102101223,0.000036
,10102101224,0.000033


In [None]:
target_sa1 = 10103101401
target_sa1 = 10106154239
region.SA1_2021.geometry.filter(pl.col(region.SA1_2021.id) == target_sa1).select(st.geom("geometry").st.area())

In [None]:
(
    region.SA1_2021.geometry.filter(pl.col(region.SA1_2021.id) == target_sa1)
    .join(
        region.Federal2022.geometry.filter(pl.col(region.Federal2022.id).is_in(["Eden-Monaro", "Hume"])),
        how="cross",
    )
    .select(st.geom("geometry").st.intersection(pl.col("geometry_right")))
    # .head(1)
    # .tail(1)
    .pipe(to_geopandas)
    .explore()
)

In [None]:
some_intersection.filter(pl.col(region.SA1_2021.id) == target_sa1)

In [None]:
_create_intersection_area_mapping(
    geometry_from=region.Federal2022.get_raw_geometry(), geometry_to=region.SA1_2021.get_raw_geometry().head(100)
)

In [22]:
intersection = get_region_mapping_base(
    region_from=region.Federal2022,
    region_to=region.SA1_2021,
    mapping_method="intersection_area",
    redistribute_with_full=True,
    save_data=True,
    force_new=True,
)
intersection

Extracting...
Transforming...
Extracting...
Transforming...


KeyboardInterrupt: 