In [1]:
%load_ext autoreload
%autoreload 2

import geopandas as gpd
import pandas as pd
import polars as pl
import polars_st as st
import pyogrio
from electoralyze import region
from electoralyze.common import constants
from electoralyze.common.geometry import to_geopandas, to_geopolars
from electoralyze.common.testing.region_fixture import (
    RegionMocked,
    create_fake_regions,
    read_true_geometry,
    read_true_metadata,
)
from electoralyze.region.redistribute import redistribute
from electoralyze.region.redistribute.mapping import (
    _create_intersection_area_mapping,
    _get_remaining_area,
    get_region_mapping_base,
)
from polars import testing

In [2]:
region.Federal2022.geometry

federal_2022,geometry
str,binary
"""Adelaide""","b""\x01\x03\x00\x00\xa0\xe6\x10\x00\x00\x01\x00\x00\x00W\x00\x00\x00u\x8e\x01\xd9kTa@\x82\xfd\xd7\xb9ioA\xc0\x00\x00\x00\x00\x00\x00\x00\x00\xd0\x98I\xd4\x0bTa@\x0a\xd68\x9b\x8epA\xc0\x00\x00\x00""…"
"""Aston""","b""\x01\x03\x00\x00\xa0\xe6\x10\x00\x00\x01\x00\x00\x00\xd0\x02\x00\x00\xfc\x8e\xe1\xb1\x1f+b@|\xed\x99%\x01\xeeB\xc0\x00\x00\x00\x00\x00\x00\x00\x00;8\xd8\x9b\x18+b@-\xe8\xbd1\x04\xeeB\xc0\x00\x00\x00""…"
"""Ballarat""","b""\x01\x03\x00\x00\xa0\xe6\x10\x00\x00\x01\x00\x00\x00D\x09\x00\x00J\xb2\x0eGW\xfaa@p\xb071$\x07C\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x05\xc0x\x06\x8d\xfaa@\xfa\xd1p\xca\xdc\x02C\xc0\x00\x00\x00""…"
"""Banks""","b""\x01\x03\x00\x00\xa0\xe6\x10\x00\x00\x01\x00\x00\x00\x9b\x00\x00\x00\xab\x07\xccC&\xe4b@\x96\xaf\xcb\xf0\x9f\xfc@\xc0\x00\x00\x00\x00\x00\x00\x00\x006#\x83\xdc\xc5\xe3b@\xfc\xdf\x11\x15\xaa\xfd@\xc0\x00\x00\x00""…"
"""Barker""","b""\x01\x06\x00\x00\xa0\xe6\x10\x00\x00\x1b\x00\x00\x00\x01\x03\x00\x00\x80\x01\x00\x00\x00M\x0d\x00\x00wj.7\x18\xa0a@\xa3\x05h[\xcd\x02A\xc0\x00\x00\x00\x00\x00\x00\x00\x00\xa0PO\x1f\x01\xa0a@r7""…"
…,…
"""Werriwa""","b""\x01\x03\x00\x00\xa0\xe6\x10\x00\x00\x01\x00\x00\x00\x1d\x01\x00\x00*\x8d\x98\xd9g\xddb@\xf7\xe67L4\xf8@\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x1a\x17\x0e\x84d\xddb@G\xc8@\x9e]\xf8@\xc0\x00\x00\x00""…"
"""Whitlam""","b""\x01\x06\x00\x00\xa0\xe6\x10\x00\x00\x02\x00\x00\x00\x01\x03\x00\x00\x80\x01\x00\x00\x00\xd9\x04\x00\x00y\x95\xb5M\xf1\xdcb@\x17a\x8ariLA\xc0\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xb1KT\xef\xdcb@u\x1e""…"
"""Wide Bay""","b""\x01\x03\x00\x00\xa0\xe6\x10\x00\x00\x01\x00\x00\x00d\x0e\x00\x00\x8b\xe1\xea\x00\x88+c@\x01L\x198\xa0\x019\xc0\x00\x00\x00\x00\x00\x00\x00\x00\xf7\xb1\x82\xdf\x86+c@6\x93o\xb6\xb9\x019\xc0\x00\x00\x00""…"
"""Wills""","b""\x01\x03\x00\x00\xa0\xe6\x10\x00\x00\x01\x00\x00\x00D\x01\x00\x00\x16\x16\xdc\x0fx\x1fb@\xce\xe2\xc5\xc2\x10\xe3B\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x0c\x02+\x87\x16\x1fb@\x84\xf2>\x8e\xe6\xe2B\xc0\x00\x00\x00""…"


In [4]:
data_by_from = pl.DataFrame(
    {
        region.Federal2022.id: ["Adelaide", "Wills"],
        "data": 1000,
    }
)
data_by_from

federal_2022,data
str,i32
"""Adelaide""",1000
"""Wills""",1000


In [16]:
mapping = get_region_mapping_base(
    region_from=region.SA1_2021, region_to=region.Federal2022, mapping_method="intersection_area"
)
mapping

federal_2022,SA1_2021,mapping
str,i64,f64
"""Banks""",11901135801,0.000011
"""Banks""",11901135802,0.000007
"""Banks""",11901135803,0.000016
"""Banks""",11901135804,0.000017
"""Banks""",11901135805,0.000011
…,…,…
,12003168029,1.6941e-21
,30402108816,1.3553e-20
,11703164618,1.0588e-22
,40104101608,3.3881e-21


In [45]:
# mapping.(region.SA1_2021.id)
(
    mapping.filter(pl.col(region.SA1_2021.id).is_unique())
    .unique(region.Federal2022.id)
    .sort(region.Federal2022.id)
    .with_row_count("value")
    .select(region.Federal2022.id, region.SA1_2021.id, "value")
).rows(named=True)

  .with_row_count("value")


[{'federal_2022': 'Adelaide', 'SA1_2021': 40101100101, 'value': 0},
 {'federal_2022': 'Aston', 'SA1_2021': 21101125104, 'value': 1},
 {'federal_2022': 'Ballarat', 'SA1_2021': 20101100101, 'value': 2},
 {'federal_2022': 'Banks', 'SA1_2021': 11901135802, 'value': 3},
 {'federal_2022': 'Barker', 'SA1_2021': 40501111003, 'value': 4},
 {'federal_2022': 'Barton', 'SA1_2021': 11702132808, 'value': 5},
 {'federal_2022': 'Bass', 'SA1_2021': 60201103605, 'value': 6},
 {'federal_2022': 'Bean', 'SA1_2021': 80103111302, 'value': 7},
 {'federal_2022': 'Bendigo', 'SA1_2021': 20201101801, 'value': 8},
 {'federal_2022': 'Bennelong', 'SA1_2021': 12502147709, 'value': 9},
 {'federal_2022': 'Berowra', 'SA1_2021': 11501129401, 'value': 10},
 {'federal_2022': 'Blair', 'SA1_2021': 31002127801, 'value': 11},
 {'federal_2022': 'Blaxland', 'SA1_2021': 11901135401, 'value': 12},
 {'federal_2022': 'Bonner', 'SA1_2021': 30101100204, 'value': 13},
 {'federal_2022': 'Boothby', 'SA1_2021': 40107102331, 'value': 14},


In [46]:
data = pl.DataFrame(
    [
        {"federal_2022": "Adelaide", "SA1_2021": 40101100101, "value": 0},
        {"federal_2022": "Aston", "SA1_2021": 21101125104, "value": 1},
        {"federal_2022": "Ballarat", "SA1_2021": 20101100101, "value": 2},
        {"federal_2022": "Banks", "SA1_2021": 11901135802, "value": 3},
        {"federal_2022": "Barker", "SA1_2021": 40501111003, "value": 4},
        {"federal_2022": "Barton", "SA1_2021": 11702132808, "value": 5},
        {"federal_2022": "Bass", "SA1_2021": 60201103605, "value": 6},
        {"federal_2022": "Bean", "SA1_2021": 80103111302, "value": 7},
        {"federal_2022": "Bendigo", "SA1_2021": 20201101801, "value": 8},
        {"federal_2022": "Bennelong", "SA1_2021": 12502147709, "value": 9},
        {"federal_2022": "Berowra", "SA1_2021": 11501129401, "value": 10},
        {"federal_2022": "Blair", "SA1_2021": 31002127801, "value": 11},
        {"federal_2022": "Blaxland", "SA1_2021": 11901135401, "value": 12},
        {"federal_2022": "Bonner", "SA1_2021": 30101100204, "value": 13},
        {"federal_2022": "Boothby", "SA1_2021": 40107102331, "value": 14},
        {"federal_2022": "Bowman", "SA1_2021": 30101100102, "value": 15},
        {"federal_2022": "Braddon", "SA1_2021": 60401107501, "value": 16},
        {"federal_2022": "Bradfield", "SA1_2021": 12101139906, "value": 17},
        {"federal_2022": "Brand", "SA1_2021": 50703116904, "value": 18},
        {"federal_2022": "Brisbane", "SA1_2021": 30202103128, "value": 19},
        {"federal_2022": "Bruce", "SA1_2021": 21202129301, "value": 20},
        {"federal_2022": "Burt", "SA1_2021": 50601111003, "value": 21},
        {"federal_2022": "Calare", "SA1_2021": 10301105901, "value": 22},
        {"federal_2022": "Calwell", "SA1_2021": 21005124201, "value": 23},
        {"federal_2022": "Canberra", "SA1_2021": 80101100102, "value": 24},
        {"federal_2022": "Canning", "SA1_2021": 50102101601, "value": 25},
        {"federal_2022": "Capricornia", "SA1_2021": 30803120501, "value": 26},
        {"federal_2022": "Casey", "SA1_2021": 21102126220, "value": 27},
        {"federal_2022": "Chifley", "SA1_2021": 11601130406, "value": 28},
        {"federal_2022": "Chisholm", "SA1_2021": 20703116117, "value": 29},
        {"federal_2022": "Clark", "SA1_2021": 60103101306, "value": 30},
        {"federal_2022": "Cook", "SA1_2021": 11903137431, "value": 31},
        {"federal_2022": "Cooper", "SA1_2021": 20602111002, "value": 32},
        {"federal_2022": "Corangamite", "SA1_2021": 20301103403, "value": 33},
        {"federal_2022": "Corio", "SA1_2021": 20302103701, "value": 34},
        {"federal_2022": "Cowan", "SA1_2021": 50401104701, "value": 35},
        {"federal_2022": "Cowper", "SA1_2021": 10402108301, "value": 36},
        {"federal_2022": "Cunningham", "SA1_2021": 10701113402, "value": 37},
        {"federal_2022": "Curtin", "SA1_2021": 50301103003, "value": 38},
        {"federal_2022": "Dawson", "SA1_2021": 31201133703, "value": 39},
        {"federal_2022": "Deakin", "SA1_2021": 20703116123, "value": 40},
        {"federal_2022": "Dickson", "SA1_2021": 30201102304, "value": 41},
        {"federal_2022": "Dobell", "SA1_2021": 10201103114, "value": 42},
        {"federal_2022": "Dunkley", "SA1_2021": 21401137002, "value": 43},
        {"federal_2022": "Durack", "SA1_2021": 50403105801, "value": 44},
        {"federal_2022": "Eden-Monaro", "SA1_2021": 10102100702, "value": 45},
        {"federal_2022": "Fadden", "SA1_2021": 30903123502, "value": 46},
        {"federal_2022": "Fairfax", "SA1_2021": 31601141304, "value": 47},
        {"federal_2022": "Farrer", "SA1_2021": 10901117201, "value": 48},
        {"federal_2022": "Fenner", "SA1_2021": 80101100201, "value": 49},
        {"federal_2022": "Fisher", "SA1_2021": 31601141604, "value": 50},
        {"federal_2022": "Flinders", "SA1_2021": 21402137702, "value": 51},
        {"federal_2022": "Flynn", "SA1_2021": 30801119001, "value": 52},
        {"federal_2022": "Forde", "SA1_2021": 30907155301, "value": 53},
        {"federal_2022": "Forrest", "SA1_2021": 50101100101, "value": 54},
        {"federal_2022": "Fowler", "SA1_2021": 12503148001, "value": 55},
        {"federal_2022": "Franklin", "SA1_2021": 60102100403, "value": 56},
        {"federal_2022": "Fraser", "SA1_2021": 21301132801, "value": 57},
        {"federal_2022": "Fremantle", "SA1_2021": 50701114806, "value": 58},
        {"federal_2022": "Gellibrand", "SA1_2021": 21302134101, "value": 59},
        {"federal_2022": "Gilmore", "SA1_2021": 10104101707, "value": 60},
        {"federal_2022": "Gippsland", "SA1_2021": 20502108109, "value": 61},
        {"federal_2022": "Goldstein", "SA1_2021": 20801116801, "value": 62},
        {"federal_2022": "Gorton", "SA1_2021": 21001122803, "value": 63},
        {"federal_2022": "Grayndler", "SA1_2021": 11702132701, "value": 64},
        {"federal_2022": "Greenway", "SA1_2021": 11601130302, "value": 65},
        {"federal_2022": "Grey", "SA1_2021": 40201102701, "value": 66},
        {"federal_2022": "Griffith", "SA1_2021": 30103101703, "value": 67},
        {"federal_2022": "Groom", "SA1_2021": 30702117916, "value": 68},
        {"federal_2022": "Hasluck", "SA1_2021": 50402104902, "value": 69},
        {"federal_2022": "Hawke", "SA1_2021": 20102100901, "value": 70},
        {"federal_2022": "Herbert", "SA1_2021": 31801146511, "value": 71},
        {"federal_2022": "Higgins", "SA1_2021": 20606113501, "value": 72},
        {"federal_2022": "Hindmarsh", "SA1_2021": 40401109002, "value": 73},
        {"federal_2022": "Hinkler", "SA1_2021": 31901149201, "value": 74},
        {"federal_2022": "Holt", "SA1_2021": 21203130002, "value": 75},
        {"federal_2022": "Hotham", "SA1_2021": 20802118001, "value": 76},
        {"federal_2022": "Hughes", "SA1_2021": 12703152305, "value": 77},
        {"federal_2022": "Hume", "SA1_2021": 10105153901, "value": 78},
        {"federal_2022": "Hunter", "SA1_2021": 10601110702, "value": 79},
        {"federal_2022": "Indi", "SA1_2021": 20401105403, "value": 80},
        {"federal_2022": "Isaacs", "SA1_2021": 20803118302, "value": 81},
        {"federal_2022": "Jagajaga", "SA1_2021": 20901119601, "value": 82},
        {"federal_2022": "Kennedy", "SA1_2021": 30602114402, "value": 83},
        {"federal_2022": "Kingsford Smith", "SA1_2021": 11701132003, "value": 84},
        {"federal_2022": "Kingston", "SA1_2021": 40302105904, "value": 85},
        {"federal_2022": "Kooyong", "SA1_2021": 20701114701, "value": 86},
        {"federal_2022": "La Trobe", "SA1_2021": 21201128904, "value": 87},
        {"federal_2022": "Lalor", "SA1_2021": 21305136101, "value": 88},
        {"federal_2022": "Leichhardt", "SA1_2021": 30601113801, "value": 89},
        {"federal_2022": "Lilley", "SA1_2021": 30201102303, "value": 90},
        {"federal_2022": "Lindsay", "SA1_2021": 12403145701, "value": 91},
        {"federal_2022": "Lingiari", "SA1_2021": 70103103101, "value": 92},
        {"federal_2022": "Longman", "SA1_2021": 31301136202, "value": 93},
        {"federal_2022": "Lyne", "SA1_2021": 10601111002, "value": 94},
        {"federal_2022": "Lyons", "SA1_2021": 60101100111, "value": 95},
        {"federal_2022": "Macarthur", "SA1_2021": 12301169801, "value": 96},
        {"federal_2022": "Mackellar", "SA1_2021": 12202142008, "value": 97},
        {"federal_2022": "Macnamara", "SA1_2021": 20604111801, "value": 98},
        {"federal_2022": "Macquarie", "SA1_2021": 11503129901, "value": 99},
        {"federal_2022": "Makin", "SA1_2021": 40203103648, "value": 100},
        {"federal_2022": "Mallee", "SA1_2021": 20103101302, "value": 101},
        {"federal_2022": "Maranoa", "SA1_2021": 30701117102, "value": 102},
        {"federal_2022": "Maribyrnong", "SA1_2021": 20603111302, "value": 103},
        {"federal_2022": "Mayo", "SA1_2021": 40102100301, "value": 104},
        {"federal_2022": "McEwen", "SA1_2021": 20202103101, "value": 105},
        {"federal_2022": "McMahon", "SA1_2021": 11603131804, "value": 106},
        {"federal_2022": "McPherson", "SA1_2021": 30901122501, "value": 107},
        {"federal_2022": "Melbourne", "SA1_2021": 20601110620, "value": 108},
        {"federal_2022": "Menzies", "SA1_2021": 20702115601, "value": 109},
        {"federal_2022": "Mitchell", "SA1_2021": 11501129001, "value": 110},
        {"federal_2022": "Monash", "SA1_2021": 20501107608, "value": 111},
        {"federal_2022": "Moncrieff", "SA1_2021": 30901122402, "value": 112},
        {"federal_2022": "Moore", "SA1_2021": 50501107001, "value": 113},
        {"federal_2022": "Moreton", "SA1_2021": 30302105206, "value": 114},
        {"federal_2022": "New England", "SA1_2021": 10604112805, "value": 115},
        {"federal_2022": "Newcastle", "SA1_2021": 11101120942, "value": 116},
        {"federal_2022": "Nicholls", "SA1_2021": 20401105606, "value": 117},
        {"federal_2022": "North Sydney", "SA1_2021": 12101139901, "value": 118},
        {"federal_2022": "O'Connor", "SA1_2021": 50102100901, "value": 119},
        {"federal_2022": "Oxley", "SA1_2021": 30305107408, "value": 120},
        {"federal_2022": "Page", "SA1_2021": 10401108001, "value": 121},
        {"federal_2022": "Parkes", "SA1_2021": 10302106202, "value": 122},
        {"federal_2022": "Parramatta", "SA1_2021": 12502147705, "value": 123},
        {"federal_2022": "Paterson", "SA1_2021": 10601110912, "value": 124},
        {"federal_2022": "Pearce", "SA1_2021": 50503109901, "value": 125},
        {"federal_2022": "Perth", "SA1_2021": 50302103801, "value": 126},
        {"federal_2022": "Petrie", "SA1_2021": 30201102206, "value": 127},
        {"federal_2022": "Rankin", "SA1_2021": 30305107202, "value": 128},
        {"federal_2022": "Reid", "SA1_2021": 12001138302, "value": 129},
        {"federal_2022": "Richmond", "SA1_2021": 11201123603, "value": 130},
        {"federal_2022": "Riverina", "SA1_2021": 10106154301, "value": 131},
        {"federal_2022": "Robertson", "SA1_2021": 10201102802, "value": 132},
        {"federal_2022": "Ryan", "SA1_2021": 30402108603, "value": 133},
        {"federal_2022": "Scullin", "SA1_2021": 20904121601, "value": 134},
        {"federal_2022": "Shortland", "SA1_2021": 10202104506, "value": 135},
        {"federal_2022": "Solomon", "SA1_2021": 70101100101, "value": 136},
        {"federal_2022": "Spence", "SA1_2021": 40201102506, "value": 137},
        {"federal_2022": "Sturt", "SA1_2021": 40103101101, "value": 138},
        {"federal_2022": "Swan", "SA1_2021": 50602111801, "value": 139},
        {"federal_2022": "Sydney", "SA1_2021": 11703132901, "value": 140},
        {"federal_2022": "Tangney", "SA1_2021": 50603112401, "value": 141},
        {"federal_2022": "Wannon", "SA1_2021": 20103101402, "value": 142},
        {"federal_2022": "Warringah", "SA1_2021": 12104141305, "value": 143},
        {"federal_2022": "Watson", "SA1_2021": 11901157106, "value": 144},
        {"federal_2022": "Wentworth", "SA1_2021": 11703133305, "value": 145},
        {"federal_2022": "Werriwa", "SA1_2021": 12302170301, "value": 146},
        {"federal_2022": "Whitlam", "SA1_2021": 10701113103, "value": 147},
        {"federal_2022": "Wide Bay", "SA1_2021": 31605143401, "value": 148},
        {"federal_2022": "Wills", "SA1_2021": 20601110609, "value": 149},
        {"federal_2022": "Wright", "SA1_2021": 30904124101, "value": 150},
    ]
)

In [51]:
redistribute(
    data.select(region.Federal2022.id, "value"),
    region_from=region.Federal2022,
    region_to=region.SA1_2021,
    mapping="intersection_area",
    # aggregation="max"
)

SA1_2021,value
i64,f64
20801117336,0.241399
31802147915,0.001268
30402108715,1.627743
60103101727,0.064366
20703116718,0.190874
…,…
21203130611,0.023229
20505110131,0.132882
21003153510,0.466562
31904151905,0.003313


In [49]:
redistribute(
    # data_by_from = pl.DataFrame({
    #     region.SA1_2021.id: [40106102213, 21003153801],
    #     "data": 1000,
    # }),
    data.select(region.SA1_2021.id, "value"),
    region_from=region.SA1_2021,
    region_to=region.Federal2022,
    mapping="intersection_area",
)

federal_2022,value
str,f64
"""Solomon""",136.0
"""Chifley""",28.0
"""Bruce""",20.0
"""Goldstein""",62.0
"""Calare""",22.0
…,…
"""Ballarat""",2.0
"""Hume""",78.0
"""Flinders""",51.0
"""Hughes""",77.0
