In [1]:
import polars as pl

In [2]:
pl.DataFrame({"SA1_2021": ["A", "A", "B", "B"], "year": [2020, 2021] * 2, "population": [10.0, 20.0, 30.0, 40.0]}).pipe(
    print
)

shape: (4, 3)
┌──────────┬──────┬────────────┐
│ SA1_2021 ┆ year ┆ population │
│ ---      ┆ ---  ┆ ---        │
│ str      ┆ i64  ┆ f64        │
╞══════════╪══════╪════════════╡
│ A        ┆ 2020 ┆ 10.0       │
│ A        ┆ 2021 ┆ 20.0       │
│ B        ┆ 2020 ┆ 30.0       │
│ B        ┆ 2021 ┆ 40.0       │
└──────────┴──────┴────────────┘


In [8]:
import os

from electoralyze import region
from electoralyze.common.constants import ROOT_DIR
from electoralyze.common.metric import Metric, MetricRegion
from electoralyze.region.region_abc import RegionABC

###### Creating group of metrics ########

PROCESSED_FILE = os.path.join(ROOT_DIR, "data/census/temp/{census}/{metric}/{region}.parquet")


class National2021Metric(Metric):
    name_suffix: str = "national_2021"
    file: None = None

    def get_file(self) -> str:
        file = PROCESSED_FILE.format(
            census=self.name_suffix,
            metric=self.name,
            region="{region}",
        )
        return file


####### Creating specific metric ###########


RAW_POPULATION_FILE = os.path.join(ROOT_DIR, "data/raw/...")


def process_raw_population(**_kwargs) -> pl.DataFrame:
    data = pl.DataFrame(
        {"SA1_2021": ["A", "A", "B", "B"], "year": [2020, 2021] * 2, "population": [10.0, 20.0, 30.0, 40.0]}
    )

    return data


population = National2021Metric(
    name="population",
    data_type="categorical",
    category_column="year",
    data_column="population",
    allowed_regions=[
        MetricRegion(region=region.SA1_2021, process_raw=process_raw_population),
        MetricRegion(region=region.SA2_2021, redistribute_from=region.SA1_2021),
    ],
    schema=lambda region: pl.Schema({region.id: pl.String, "year": pl.Int64, "population": pl.Float64}),
)

In [9]:
population.get_file()

'/home/andre/git/private/electoralyze/packages/electoralyze/electoralyze/common/../../../../data/census/temp/national_2021/population/{region}.parquet'

In [10]:
population.allowed_regions

[MetricRegion(region=<class 'electoralyze.region.regions.SA1_2021.SA1_2021'>, redistribute_from=None, redistribute_kwargs=None, process_raw=<function process_raw_population at 0x7f7be6d41080>, process_raw_kwargs=None),
 MetricRegion(region=<class 'electoralyze.region.regions.SA2_2021.SA2_2021'>, redistribute_from=<class 'electoralyze.region.regions.SA1_2021.SA1_2021'>, redistribute_kwargs=None, process_raw=None, process_raw_kwargs=None)]

In [11]:
population.allowed_regions_map

{'SA1_2021': MetricRegion(region=<class 'electoralyze.region.regions.SA1_2021.SA1_2021'>, redistribute_from=None, redistribute_kwargs=None, process_raw=<function process_raw_population at 0x7f7be6d41080>, process_raw_kwargs=None),
 'SA2_2021': MetricRegion(region=<class 'electoralyze.region.regions.SA2_2021.SA2_2021'>, redistribute_from=<class 'electoralyze.region.regions.SA1_2021.SA1_2021'>, redistribute_kwargs=None, process_raw=None, process_raw_kwargs=None)}

In [14]:
population.process_raw()

In [16]:
population.by(region.SA1_2021)

SA1_2021,year,population
str,i64,f64
"""A""",2020,10.0
"""A""",2021,20.0
"""B""",2020,30.0
"""B""",2021,40.0


In [17]:
population.by(region.SA2_2021)

NotImplementedError: Not read yet

In [None]:
population.by(RegionABC)

KeyError: "Region None not found for metric: 'population_national_2021'"

In [None]:
pl.DataFrame({"SA1_2021": ["A", "A", "B", "B"], "year": [2020, 2021] * 2, "population": [10.0, 20.0, 30.0, 40.0]}).pipe(
    print
)

shape: (4, 3)
┌──────────┬──────┬────────────┐
│ SA1_2021 ┆ year ┆ population │
│ ---      ┆ ---  ┆ ---        │
│ str      ┆ i64  ┆ f64        │
╞══════════╪══════╪════════════╡
│ A        ┆ 2020 ┆ 10.0       │
│ A        ┆ 2021 ┆ 20.0       │
│ B        ┆ 2020 ┆ 30.0       │
│ B        ┆ 2021 ┆ 40.0       │
└──────────┴──────┴────────────┘
