# devlog 2024-06-03

_author: Trevor Johnson_

Tests ACS5 ADRIOs at a variety of granularities.

In [1]:
import numpy as np

from epymorph.kit import *
from epymorph.adrio import acs5, adrio, commuting_flows, us_tiger
from epymorph.data_shape import DataShapeMatcher
from epymorph.data_type import dtype_as_np
from epymorph.data.ipm.no import No as NoIpm
from epymorph.data.mm.no import No as NoMm
from epymorph.rume import Rume
from epymorph.util import NumpyTypeError, check_ndarray, match

# This is the expected type and shape for every attribute we're going to test.
expected: list[AttributeDef] = [
    AttributeDef("label", str, Shapes.N),
    AttributeDef("population", int, Shapes.N),
    AttributeDef("population_by_age_table", int, Shapes.NxA),
    AttributeDef("population_by_age", int, Shapes.N),
    AttributeDef("average_household_size", float, Shapes.N),
    AttributeDef("dissimilarity_index", float, Shapes.N),
    AttributeDef("commuters", int, Shapes.NxN),
    AttributeDef("gini_index", float, Shapes.N),
    AttributeDef("median_age", float, Shapes.N),
    AttributeDef("median_income", float, Shapes.N),
    AttributeDef("pop_density_km2", float, Shapes.N),
]

# And here are the ADRIOs for each of those attributes.
params: dict[str, adrio.Adrio] = {
    "label": us_tiger.Name(),
    "population": acs5.Population(),
    "population_by_age_table": acs5.PopulationByAgeTable(),
    "population_by_age": acs5.PopulationByAge(18, 24),
    "average_household_size": acs5.AverageHouseholdSize(),
    "dissimilarity_index": acs5.DissimilarityIndex("White", "Black"),
    "commuters": commuting_flows.Commuters(),
    "gini_index": acs5.GiniIndex(),
    "median_age": acs5.MedianAge(),
    "median_income": acs5.MedianIncome(),
    "land_area_km2": adrio.Scale(us_tiger.LandAreaM2(), 1e-6),
    "pop_density_km2": adrio.PopulationPerKm2(),
}


def run_test(rume: Rume, skip: tuple[str, ...] = ()):
    for attr in (a for a in expected if a.name not in skip):
        try:
            actual = (
                params[attr.name]
                .with_context(
                    dim=rume.dim,
                    scope=rume.scope,
                    params=params,
                    rng=np.random.default_rng(),
                )
                .evaluate()
            )
            check_ndarray(
                actual,
                dtype=match.dtype(dtype_as_np(attr.type)),
                shape=DataShapeMatcher(attr.shape, rume.dim),
            )
            print(f"{attr.name}: good")
        except NumpyTypeError as e:
            print(f"{attr.name}: FAILED")
            print(e)


def placeholder_rume(scope, time_frame):
    return SingleStrataRume.build(
        ipm=NoIpm(),
        mm=NoMm(),
        init=init.NoInfection(),
        scope=scope,
        time_frame=time_frame,
        params=params,
    )

In [2]:
rume = placeholder_rume(
    scope=StateScope.in_states(["NY", "NJ", "MD", "VA"], year=2020),
    time_frame=TimeFrame.year(2020),
)

run_test(rume)

label: good
population: good
population_by_age_table: good
population_by_age: good
average_household_size: good
dissimilarity_index: good
commuters: good
gini_index: good
median_age: good
median_income: good
pop_density_km2: good


In [3]:
rume = placeholder_rume(
    scope=StateScope.in_states(["04", "08"], year=2020),
    time_frame=TimeFrame.year(2020),
)

run_test(rume)

label: good
population: good
population_by_age_table: good
population_by_age: good
average_household_size: good
dissimilarity_index: good
commuters: good
gini_index: good
median_age: good
median_income: good
pop_density_km2: good


In [4]:
rume = placeholder_rume(
    scope=CountyScope.in_counties(["35001", "04013", "04017"], year=2020),
    time_frame=TimeFrame.year(2020),
)

run_test(rume)

label: good
population: good
population_by_age_table: good
population_by_age: good
average_household_size: good
dissimilarity_index: good
commuters: good
gini_index: good
median_age: good
median_income: good
pop_density_km2: good


In [5]:
rume = placeholder_rume(
    scope=TractScope.in_tracts(
        ["35001000720", "35001000904", "35001000906", "04027011405", "04027011407"],
        year=2020,
    ),
    time_frame=TimeFrame.year(2020),
)

run_test(rume, skip=("commuters",))

label: good
population: good
population_by_age_table: good
population_by_age: good
average_household_size: good
dissimilarity_index: good
gini_index: good
median_age: good
median_income: good
pop_density_km2: good


In [6]:
rume = placeholder_rume(
    scope=BlockGroupScope.in_block_groups(
        [
            "350010007201",
            "350010009041",
            "350010009061",
            "040270114053",
            "040270114072",
        ],
        year=2020,
    ),
    time_frame=TimeFrame.year(2020),
)

run_test(rume, skip=("commuters", "dissimilarity_index"))

label: good
population: good
population_by_age_table: good
population_by_age: good
average_household_size: good
Gini Index cannot be retrieved for block group level, fetching tract level data instead.
gini_index: good
median_age: good
median_income: good
pop_density_km2: good
