# devlog 2024-07-10

_author: Trevor Johnson_

Integration test for Census ADRIOs. This notebook ensures that Census data attributes are being fetched correctly by evaluating:
- Attribute shape
- Attribute data type
- Attribute sort order

Sorting is evaluated by creating indivdual ADRIOs to fetch each attribute one county at a time in fips order and concatenating the result into a list to ensure the correct sort, then constructing a geo with all attributes and all of the individually fetched counties and comparing the outputs of each. 

In [None]:
from epymorph.data_shape import Shapes
from epymorph.geo.adrio.census.adrio_census import ADRIOMakerCensus
from epymorph.geo.spec import Year
from epymorph.geography.us_census import CountyScope
from epymorph.simulation import geo_attrib

maker = ADRIOMakerCensus()
attributes = maker.attributes

# tract median income is not available for county granularity
attributes.remove(geo_attrib('tract_median_income', dtype=int, shape=Shapes.N,
                             comment='The median income according to the Census Tract which encloses this place.'
                             'This attribute is only valid if the geo granularity is below tract.'))
# commuters cannot be retrieved one node at a time
attributes.remove(geo_attrib('commuters', dtype=int, shape=Shapes.NxN,
                             comment='The number of commuters between places, as reported by the ACS Commuting Flows data.'),)

geoids = ['04001', '04003', '04005', '04013', '04017']

# build 2d list of all attributes by fetching one county at a time in fips order, ensuring correct sort
truth = [[maker.make_adrio(attribute, CountyScope.in_counties([county]), Year(
    2020)).get_value().tolist() for county in geoids] for attribute in attributes]

In [None]:
from epymorph.data_shape import Shapes
from epymorph.data_type import CentroidDType
from epymorph.geo.adrio import adrio_maker_library
from epymorph.geo.dynamic import DynamicGeo
from epymorph.geo.spec import DynamicGeoSpec
from epymorph.simulation import geo_attrib

spec = DynamicGeoSpec(
    attributes=[
        geo_attrib('label', str, Shapes.N),
        # duplicate name field so that it can be accessed by enumeration below
        geo_attrib('name', str, Shapes.N),
        geo_attrib('population', int, Shapes.N),
        geo_attrib('population_by_age', int, Shapes.NxA(3)),
        geo_attrib('population_by_age_x6', int, Shapes.NxA(6)),
        geo_attrib('centroid', CentroidDType, Shapes.N),
        geo_attrib('geoid', str, Shapes.N),
        geo_attrib('average_household_size', int, Shapes.N),
        geo_attrib('dissimilarity_index', float, Shapes.N),
        geo_attrib('gini_index', float, Shapes.N),
        geo_attrib('median_age', int, Shapes.N),
        geo_attrib('median_income', int, Shapes.N),
        geo_attrib('pop_density_km2', float, Shapes.N)
    ],
    time_period=Year(2020),
    scope=CountyScope.in_counties(['04001', '04003', '04005', '04013', '04017']),
    source={
        'label': 'Census:name',
        'name': 'Census',
        'population': 'Census',
        'population_by_age': 'Census',
        'population_by_age_x6': 'Census',
        'centroid': 'Census',
        'geoid': 'Census',
        'average_household_size': 'Census',
        'dissimilarity_index': 'Census',
        'gini_index': 'Census',
        'median_age': 'Census',
        'median_income': 'Census',
        'pop_density_km2': 'Census',
    }
)

geo = DynamicGeo.from_library(spec, adrio_maker_library)

In [9]:
from numpy import array_equal

geo.validate()

enum = enumerate(attributes)

if all(array_equal(truth[attribute[0]], geo[attribute[1].name].tolist()) for attribute in enum):
    print("Sort test passed.")
else:
    print("Sort test failed.")

Sort test passed.
