# devlog 2024-07-10

_author: Trevor Johnson_

Integration test for Census ADRIOs. This notebook ensures that Census data attributes are being fetched correctly by evaluating:
- Attribute shape
- Attribute data type
- Attribute values
- Attribute sort order

In [1]:
from epymorph import *
from epymorph.adrio import acs5, commuting_flows, us_tiger
from epymorph.data_type import CentroidDType
from epymorph.geography.us_census import CountyScope
from epymorph.rume import SingleStrataRume

# make a placeholder rume for testing ADRIOs
rume = SingleStrataRume.build(
    ipm=ipm_library["no"](),
    mm=mm_library["no"](),
    init=init.NoInfection(),
    scope=CountyScope.in_counties(
        ["04001", "04003", "04005", "04013", "04017"], year=2020
    ),
    time_frame=TimeFrame.year(2020),
    params={
        "population": acs5.Population(),
        "centroid": us_tiger.GeometricCentroid(),
        "commuters": commuting_flows.Commuters(),
    },
)

In [2]:
import numpy as np

from epymorph.simulator.data import evaluate_param
from epymorph.util import check_ndarray, match

population = evaluate_param(rume, "population")
centroid = evaluate_param(rume, "centroid")
commuters = evaluate_param(rume, "commuters")

T = rume.dim.days
N = rume.dim.nodes

# validate datatype and shape
check_ndarray(population, dtype=match.dtype(int), shape=match.shape_literal((N,)))
check_ndarray(
    centroid, dtype=match.dtype(CentroidDType), shape=match.shape_literal((N,))
)
check_ndarray(commuters, dtype=match.dtype(int), shape=match.shape_literal((N, N)))

# values retrieved manually from Census table B01001
population_array = [71714, 126442, 142254, 4412779, 110271]

# values calculated manually using polygon centroid formula applied to tygris shapefile polygons
centroid_array = np.array(
    [
        (-109.48884962248498, 35.39552879677974),
        (-109.75126313676874, 31.87963708630415),
        (-111.77052095609857, 35.838724829519194),
        (-112.49151143850366, 33.349039435609264),
        (-110.32141934757458, 35.39955033687498),
    ],
    dtype=CentroidDType,
)

# values retrieved manually from ACS commuting flows table1 for 2020
commuters_matrix = [
    [14190, 0, 149, 347, 1668],
    [0, 43820, 32, 160, 5],
    [99, 17, 59440, 1160, 525],
    [22, 52, 757, 2059135, 240],
    [706, 14, 1347, 592, 30520],
]

# validate values and sort order
if np.array_equal(population_array, population):
    print("AC5 attribute validation passed.")
if np.allclose(centroid_array.tolist(), centroid.tolist()):
    print("Shapefile attribute validation passed.")
if np.array_equal(commuters_matrix, commuters):
    print("Commuting flows attribute validation passed.")

AC5 attribute validation passed.
Shapefile attribute validation passed.
Commuting flows attribute validation passed.


The following cells caclulate geographic centroids from shapefiles and compare the result to values calculated by shapely.

In [3]:
from geopandas import read_file

# load in shapefile data for use in centroid caclulations
url = "https://www2.census.gov/geo/tiger/TIGER2020/COUNTY/tl_2020_us_county.zip"
gdf = read_file(
    url, engine="fiona", ignore_geometry=False, include_fields=["GEOID", "STUSPS"]
)
gdf = gdf[gdf["GEOID"].isin(rume.scope.get_node_ids())]
gdf.sort_values(by="GEOID", inplace=True)

In [4]:
# centroids as calculated by shapely's centroid property (for reference)
centroids = [x.centroid.coords[0] for x in gdf["geometry"]]
print(centroids)

[(-109.48884962242164, 35.395528796753005), (-109.75126313669315, 31.87963708628258), (-111.77052095590304, 35.83872482945673), (-112.49151143850068, 33.34903943560914), (-110.32141934752828, 35.39955033686066)]


In [5]:
# calculate centroids manually using polygon centroid formula https://en.wikipedia.org/wiki/Centroid#Of_a_polygon
centroids = []
for county in gdf["geometry"]:
    sum = 0.0
    coords = list(county.exterior.coords)
    for point in range(0, len(coords) - 1):
        sum += (coords[point][0] * coords[point + 1][1]) - (
            coords[point + 1][0] * coords[point][1]
        )

    a = sum * 0.5

    xsum = 0.0
    ysum = 0.0
    for point in range(0, len(coords) - 1):
        xsum += (coords[point][0] + coords[point + 1][0]) * (
            (coords[point][0] * coords[point + 1][1])
            - (coords[point + 1][0] * coords[point][1])
        )
        ysum += (coords[point][1] + coords[point + 1][1]) * (
            (coords[point][0] * coords[point + 1][1])
            - (coords[point + 1][0] * coords[point][1])
        )

    cx = (1 / (6 * a)) * xsum
    cy = (1 / (6 * a)) * ysum

    centroids.append((cx, cy))

print(centroids)

[(-109.48884962248498, 35.39552879677974), (-109.75126313676874, 31.87963708630415), (-111.77052095609857, 35.838724829519194), (-112.49151143850366, 33.349039435609264), (-110.32141934757458, 35.39955033687498)]
