# devlog 2024-07-10

_author: Trevor Johnson_

Integration test for Census ADRIOs. This notebook ensures that Census data attributes are being fetched correctly by evaluating:
- Attribute shape
- Attribute data type
- Attribute values
- Attribute sort order

In [1]:
import numpy as np

from epymorph import *
from epymorph.adrio import acs5, commuting_flows, us_tiger
from epymorph.data_shape import SimDimensions
from epymorph.data_type import CentroidDType
from epymorph.geography.us_census import CountyScope
from epymorph.util import check_ndarray, match


# Create a basic evaluation context
scope = CountyScope.in_counties(
    ["04001", "04003", "04005", "04013", "04017"],
    year=2020,
)
time = TimeFrame.year(2020)
dim = SimDimensions.build(
    nodes=scope.nodes,
    start_date=time.start_date,
    days=time.duration_days,
    tau_step_lengths=[1.0],
    compartments=1,
    events=0,
)
context = {"scope": scope, "dim": dim}


# Population
with sim_messaging():
    # values retrieved manually from Census table B01001
    pop_expected = [71714, 126442, 142254, 4412779, 110271]

    pop_actual = acs5.Population().with_context(**context).evaluate()

    check_ndarray(pop_actual, dtype=match.dtype(int))

    if np.array_equal(pop_expected, pop_actual):
        print("ACS5 attribute validation passed.\n")
    else:
        print("ACS5 attribute validation FAILED.\n")


# Centroids
with sim_messaging():
    # values calculated manually using polygon centroid formula
    # applied to TIGRIS shapefile polygons
    centroid_expected = np.array(
        [
            (-109.48884962248498, 35.39552879677974),
            (-109.75126313676874, 31.87963708630415),
            (-111.77052095609857, 35.838724829519194),
            (-112.49151143850366, 33.349039435609264),
            (-110.32141934757458, 35.39955033687498),
        ],
        dtype=CentroidDType,
    )

    centroid_actual = us_tiger.GeometricCentroid().with_context(**context).evaluate()

    check_ndarray(centroid_actual, dtype=match.dtype(CentroidDType))

    if np.allclose(
        centroid_expected["latitude"],
        centroid_actual["latitude"],
    ) and np.allclose(
        centroid_expected["longitude"],
        centroid_actual["longitude"],
    ):
        print("Shapefile attribute validation passed.\n")
    else:
        print("Shapefile attribute validation FAILED.\n")


# Commuters
with sim_messaging():
    # values retrieved manually from ACS commuting flows table1 for 2020
    commuters_expected = [
        [14190, 0, 149, 347, 1668],
        [0, 43820, 32, 160, 5],
        [99, 17, 59440, 1160, 525],
        [22, 52, 757, 2059135, 240],
        [706, 14, 1347, 592, 30520],
    ]

    commuters_actual = commuting_flows.Commuters().with_context(**context).evaluate()

    check_ndarray(commuters_actual, dtype=match.dtype(int))

    if np.array_equal(commuters_expected, commuters_actual):
        print("Commuting flows attribute validation passed.\n")
    else:
        print("Commuting flows attribute validation FAILED.\n")

Loading epymorph.adrio.acs5.Population:
  |####################| 100%  (1.293s)
ACS5 attribute validation passed.

Loading epymorph.adrio.us_tiger.GeometricCentroid:
  |####################| 100%  (7.969s)
Shapefile attribute validation passed.

Loading epymorph.adrio.commuting_flows.Commuters:
  |####################| 100%  (6.335s)
Commuting flows attribute validation passed.



## Aside: calculating expected centroids

The following cells caclulate geographic centroids from shapefiles and compare the result to values calculated by shapely.

In [2]:
from geopandas import read_file

# load in shapefile data for use in centroid caclulations
gdf = read_file(
    "https://www2.census.gov/geo/tiger/TIGER2020/COUNTY/tl_2020_us_county.zip",
    engine="fiona",
    ignore_geometry=False,
    include_fields=["GEOID", "STUSPS"],
)
gdf = gdf[gdf["GEOID"].isin(scope.node_ids)]
gdf = gdf.sort_values(by="GEOID")

In [3]:
# centroids as calculated by shapely's centroid property (for reference)
centroids = [x.centroid.coords[0] for x in gdf["geometry"]]
print(centroids)

[(-109.48884962242164, 35.395528796753005), (-109.75126313669315, 31.87963708628258), (-111.77052095590304, 35.83872482945673), (-112.49151143850068, 33.34903943560914), (-110.32141934752828, 35.39955033686066)]


In [4]:
# calculate centroids manually using polygon centroid formula https://en.wikipedia.org/wiki/Centroid#Of_a_polygon
centroids = []
for county in gdf["geometry"]:
    total = 0.0
    coords = list(county.exterior.coords)
    for point in range(len(coords) - 1):
        total += (coords[point][0] * coords[point + 1][1]) - (
            coords[point + 1][0] * coords[point][1]
        )

    a = total * 0.5

    xsum = 0.0
    ysum = 0.0
    for point in range(len(coords) - 1):
        xsum += (coords[point][0] + coords[point + 1][0]) * (
            (coords[point][0] * coords[point + 1][1])
            - (coords[point + 1][0] * coords[point][1])
        )
        ysum += (coords[point][1] + coords[point + 1][1]) * (
            (coords[point][0] * coords[point + 1][1])
            - (coords[point + 1][0] * coords[point][1])
        )

    cx = (1 / (6 * a)) * xsum
    cy = (1 / (6 * a)) * ysum

    centroids.append((cx, cy))

print(centroids)

[(-109.48884962248498, 35.39552879677974), (-109.75126313676874, 31.87963708630415), (-111.77052095609857, 35.838724829519194), (-112.49151143850366, 33.349039435609264), (-110.32141934757458, 35.39955033687498)]
