# devlog 2024-07-10

_author: Trevor Johnson_

Integration test for Census ADRIOs. This notebook ensures that Census data attributes are being fetched correctly by evaluating:
- Attribute shape
- Attribute data type
- Attribute values
- Attribute sort order

In [1]:
from epymorph.data_shape import Shapes
from epymorph.data_type import CentroidDType, CentroidType
from epymorph.geo.adrio.census.adrio_census import ADRIOMakerCensus
from epymorph.geo.spec import Year
from epymorph.geography.us_census import CountyScope
from epymorph.simulation import AttributeDef

# make adrios for one attribute from each fetch method
maker = ADRIOMakerCensus()
geoids = ['04001', '04003', '04005', '04013', '04017']
scope = CountyScope.in_counties(geoids)
time_period = Year(2020)
attribs = [
    AttributeDef('population', int, Shapes.N),
    AttributeDef('centroid', CentroidType, Shapes.N),
    AttributeDef('commuters', int, Shapes.NxN),
]

population = maker.make_adrio(attribs[0], scope, time_period)
centroid = maker.make_adrio(attribs[1], scope, time_period)
commuters = maker.make_adrio(attribs[2], scope, time_period)

In [2]:
import numpy as np

from epymorph.util import check_ndarray, match

T = time_period.days
N = len(population.get_value())

# validate datatype and shape
check_ndarray(
    population.get_value(),
    dtype=match.dtype(int),
    shape=match.shape_literal((N,))
)
check_ndarray(
    centroid.get_value(),
    dtype=match.dtype(CentroidDType),
    shape=match.shape_literal((N,))
)
check_ndarray(
    commuters.get_value(),
    dtype=match.dtype(int),
    shape=match.shape_literal((N, N))
)

# values retrieved manually from Census table B01001
population_array = [71714, 126442, 142254, 4412779, 110271]

# values calculated manually using polygon centroid formula applied to tygris shapefile polygons
centroid_array = np.array([(-109.48884962248498, 35.39552879677974),
                           (-109.75126313676874, 31.87963708630415),
                           (-111.77052095609857, 35.838724829519194),
                           (-112.49151143850366, 33.349039435609264),
                           (-110.32141934757458, 35.39955033687498)], dtype=CentroidDType)

# values retrieved manually from ACS commuting flows table1 for 2020
commuters_matrix = [[14190, 0, 149, 347, 1668],
                    [0, 43820, 32, 160, 5],
                    [99, 17, 59440, 1160, 525],
                    [22, 52, 757, 2059135, 240],
                    [706, 14, 1347, 592, 30520]]

# validate values and sort order
if np.array_equal(population_array, population.get_value()):
    print('AC5 attribute validation passed.')
if np.allclose(centroid_array.tolist(), centroid.get_value().tolist()):
    print('Shapefile attribute validation passed.')
if np.array_equal(commuters_matrix, commuters.get_value()):
    print('Commuting flows attribute validation passed.')

AC5 attribute validation passed.
Shapefile attribute validation passed.
Commuting flows attribute validation passed.


The following cells caclulate geographic centroids from shapefiles and compare the result to values calculated by shapely.

In [3]:
from io import BytesIO
from urllib.request import urlopen

from geopandas import read_file

# load in shapefile data for use in centroid caclulations
with urlopen("https://www2.census.gov/geo/tiger/TIGER2020/COUNTY/tl_2020_us_county.zip") as f:
    file_buffer = BytesIO()
    file_buffer.write(f.read())
    file_buffer.seek(0)
    gdf = read_file(file_buffer, engine="fiona", ignore_geometry=False,
                    include_fields=["GEOID", "STUSPS"])
    gdf = gdf[gdf['GEOID'].isin(geoids)]
    gdf.sort_values(by='GEOID', inplace=True)
    geometry = gdf['geometry'].to_list()

In [4]:
# centroids as calculated by shapely's centroid property (for reference)
centroids = [x.centroid.coords[0] for x in gdf['geometry']]
print(centroids)

[(-109.48884962242164, 35.395528796753005), (-109.75126313669315, 31.87963708628258), (-111.77052095590304, 35.83872482945673), (-112.49151143850068, 33.34903943560914), (-110.32141934752828, 35.39955033686066)]


In [5]:
# calculate centroids manually using polygon centroid formula https://en.wikipedia.org/wiki/Centroid#Of_a_polygon
centroids = []
for county in geometry:
    sum = 0.0
    coords = list(county.exterior.coords)
    for point in range(0, len(coords) - 1):
        sum += (coords[point][0] * coords[point + 1][1]) - \
            (coords[point + 1][0] * coords[point][1])

    a = sum * 0.5

    xsum = 0.0
    ysum = 0.0
    for point in range(0, len(coords) - 1):
        xsum += (coords[point][0] + coords[point + 1][0]) * ((coords[point][0]
                                                              * coords[point + 1][1]) - (coords[point + 1][0] * coords[point][1]))
        ysum += (coords[point][1] + coords[point + 1][1]) * ((coords[point][0]
                                                              * coords[point + 1][1]) - (coords[point + 1][0] * coords[point][1]))

    cx = (1 / (6 * a)) * xsum
    cy = (1 / (6 * a)) * ysum

    centroids.append((cx, cy))

print(centroids)

[(-109.48884962248498, 35.39552879677974), (-109.75126313676874, 31.87963708630415), (-111.77052095609857, 35.838724829519194), (-112.49151143850366, 33.349039435609264), (-110.32141934757458, 35.39955033687498)]
