# devlog 2024-07-01

_author: Trevor Johnson_

This devlog will demonstrate the functionality of a new 'CDC ADRIO maker' which fetches data from various CDC and HealthData datasets. Six datasets are currenlty included, each with their own limitations and set of supported attributes. Functionality will be demonstrated one dataset at a time.

First, CSVs containing label and population attributes must be created and stored for use in the demo geos to allow them to have a flexible date range.

In [1]:
from pandas import DataFrame
from epymorph.data_shape import Shapes
from epymorph.geo.adrio.census.adrio_census import ADRIOMakerCensus
from epymorph.geo.spec import Year
from epymorph.geography.us_census import CountyScope
from epymorph.simulation import geo_attrib


# create and store 'cdc_label_pop_county.csv'
census_maker = ADRIOMakerCensus()
states_list = ['AZ', 'CO']
geoid = census_maker.make_adrio(geo_attrib(
    'geoid', str, Shapes.N), CountyScope.in_states_by_code(states_list), Year(2022))
name = census_maker.make_adrio(geo_attrib(
    'name', str, Shapes.N), CountyScope.in_states_by_code(states_list), Year(2022))
population = census_maker.make_adrio(geo_attrib(
    'population', int, Shapes.N), CountyScope.in_states_by_code(states_list), Year(2022))
df = DataFrame({'geoid': geoid.get_value(), 'name': name.get_value(),
               'population': population.get_value()})
df.to_csv("./scratch/cdc_label_pop_county.csv", header=False, index=False)

In [2]:
from epymorph.geography.us_census import StateScope


# create and store 'cdc_label_pop_state.csv'
census_maker = ADRIOMakerCensus()
states_list = ['AZ', 'CO']
geoid = census_maker.make_adrio(geo_attrib(
    'geoid', str, Shapes.N), StateScope.in_states_by_code(states_list), Year(2022))
name = census_maker.make_adrio(geo_attrib(
    'name', str, Shapes.N), StateScope.in_states_by_code(states_list), Year(2022))
population = census_maker.make_adrio(geo_attrib(
    'population', int, Shapes.N), StateScope.in_states_by_code(states_list), Year(2022))
df = DataFrame({'geoid': geoid.get_value(), 'name': name.get_value(),
               'population': population.get_value()})
df.to_csv("./scratch/cdc_label_pop_state.csv", header=False, index=False)

### **United States COVID 19 Community Levels by County**
This dataset is used to fetch data on reported COVID-19 cases and hospitalizations per 100k population.

- Supported attributes: covid_cases_per_100k, covid_hospitalizations_per_100k
- Available date range: 2/24/2022 - 5/4/2023
- Granularity: county, state

https://healthdata.gov/dataset/United-States-COVID-19-Community-Levels-by-County/nn5b-j5u9/about_data

In [3]:
from datetime import date
from pathlib import Path

from epymorph.geo.adrio.file.adrio_csv import CSVSpec
from epymorph.geo.spec import DateRange, DynamicGeoSpec


spec = DynamicGeoSpec(
    attributes=[
        geo_attrib('label', str, Shapes.N),
        geo_attrib('population', int, Shapes.N),
        geo_attrib("covid_cases_per_100k", int, Shapes.TxN),
        geo_attrib("covid_hospitalizations_per_100k", int, Shapes.TxN),
    ],
    time_period=DateRange(date(2022, 2, 24), date(2023, 5, 4)),
    scope=CountyScope.in_states(['04', '08']),
    source={
        'label': CSVSpec(file_path=Path("./scratch/cdc_label_pop_county.csv"),
                         key_col=0, data_col=1, key_type="geoid", skiprows=None),
        'population': CSVSpec(file_path=Path("./scratch/cdc_label_pop_county.csv"),
                              key_col=0, data_col=2, key_type="geoid", skiprows=None),
        "covid_cases_per_100k": "CDC",
        "covid_hospitalizations_per_100k": "CDC",
    }
)

In [4]:
from epymorph.geo.dynamic import DynamicGeo
from epymorph.geo.adrio import adrio_maker_library


geo = DynamicGeo.from_library(spec, adrio_maker_library)

print(f"COVID cases per 100k:\n {geo['covid_cases_per_100k']}\n")
print(f"COVID hospitalizations per 100k:\n {geo['covid_hospitalizations_per_100k']}")

COVID cases per 100k:
 [[332 260 181 ... 203 183 129]
 [ 97 100  78 ... 122 149  79]
 [293 131 132 ... 122 115  49]
 ...
 [169  38  50 ...  61  35   9]
 [204  50  73 ...   0  48  29]
 [344  66 119 ...  20  67   0]]

COVID hospitalizations per 100k:
 [[25 15 19 ... 11 11 11]
 [19 11 13 ...  7  7  7]
 [13 10 10 ...  4  4  4]
 ...
 [ 6  3  4 ...  2  2  2]
 [ 8  3  4 ...  3  3  3]
 [ 8  3  4 ...  1  1  1]]


### **COVID-19 Reported Patient Impact and Hospital Capacity by Facility**
This dataset is used to fetch hospitalization data for COVID-19 and other respiratory illnesses.

- Supported attributes: covid_hospitalization_avg_facility, covid_hospitalization_sum_facility, influenza_hospitalization_avg_facility, influenza_hospitalization_sum_facility
- Available date range: 12/13/2020 to 5/10/2023
- Granularity: county, state

https://healthdata.gov/Hospital/COVID-19-Reported-Patient-Impact-and-Hospital-Capa/anag-cw7u/about_data


In [5]:
spec = DynamicGeoSpec(
    attributes=[
        geo_attrib('label', str, Shapes.N),
        geo_attrib('population', int, Shapes.N),
        geo_attrib("covid_hospitalization_avg_facility", float, Shapes.TxN),
        geo_attrib("covid_hospitalization_sum_facility", int, Shapes.TxN),
        geo_attrib("influenza_hospitalization_avg_facility", float, Shapes.TxN),
        geo_attrib("influenza_hospitalization_sum_facility", int, Shapes.TxN),
    ],
    time_period=DateRange(date(2020, 12, 13), date(2023, 5, 10)),
    scope=CountyScope.in_states(['04', '08']),
    source={
        'label': CSVSpec(file_path=Path("./scratch/cdc_label_pop_county.csv"),
                         key_col=0, data_col=1, key_type="geoid", skiprows=None),
        'population': CSVSpec(file_path=Path("./scratch/cdc_label_pop_county.csv"),
                              key_col=0, data_col=2, key_type="geoid", skiprows=None),
        "covid_hospitalization_avg_facility": "CDC",
        "covid_hospitalization_sum_facility": "CDC",
        "influenza_hospitalization_avg_facility": "CDC",
        "influenza_hospitalization_sum_facility": "CDC",
    }
)

In [6]:
geo = DynamicGeo.from_library(spec, adrio_maker_library)

print(f"COVID hospitalization average:\n {geo['covid_hospitalization_avg_facility']}\n")
print(f"COVID hospitalization sum:\n {geo['covid_hospitalization_sum_facility']}\n")
print(
    f"Influenza hospitalization average:\n {geo['influenza_hospitalization_avg_facility']}\n")
print(
    f"Influenza hospitalization sum:\n {geo['influenza_hospitalization_sum_facility']}")

COVID hospitalization average:
 [[ 6.40000e+00 -9.99999e+05 -9.99999e+05 ... -9.99999e+05  6.20000e+01
  -9.99999e+05]
 [ 6.40000e+00 -9.99999e+05  5.37000e+01 ... -9.99999e+05  5.72000e+01
  -9.99999e+05]
 [ 5.70000e+00 -9.99999e+05 -9.99999e+05 ... -9.99999e+05  3.72000e+01
  -9.99999e+05]
 ...
 [-9.99999e+05 -9.99999e+05  6.70000e+00 ...  0.00000e+00 -9.99999e+05
   0.00000e+00]
 [-9.99999e+05 -9.99999e+05  5.70000e+00 ...  0.00000e+00 -9.99999e+05
   0.00000e+00]
 [-9.99999e+05 -9.99999e+05  5.00000e+00 ...  0.00000e+00 -9.99999e+05
   0.00000e+00]]

COVID hospitalization sum:
 [[     45     236     352 ...      14     434 -999999]
 [     45     225     376 ... -999999     400      23]
 [     40     238     389 ...       9     260 -999999]
 ...
 [      6 -999999      47 ...       0      31       0]
 [      7 -999999      40 ...       0      23       0]
 [-999999      18      35 ...       0 -999999       0]]

Influenza hospitalization average:
 [[      0.       0.       0. ...      

### **Weekly United States Hospitalization Metrics by Jurisdiction**
Like the previous dataset, this dataset is used to fetch hospitalization data for COVID-19 and other respiratory illnesses. Unlike the previous dataset however, it includes metrics reported voluntarily after the end of the manditory reporting period and is limited to state granularity.

- Supported attributes: covid_hospitalization_avg_state, covid_hospitalization_sum_state, influenza_hospitalization_avg_state, influenza_hospitalization_sum_state
- Available date range: 1/04/2020 to present. Data reported voluntary past 5/1/2024.
- Granularity: state

https://data.cdc.gov/Public-Health-Surveillance/Weekly-United-States-Hospitalization-Metrics-by-Ju/aemt-mg7g/about_data

In [7]:
spec = DynamicGeoSpec(
    attributes=[
        geo_attrib('label', str, Shapes.N),
        geo_attrib('population', int, Shapes.N),
        geo_attrib("covid_hospitalization_avg_state", float, Shapes.TxN),
        geo_attrib("covid_hospitalization_sum_state", int, Shapes.TxN),
        geo_attrib("influenza_hospitalization_avg_state", float, Shapes.TxN),
        geo_attrib("influenza_hospitalization_sum_state", int, Shapes.TxN),
    ],
    time_period=DateRange(date(2020, 12, 13), date(2024, 6, 28)),
    scope=StateScope.in_states(['04', '08']),
    source={
        'label': CSVSpec(file_path=Path("./scratch/cdc_label_pop_state.csv"),
                         key_col=0, data_col=1, key_type="geoid", skiprows=None),
        'population': CSVSpec(file_path=Path("./scratch/cdc_label_pop_state.csv"),
                              key_col=0, data_col=2, key_type="geoid", skiprows=None),
        "covid_hospitalization_avg_state": "CDC",
        "covid_hospitalization_sum_state": "CDC",
        "influenza_hospitalization_avg_state": "CDC",
        "influenza_hospitalization_sum_state": "CDC",
    }
)

In [8]:
geo = DynamicGeo.from_library(spec, adrio_maker_library)

print(
    f"COVID hospitalization average:\n {geo['covid_hospitalization_avg_state'][:3]}\n...\n")
print(
    f"COVID hospitalization sum:\n {geo['covid_hospitalization_sum_state'][:3]}\n...\n")
print(
    f"Influenza hospitalization average:\n {geo['influenza_hospitalization_avg_state'][:3]}\n...\n")
print(
    f"Influenza hospitalization sum:\n {geo['influenza_hospitalization_sum_state'][:3]}\n...")

State level hospitalization data is voluntary past 5/1/2024.
COVID hospitalization average:
 [[452. 199.]
 [472. 164.]
 [495. 150.]]
...

State level hospitalization data is voluntary past 5/1/2024.
COVID hospitalization sum:
 [[3164 1396]
 [3307 1148]
 [3465 1051]]
...

State level hospitalization data is voluntary past 5/1/2024.
Influenza hospitalization average:
 [[2. 1.]
 [1. 1.]
 [6. 0.]]
...

State level hospitalization data is voluntary past 5/1/2024.
Influenza hospitalization sum:
 [[11  5]
 [ 8  5]
 [44  0]]
...


### **COVID-19 Vaccinations in the United States,County**
This dataset is used to fetch cumulative COVID-19 vaccination data.

- Supported attributes: full_covid_vaccinations, one_dose_covid_vaccinations, covid_booster_doses
- Available date range: 12/13/2020 to 5/10/2024.
- Granularity: county, state

https://data.cdc.gov/Vaccinations/COVID-19-Vaccinations-in-the-United-States-County/8xkx-amqh/about_data

In [9]:
spec = DynamicGeoSpec(
    attributes=[
        geo_attrib('label', str, Shapes.N),
        geo_attrib('population', int, Shapes.N),
        geo_attrib("full_covid_vaccinations", int, Shapes.TxN),
        geo_attrib("one_dose_covid_vaccinations", int, Shapes.TxN),
        geo_attrib("covid_booster_doses", int, Shapes.TxN),
    ],
    time_period=DateRange(date(2020, 12, 13), date(2024, 5, 10)),
    scope=CountyScope.in_states(['04', '08']),
    source={
        'label': CSVSpec(file_path=Path("./scratch/cdc_label_pop_county.csv"),
                         key_col=0, data_col=1, key_type="geoid", skiprows=None),
        'population': CSVSpec(file_path=Path("./scratch/cdc_label_pop_county.csv"),
                              key_col=0, data_col=2, key_type="geoid", skiprows=None),
        "full_covid_vaccinations": "CDC",
        "one_dose_covid_vaccinations": "CDC",
        "covid_booster_doses": "CDC",
    }
)

In [10]:
geo = DynamicGeo.from_library(spec, adrio_maker_library)

print(f"Full COVID vaccinations:\n {geo['full_covid_vaccinations']}\n")
print(f"One dose COVID vaccinations:\n {geo['one_dose_covid_vaccinations']}\n")
print(f"COVID booster doses:\n {geo['covid_booster_doses']}\n")

Full COVID vaccinations:
 [[     0      0      0 ...      0      0      0]
 [     0      0      0 ...      0      0      0]
 [     0      0      0 ...      0      0      0]
 ...
 [ 80296  90170 119331 ...   1592 206984   4555]
 [ 80318  90188 119360 ...   1592 207075   4556]
 [ 80342  90203 119393 ...   1592 207109   4556]]

One dose COVID vaccinations:
 [[     0      0      0 ...      0      0      0]
 [     0      0      0 ...      0      0      0]
 [     0      0      0 ...      0      0      0]
 ...
 [ 96425 109710 137855 ...   1768 223046   5100]
 [ 96465 109735 137876 ...   1769 223133   5100]
 [ 96506 109753 137911 ...   1769 223173   5101]]

COVID booster doses:
 [[     0      0      0 ...      0      0      0]
 [     0      0      0 ...      0      0      0]
 [     0      0      0 ...      0      0      0]
 ...
 [ 44350  43430  64573 ...    811 109515   2171]
 [ 44383  43447  64614 ...    811 109634   2171]
 [ 44416  43472  64648 ...    811 109687   2172]]



### **AH COVID-19 Death Counts by County and Week, 2020-present**
This dataset is used to fetch data on COVID-19 deaths.

- Supported attributes: covid_deaths_county
- Available date range: 1/4/2020 to 4/5/2024.
- Granularity: county, state

https://data.cdc.gov/NCHS/AH-COVID-19-Death-Counts-by-County-and-Week-2020-p/ite7-j2w7/about_data

In [11]:
spec = DynamicGeoSpec(
    attributes=[
        geo_attrib('label', str, Shapes.N),
        geo_attrib('population', int, Shapes.N),
        geo_attrib("covid_deaths_county", int, Shapes.TxN),
    ],
    time_period=DateRange(date(2020, 1, 4), date(2024, 4, 5)),
    scope=CountyScope.in_states(['04', '08']),
    source={
        'label': CSVSpec(file_path=Path("./scratch/cdc_label_pop_county.csv"),
                         key_col=0, data_col=1, key_type="geoid", skiprows=None),
        'population': CSVSpec(file_path=Path("./scratch/cdc_label_pop_county.csv"),
                              key_col=0, data_col=2, key_type="geoid", skiprows=None),
        "covid_deaths_county": "CDC"
    }
)

In [12]:
geo = DynamicGeo.from_library(spec, adrio_maker_library)

print(f"COVID deaths:\n {geo['covid_deaths_county']}\n")

COVID deaths:
 [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]



### **Provisional COVID-19 Death Counts by Week Ending Date and State**
This dataset is used to fetch data on COVID-19 and influenza deaths. It is continuously updated but only available for state granularity.

- Supported attributes: covid_deaths_state, influenza_deaths
- Available date range: 1/4/2020 to present.
- Granularity: state

https://data.cdc.gov/NCHS/Provisional-COVID-19-Death-Counts-by-Week-Ending-D/r8kw-7aab/about_data

In [13]:
spec = DynamicGeoSpec(
    attributes=[
        geo_attrib('label', str, Shapes.N),
        geo_attrib('population', int, Shapes.N),
        geo_attrib("covid_deaths_state", int, Shapes.TxN),
        geo_attrib("influenza_deaths", int, Shapes.TxN)
    ],
    time_period=DateRange(date(2020, 1, 4), date(2024, 4, 5)),
    scope=StateScope.in_states(['04', '08']),
    source={
        'label': CSVSpec(file_path=Path("./scratch/cdc_label_pop_state.csv"),
                         key_col=0, data_col=1, key_type="geoid", skiprows=None),
        'population': CSVSpec(file_path=Path("./scratch/cdc_label_pop_state.csv"),
                              key_col=0, data_col=2, key_type="geoid", skiprows=None),
        "covid_deaths_state": "CDC",
        "influenza_deaths": "CDC"
    }
)

In [14]:
geo = DynamicGeo.from_library(spec, adrio_maker_library)

print(f"COVID deaths:\n {geo['covid_deaths_state'][:3]}\n...\n")
print(f"Influenza deaths:\n {geo['influenza_deaths'][:3]}\n...")

COVID deaths:
 [[0 0]
 [0 0]
 [0 0]]
...

Influenza deaths:
 [[ 0  0]
 [ 0  0]
 [11  0]]
...
