In [None]:
%load_ext lab_black

## CDC API notebook

The vaers_monitoring code is being retired.  

In [None]:
# imports
import pandas as pd
import datetime as dt
from sodapy import Socrata
from pathlib import Path

In [None]:
pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", 100)

In [None]:
CDC_SERVER = "data.cdc.gov"
#Obtain CDC Credentials to provide values for the following Variables
CDC_ADMIN_DATA = "" 
CDC_APP_KEY = ""

PROJECT_ROOT = Path("..")
EXPOSURE_CSV = PROJECT_ROOT / "AnalysisOutput" / "CovidVaccineAdmin_API.csv"
US_AGE_EXPOSURE_CSV = PROJECT_ROOT / "AnalysisOutput" / "USAgeBasedAdmin.csv"

FINAL_COLUMNS = [
    "Vaccine",
    "VAX_NAME",
    "Total_Administered",
    "Datetime",
    "Posted",
    "Updated_Raw",
    "URL",
]

In [None]:
client = Socrata(CDC_SERVER, CDC_APP_KEY)

In [None]:
offset = 0
limit = 1000
delta = 1000
done = False
results = []
while not done:
    result_list = client.get(CDC_ADMIN_DATA, offset=offset, limit=limit)
    print(f"Current offset: {offset}, {len(result_list)} rows returned.")
    if not result_list:
        done = True
    elif len(result_list) < limit:
        results.extend(result_list)
        done = True
    else:
        results.extend(result_list)
        offset += delta

In [None]:
clean_results = []
for r in results:
    clean_d = {}
    for k, v in r.items():
        if k == "date":
            clean_d["date"] = dt.datetime.strptime(v, "%Y-%m-%dT%H:%M:%S.%f")
        elif k == "location":
            clean_d[k] = v
        else:
            try:
                clean_d[k] = float(v)
            except:
                clean_d[k] = v
    clean_results.append(clean_d)

In [None]:
exposure_df = pd.DataFrame.from_records(clean_results)

In [None]:
print(f'Date Range: {exposure_df["date"].min()} - {exposure_df["date"].max()}')

In [None]:
covid_vax_admin = exposure_df.loc[
    (exposure_df["location"] == "US"),
    [
        "date",
        "administered_pfizer",
        "administered_moderna",
        "administered_janssen",
        "administered_unk_manuf",
    ],
].copy()

In [None]:
vaccine_map = {
    "date": "Datetime",
    "administered_pfizer": "Pfizer-BioNTech",
    "administered_moderna": "Moderna",
    "administered_janssen": "Janssen",
    "administered_unk_manuf": "Not Identified",
}
cdc_vax_name_map = {
    "Pfizer-BioNTech": "COVID19 (COVID19 (PFIZER-BIONTECH))",
    "Moderna": "COVID19 (COVID19 (MODERNA))",
    "Janssen": "COVID19 (COVID19 (JANSSEN))",
    "Not Identified": "COVID19 (COVID19 (UNKNOWN))",
}

In [None]:
covid_vax_admin.columns = [vaccine_map.get(c) or c for c in covid_vax_admin.columns]

In [None]:
covid_vax_admin_pivot = (
    covid_vax_admin.melt(
        id_vars="Datetime", var_name="Vaccine", value_name="Total_Administered"
    )
    .sort_values(["Datetime", "Vaccine"], ascending=[True, True])
    .reset_index()
)

In [None]:
covid_vax_admin_pivot["VAX_NAME"] = covid_vax_admin_pivot["Vaccine"].map(
    cdc_vax_name_map
)
covid_vax_admin_pivot["Posted"] = covid_vax_admin_pivot["Datetime"]
covid_vax_admin_pivot["Updated_Raw"] = covid_vax_admin_pivot["Datetime"].apply(
    lambda d: f"CDC API|Data as of: {d.strftime('%b %d %Y %H:%M%p ET')}"
)
covid_vax_admin_pivot["URL"] = f"https://{CDC_SERVER}/{CDC_ADMIN_DATA}"

In [None]:
covid_vax_admin_pivot[FINAL_COLUMNS].tail()

In [None]:
print("Saving CDC Exposure Data to:", EXPOSURE_CSV.resolve())
covid_vax_admin_pivot[FINAL_COLUMNS].to_csv(EXPOSURE_CSV, index=False)

### Exploratory Work

In [None]:
exposure_df.loc[
    (exposure_df["location"] == "US"),
    [
        "date",
        "location",
        "administered",
        "administered_12plus",
        "administered_18plus",
        "administered_65plus",
    ],
]

In [None]:
# Calculate Youth (0-17) administration
exposure_df["administered_under18"] = exposure_df.apply(
    lambda c: c["administered"] - c["administered_18plus"]
    if c["administered_18plus"] > 0
    else 0,
    axis=1,
)

In [None]:
exposure_df["us_youth_population"] = 72822113.0

In [None]:
exposure_df["us_youth_uptake"] = (
    exposure_df["administered_under18"] / exposure_df["us_youth_population"]
)

In [None]:
exposure_df.loc[
    (exposure_df["location"] == "US"),
    [
        "date",
        "location",
        "administered",
        "administered_12plus",
        "administered_18plus",
        "administered_65plus",
        "us_youth_population",
        "administered_under18",
        "us_youth_population",
        "us_youth_uptake",
    ],
].head().to_clipboard()

In [None]:
print("Saving Age Based CDC Exposure Data to:", US_AGE_EXPOSURE_CSV.resolve())
exposure_df.to_csv(US_AGE_EXPOSURE_CSV, index=False)

In [None]:
exposure_df.columns

In [None]:
# ecdc_exposure =  pd.read_csv("https://opendata.ecdc.europa.eu/covid19/nationalcasedeath_eueea_daily_ei/csv", na_values = "", encoding = "utf_8")
ecdc_exposure = pd.read_csv(
    "https://opendata.ecdc.europa.eu/covid19/vaccine_tracker/csv/data.csv",
    na_values="",
    encoding="utf_8",
)

In [None]:
ecdc_exposure.head