In [1]:
import requests
import bs4
import re
import json
import urllib.parse
import dateutil.parser

# ArcGIS
Get the case count data and the date and time on the data at the same time from ArcGIS:

In [2]:
response_containing_date = requests.get("https://navajo-nation-coronavirus-response-ndoh-nec.hub.arcgis.com")
response_containing_case_count = requests.get(
    "https://services7.arcgis.com/9dF9awxpVpgKyoYk/arcgis/rest/services/SA/FeatureServer/0/query"
    "?f=json&where=1%3D1&outFields=*&returnGeometry=false&outStatistics=%5B%7B%22onStatisticField%22%3A%22Total%22%2C%22"
    "outStatisticFieldName%22%3A%22Total_sum%22%2C%22statisticType%22%3A%22sum%22%7D%5D"
)
print(response_containing_date.status_code)
print(response_containing_case_count.status_code)

200
200


Do a shitload of parsing just to extract the date and time the data was updated:

In [3]:
arcgis_site_soup = bs4.BeautifulSoup(response_containing_date.text, "lxml")
injection_script = arcgis_site_soup.select_one("#site-injection").contents[0]
encoded_injection_content_string = (
    re.match(
        r"\s*window.__SITE\s*=\s*\"(?P<content>.*)\"",
        injection_script
    ).group("content")
)
injection_content_string = urllib.parse.unquote(encoded_injection_content_string)
injection_content = json.loads(injection_content_string)
html_with_date = injection_content\
    ["site"]["data"]["values"]["layout"]["sections"][0]["rows"][0]["cards"][0]["component"]["settings"]["markdown"]
soup_with_date = bs4.BeautifulSoup(html_with_date, "html.parser")
text_with_date = soup_with_date.get_text()
report_date = dateutil.parser.parse(
    text_with_date,
    fuzzy = True,
)
print(report_date.strftime("%A, %B %e  %-l:%M %p"))

Thursday, June 18  5:00 PM


Do a tiny bit of parsing to get the actual count:

In [4]:
case_count_data = response_containing_case_count.json()
case_count = case_count_data["features"][0]["attributes"]["Total_sum"]
print(case_count)

6824


Print a CSV line from the data:

In [5]:
arcgis_data = f"{report_date.strftime('%Y-%m-%d')},{case_count},"
print(arcgis_data)

2020-06-18,6824,


# Navajo Nation site
Fetch the site:

In [6]:
navajo_data_website = requests.get("https://www.ndoh.navajo-nsn.gov/COVID-19")

Extract the good stuff:

In [7]:
navajo_data_soup = bs4.BeautifulSoup(navajo_data_website.text, "lxml")

comprehensible_navajo_data = (
    "\n".join(
        [
            string.strip()
            for string
            in navajo_data_soup.select_one("[id^='main-content'] [data-col='5']").strings
            if string.strip() != ""
        ]
    )
)
print(comprehensible_navajo_data)

6,832
Positive Cases of COVID-19
Last Updated June 18, 2020
Total Negative Tests:
37,413
Total Confirmed Deaths:
324


Parse the good stuff into a CSV line:

In [8]:
parsed_navajo_data = (
    re.match(
        (
            r"(?P<case_count>[\d,]+)\n"
            r"positive[^\n]*\n"
            r"(?P<update_date_text>[^\n]+)\n"
            r"[^\n]*negative[^\n]*\n"
            r"(?P<negative_count>[\d,]+)\n"
            r"[^\n]*deaths[^\n]*\n"
            r"(?P<death_count>[\d,]+)"
        ),
        comprehensible_navajo_data,
        flags = re.IGNORECASE | re.MULTILINE,
    )
)

case_count = int(parsed_navajo_data.group("case_count").replace(",", ""))
update_date_text = parsed_navajo_data.group("update_date_text")
negative_count = int(parsed_navajo_data.group("negative_count").replace(",", ""))
death_count = int(parsed_navajo_data.group("death_count").replace(",", ""))

update_date = dateutil.parser.parse(
    update_date_text,
    fuzzy = True,
)

navajo_site_data = f"{update_date.strftime('%Y-%m-%d')},{case_count},{death_count}"
print(navajo_site_data)

2020-06-18,6832,324


In [9]:
print(arcgis_data)
print(navajo_site_data)

2020-06-18,6824,
2020-06-18,6832,324
