Skip to content
44 changes: 32 additions & 12 deletions web_programming/covid_stats_via_xpath.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""
This is to show simple COVID19 info fetching from worldometers archive site using lxml
* The main motivation to use lxml in place of bs4 is that it is faster and therefore
more convenient to use in Python web projects (e.g. Django or Flask-based)
This script demonstrates fetching simple COVID-19 statistics from the
Worldometers archive site using lxml. lxml is chosen over BeautifulSoup
for its speed and convenience in Python web projects (such as Django or
Flask).
"""

# /// script
Expand All @@ -25,15 +26,34 @@ class CovidData(NamedTuple):


def covid_stats(
url: str = "https://web.archive.org/web/20250825095350/https://www.worldometers.info/coronavirus/",
url: str = (
"https://web.archive.org/web/20250825095350/"
"https://www.worldometers.info/coronavirus/"
),
) -> CovidData:
xpath_str = '//div[@class = "maincounter-number"]/span/text()'
return CovidData(
*html.fromstring(httpx.get(url, timeout=10).content).xpath(xpath_str)
try:
response = httpx.get(url, timeout=10).raise_for_status()
except httpx.TimeoutException:
print(
"Request timed out. Please check your network connection "
"or try again later."
)
return CovidData("N/A", "N/A", "N/A")
except httpx.HTTPStatusError as e:
print(f"HTTP error occurred: {e}")
return CovidData("N/A", "N/A", "N/A")
data = html.fromstring(response.content).xpath(xpath_str)
if len(data) != 3:
print("Unexpected data format. The page structure may have changed.")
data = "N/A", "N/A", "N/A"
return CovidData(*data)


if __name__ == "__main__":
fmt = (
"Total COVID-19 cases in the world: {}\n"
"Total deaths due to COVID-19 in the world: {}\n"
"Total COVID-19 patients recovered in the world: {}"
)


fmt = """Total COVID-19 cases in the world: {}
Total deaths due to COVID-19 in the world: {}
Total COVID-19 patients recovered in the world: {}"""
print(fmt.format(*covid_stats()))
print(fmt.format(*covid_stats()))