Merge 4bde047 into 375794e

ExpDev07 · May 20, 2020 · 07a0167 · 07a0167
2 parents 375794e + 4bde047
commit 07a0167
Show file tree

Hide file tree

Showing 19 changed files with 295 additions and 130 deletions.
diff --git a/.deepsource.toml b/.deepsource.toml
@@ -0,0 +1,10 @@
+version = 1
+
+test_patterns = ["tests/**"]
+
+[[analyzers]]
+name = "python"
+enabled = true
+
+  [analyzers.meta]
+  runtime_version = "3.x.x"
diff --git a/.gitignore b/.gitignore
@@ -51,6 +51,7 @@ htmlcov/
 nosetests.xml
 coverage.xml
 *,cover
+locustfile.py
 
 # Translations
 *.mo

diff --git a/app/data/__init__.py b/app/data/__init__.py
@@ -4,7 +4,11 @@
 from ..services.location.nyt import NYTLocationService
 
 # Mapping of services to data-sources.
-DATA_SOURCES = {"jhu": JhuLocationService(), "csbs": CSBSLocationService(), "nyt": NYTLocationService()}
+DATA_SOURCES = {
+    "jhu": JhuLocationService(),
+    "csbs": CSBSLocationService(),
+    "nyt": NYTLocationService(),
+}
 
 
 def data_source(source):

diff --git a/app/io.py b/app/io.py
@@ -10,7 +10,11 @@
 
 
 def save(
-    name: str, content: Union[str, Dict, List], write_mode: str = "w", indent: int = 2, **json_dumps_kwargs
+    name: str,
+    content: Union[str, Dict, List],
+    write_mode: str = "w",
+    indent: int = 2,
+    **json_dumps_kwargs,
 ) -> pathlib.Path:
     """Save content to a file. If content is a dictionary, use json.dumps()."""
     path = DATA / name
@@ -35,7 +39,12 @@ class AIO:
 
     @classmethod
     async def save(
-        cls, name: str, content: Union[str, Dict, List], write_mode: str = "w", indent: int = 2, **json_dumps_kwargs
+        cls,
+        name: str,
+        content: Union[str, Dict, List],
+        write_mode: str = "w",
+        indent: int = 2,
+        **json_dumps_kwargs,
     ):
         """Save content to a file. If content is a dictionary, use json.dumps()."""
         path = DATA / name

diff --git a/app/location/__init__.py b/app/location/__init__.py
@@ -11,7 +11,7 @@ class Location:  # pylint: disable=too-many-instance-attributes
     """
 
     def __init__(
-        self, id, country, province, coordinates, last_updated, confirmed, deaths, recovered
+        self, id, country, province, coordinates, last_updated, confirmed, deaths, recovered,
     ):  # pylint: disable=too-many-arguments
         # General info.
         self.id = id
@@ -66,7 +66,11 @@ def serialize(self):
             # Last updated.
             "last_updated": self.last_updated,
             # Latest data (statistics).
-            "latest": {"confirmed": self.confirmed, "deaths": self.deaths, "recovered": self.recovered},
+            "latest": {
+                "confirmed": self.confirmed,
+                "deaths": self.deaths,
+                "recovered": self.recovered,
+            },
         }
 
 

diff --git a/app/main.py b/app/main.py
@@ -34,7 +34,7 @@
         "API for tracking the global coronavirus (COVID-19, SARS-CoV-2) outbreak."
         " Project page: https://github.com/ExpDev07/coronavirus-tracker-api."
     ),
-    version="2.0.3",
+    version="2.0.4",
     docs_url="/",
     redoc_url="/docs",
     on_startup=[setup_client_session],
@@ -59,7 +59,11 @@
 
 # Enable CORS.
 APP.add_middleware(
-    CORSMiddleware, allow_credentials=True, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"],
+    CORSMiddleware,
+    allow_credentials=True,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
 )
 APP.add_middleware(GZipMiddleware, minimum_size=1000)
 

diff --git a/app/routers/v1.py b/app/routers/v1.py
@@ -19,7 +19,11 @@ async def all_categories():
         "deaths": deaths,
         "recovered": recovered,
         # Latest.
-        "latest": {"confirmed": confirmed["latest"], "deaths": deaths["latest"], "recovered": recovered["latest"],},
+        "latest": {
+            "confirmed": confirmed["latest"],
+            "deaths": deaths["latest"],
+            "recovered": recovered["latest"],
+        },
     }
 
 

diff --git a/app/routers/v2.py b/app/routers/v2.py
@@ -65,11 +65,17 @@ async def get_locations(
 
         # Do filtering.
         try:
-            locations = [location for location in locations if str(getattr(location, key)).lower() == str(value)]
+            locations = [
+                location
+                for location in locations
+                if str(getattr(location, key)).lower() == str(value)
+            ]
         except AttributeError:
             pass
         if not locations:
-            raise HTTPException(404, detail=f"Source `{source}` does not have the desired location data.")
+            raise HTTPException(
+                404, detail=f"Source `{source}` does not have the desired location data.",
+            )
 
     # Return final serialized data.
     return {
@@ -84,7 +90,9 @@ async def get_locations(
 
 # pylint: disable=invalid-name
 @V2.get("/locations/{id}", response_model=LocationResponse)
-async def get_location_by_id(request: Request, id: int, source: Sources = "jhu", timelines: bool = True):
+async def get_location_by_id(
+    request: Request, id: int, source: Sources = "jhu", timelines: bool = True
+):
     """
     Getting specific location by id.
     """

diff --git a/app/services/location/csbs.py b/app/services/location/csbs.py
@@ -6,6 +6,7 @@
 from asyncache import cached
 from cachetools import TTLCache
 
+from ...caches import check_cache, load_cache
 from ...coordinates import Coordinates
 from ...location.csbs import CSBSLocation
 from ...utils import httputils
@@ -34,7 +35,7 @@ async def get(self, loc_id):  # pylint: disable=arguments-differ
 BASE_URL = "https://facts.csbs.org/covid-19/covid19_county.csv"
 
 
-@cached(cache=TTLCache(maxsize=1, ttl=3600))
+@cached(cache=TTLCache(maxsize=1, ttl=1800))
 async def get_locations():
     """
     Retrieves county locations; locations are cached for 1 hour
@@ -44,48 +45,58 @@ async def get_locations():
     """
     data_id = "csbs.locations"
     LOGGER.info(f"{data_id} Requesting data...")
-    async with httputils.CLIENT_SESSION.get(BASE_URL) as response:
-        text = await response.text()
-
-    LOGGER.debug(f"{data_id} Data received")
-
-    data = list(csv.DictReader(text.splitlines()))
-    LOGGER.debug(f"{data_id} CSV parsed")
-
-    locations = []
-
-    for i, item in enumerate(data):
-        # General info.
-        state = item["State Name"]
-        county = item["County Name"]
-
-        # Ensure country is specified.
-        if county in {"Unassigned", "Unknown"}:
-            continue
-
-        # Coordinates.
-        coordinates = Coordinates(item["Latitude"], item["Longitude"])  # pylint: disable=unused-variable
-
-        # Date string without "EDT" at end.
-        last_update = " ".join(item["Last Update"].split(" ")[0:2])
-
-        # Append to locations.
-        locations.append(
-            CSBSLocation(
-                # General info.
-                i,
-                state,
-                county,
-                # Coordinates.
-                Coordinates(item["Latitude"], item["Longitude"]),
-                # Last update (parse as ISO).
-                datetime.strptime(last_update, "%Y-%m-%d %H:%M").isoformat() + "Z",
-                # Statistics.
-                int(item["Confirmed"] or 0),
-                int(item["Death"] or 0),
+    # check shared cache
+    cache_results = await check_cache(data_id)
+    if cache_results:
+        LOGGER.info(f"{data_id} using shared cache results")
+        locations = cache_results
+    else:
+        LOGGER.info(f"{data_id} shared cache empty")
+        async with httputils.CLIENT_SESSION.get(BASE_URL) as response:
+            text = await response.text()
+
+        LOGGER.debug(f"{data_id} Data received")
+
+        data = list(csv.DictReader(text.splitlines()))
+        LOGGER.debug(f"{data_id} CSV parsed")
+
+        locations = []
+
+        for i, item in enumerate(data):
+            # General info.
+            state = item["State Name"]
+            county = item["County Name"]
+
+            # Ensure country is specified.
+            if county in {"Unassigned", "Unknown"}:
+                continue
+
+            # Date string without "EDT" at end.
+            last_update = " ".join(item["Last Update"].split(" ")[0:2])
+
+            # Append to locations.
+            locations.append(
+                CSBSLocation(
+                    # General info.
+                    i,
+                    state,
+                    county,
+                    # Coordinates.
+                    Coordinates(item["Latitude"], item["Longitude"]),
+                    # Last update (parse as ISO).
+                    datetime.strptime(last_update, "%Y-%m-%d %H:%M").isoformat() + "Z",
+                    # Statistics.
+                    int(item["Confirmed"] or 0),
+                    int(item["Death"] or 0),
+                )
             )
-        )
-    LOGGER.info(f"{data_id} Data normalized")
+        LOGGER.info(f"{data_id} Data normalized")
+        # save the results to distributed cache
+        # TODO: fix json serialization
+        try:
+            await load_cache(data_id, locations)
+        except TypeError as type_err:
+            LOGGER.error(type_err)
 
     # Return the locations.
     return locations
diff --git a/app/services/location/jhu.py b/app/services/location/jhu.py
@@ -41,12 +41,10 @@ async def get(self, loc_id):  # pylint: disable=arguments-differ
 
 
 # Base URL for fetching category.
-BASE_URL = (
-    "https://raw.githubusercontent.com/CSSEGISandData/2019-nCoV/master/csse_covid_19_data/csse_covid_19_time_series/"
-)
+BASE_URL = "https://raw.githubusercontent.com/CSSEGISandData/2019-nCoV/master/csse_covid_19_data/csse_covid_19_time_series/"
 
 
-@cached(cache=TTLCache(maxsize=128, ttl=1800))
+@cached(cache=TTLCache(maxsize=4, ttl=1800))
 async def get_category(category):
     """
     Retrieves the data for the provided category. The data is cached for 30 minutes locally, 1 hour via shared Redis.
@@ -129,7 +127,7 @@ async def get_category(category):
     return results
 
 
-@cached(cache=TTLCache(maxsize=1024, ttl=1800))
+@cached(cache=TTLCache(maxsize=1, ttl=1800))
 async def get_locations():
     """
     Retrieves the locations from the categories. The locations are cached for 1 hour.
@@ -142,22 +140,31 @@ async def get_locations():
     # Get all of the data categories locations.
     confirmed = await get_category("confirmed")
     deaths = await get_category("deaths")
-    # recovered = await get_category("recovered")
+    recovered = await get_category("recovered")
 
     locations_confirmed = confirmed["locations"]
     locations_deaths = deaths["locations"]
-    # locations_recovered = recovered["locations"]
+    locations_recovered = recovered["locations"]
 
     # Final locations to return.
     locations = []
-
+    # ***************************************************************************
+    # TODO: This iteration approach assumes the indexes remain the same
+    #       and opens us to a CRITICAL ERROR. The removal of a column in the data source
+    #       would break the API or SHIFT all the data confirmed, deaths, recovery producting
+    #       incorrect data to consumers.
+    # ***************************************************************************
     # Go through locations.
     for index, location in enumerate(locations_confirmed):
         # Get the timelines.
+
+        # TEMP: Fix for merging recovery data. See TODO above for more details.
+        key = (location["country"], location["province"])
+
         timelines = {
-            "confirmed": locations_confirmed[index]["history"],
-            "deaths": locations_deaths[index]["history"],
-            # 'recovered' : locations_recovered[index]['history'],
+            "confirmed": location["history"],
+            "deaths": parse_history(key, locations_deaths, index),
+            "recovered": parse_history(key, locations_recovered, index),
         }
 
         # Grab coordinates.
@@ -188,11 +195,34 @@ async def get_locations():
                             for date, amount in timelines["deaths"].items()
                         }
                     ),
-                    "recovered": Timeline({}),
+                    "recovered": Timeline(
+                        {
+                            datetime.strptime(date, "%m/%d/%y").isoformat() + "Z": amount
+                            for date, amount in timelines["recovered"].items()
+                        }
+                    ),
                 },
             )
         )
     LOGGER.info(f"{data_id} Data normalized")
 
     # Finally, return the locations.
     return locations
+
+
+def parse_history(key: tuple, locations: list, index: int):
+    """
+    Helper for validating and extracting history content from
+    locations data based on index. Validates with the current country/province
+    key to make sure no index/column issue.
+
+    TEMP: solution because implement a more efficient and better approach in the refactor.
+    """
+    location_history = {}
+    try:
+        if key == (locations[index]["country"], locations[index]["province"]):
+            location_history = locations[index]["history"]
+    except (IndexError, KeyError):
+        LOGGER.debug(f"iteration data merge error: {index} {key}")
+
+    return location_history