Merge 10cb8ab into e23b990

OpenDataServices · Apr 15, 2021 · 0c943a1 · 0c943a1
2 parents e23b990 + 10cb8ab
commit 0c943a1
Show file tree

Hide file tree

Showing 2 changed files with 440 additions and 15 deletions.
diff --git a/cove_360/lib/threesixtygiving.py b/cove_360/lib/threesixtygiving.py
@@ -1,19 +1,30 @@
-import json
+import datetime
 import itertools
-import openpyxl
+import json
+import re
 from collections import OrderedDict, Callable
 from decimal import Decimal
-import datetime
-import pytz
-import re
+
 import libcove.lib.tools as tools
+import openpyxl
+import pytz
+from dateutil.relativedelta import relativedelta
 from django.utils.html import mark_safe
 from libcove.lib.common import common_checks_context, get_orgids_prefixes
 from rangedict import RangeDict as range_dict
 
 QUALITY_TEST_CLASS = 'quality_accuracy'
 USEFULNESS_TEST_CLASS = 'usefulness'
 
+GRANT_DATES = {}
+DATES_JSON_LOCATION = {
+    'award_date': '/id',
+    'planned_start_date': '/plannedDates/0/startDate',
+    'planned_end_date': '/plannedDates/0/endDate',
+    'actual_start_date': '/actualDates/0/startDate',
+    'actual_end_date': '/actualDates/0/endDate'
+}
+
 orgids_prefixes = get_orgids_prefixes()
 orgids_prefixes.append('360G')
 
@@ -979,6 +990,243 @@ def process(self, grant, path_prefix):
         self.message = mark_safe(self.check_text['message'][self.grants_percentage])
 
 
+class ImpossibleDates(AdditionalTest):
+    """
+    Check if dates supplied are plausible (eg no 31st Feb) or
+    are plausible but didn't happen (eg 29th of Feb in a non-leap year).
+    """
+
+    check_text = {
+        "heading": mark_safe("dates that didn't, or won't, exist"),
+        "message": RangeDict()
+    }
+    check_text['message'][(0, 100)] = mark_safe(
+        "Your data contains dates that didn't, or won't, exist - such as the 31st of September, "
+        "or the 29th of February in a year that's not a leap year. This is commonly caused by typos during data entry."
+    )
+
+    def process(self, grant, path_prefix):
+        id = grant.get('id')
+        grant_dates = GRANT_DATES.get(id)
+
+        if grant_dates:
+            for date_type, date_format_error in (
+                ["award_date", grant_dates.get('award_date', {}).get('date_format_error')],
+                ["planned_start_date", grant_dates.get('planned_start_date', {}).get('date_format_error')],
+                ["planned_end_date", grant_dates.get('planned_end_date', {}).get('date_format_error')],
+                ["actual_start_date", grant_dates.get('actual_start_date', {}).get('date_format_error')],
+                ["actual_end_date", grant_dates.get('actual_end_date', {}).get('date_format_error')]
+            ):
+                if date_format_error:
+                    if (
+                            "does not match format '%Y-%m-%d'" not in date_format_error
+                    ) and (
+                            "unconverted data remains" not in date_format_error
+                    ):
+                        self.failed = True
+                        self.count += 1
+                        self.json_locations.append(path_prefix + DATES_JSON_LOCATION[date_type])
+                        break
+
+        self.heading = self.format_heading_count(self.check_text['heading'])
+        self.message = self.check_text['message'][self.grants_percentage]
+
+
+class PlannedStartDateBeforeEndDate(AdditionalTest):
+    """Check if Planned Dates:Start Date is after Planned Dates:End Date"""
+
+    check_text = {
+        "heading": mark_safe(
+            "<span class=\"highlight-background-text\">Planned Dates: Start Date</span> entries that are after the "
+            "corresponding <span class=\"highlight-background-text\">Planned Dates: End Date</span>"),
+        "message": RangeDict()
+    }
+    check_text['message'][(0, 100)] = mark_safe(
+        "This can happen when the fields are accidentally reversed, or if there is a typo in the data. "
+        "This can also be caused by inconsistent date formatting when data was prepared using spreadsheet software."
+    )
+
+    def process(self, grant, path_prefix):
+        id = grant.get('id')
+        grant_dates = GRANT_DATES.get(id)
+
+        if grant_dates:
+            planned_start_date = grant_dates.get('planned_start_date', {}).get('datetime_date')
+            planned_end_date = grant_dates.get('planned_end_date', {}).get('datetime_date')
+
+            if planned_start_date and planned_end_date:
+                if planned_start_date > planned_end_date:
+                    self.failed = True
+                    self.count += 1
+                    self.json_locations.append(path_prefix + DATES_JSON_LOCATION['planned_start_date'])
+
+        self.heading = self.format_heading_count(self.check_text['heading'])
+        self.message = self.check_text['message'][self.grants_percentage]
+
+
+class ActualStartDateBeforeEndDate(AdditionalTest):
+    """Check if Actual Dates:Start Date is after Actual Dates:End Date'"""
+
+    check_text = {
+        "heading": mark_safe(
+            "<span class=\"highlight-background-text\">Actual Dates: Start Date</span> entries that are after the "
+            "corresponding <span class=\"highlight-background-text\">Actual Dates: End Date</span>"),
+        "message": RangeDict()
+    }
+    check_text['message'][(0, 100)] = mark_safe(
+        "This can happen when the fields are accidentally reversed, or if there is a typo in the data. "
+        "This can also be caused by inconsistent date formatting when data was prepared using spreadsheet software."
+    )
+
+    def process(self, grant, path_prefix):
+        id = grant.get('id')
+        grant_dates = GRANT_DATES.get(id)
+
+        if grant_dates:
+            actual_start_date = grant_dates.get('actual_start_date', {}).get('datetime_date')
+            actual_end_date = grant_dates.get('actual_end_date', {}).get('datetime_date')
+
+            if actual_start_date and actual_end_date:
+                if actual_start_date > actual_end_date:
+                    self.failed = True
+                    self.count += 1
+                    self.json_locations.append(path_prefix + DATES_JSON_LOCATION['actual_start_date'])
+
+        self.heading = self.format_heading_count(self.check_text['heading'])
+        self.message = self.check_text['message'][self.grants_percentage]
+
+
+class FarFuturePlannedDates(AdditionalTest):
+    """Check if dates in plannedDates are > 12 years into the future, from the present day. """
+
+    check_text = {
+        "heading": mark_safe("Planned Dates that are over 12 years in the future"),
+        "message": RangeDict()
+    }
+    check_text['message'][(0, 100)] = mark_safe(
+        "Your data contains Planned Dates that are more than 12 years into the future. You can disregard this check if "
+        "your data is about activities that run a long time into the future, but you should check for data entry "
+        "errors if this isn't expected."
+    )
+
+    def process(self, grant, path_prefix):
+        id = grant.get('id')
+        grant_dates = GRANT_DATES.get(id)
+
+        if grant_dates:
+            for date_type, input_date in (
+                ["planned_start_date", grant_dates.get('planned_start_date', {}).get('datetime_date')],
+                ["planned_end_date", grant_dates.get('planned_end_date', {}).get('datetime_date')],
+            ):
+                if input_date:
+                    if input_date > datetime.datetime.now() + relativedelta(years=12):
+                        self.failed = True
+                        self.count += 1
+                        self.json_locations.append(path_prefix + DATES_JSON_LOCATION[date_type])
+                        break
+
+        self.heading = self.format_heading_count(self.check_text['heading'])
+        self.message = self.check_text['message'][self.grants_percentage]
+
+
+class FarFutureActualDates(AdditionalTest):
+    """Check if dates in actualDates are > 5 years into the future, from the present day."""
+
+    check_text = {
+        "heading": mark_safe("Actual Date entries that are over 5 years in the future"),
+        "message": RangeDict()
+    }
+    check_text['message'][(0, 100)] = mark_safe(
+        "Your data contains Actual Date entries that are more than 5 years into the future. You can disregard this "
+        "check if your data is about activities in the future, but you should check for data entry errors "
+        "if this isn't expected."
+    )
+
+    def process(self, grant, path_prefix):
+        id = grant.get('id')
+        grant_dates = GRANT_DATES.get(id)
+
+        if grant_dates:
+            for date_type, input_date in (
+                ["actual_start_date", grant_dates.get('actual_start_date', {}).get('datetime_date')],
+                ["actual_end_date", grant_dates.get('actual_end_date', {}).get('datetime_date')]
+            ):
+                if input_date:
+                    if input_date > datetime.datetime.now() + relativedelta(years=5):
+                        self.failed = True
+                        self.count += 1
+                        self.json_locations.append(path_prefix + DATES_JSON_LOCATION[date_type])
+                        break
+
+        self.heading = self.format_heading_count(self.check_text['heading'])
+        self.message = self.check_text['message'][self.grants_percentage]
+
+
+class FarPastDates(AdditionalTest):
+    """Check if dates in awardDate, plannedDates, actualDates are > 25 years in the past, from the present day."""
+
+    check_text = {
+        "heading": mark_safe("dates that are over 25 years ago"),
+        "message": RangeDict()
+    }
+    check_text['message'][(0, 100)] = mark_safe(
+        "Your data contains dates that are more than 25 years ago. You can disregard this check if your "
+        "data is about activities in the past, but you should check for data entry errors if this isn't expected."
+    )
+
+    def process(self, grant, path_prefix):
+        id = grant.get('id')
+        grant_dates = GRANT_DATES.get(id)
+
+        if grant_dates:
+            for date_type, input_date in (
+                ["award_date", grant_dates.get('award_date', {}).get('datetime_date')],
+                ["planned_start_date", grant_dates.get('planned_start_date', {}).get('datetime_date')],
+                ["planned_end_date", grant_dates.get('planned_end_date', {}).get('datetime_date')],
+                ["actual_start_date", grant_dates.get('actual_start_date', {}).get('datetime_date')],
+                ["actual_end_date", grant_dates.get('actual_end_date', {}).get('datetime_date')]
+            ):
+
+                if input_date:
+                    if input_date < datetime.datetime.now() - relativedelta(years=25):
+                        self.failed = True
+                        self.count += 1
+                        self.json_locations.append(path_prefix + DATES_JSON_LOCATION[date_type])
+                        break
+
+        self.heading = self.format_heading_count(self.check_text['heading'])
+        self.message = self.check_text['message'][self.grants_percentage]
+
+
+class PostDatedAwardDates(AdditionalTest):
+    """Check if dates in awardDate is in the future, from the present day. """
+
+    check_text = {
+        "heading": mark_safe("Award Dates that are in the future"),
+        "message": RangeDict()
+    }
+    check_text['message'][(0, 100)] = mark_safe(
+        "Your data contains grant Award Dates in the future. This field is for when was the decision to award "
+        "this grant made so the date would normally be in the past. This can happen when there is a typo in the date, "
+        "or the data includes grants that are not yet fully committed."
+    )
+
+    def process(self, grant, path_prefix):
+        id = grant.get('id')
+        grant_dates = GRANT_DATES.get(id)
+
+        if grant_dates:
+            award_date = grant_dates.get('award_date', {}).get('datetime_date')
+            if award_date:
+                if award_date > datetime.datetime.now():
+                    self.failed = True
+                    self.count += 1
+                    self.json_locations.append(path_prefix + DATES_JSON_LOCATION['award_date'])
+
+        self.heading = self.format_heading_count(self.check_text['heading'])
+        self.message = self.check_text['message'][self.grants_percentage]
+
+
 TEST_CLASSES = {
     QUALITY_TEST_CLASS: [
         ZeroAmountTest,
@@ -989,6 +1237,13 @@ def process(self, grant, path_prefix):
         OrganizationIdLooksInvalid,
         MoreThanOneFundingOrg,
         LooksLikeEmail,
+        ImpossibleDates,
+        PlannedStartDateBeforeEndDate,
+        ActualStartDateBeforeEndDate,
+        FarFuturePlannedDates,
+        FarFutureActualDates,
+        FarPastDates,
+        PostDatedAwardDates,
     ],
     USEFULNESS_TEST_CLASS: [
         RecipientOrg360GPrefix,
@@ -1005,13 +1260,63 @@ def process(self, grant, path_prefix):
 }
 
 
+def convert_string_date_to_datetime(input_date):
+    """
+    Date format that will be converted are:
+
+    YYYY-MM-DD
+    YYYY-MM-DDT...
+    """
+    error_msg = None
+    datetime_date = None
+
+    if 'T' in input_date:
+        input_date = input_date.split('T')[0]
+        convert_string_date_to_datetime(input_date)
+
+    try:
+        datetime_date = datetime.datetime.strptime(input_date, "%Y-%m-%d")
+    except ValueError as e:
+        error_msg = str(e)
+
+    return datetime_date, error_msg
+
+
+def create_grant_dates_dict(grant):
+    """
+    Creates the following dict:
+
+    GRANT_DATES: {'id': { 'date_type': {'datetime_date': datetime_date, 'date_format_error': error_msg}}}
+
+    Dates are needed in several additional checks. With this dict, dates are converted just once.
+    """
+    id = grant.get("id")
+    award_date = grant.get("awardDate")
+    planned_start_date = grant.get("plannedDates", [{}])[0].get('startDate')
+    planned_end_date = grant.get("plannedDates", [{}])[0].get('endDate')
+    actual_start_date = grant.get("actualDates", [{}])[0].get('startDate')
+    actual_end_date = grant.get("actualDates", [{}])[0].get('endDate')
+
+    for date_type, input_date in [
+        ["award_date", award_date],
+        ["planned_start_date", planned_start_date], ["planned_end_date", planned_end_date],
+        ["actual_start_date", actual_start_date], ["actual_end_date", actual_end_date]
+    ]:
+        if input_date:
+            datetime_date, error_msg = convert_string_date_to_datetime(input_date)
+
+            GRANT_DATES.setdefault(id, {})[date_type] = {'datetime_date': datetime_date, 'date_format_error': error_msg}
+
+
 @tools.ignore_errors
 def run_extra_checks(json_data, cell_source_map, test_classes):
     if 'grants' not in json_data:
         return []
     test_instances = [test_cls(grants=json_data['grants']) for test_cls in test_classes]
 
     for num, grant in enumerate(json_data['grants']):
+        create_grant_dates_dict(grant)
+
         for test_instance in test_instances:
             test_instance.process(grant, 'grants/{}'.format(num))