Skip to content

Commit

Permalink
Merge 10cb8ab into e23b990
Browse files Browse the repository at this point in the history
  • Loading branch information
BibianaC committed Apr 15, 2021
2 parents e23b990 + 10cb8ab commit 0c943a1
Show file tree
Hide file tree
Showing 2 changed files with 440 additions and 15 deletions.
315 changes: 310 additions & 5 deletions cove_360/lib/threesixtygiving.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,30 @@
import json
import datetime
import itertools
import openpyxl
import json
import re
from collections import OrderedDict, Callable
from decimal import Decimal
import datetime
import pytz
import re

import libcove.lib.tools as tools
import openpyxl
import pytz
from dateutil.relativedelta import relativedelta
from django.utils.html import mark_safe
from libcove.lib.common import common_checks_context, get_orgids_prefixes
from rangedict import RangeDict as range_dict

QUALITY_TEST_CLASS = 'quality_accuracy'
USEFULNESS_TEST_CLASS = 'usefulness'

GRANT_DATES = {}
DATES_JSON_LOCATION = {
'award_date': '/id',
'planned_start_date': '/plannedDates/0/startDate',
'planned_end_date': '/plannedDates/0/endDate',
'actual_start_date': '/actualDates/0/startDate',
'actual_end_date': '/actualDates/0/endDate'
}

orgids_prefixes = get_orgids_prefixes()
orgids_prefixes.append('360G')

Expand Down Expand Up @@ -979,6 +990,243 @@ def process(self, grant, path_prefix):
self.message = mark_safe(self.check_text['message'][self.grants_percentage])


class ImpossibleDates(AdditionalTest):
"""
Check if dates supplied are plausible (eg no 31st Feb) or
are plausible but didn't happen (eg 29th of Feb in a non-leap year).
"""

check_text = {
"heading": mark_safe("dates that didn't, or won't, exist"),
"message": RangeDict()
}
check_text['message'][(0, 100)] = mark_safe(
"Your data contains dates that didn't, or won't, exist - such as the 31st of September, "
"or the 29th of February in a year that's not a leap year. This is commonly caused by typos during data entry."
)

def process(self, grant, path_prefix):
id = grant.get('id')
grant_dates = GRANT_DATES.get(id)

if grant_dates:
for date_type, date_format_error in (
["award_date", grant_dates.get('award_date', {}).get('date_format_error')],
["planned_start_date", grant_dates.get('planned_start_date', {}).get('date_format_error')],
["planned_end_date", grant_dates.get('planned_end_date', {}).get('date_format_error')],
["actual_start_date", grant_dates.get('actual_start_date', {}).get('date_format_error')],
["actual_end_date", grant_dates.get('actual_end_date', {}).get('date_format_error')]
):
if date_format_error:
if (
"does not match format '%Y-%m-%d'" not in date_format_error
) and (
"unconverted data remains" not in date_format_error
):
self.failed = True
self.count += 1
self.json_locations.append(path_prefix + DATES_JSON_LOCATION[date_type])
break

self.heading = self.format_heading_count(self.check_text['heading'])
self.message = self.check_text['message'][self.grants_percentage]


class PlannedStartDateBeforeEndDate(AdditionalTest):
"""Check if Planned Dates:Start Date is after Planned Dates:End Date"""

check_text = {
"heading": mark_safe(
"<span class=\"highlight-background-text\">Planned Dates: Start Date</span> entries that are after the "
"corresponding <span class=\"highlight-background-text\">Planned Dates: End Date</span>"),
"message": RangeDict()
}
check_text['message'][(0, 100)] = mark_safe(
"This can happen when the fields are accidentally reversed, or if there is a typo in the data. "
"This can also be caused by inconsistent date formatting when data was prepared using spreadsheet software."
)

def process(self, grant, path_prefix):
id = grant.get('id')
grant_dates = GRANT_DATES.get(id)

if grant_dates:
planned_start_date = grant_dates.get('planned_start_date', {}).get('datetime_date')
planned_end_date = grant_dates.get('planned_end_date', {}).get('datetime_date')

if planned_start_date and planned_end_date:
if planned_start_date > planned_end_date:
self.failed = True
self.count += 1
self.json_locations.append(path_prefix + DATES_JSON_LOCATION['planned_start_date'])

self.heading = self.format_heading_count(self.check_text['heading'])
self.message = self.check_text['message'][self.grants_percentage]


class ActualStartDateBeforeEndDate(AdditionalTest):
"""Check if Actual Dates:Start Date is after Actual Dates:End Date'"""

check_text = {
"heading": mark_safe(
"<span class=\"highlight-background-text\">Actual Dates: Start Date</span> entries that are after the "
"corresponding <span class=\"highlight-background-text\">Actual Dates: End Date</span>"),
"message": RangeDict()
}
check_text['message'][(0, 100)] = mark_safe(
"This can happen when the fields are accidentally reversed, or if there is a typo in the data. "
"This can also be caused by inconsistent date formatting when data was prepared using spreadsheet software."
)

def process(self, grant, path_prefix):
id = grant.get('id')
grant_dates = GRANT_DATES.get(id)

if grant_dates:
actual_start_date = grant_dates.get('actual_start_date', {}).get('datetime_date')
actual_end_date = grant_dates.get('actual_end_date', {}).get('datetime_date')

if actual_start_date and actual_end_date:
if actual_start_date > actual_end_date:
self.failed = True
self.count += 1
self.json_locations.append(path_prefix + DATES_JSON_LOCATION['actual_start_date'])

self.heading = self.format_heading_count(self.check_text['heading'])
self.message = self.check_text['message'][self.grants_percentage]


class FarFuturePlannedDates(AdditionalTest):
"""Check if dates in plannedDates are > 12 years into the future, from the present day. """

check_text = {
"heading": mark_safe("Planned Dates that are over 12 years in the future"),
"message": RangeDict()
}
check_text['message'][(0, 100)] = mark_safe(
"Your data contains Planned Dates that are more than 12 years into the future. You can disregard this check if "
"your data is about activities that run a long time into the future, but you should check for data entry "
"errors if this isn't expected."
)

def process(self, grant, path_prefix):
id = grant.get('id')
grant_dates = GRANT_DATES.get(id)

if grant_dates:
for date_type, input_date in (
["planned_start_date", grant_dates.get('planned_start_date', {}).get('datetime_date')],
["planned_end_date", grant_dates.get('planned_end_date', {}).get('datetime_date')],
):
if input_date:
if input_date > datetime.datetime.now() + relativedelta(years=12):
self.failed = True
self.count += 1
self.json_locations.append(path_prefix + DATES_JSON_LOCATION[date_type])
break

self.heading = self.format_heading_count(self.check_text['heading'])
self.message = self.check_text['message'][self.grants_percentage]


class FarFutureActualDates(AdditionalTest):
"""Check if dates in actualDates are > 5 years into the future, from the present day."""

check_text = {
"heading": mark_safe("Actual Date entries that are over 5 years in the future"),
"message": RangeDict()
}
check_text['message'][(0, 100)] = mark_safe(
"Your data contains Actual Date entries that are more than 5 years into the future. You can disregard this "
"check if your data is about activities in the future, but you should check for data entry errors "
"if this isn't expected."
)

def process(self, grant, path_prefix):
id = grant.get('id')
grant_dates = GRANT_DATES.get(id)

if grant_dates:
for date_type, input_date in (
["actual_start_date", grant_dates.get('actual_start_date', {}).get('datetime_date')],
["actual_end_date", grant_dates.get('actual_end_date', {}).get('datetime_date')]
):
if input_date:
if input_date > datetime.datetime.now() + relativedelta(years=5):
self.failed = True
self.count += 1
self.json_locations.append(path_prefix + DATES_JSON_LOCATION[date_type])
break

self.heading = self.format_heading_count(self.check_text['heading'])
self.message = self.check_text['message'][self.grants_percentage]


class FarPastDates(AdditionalTest):
"""Check if dates in awardDate, plannedDates, actualDates are > 25 years in the past, from the present day."""

check_text = {
"heading": mark_safe("dates that are over 25 years ago"),
"message": RangeDict()
}
check_text['message'][(0, 100)] = mark_safe(
"Your data contains dates that are more than 25 years ago. You can disregard this check if your "
"data is about activities in the past, but you should check for data entry errors if this isn't expected."
)

def process(self, grant, path_prefix):
id = grant.get('id')
grant_dates = GRANT_DATES.get(id)

if grant_dates:
for date_type, input_date in (
["award_date", grant_dates.get('award_date', {}).get('datetime_date')],
["planned_start_date", grant_dates.get('planned_start_date', {}).get('datetime_date')],
["planned_end_date", grant_dates.get('planned_end_date', {}).get('datetime_date')],
["actual_start_date", grant_dates.get('actual_start_date', {}).get('datetime_date')],
["actual_end_date", grant_dates.get('actual_end_date', {}).get('datetime_date')]
):

if input_date:
if input_date < datetime.datetime.now() - relativedelta(years=25):
self.failed = True
self.count += 1
self.json_locations.append(path_prefix + DATES_JSON_LOCATION[date_type])
break

self.heading = self.format_heading_count(self.check_text['heading'])
self.message = self.check_text['message'][self.grants_percentage]


class PostDatedAwardDates(AdditionalTest):
"""Check if dates in awardDate is in the future, from the present day. """

check_text = {
"heading": mark_safe("Award Dates that are in the future"),
"message": RangeDict()
}
check_text['message'][(0, 100)] = mark_safe(
"Your data contains grant Award Dates in the future. This field is for when was the decision to award "
"this grant made so the date would normally be in the past. This can happen when there is a typo in the date, "
"or the data includes grants that are not yet fully committed."
)

def process(self, grant, path_prefix):
id = grant.get('id')
grant_dates = GRANT_DATES.get(id)

if grant_dates:
award_date = grant_dates.get('award_date', {}).get('datetime_date')
if award_date:
if award_date > datetime.datetime.now():
self.failed = True
self.count += 1
self.json_locations.append(path_prefix + DATES_JSON_LOCATION['award_date'])

self.heading = self.format_heading_count(self.check_text['heading'])
self.message = self.check_text['message'][self.grants_percentage]


TEST_CLASSES = {
QUALITY_TEST_CLASS: [
ZeroAmountTest,
Expand All @@ -989,6 +1237,13 @@ def process(self, grant, path_prefix):
OrganizationIdLooksInvalid,
MoreThanOneFundingOrg,
LooksLikeEmail,
ImpossibleDates,
PlannedStartDateBeforeEndDate,
ActualStartDateBeforeEndDate,
FarFuturePlannedDates,
FarFutureActualDates,
FarPastDates,
PostDatedAwardDates,
],
USEFULNESS_TEST_CLASS: [
RecipientOrg360GPrefix,
Expand All @@ -1005,13 +1260,63 @@ def process(self, grant, path_prefix):
}


def convert_string_date_to_datetime(input_date):
"""
Date format that will be converted are:
YYYY-MM-DD
YYYY-MM-DDT...
"""
error_msg = None
datetime_date = None

if 'T' in input_date:
input_date = input_date.split('T')[0]
convert_string_date_to_datetime(input_date)

try:
datetime_date = datetime.datetime.strptime(input_date, "%Y-%m-%d")
except ValueError as e:
error_msg = str(e)

return datetime_date, error_msg


def create_grant_dates_dict(grant):
"""
Creates the following dict:
GRANT_DATES: {'id': { 'date_type': {'datetime_date': datetime_date, 'date_format_error': error_msg}}}
Dates are needed in several additional checks. With this dict, dates are converted just once.
"""
id = grant.get("id")
award_date = grant.get("awardDate")
planned_start_date = grant.get("plannedDates", [{}])[0].get('startDate')
planned_end_date = grant.get("plannedDates", [{}])[0].get('endDate')
actual_start_date = grant.get("actualDates", [{}])[0].get('startDate')
actual_end_date = grant.get("actualDates", [{}])[0].get('endDate')

for date_type, input_date in [
["award_date", award_date],
["planned_start_date", planned_start_date], ["planned_end_date", planned_end_date],
["actual_start_date", actual_start_date], ["actual_end_date", actual_end_date]
]:
if input_date:
datetime_date, error_msg = convert_string_date_to_datetime(input_date)

GRANT_DATES.setdefault(id, {})[date_type] = {'datetime_date': datetime_date, 'date_format_error': error_msg}


@tools.ignore_errors
def run_extra_checks(json_data, cell_source_map, test_classes):
if 'grants' not in json_data:
return []
test_instances = [test_cls(grants=json_data['grants']) for test_cls in test_classes]

for num, grant in enumerate(json_data['grants']):
create_grant_dates_dict(grant)

for test_instance in test_instances:
test_instance.process(grant, 'grants/{}'.format(num))

Expand Down
Loading

0 comments on commit 0c943a1

Please sign in to comment.