Skip to content

Commit

Permalink
Merge pull request #2236 from SEED-platform/2202_deleted_dq_rules
Browse files Browse the repository at this point in the history
2202 deleted dq rules
  • Loading branch information
adrian-lara committed Jun 4, 2020
2 parents 52ba0e7 + 9eda92c commit db6321c
Show file tree
Hide file tree
Showing 2 changed files with 304 additions and 20 deletions.
263 changes: 263 additions & 0 deletions seed/tests/test_data_quality_rules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
# !/usr/bin/env python
# encoding: utf-8
"""
:copyright (c) 2014 - 2020, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Department of Energy) and contributors. All rights reserved. # NOQA
:author
"""
import json

from copy import deepcopy

from django.urls import reverse

from seed.models.data_quality import (
DataQualityCheck,
Rule,
)
from seed.models.models import ASSESSED_RAW

from seed.tests.util import DataMappingBaseTestCase


class RuleViewTests(DataMappingBaseTestCase):
def setUp(self):
selfvars = self.set_up(ASSESSED_RAW)

self.user, self.org, self.import_file, self.import_record, self.cycle = selfvars

self.client.login(
username='test_user@demo.com',
password='test_pass',
email='test_user@demo.com'
)

def test_valid_data_rule_without_label_does_not_actually_update_or_delete_any_rules(self):
# Start with 3 Rules
dq = DataQualityCheck.retrieve(self.org.id)
dq.remove_all_rules()
base_rule_info = {
'field': 'address_line_1',
'table_name': 'PropertyState',
'enabled': True,
'data_type': Rule.TYPE_STRING,
'rule_type': Rule.RULE_TYPE_DEFAULT,
'condition': Rule.RULE_INCLUDE,
'required': False,
'not_null': False,
'min': None,
'max': None,
'text_match': 'Test Rule 1',
'severity': Rule.SEVERITY_ERROR,
'units': "",
'status_label_id': None
}
dq.add_rule(base_rule_info)

rule_2_info = deepcopy(base_rule_info)
rule_2_info['text_match'] = 'Test Rule 2'
dq.add_rule(rule_2_info)

rule_3_info = deepcopy(base_rule_info)
rule_3_info['text_match'] = 'Test Rule 3'
dq.add_rule(rule_3_info)

self.assertEqual(dq.rules.count(), 3)

property_rules = [base_rule_info, rule_2_info, rule_3_info]

# Make some adjustments to mimic how data is expected in API endpoint
rule_3_info['severity'] = dict(Rule.SEVERITY).get(Rule.SEVERITY_ERROR)
for rule in property_rules:
rule['data_type'] = dict(Rule.DATA_TYPES).get(rule['data_type'])
rule['label'] = None

# Make 2 rules trigger the "valid without label" failure
base_rule_info['severity'] = dict(Rule.SEVERITY).get(Rule.SEVERITY_VALID)
rule_2_info['severity'] = dict(Rule.SEVERITY).get(Rule.SEVERITY_VALID)

url = reverse('api:v2:data_quality_checks-save-data-quality-rules') + '?organization_id=' + str(self.org.pk)
post_data = {
"data_quality_rules": {
"properties": property_rules,
"taxlots": [],
},
}
res = self.client.post(url, content_type='application/json', data=json.dumps(post_data))

self.assertEqual(res.status_code, 400)
self.assertEqual(json.loads(res.content)['message'], 'Label must be assigned when using Valid Data Severity.')

# Count 3 total rules. None of them were updated
self.assertEqual(dq.rules.count(), 3)
self.assertEqual(dq.rules.filter(severity=Rule.SEVERITY_VALID).count(), 0)

def test_include_exclude_without_text_match_does_not_actually_update_or_delete_any_rules(self):
# Start with 3 Rules
dq = DataQualityCheck.retrieve(self.org.id)
dq.remove_all_rules()
base_rule_info = {
'field': 'address_line_1',
'table_name': 'PropertyState',
'enabled': True,
'data_type': Rule.TYPE_STRING,
'rule_type': Rule.RULE_TYPE_DEFAULT,
'condition': Rule.RULE_INCLUDE,
'required': False,
'not_null': False,
'min': None,
'max': None,
'text_match': 'Test Rule 1',
'severity': Rule.SEVERITY_ERROR,
'units': "",
'status_label_id': None
}
dq.add_rule(base_rule_info)

rule_2_info = deepcopy(base_rule_info)
rule_2_info['text_match'] = 'Test Rule 2'
dq.add_rule(rule_2_info)

rule_3_info = deepcopy(base_rule_info)
rule_3_info['text_match'] = 'Test Rule 3'
dq.add_rule(rule_3_info)

self.assertEqual(dq.rules.count(), 3)

property_rules = [base_rule_info, rule_2_info, rule_3_info]

# Make some adjustments to mimic how data is expected in API endpoint
rule_3_info['severity'] = dict(Rule.SEVERITY).get(Rule.SEVERITY_ERROR)
for rule in property_rules:
rule['data_type'] = dict(Rule.DATA_TYPES).get(rule['data_type'])
rule['label'] = None

# Make 2 rules trigger the include or exclude without text_match failure
base_rule_info['text_match'] = ''

rule_2_info['condition'] = Rule.RULE_EXCLUDE
rule_2_info['text_match'] = ''

url = reverse('api:v2:data_quality_checks-save-data-quality-rules') + '?organization_id=' + str(self.org.pk)
post_data = {
"data_quality_rules": {
"properties": property_rules,
"taxlots": [],
},
}
res = self.client.post(url, content_type='application/json', data=json.dumps(post_data))

self.assertEqual(res.status_code, 400)
self.assertEqual(json.loads(res.content)['message'], 'Rule must not include or exclude an empty string.')

# Count 3 total rules. None of them were updated
self.assertEqual(dq.rules.count(), 3)
self.assertEqual(dq.rules.filter(condition=Rule.RULE_EXCLUDE).count(), 0)
self.assertEqual(dq.rules.filter(text_match='').count(), 0)

def test_failed_rule_creation_doesnt_prevent_other_rules_from_being_created(self):
# Start with 0 Rules
dq = DataQualityCheck.retrieve(self.org.id)
dq.remove_all_rules()

# Post 3 rules - one of which will fail
base_rule_post_data = {
'field': 'address_line_1',
'table_name': 'PropertyState',
'enabled': True,
'data_type': dict(Rule.DATA_TYPES).get(Rule.TYPE_STRING),
'rule_type': Rule.RULE_TYPE_DEFAULT,
'condition': Rule.RULE_INCLUDE,
'required': False,
'not_null': False,
'min': None,
'max': None,
'text_match': 'Test Rule 1',
'severity': dict(Rule.SEVERITY).get(Rule.SEVERITY_ERROR),
'units': "",
'label': None
}

rule_2_post_data = deepcopy(base_rule_post_data)
rule_2_post_data['text_match'] = 'Test Rule 2'
rule_2_post_data['rule_type'] = 'some invalid rule type'

rule_3_post_data = deepcopy(base_rule_post_data)
rule_3_post_data['text_match'] = 'Test Rule 3'
rule_3_post_data['rule_type'] = Rule.RULE_TYPE_DEFAULT

property_rules = [base_rule_post_data, rule_2_post_data, rule_3_post_data]

url = reverse('api:v2:data_quality_checks-save-data-quality-rules') + '?organization_id=' + str(self.org.pk)
post_data = {
"data_quality_rules": {
"properties": property_rules,
"taxlots": [],
},
}
res = self.client.post(url, content_type='application/json', data=json.dumps(post_data))

self.assertEqual(res.status_code, 400)
self.assertEqual(json.loads(res.content)['message'], "Rule could not be recreated: invalid literal for int() with base 10: 'some invalid rule type'")

# Count 2 total rules - the first and second rules
self.assertEqual(dq.rules.count(), 2)
self.assertEqual(dq.rules.filter(text_match__in=['Test Rule 1', 'Test Rule 3']).count(), 2)

def test_multiple_unique_errors_get_reported(self):
# Start with 3 Rules
dq = DataQualityCheck.retrieve(self.org.id)
dq.remove_all_rules()
base_rule_info = {
'field': 'address_line_1',
'table_name': 'PropertyState',
'enabled': True,
'data_type': Rule.TYPE_STRING,
'rule_type': Rule.RULE_TYPE_DEFAULT,
'condition': Rule.RULE_INCLUDE,
'required': False,
'not_null': False,
'min': None,
'max': None,
'text_match': 'Test Rule 1',
'severity': Rule.SEVERITY_ERROR,
'units': "",
'status_label_id': None
}
dq.add_rule(base_rule_info)

rule_2_info = deepcopy(base_rule_info)
rule_2_info['text_match'] = 'Test Rule 2'
dq.add_rule(rule_2_info)

rule_3_info = deepcopy(base_rule_info)
rule_3_info['text_match'] = 'Test Rule 3'
dq.add_rule(rule_3_info)

self.assertEqual(dq.rules.count(), 3)

property_rules = [base_rule_info, rule_2_info, rule_3_info]

# Make some adjustments to mimic how data is expected in API endpoint
rule_3_info['severity'] = dict(Rule.SEVERITY).get(Rule.SEVERITY_ERROR)
for rule in property_rules:
rule['data_type'] = dict(Rule.DATA_TYPES).get(rule['data_type'])
rule['label'] = None

# Make 1 rule trigger the include without text_match failure
base_rule_info['text_match'] = ''

# Make 1 rule trigger the "valid without label" failure
rule_2_info['severity'] = dict(Rule.SEVERITY).get(Rule.SEVERITY_VALID)

url = reverse('api:v2:data_quality_checks-save-data-quality-rules') + '?organization_id=' + str(self.org.pk)
post_data = {
"data_quality_rules": {
"properties": property_rules,
"taxlots": [],
},
}
res = self.client.post(url, content_type='application/json', data=json.dumps(post_data))

self.assertEqual(res.status_code, 400)
self.assertTrue('Rule must not include or exclude an empty string.' in json.loads(res.content)['message'])
self.assertTrue('Label must be assigned when using Valid Data Severity.' in json.loads(res.content)['message'])
61 changes: 41 additions & 20 deletions seed/views/data_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import csv

from celery.utils.log import get_task_logger
from django.db import transaction
from django.http import JsonResponse, HttpResponse
from rest_framework import viewsets, serializers, status
from rest_framework.decorators import action
Expand Down Expand Up @@ -365,7 +366,16 @@ def save_data_quality_rules(self, request, pk=None):

posted_rules = body['data_quality_rules']
updated_rules = []
valid_rules = True
validation_messages = set()
for rule in posted_rules['properties']:
if _get_severity_from_js(rule['severity']) == Rule.SEVERITY_VALID and rule['label'] is None:
valid_rules = False
validation_messages.add('Label must be assigned when using Valid Data Severity.')
if rule['condition'] == Rule.RULE_INCLUDE or rule['condition'] == Rule.RULE_EXCLUDE:
if rule['text_match'] is None or rule['text_match'] == '':
valid_rules = False
validation_messages.add('Rule must not include or exclude an empty string.')
updated_rules.append(
{
'field': rule['field'],
Expand All @@ -386,6 +396,13 @@ def save_data_quality_rules(self, request, pk=None):
)

for rule in posted_rules['taxlots']:
if _get_severity_from_js(rule['severity']) == Rule.SEVERITY_VALID and rule['label'] is None:
valid_rules = False
validation_messages.add('Label must be assigned when using Valid Data Severity.')
if rule['condition'] == Rule.RULE_INCLUDE or rule['condition'] == Rule.RULE_EXCLUDE:
if rule['text_match'] is None or rule['text_match'] == '':
valid_rules = False
validation_messages.add('Rule must not include or exclude an empty string.')
updated_rules.append(
{
'field': rule['field'],
Expand All @@ -405,29 +422,33 @@ def save_data_quality_rules(self, request, pk=None):
}
)

if valid_rules is False:
return JsonResponse({
'status': 'error',
'message': '\n'.join(validation_messages),
}, status=status.HTTP_400_BAD_REQUEST)

# This pattern of deleting and recreating Rules is slated to be deprecated
bad_rule_creation = False
error_messages = set()
dq = DataQualityCheck.retrieve(organization.id)
dq.remove_all_rules()
for rule in updated_rules:
if rule['severity'] == Rule.SEVERITY_VALID and rule['status_label_id'] is None:
return JsonResponse({
'status': 'error',
'message': 'Label must be assigned when using Valid Data Severity.',
}, status=status.HTTP_400_BAD_REQUEST)
if rule['condition'] == Rule.RULE_INCLUDE or rule['condition'] == Rule.RULE_EXCLUDE:
if rule['text_match'] is None or rule['text_match'] == '':
return JsonResponse({
'status': 'error',
'message': 'Rule must not include or exclude an empty string.',
}, status=status.HTTP_400_BAD_REQUEST)
try:
dq.add_rule(rule)
except TypeError as e:
return JsonResponse({
'status': 'error',
'message': e,
}, status=status.HTTP_400_BAD_REQUEST)

return self.data_quality_rules(request)
with transaction.atomic():
try:
dq.add_rule(rule)
except Exception as e:
error_messages.add('Rule could not be recreated: ' + str(e))
bad_rule_creation = True
continue

if bad_rule_creation:
return JsonResponse({
'status': 'error',
'message': '\n'.join(error_messages),
}, status=status.HTTP_400_BAD_REQUEST)
else:
return self.data_quality_rules(request)

@api_endpoint_class
@ajax_request_class
Expand Down

0 comments on commit db6321c

Please sign in to comment.