Skip to content

Commit

Permalink
Merge e4343c8 into 4829ad9
Browse files Browse the repository at this point in the history
  • Loading branch information
adrian-lara committed Jun 19, 2020
2 parents 4829ad9 + e4343c8 commit f3dc6c0
Show file tree
Hide file tree
Showing 7 changed files with 504 additions and 420 deletions.
11 changes: 9 additions & 2 deletions seed/api/v3/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
from django.conf.urls import url, include
from rest_framework import routers

from rest_framework_nested import routers as nested_routers

from seed.views.v3.columns import ColumnViewSet
from seed.views.v3.cycles import CycleViewSet
from seed.views.v3.data_quality import DataQualityViews
from seed.views.v3.data_quality_checks import DataQualityCheckViewSet
from seed.views.v3.data_quality_check_rules import DataQualityCheckRuleViewSet
from seed.views.v3.datasets import DatasetViewSet
from seed.views.v3.labels import LabelViewSet
from seed.views.v3.import_files import ImportFileViewSet
Expand All @@ -19,13 +22,17 @@
api_v3_router.register(r'cycles', CycleViewSet, base_name='cycles')
api_v3_router.register(r'datasets', DatasetViewSet, base_name='datasets')
api_v3_router.register(r'labels', LabelViewSet, base_name='labels')
api_v3_router.register(r'data_quality_checks', DataQualityViews, base_name='data_quality_checks')
api_v3_router.register(r'data_quality_checks', DataQualityCheckViewSet, base_name='data_quality_checks')
api_v3_router.register(r'import_files', ImportFileViewSet, base_name='import_files')
api_v3_router.register(r'organizations', OrganizationViewSet, base_name='organizations')
api_v3_router.register(r'properties', PropertyViewSet, base_name='properties')
api_v3_router.register(r'taxlots', TaxlotViewSet, base_name='taxlots')
api_v3_router.register(r'users', UserViewSet, base_name='user')

data_quality_checks_router = nested_routers.NestedSimpleRouter(api_v3_router, r'data_quality_checks', lookup="nested")
data_quality_checks_router.register(r'rules', DataQualityCheckRuleViewSet, base_name='data_quality_check-rules')

urlpatterns = [
url(r'^', include(api_v3_router.urls)),
url(r'^', include(data_quality_checks_router.urls)),
]
105 changes: 105 additions & 0 deletions seed/serializers/rules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# !/usr/bin/env python
# encoding: utf-8
"""
:copyright (c) 2014 - 2020, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Department of Energy) and contributors. All rights reserved. # NOQA
:author
"""

from rest_framework import serializers
from seed.models.data_quality import Rule
from seed.models import StatusLabel


class RuleSerializer(serializers.ModelSerializer):
data_type = serializers.CharField(source='get_data_type_display', required=False)
status_label = serializers.PrimaryKeyRelatedField(
queryset=StatusLabel.objects.all(),
allow_null=True,
required=False
)
severity = serializers.CharField(source='get_severity_display', required=False)

class Meta:
model = Rule
fields = [
'condition',
'data_type',
'enabled',
'field',
'id',
'max',
'min',
'not_null',
'required',
'rule_type',
'severity',
'status_label',
'table_name',
'text_match',
'units',
]

def validate_status_label(self, label):
"""
Note: DQ Rules can be shared from parent to child but child orgs can
have their own labels. So, a Rule shouldn't be associated to Labels
from child orgs. In other words, Rule and associated Label should be
from the same org.
"""
if label is not None and label.super_organization_id != self.instance.data_quality_check.organization_id:
raise serializers.ValidationError(
f'Label with ID {label.id} not found in organization, {self.instance.data_quality_check.organization.name}.'
)
else:
return label

def validate(self, data):
"""
These are validations that involve values between multiple fields.
Custom validations for field values in isolation should still be
contained in 'validate_{field_name}' methods which are only checked when
that field is in 'data'.
"""
data_invalid = False
validation_messages = []

# Rule with SEVERITY setting of "valid" should have a Label.
severity_is_valid = self.instance.severity == Rule.SEVERITY_VALID
severity_unchanged = 'get_severity_display' not in data
severity_will_be_valid = data.get('get_severity_display') == dict(Rule.SEVERITY)[Rule.SEVERITY_VALID]

if (severity_is_valid and severity_unchanged) or severity_will_be_valid:
# Defaulting to "FOO" enables a value check of either "" or None (even if key doesn't exist)
label_will_be_removed = data.get('status_label', "FOO") in ["", None]
label_is_not_associated = self.instance.status_label is None
label_unchanged = 'status_label' not in data
if label_will_be_removed or (label_is_not_associated and label_unchanged):
data_invalid = True
validation_messages.append(
'Label must be assigned when using \'Valid\' Data Severity.'
)

# Rule must NOT include or exclude an empty string.
is_include_or_exclude = self.instance.condition in [Rule.RULE_INCLUDE, Rule.RULE_EXCLUDE]
condition_unchanged = 'condition' not in data
will_be_include_or_exclude = data.get('condition') in [Rule.RULE_INCLUDE, Rule.RULE_EXCLUDE]

if (is_include_or_exclude and condition_unchanged) or will_be_include_or_exclude:
# Defaulting to "FOO" enables a value check of either "" or None (only if key exists)
text_match_will_be_empty = data.get('text_match', "FOO") in ["", None]
text_match_is_empty = getattr(self.instance, 'text_match', "FOO") in ["", None]
text_match_unchanged = 'text_match' not in data

if text_match_will_be_empty or (text_match_is_empty and text_match_unchanged):
data_invalid = True
validation_messages.append(
'Rule must not include or exclude an empty string.'
)

if data_invalid:
raise serializers.ValidationError({
'general_validation_error': validation_messages
})
else:
return data
141 changes: 141 additions & 0 deletions seed/tests/test_data_quality_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

from seed.tests.util import DataMappingBaseTestCase

from seed.utils.organizations import create_organization


class RuleViewTests(DataMappingBaseTestCase):
def setUp(self):
Expand All @@ -31,6 +33,145 @@ def setUp(self):
email='test_user@demo.com'
)

def test_update_rule_status_label_validation(self):
# Start with 1 Rule
dq = DataQualityCheck.retrieve(self.org.id)
dq.remove_all_rules()
base_rule_info = {
'field': 'address_line_1',
'table_name': 'PropertyState',
'enabled': True,
'data_type': Rule.TYPE_STRING,
'rule_type': Rule.RULE_TYPE_DEFAULT,
'condition': Rule.RULE_INCLUDE,
'required': False,
'not_null': False,
'min': None,
'max': None,
'text_match': 'Test Rule 1',
'severity': Rule.SEVERITY_ERROR,
'units': "",
}
dq.add_rule(base_rule_info)
rule = dq.rules.get()

# Send invalid update request that includes a label id from another org
new_org, _, _ = create_organization(self.user, "test-organization-a")
wrong_org_label_id = new_org.labels.first().id
put_data = deepcopy(base_rule_info)
put_data['status_label'] = wrong_org_label_id
url = reverse('api:v3:data_quality_check-rules-detail', kwargs={
'nested_organization_id': self.org.id,
'pk': rule.id
})
res = self.client.put(url, content_type='application/json', data=json.dumps(put_data))

self.assertEqual(res.status_code, 400)
self.assertTrue(f'Label with ID {wrong_org_label_id} not found in organization, {self.org.name}.' in json.loads(res.content)['status_label'])

def test_update_rule_valid_severity_label_validation(self):
# Start with 1 Rule
dq = DataQualityCheck.retrieve(self.org.id)
dq.remove_all_rules()
base_rule_info = {
'field': 'address_line_1',
'table_name': 'PropertyState',
'enabled': True,
'data_type': Rule.TYPE_STRING,
'rule_type': Rule.RULE_TYPE_DEFAULT,
'condition': Rule.RULE_INCLUDE,
'required': False,
'not_null': False,
'min': None,
'max': None,
'text_match': 'Test Rule 1',
'severity': Rule.SEVERITY_ERROR,
'units': "",
}
dq.add_rule(base_rule_info)
rule = dq.rules.get()

# Send invalid update request
put_data = deepcopy(base_rule_info)
put_data['severity'] = dict(Rule.SEVERITY).get(Rule.SEVERITY_VALID)
put_data['status_label'] = None
url = reverse('api:v3:data_quality_check-rules-detail', kwargs={
'nested_organization_id': self.org.id,
'pk': rule.id
})
res = self.client.put(url, content_type='application/json', data=json.dumps(put_data))

self.assertEqual(res.status_code, 400)
self.assertTrue('Label must be assigned when using \'Valid\' Data Severity.' in json.loads(res.content)['general_validation_error'])

# Add label to rule and change severity to valid, then try to remove label
rule.status_label = self.org.labels.first()
rule.severity = Rule.SEVERITY_VALID
rule.save()

put_data_2 = deepcopy(base_rule_info)
del put_data_2['severity'] # don't update severity
put_data_2['status_label'] = ""
url = reverse('api:v3:data_quality_check-rules-detail', kwargs={
'nested_organization_id': self.org.id,
'pk': rule.id
})
res = self.client.put(url, content_type='application/json', data=json.dumps(put_data_2))

self.assertEqual(res.status_code, 400)
self.assertTrue('Label must be assigned when using \'Valid\' Data Severity.' in json.loads(res.content)['general_validation_error'])

def test_update_rule_include_empty_text_match_validation(self):
# Start with 1 Rule
dq = DataQualityCheck.retrieve(self.org.id)
dq.remove_all_rules()
base_rule_info = {
'field': 'address_line_1',
'table_name': 'PropertyState',
'enabled': True,
'data_type': Rule.TYPE_STRING,
'rule_type': Rule.RULE_TYPE_DEFAULT,
'condition': Rule.RULE_INCLUDE,
'required': False,
'not_null': False,
'min': None,
'max': None,
'text_match': 'Test Rule 1',
'severity': Rule.SEVERITY_ERROR,
'units': "",
}
dq.add_rule(base_rule_info)
rule = dq.rules.get()

# Send invalid update request
put_data = deepcopy(base_rule_info)
put_data['text_match'] = None
url = reverse('api:v3:data_quality_check-rules-detail', kwargs={
'nested_organization_id': self.org.id,
'pk': rule.id
})
res = self.client.put(url, content_type='application/json', data=json.dumps(put_data))

self.assertEqual(res.status_code, 400)
self.assertTrue('Rule must not include or exclude an empty string.' in json.loads(res.content)['general_validation_error'])

# Remove text_match and make condition NOT_NULL, then try making condition EXCLUDE
rule.text_match = None
rule.condition = Rule.RULE_NOT_NULL
rule.save()

put_data_2 = deepcopy(base_rule_info)
del put_data_2['text_match'] # don't update text_match
put_data_2['condition'] = Rule.RULE_EXCLUDE
url = reverse('api:v3:data_quality_check-rules-detail', kwargs={
'nested_organization_id': self.org.id,
'pk': dq.rules.get().id
})
res = self.client.put(url, content_type='application/json', data=json.dumps(put_data_2))

self.assertEqual(res.status_code, 400)
self.assertTrue('Rule must not include or exclude an empty string.' in json.loads(res.content)['general_validation_error'])

def test_valid_data_rule_without_label_does_not_actually_update_or_delete_any_rules(self):
# Start with 3 Rules
dq = DataQualityCheck.retrieve(self.org.id)
Expand Down
4 changes: 2 additions & 2 deletions seed/utils/api_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def base_field(name, location_attr, description, required, type):
getattr(openapi, location_attr),
description=description,
required=required,
type=type
type=getattr(openapi, type)
)

@staticmethod
Expand All @@ -46,7 +46,7 @@ def query_integer_field(name, required, description):
name,
openapi.IN_QUERY,
description=description,
required=True,
required=required,
type=openapi.TYPE_INTEGER
)

Expand Down

0 comments on commit f3dc6c0

Please sign in to comment.