Skip to content

Commit

Permalink
Validate resources using multiple expressions
Browse files Browse the repository at this point in the history
  • Loading branch information
willu47 committed Feb 12, 2020
1 parent 7fd8cef commit a2631c4
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 63 deletions.
75 changes: 51 additions & 24 deletions src/otoole/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def check_for_duplicates(codes: List) -> bool:
return duplicate_values


def create_schema(config: Dict = None):
def create_schema(config: Dict = None) -> Dict:
"""Populate the dict of schema with codes from the validation config
Arguments
Expand All @@ -62,17 +62,23 @@ def create_schema(config: Dict = None):
if config is None:
config = read_validation_config()

for _, schema in config['schema'].items():
for name in schema:
if isinstance(name['valid'], str):
name['valid'] = list(config['codes'][name['valid']].keys())
logger.debug("create_schema: %s", name['valid'])
elif isinstance(name['valid'], list):
pass
else:
raise ValueError("Entry {} is not correct".format(name['name']))
if check_for_duplicates(name['valid']):
raise ValueError("There are duplicate values in codes for {}", name['name'])
for resource_name, resource_schemas in config['schema'].items():
logger.debug("%s", resource_name)
for schema in resource_schemas:

for items in schema['items']:

if isinstance(items['valid'], str):
items['valid'] = list(config['codes'][items['valid']].keys())
logger.debug("create_schema: %s", items['valid'])
elif isinstance(items['valid'], list):
pass
else:
raise ValueError("Entry {} is not correct".format(schema['name']))

if check_for_duplicates(items['valid']):
raise ValueError("There are duplicate values in codes for {}", schema['name'])

return config['schema']


Expand All @@ -91,6 +97,18 @@ def compose_expression(schema: List) -> str:
return expression


def compose_multi_expression(resource: List) -> str:
"""Concatenates multiple expressions using an OR operator
Use to validate elements using an OR operation e.g. the elements
must match this expression OR the expression
"""
expressions = []
for schemas in resource:
expressions.append(compose_expression(schemas['items']))
return "|".join(expressions)


def validate(expression: str, name: str) -> bool:
"""Determine if ``name`` matches the ``expression``
Expand All @@ -116,11 +134,22 @@ def validate(expression: str, name: str) -> bool:
return valid


def validate_resource(package, schema, resource):
def validate_resource(package, resource: str, schemas: List[Dict]):
"""
Arguments
---------
package
resource: str
schemas : List[Dict]
The schema from which to create a validation expression
"""

print("Validating {} with {}\n".format(resource, ", ".join([x['name'] for x in schemas])))

logger.debug(schema)
logger.debug(schemas)

expression = compose_expression(schema)
expression = compose_multi_expression(schemas)
resources = package.get_resource(resource).read(keyed=True)

valid_names = []
Expand All @@ -135,10 +164,10 @@ def validate_resource(package, schema, resource):
invalid_names.append(name)

if invalid_names:
msg = "{} invalid names:\n {}"
msg = "{} invalid names:\n{}\n"
print(msg.format(len(invalid_names), ", ".join(invalid_names)))
if valid_names:
msg = "{} valid names:\n {}"
msg = "{} valid names:\n{}\n"
print(msg.format(len(valid_names), ", ".join(valid_names)))


Expand All @@ -155,7 +184,7 @@ def identify_orphaned_fuels_techs(package) -> Dict[str, str]:
number_of_isolates = isolate.number_of_isolates(graph)
logger.debug("There are {} isolated nodes in the graph".format(number_of_isolates))

isolated_nodes = defaultdict(list)
isolated_nodes: Dict = defaultdict(list)

for node_name in list(isolate.isolates(graph)):
node_data = graph.nodes[node_name]
Expand All @@ -166,22 +195,20 @@ def identify_orphaned_fuels_techs(package) -> Dict[str, str]:

def main(file_format: str, filepath: str, config=None):

print("\n***Beginning validation***")
print("\n***Beginning validation***\n")
if file_format == 'datapackage':
package = read_datapackage(filepath)
elif file_format == 'sql':
package = read_datapackage(filepath, sql=True)

schema = create_schema(config)

print("\n***Checking TECHNOLOGY names***\n")
validate_resource(package, schema['technology_name'], 'TECHNOLOGY')

print("\n***Checking FUEL names***\n")
validate_resource(package, schema['fuel_name'], 'FUEL')
for resource, schemas in schema.items():
validate_resource(package, resource, schemas)

print("\n***Checking graph structure***")
isolated_nodes = identify_orphaned_fuels_techs(package)

msg = ""
for node_type, node_names in isolated_nodes.items():
msg += "\n{} '{}' nodes are isolated:\n {}\n".format(
Expand Down
70 changes: 44 additions & 26 deletions src/otoole/validate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ codes:
'02': MDT
'03': NDT
'04': OTF/OTS
tradelink:
'EL1EX': electricity trade link
'NG1EX': natural gas trade link
age:
'O': existing or historical plant
'N': new plant
Expand Down Expand Up @@ -118,29 +121,44 @@ codes:
'ZMB': Zambia
'ZWE': Zimbabwe
schema:
fuel_name:
- name: countries
valid: countries
position: (1, 3)
- name: fuels
valid: fuels
position: (4, 6)
technology_name:
- name: countries
valid: countries
position: (1, 3)
- name: fuels
valid: fuels
position: (4, 6)
- name: technology
valid: technologies
position: (7, 8)
- name: ccs
valid: ['P', 'C']
position: (9, )
- name: cooling_type
valid: cooling
position: (10, 11)
- name: age
valid: age
position: (12, )
FUEL:
- name: fuel_name
items:
- name: countries
valid: countries
position: (1, 3)
- name: fuels
valid: fuels
position: (4, 6)
TECHNOLOGY:
- name: technology_name
items:
- name: countries
valid: countries
position: (1, 3)
- name: fuels
valid: fuels
position: (4, 6)
- name: technology
valid: technologies
position: (7, 8)
- name: ccs
valid: ['P', 'C']
position: (9, )
- name: cooling_type
valid: cooling
position: (10, 11)
- name: age
valid: age
position: (12, )
- name: trade_link
items:
- name: countries
valid: countries
position: (1, 3)
- name: tradelink
valid: tradelink
position: (4, 8)
- name: countries
valid: countries
position: (9, 11)
3 changes: 2 additions & 1 deletion src/otoole/visualise/res.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ def extract_nodes(package_rows: List[List], node_type='technology',
"""
nodes = [(x[0], {'type': node_type,
'fillcolor': color, 'shape': shape,
'style': 'filled'}
'style': 'filled',
'label': x[0]}
)
for x in package_rows]

Expand Down
111 changes: 99 additions & 12 deletions tests/test_validate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
import pytest

from otoole.validate import compose_expression, create_schema, read_validation_config, validate
from yaml import load

from otoole.validate import (
compose_expression,
compose_multi_expression,
create_schema,
read_validation_config,
validate
)


@pytest.mark.parametrize(
Expand All @@ -18,6 +26,21 @@ def test_validate_fuel_code_true(name, expected):
assert actual == expected


@pytest.mark.parametrize(
"name,expected",
[("DZAETH", True),
("AGOCR1", True),
("CO1AGO", True),
("AGOETHETH", False),
(" ETH", False),
("DVA", False)]
)
def test_validate_fuel_code_true_multi(name, expected):

actual = validate("^(DZA|AGO)(ETH|CR1)|^(CO1)(AGO)", name)
assert actual == expected


def test_compose_expression():

schema = [{'name': 'countries',
Expand All @@ -34,12 +57,44 @@ def test_compose_expression():
assert actual == expected


def test_compose_multi_expression():
resource = load("""
- name: technology_name
items:
- name: countries
valid:
- DZA
- AGO
position: (1, 3)
- name: fuels
valid:
- ETH
- CR1
position: (4, 6)
- name: trade_link
items:
- name: countries
valid:
- DZA
- AGO
position: (1, 3)
- name: tradelink
valid:
- EL1EX
- NG1EX
position: (4, 8)
""")
actual = compose_multi_expression(resource)
expected = "^(DZA|AGO)(ETH|CR1)|^(DZA|AGO)(EL1EX|NG1EX)"
assert actual == expected


def test_read_packaged_validation():

actual = read_validation_config()
expected = ['codes', 'schema']
assert list(actual.keys()) == expected
expected_codes = ['fuels', 'technologies', 'cooling', 'age', 'countries']
expected_codes = ['fuels', 'technologies', 'cooling', 'tradelink', 'age', 'countries']
assert list(actual['codes'].keys()) == expected_codes
assert list(actual['codes']['technologies'].keys()) == [
'CH', 'SC', 'CV', 'GC', 'LS', 'MS', 'SS', 'SA', 'RC', 'CC', 'PW',
Expand All @@ -51,26 +106,58 @@ def test_create_schema():

schema = {
'codes': {'countries': {'DZA': 'Algeria', 'AGO': 'Angola'}},
'schema': {'fuel_name': [{'name': 'countries',
'valid': 'countries',
'position': (1, 3)}]
'schema': {'FUEL': [{'items': [{'name': 'countries',
'valid': 'countries',
'position': (1, 3)}],
'name': 'fuels'}]
}
}
actual = create_schema(schema)
expected = {'FUEL': [{'items': [{'name': 'countries',
'valid': ['DZA', 'AGO'],
'position': (1, 3)}],
'name': 'fuels'}]
}
assert actual == expected


def test_create_schema_two_items():

schema = {
'codes': {'countries': {'DZA': 'Algeria', 'AGO': 'Angola'},
'other_countries': {'ELC': 'Electricity'}},
'schema': {'FUEL': [{'items': [{'name': 'countries',
'valid': 'countries',
'position': (1, 3)}],
'name': 'countries'},
{'items': [{'name': 'other_countries',
'valid': 'other_countries',
'position': (1, 3)}],
'name': 'other_countries'}]
}
}
actual = create_schema(schema)
expected = {'fuel_name':
[{'name': 'countries',
'valid': ['DZA', 'AGO'],
'position': (1, 3)}]
expected = {'FUEL': [{'items': [{'name': 'countries',
'valid': ['DZA', 'AGO'],
'position': (1, 3)}],
'name': 'countries'},
{'items': [{'name': 'other_countries',
'valid': ['ELC'],
'position': (1, 3)}],
'name': 'other_countries'}
]
}
assert actual == expected


def test_create_schema_duplicate_raises():

schema = {
'schema': {'fuel_name': [{'name': 'countries',
'valid': ['DZA', 'DZA'],
'position': (1, 3)}]
'schema': {'FUEL': [{'items': [{'name': 'countries',
'valid': ['DZA', 'DZA'],
'position': (1, 3)}],
'name': 'country'
}]
}
}
with pytest.raises(ValueError):
Expand Down

0 comments on commit a2631c4

Please sign in to comment.