Skip to content

Commit

Permalink
New --remove-empty-schema-columns flag to flatten
Browse files Browse the repository at this point in the history
  • Loading branch information
odscjames committed Oct 23, 2018
1 parent e0b12b5 commit c7be44e
Show file tree
Hide file tree
Showing 13 changed files with 127 additions and 13 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ examples/flatten/simple/actual
examples/flatten/simple/actual.*
examples/flatten/sheet-prefix/actual
examples/flatten/sheet-prefix/actual.*
examples/flatten/remove-empty-schema-columns/actual
examples/flatten/remove-empty-schema-columns/actual.*
examples/flatten/root-is-list/actual
examples/flatten/root-is-list/actual.*
examples/receipt/source-map/actual
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
### Added

- Add --disable-local-refs to flatten, unflatten and create-template.
- Add --remove-empty-schema-columns flag to flatten https://github.com/OpenDataServices/cove/issues/1019

## [0.3.0] - 2018-10-12

Expand Down
24 changes: 24 additions & 0 deletions docs/flatten.rst
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,30 @@ No `dishes` sheet is produced, and the main sheet does not have a `coffee` colum

The field specified must be a field directly on the data object - it's not possible to filter on fields like `pints/0/title` .

Remove Empty Schema Columns
---------------------------

By default, columns that are empty (have no data) will be kept in the output.

But you can pass the `remove-empty-schema-columns` flag to have these removed.

This shows without and with the flag:

.. literalinclude:: ../examples/flatten/simple/cmd.txt
:language: bash

.. csv-table:: sheet: cafe.csv
:file: ../examples/flatten/simple/expected/cafe.csv
:header-rows: 1

.. literalinclude:: ../examples/flatten/remove-empty-schema-columns/cmd.txt
:language: bash

.. csv-table:: sheet: cafe.csv
:file: ../examples/flatten/remove-empty-schema-columns/expected/cafe.csv
:header-rows: 1


All flatten options
-------------------

Expand Down
1 change: 1 addition & 0 deletions examples/flatten/remove-empty-schema-columns/cmd.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
$ flatten-tool flatten --remove-empty-schema-columns --root-list-path=cafe --main-sheet-name=cafe --schema=examples/receipt/cafe.schema examples/flatten/remove-empty-schema-columns/input.json -o examples/flatten/remove-empty-schema-columns/actual
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id,name
CAFE-HEALTH,Healthy Cafe
CAFE-VEG,Vegetarian Cafe
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
id,table/0/id,table/0/number
CAFE-HEALTH,TABLE-1,1
CAFE-HEALTH,TABLE-2,2
CAFE-HEALTH,TABLE-3,3
CAFE-VEG,TABLE-16,16
CAFE-VEG,TABLE-17,17
36 changes: 36 additions & 0 deletions examples/flatten/remove-empty-schema-columns/input.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"cafe": [
{
"id": "CAFE-HEALTH",
"name": "Healthy Cafe",
"table": [
{
"id": "TABLE-1",
"number": "1"
},
{
"id": "TABLE-2",
"number": "2"
},
{
"id": "TABLE-3",
"number": "3"
}
]
},
{
"id": "CAFE-VEG",
"name": "Vegetarian Cafe",
"table": [
{
"id": "TABLE-16",
"number": "16"
},
{
"id": "TABLE-17",
"number": "17"
}
]
}
]
}
3 changes: 3 additions & 0 deletions examples/help/flatten/expected.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ usage: flatten-tool flatten [-h] [-s SCHEMA] [-f {csv,xlsx,all}] [--xml]
[--filter-field FILTER_FIELD]
[--filter-value FILTER_VALUE]
[--disable-local-refs]
[--remove-empty-schema-columns]
input_name

positional arguments:
Expand Down Expand Up @@ -46,3 +47,5 @@ optional arguments:
Data Filter - only data with this will be processed.
Use with --filter-field
--disable-local-refs Disable local refs when parsing JSON Schema.
--remove-empty-schema-columns
Remove columns from the schema that contain no data.
7 changes: 5 additions & 2 deletions flattentool/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ def spreadsheet_output(spreadsheet_output_class, name):

def flatten(input_name, schema=None, output_name='flattened', output_format='all', main_sheet_name='main',
root_list_path='main', root_is_list=False, sheet_prefix='', filter_field=None, filter_value=None,
rollup=False, root_id=None, use_titles=False, xml=False, id_name='id', disable_local_refs=False, **_):
rollup=False, root_id=None, use_titles=False, xml=False, id_name='id', disable_local_refs=False,
remove_empty_schema_columns=False, **_):
"""
Flatten a nested structure (JSON) to a flat structure (spreadsheet - csv or xlsx).
Expand Down Expand Up @@ -73,7 +74,9 @@ def flatten(input_name, schema=None, output_name='flattened', output_format='all
xml=xml,
id_name=id_name,
filter_field=filter_field,
filter_value=filter_value)
filter_value=filter_value,
remove_empty_schema_columns=remove_empty_schema_columns,
)
parser.parse()

def spreadsheet_output(spreadsheet_output_class, name):
Expand Down
5 changes: 4 additions & 1 deletion flattentool/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,10 @@ def create_parser():
"--disable-local-refs",
action='store_true',
help="Disable local refs when parsing JSON Schema.")

parser_flatten.add_argument(
"--remove-empty-schema-columns",
action='store_true',
help="Remove columns from the schema that contain no data.")

parser_unflatten = subparsers.add_parser(
'unflatten',
Expand Down
25 changes: 21 additions & 4 deletions flattentool/json_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ def sheet_key_title(sheet, key):
"""
if key in sheet.titles:
return sheet.titles[key]
title = sheet.titles[key]
if title not in sheet:
sheet.append(title)
return title
else:
if key not in sheet:
sheet.append(key)
Expand All @@ -47,7 +50,8 @@ class JSONParser(object):
# Similarily with methods like parse_json_dict

def __init__(self, json_filename=None, root_json_dict=None, schema_parser=None, root_list_path=None,
root_id='ocid', use_titles=False, xml=False, id_name='id', filter_field=None, filter_value=None):
root_id='ocid', use_titles=False, xml=False, id_name='id', filter_field=None, filter_value=None,
remove_empty_schema_columns=False):
self.sub_sheets = {}
self.main_sheet = Sheet()
self.root_list_path = root_list_path
Expand All @@ -57,9 +61,16 @@ def __init__(self, json_filename=None, root_json_dict=None, schema_parser=None,
self.xml = xml
self.filter_field = filter_field
self.filter_value = filter_value
self.remove_empty_schema_columns = remove_empty_schema_columns
if schema_parser:
self.main_sheet = schema_parser.main_sheet
self.sub_sheets = schema_parser.sub_sheets
self.main_sheet = copy.deepcopy(schema_parser.main_sheet)
self.sub_sheets = copy.deepcopy(schema_parser.sub_sheets)
if remove_empty_schema_columns:
# Don't use columns from the schema parser
# (avoids empty columns)
self.main_sheet.columns = []
for sheet_name, sheet in list(self.sub_sheets.items()):
sheet.columns = []
# Rollup is pulled from the schema_parser, as rollup is only possible if a schema parser is specified
self.rollup = schema_parser.rollup
self.schema_parser = schema_parser
Expand Down Expand Up @@ -104,6 +115,12 @@ def parse(self):
# fallover on empty activity, e.g. <iati-activity/>
continue
self.parse_json_dict(json_dict, sheet=self.main_sheet)

if self.remove_empty_schema_columns:
# Remove sheets with no lines of data
for sheet_name, sheet in list(self.sub_sheets.items()):
if not sheet.lines:
del self.sub_sheets[sheet_name]

def parse_json_dict(self, json_dict, sheet, json_key=None, parent_name='', flattened_dict=None, parent_id_fields=None, top_level_of_sub_sheet=False):
"""
Expand Down
4 changes: 2 additions & 2 deletions flattentool/tests/test_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,9 @@ def test_examples_in_docs():
tests_passed += 1
# Check that the number of tests were run that we expected
if sys.version_info[:2] < (3,4):
assert tests_passed == 44
else:
assert tests_passed == 45
else:
assert tests_passed == 46

def _simplify_warnings(lines):
return '\n'.join([_simplify_line(line) for line in lines.split('\n')])
Expand Down
23 changes: 19 additions & 4 deletions flattentool/tests/test_json_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,13 +264,23 @@ def test_parse_ids_nested(self):


class TestParseUsingSchema(object):
def test_sub_sheet_names(self, tmpdir):
@pytest.mark.parametrize('remove_empty_schema_columns', [False, True])
def test_sub_sheets(self, tmpdir, remove_empty_schema_columns):
test_schema = tmpdir.join('test.json')
test_schema.write('''{
"properties": {
"c": {
"type": "array",
"items": {"$ref": "#/testB"}
},
"g": {
"type": "array",
"items": {
"type": "object",
"properties": {
"h": { "type": "string"}
}
}
}
},
"testB": {
Expand All @@ -291,15 +301,20 @@ def test_sub_sheet_names(self, tmpdir):
('a', 'b'),
('c', [OrderedDict([('d', 'e')])]),
])],
schema_parser=schema_parser
schema_parser=schema_parser,
remove_empty_schema_columns=remove_empty_schema_columns,
)
parser.parse()
assert list(parser.main_sheet) == [ 'a' ]
assert parser.main_sheet.lines == [
{'a': 'b'}
]
assert len(parser.sub_sheets) == 1
assert list(parser.sub_sheets['c']) == list(['ocid', 'c/0/d', 'c/0/f'])
assert len(parser.sub_sheets) == 2 if not remove_empty_schema_columns else 1
if not remove_empty_schema_columns:
assert list(parser.sub_sheets['c']) == list(['ocid', 'c/0/d', 'c/0/f'])
assert list(parser.sub_sheets['g']) == list(['ocid', 'g/0/h'])
else:
assert list(parser.sub_sheets['c']) == list(['ocid', 'c/0/d'])
assert parser.sub_sheets['c'].lines == [{'c/0/d':'e'}]

def test_column_matching(self, tmpdir):
Expand Down

0 comments on commit c7be44e

Please sign in to comment.