From 90131d185d3bb96ca1a6230c79c122d4e31f811c Mon Sep 17 00:00:00 2001 From: Ben Webb Date: Mon, 19 Jan 2015 17:17:48 +0000 Subject: [PATCH] [#45] Add option to use titles when creating a template --- README.md | 2 +- flattening_ocds/__init__.py | 4 ++-- flattening_ocds/cli.py | 4 ++++ flattening_ocds/schema.py | 13 ++++++++++--- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index c358e805..da21fdc9 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,6 @@ However, Python 2 can not load CSVs that contain the NULL character. This includ There is work currently in progress to convert this codebase to also flatten 360 giving files. - flatten-ocds create-template --output-format all --output-name 360giving-template --schema 360-giving-schema.json --main-sheet-name activity + flatten-ocds create-template --output-format all --output-name 360giving-template --schema 360-giving-schema.json --main-sheet-name grants --rollup --use-titles flatten-ocds unflatten --root-id='' -o out.json -f xlsx --main-sheet-name=grants input.xlsx --schema 360-giving-schema.json --convert-titles diff --git a/flattening_ocds/__init__.py b/flattening_ocds/__init__.py index 6bdfe954..5d2184ab 100644 --- a/flattening_ocds/__init__.py +++ b/flattening_ocds/__init__.py @@ -8,14 +8,14 @@ from collections import OrderedDict -def create_template(schema, output_name='releases', output_format='all', main_sheet_name='main', flatten=False, rollup=False, root_id='ocid', **_): +def create_template(schema, output_name='releases', output_format='all', main_sheet_name='main', flatten=False, rollup=False, root_id='ocid', use_titles=False, **_): """ Creates template file(s) from given inputs This function is built to deal with commandline input and arguments but to also be called from elswhere in future """ - parser = SchemaParser(schema_filename=schema, main_sheet_name=main_sheet_name, rollup=rollup, root_id=root_id) + parser = SchemaParser(schema_filename=schema, main_sheet_name=main_sheet_name, rollup=rollup, root_id=root_id, use_titles=use_titles) parser.parse() def spreadsheet_output(spreadsheet_output_class, name): diff --git a/flattening_ocds/cli.py b/flattening_ocds/cli.py index ca83bade..d8067b01 100644 --- a/flattening_ocds/cli.py +++ b/flattening_ocds/cli.py @@ -31,6 +31,10 @@ def create_parser(): parser_create_template.add_argument( "-r", "--root-id", help="Root ID of the data format, e.g. ocid for OCDS and blank for 360Giving (use --root-id=''). Defaults to ocid.") + parser_create_template.add_argument( + "--use-titles", + action='store_true', + help="Convert titles. Requires a schema to be specified.") parser_flatten = subparsers.add_parser( 'flatten', diff --git a/flattening_ocds/schema.py b/flattening_ocds/schema.py index 4f3dce90..5527675f 100644 --- a/flattening_ocds/schema.py +++ b/flattening_ocds/schema.py @@ -37,7 +37,7 @@ def get_property_type_set(property_schema_dict): class SchemaParser(object): """Parse the fields of a JSON schema into a flattened structure.""" - def __init__(self, schema_filename=None, root_schema_dict=None, main_sheet_name='main', rollup=False, root_id='ocid'): + def __init__(self, schema_filename=None, root_schema_dict=None, main_sheet_name='main', rollup=False, root_id='ocid', use_titles=False): self.sub_sheets = {} self.main_sheet = [] self.sub_sheet_mapping = {} @@ -46,6 +46,7 @@ def __init__(self, schema_filename=None, root_schema_dict=None, main_sheet_name= self.root_id = root_id self.main_sheet_titles = {} self.sub_sheet_titles = {} + self.use_titles = use_titles if root_schema_dict is None and schema_filename is None: raise ValueError('One of schema_filename or root_schema_dict must be supplied') @@ -60,7 +61,10 @@ def __init__(self, schema_filename=None, root_schema_dict=None, main_sheet_name= def parse(self): fields = self.parse_schema_dict(self.main_sheet_name, self.root_schema_dict) for field, title in fields: - self.main_sheet.append(field) + if self.use_titles: + self.main_sheet.append(title) + else: + self.main_sheet.append(field) if title: self.main_sheet_titles[title] = field @@ -110,7 +114,10 @@ def parse_schema_dict(self, parent_name, schema_dict, parent_id_fields=None): property_schema_dict['items'], parent_id_fields=id_fields) for field, child_title in fields: - sub_sheet.add_field(field) + if self.use_titles: + sub_sheet.add_field(title) + else: + sub_sheet.add_field(field) if child_title: self.sub_sheet_titles[sub_sheet_name][child_title] = field if self.rollup and 'rollUp' in property_schema_dict and field in property_schema_dict['rollUp']: