diff --git a/cove/lib/common.py b/cove/lib/common.py index 8d53f90c4..3b93c8cac 100644 --- a/cove/lib/common.py +++ b/cove/lib/common.py @@ -16,13 +16,15 @@ from flattentool import unflatten from jsonschema import FormatChecker, RefResolver from jsonschema.exceptions import ValidationError -from jsonschema.validators import Draft4Validator as validator +import jsonschema.validators from django.utils.html import escape, conditional_escape, format_html from cove.lib.exceptions import cove_spreadsheet_conversion_error from cove.lib.tools import cached_get_request, decimal_default - +# Because we will be changing items on this validator, it's important we take a copy! +# Otherwise we could cause conflicts with other software in the same process. +validator = jsonschema.validators.extend(jsonschema.validators.Draft4Validator, validators={}) uniqueItemsValidator = validator.VALIDATORS.pop("uniqueItems") LANGUAGE_RE = re.compile("^(.*_(((([A-Za-z]{2,3}(-([A-Za-z]{3}(-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(-([A-Za-z]{4}))?(-([A-Za-z]{2}|[0-9]{3}))?(-([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(-([0-9A-WY-Za-wy-z](-[A-Za-z0-9]{2,8})+))*(-(x(-[A-Za-z0-9]{1,8})+))?)|(x(-[A-Za-z0-9]{1,8})+)))$") validation_error_template_lookup = {'date-time': 'Date is not in the correct format', diff --git a/cove_ocds/lib/api.py b/cove_ocds/lib/api.py deleted file mode 100644 index 262efa5fb..000000000 --- a/cove_ocds/lib/api.py +++ /dev/null @@ -1,133 +0,0 @@ -import json -import os - -from .schema import SchemaOCDS -from .ocds import common_checks_ocds -from cove.lib.common import get_spreadsheet_meta_data -from cove.lib.converters import convert_spreadsheet, convert_json -from cove.lib.tools import get_file_type - - -class APIException(Exception): - pass - - -def context_api_transform(context): - validation_errors = context.get('validation_errors') - context['validation_errors'] = [] - - context.pop('validation_errors_count') - - extensions = context.get('extensions') - context['extensions'] = {} - - deprecated_fields = context.get('deprecated_fields') - context['deprecated_fields'] = [] - - additional_fields = context.pop('data_only') - context['additional_fields'] = [] - context.pop('additional_fields_count') - - context['ocds_prefixes_bad_format'] = list(context.pop('ocds_prefixes_bad_format', [])) - - if validation_errors: - for error_group in validation_errors: - error = json.loads(error_group[0]) - for path_value in error_group[1]: - context['validation_errors'].append({ - 'type': error['message_type'], - 'field': error['path_no_number'], - 'description': error['message'], - 'path': path_value.get('path', ''), - 'value': path_value.get('value', '') - }) - - if extensions: - invalid_extensions = extensions.get('invalid_extension') - context['extensions']['extensions'] = [] - for key, value in extensions['extensions'].items(): - if key not in invalid_extensions: - context['extensions']['extensions'].append(value) - context['extensions']['invalid_extensions'] = [] - for key, value in invalid_extensions.items(): - context['extensions']['invalid_extensions'].append([key, value]) - context['extensions']['extended_schema_url'] = extensions['extended_schema_url'] - context['extensions']['is_extended_schema'] = extensions['is_extended_schema'] - - if deprecated_fields: - for key, value in deprecated_fields.items(): - value.update({'field': key}) - context['deprecated_fields'].append(value) - - if additional_fields: - for field_group in additional_fields: - context['additional_fields'].append({ - 'path': field_group[0], - 'field': field_group[1], - 'usage_count': field_group[2] - }) - - return context - - -def ocds_json_output(output_dir, file, schema_version, convert, cache_schema=False, file_type=None, json_data=None): - context = {} - if not file_type: - file_type = get_file_type(file) - context = {"file_type": file_type} - - if file_type == 'json': - if not json_data: - with open(file, encoding='utf-8') as fp: - try: - json_data = json.load(fp) - except ValueError: - raise APIException('The file looks like invalid json') - - schema_ocds = SchemaOCDS(schema_version, json_data, cache_schema=cache_schema) - - if schema_ocds.invalid_version_data: - msg = '\033[1;31mThe schema version in your data is not valid. Accepted values: {}\033[1;m' - raise APIException(msg.format(str(list(schema_ocds.version_choices.keys())))) - if schema_ocds.extensions: - schema_ocds.create_extended_release_schema_file(output_dir, "") - - url = schema_ocds.extended_schema_file or schema_ocds.release_schema_url - - if convert: - context.update(convert_json( - output_dir, '', file, schema_url=url, flatten=True, cache=False) - ) - - else: - metatab_schema_url = SchemaOCDS(select_version='1.1').release_pkg_schema_url - metatab_data = get_spreadsheet_meta_data(output_dir, file, metatab_schema_url, file_type=file_type) - schema_ocds = SchemaOCDS(schema_version, release_data=metatab_data, cache_schema=cache_schema) - - if schema_ocds.invalid_version_data: - msg = '\033[1;31mThe schema version in your data is not valid. Accepted values: {}\033[1;m' - raise APIException(msg.format(str(list(schema_ocds.version_choices.keys())))) - if schema_ocds.extensions: - schema_ocds.create_extended_release_schema_file(output_dir, '') - - url = schema_ocds.extended_schema_file or schema_ocds.release_schema_url - pkg_url = schema_ocds.release_pkg_schema_url - - context.update(convert_spreadsheet( - output_dir, '', file, file_type, schema_url=url, pkg_schema_url=pkg_url, cache=False) - ) - - with open(context['converted_path'], encoding='utf-8') as fp: - json_data = json.load(fp) - - context = context_api_transform( - common_checks_ocds(context, output_dir, json_data, schema_ocds, api=True, cache=False) - ) - - if file_type == 'xlsx': - # Remove unwanted files in the output - # TODO: can we do this by no writing the files in the first place? - os.remove(os.path.join(output_dir, 'heading_source_map.json')) - os.remove(os.path.join(output_dir, 'cell_source_map.json')) - - return context diff --git a/cove_ocds/lib/ocds.py b/cove_ocds/lib/ocds.py deleted file mode 100644 index 152e3bd5f..000000000 --- a/cove_ocds/lib/ocds.py +++ /dev/null @@ -1,509 +0,0 @@ -import re -import json -import collections - -import cove.lib.tools as tools -from cove.lib.common import common_checks_context, get_additional_codelist_values - -from django.utils.html import mark_safe, escape, conditional_escape, format_html - -import CommonMark -import bleach - - -validation_error_lookup = { - 'date-time': mark_safe('Incorrect date format. Dates should use the form YYYY-MM-DDT00:00:00Z. Learn more about dates in OCDS.'), -} - - -@tools.ignore_errors -def get_releases_aggregates(json_data): - release_count = 0 - unique_ocids = set() - tags = collections.Counter() - unique_lang = set() - unique_initation_type = set() - unique_release_ids = set() - duplicate_release_ids = set() - - ##for matching with contracts - unique_award_id = set() - - planning_ocids = set() - tender_ocids = set() - awardid_ocids = set() - award_ocids = set() - contractid_ocids = set() - contract_ocids = set() - implementation_contractid_ocids = set() - implementation_ocids = set() - - release_dates = [] - tender_dates = [] - award_dates = [] - contract_dates = [] - - unique_buyers_identifier = dict() - unique_buyers_name_no_id = set() - unique_suppliers_identifier = dict() - unique_suppliers_name_no_id = set() - unique_procuring_identifier = dict() - unique_procuring_name_no_id = set() - unique_tenderers_identifier = dict() - unique_tenderers_name_no_id = set() - - unique_organisation_schemes = set() - organisation_identifier_address = set() - organisation_name_no_id_address = set() - organisation_identifier_contact_point = set() - organisation_name_no_id_contact_point = set() - - release_tender_item_ids = set() - release_award_item_ids = set() - release_contract_item_ids = set() - item_identifier_schemes = set() - - unique_currency = set() - - planning_doctype = collections.Counter() - planning_doc_count = 0 - tender_doctype = collections.Counter() - tender_doc_count = 0 - tender_milestones_doctype = collections.Counter() - tender_milestones_doc_count = 0 - award_doctype = collections.Counter() - award_doc_count = 0 - contract_doctype = collections.Counter() - contract_doc_count = 0 - implementation_doctype = collections.Counter() - implementation_doc_count = 0 - implementation_milestones_doctype = collections.Counter() - implementation_milestones_doc_count = 0 - - def process_org(org, unique_id, unique_name): - identifier = org.get('identifier') - org_id = None - if identifier: - org_id = identifier.get('id') - if org_id: - unique_id[org_id] = org.get('name', '') or '' - scheme = identifier.get('scheme') - if scheme: - unique_organisation_schemes.add(scheme) - if org.get('address'): - organisation_identifier_address.add(org_id) - if org.get('contactPoint'): - organisation_identifier_contact_point.add(org_id) - if not org_id: - name = org.get('name') - if name: - unique_name.add(name) - if org.get('address'): - organisation_name_no_id_address.add(name) - if org.get('contactPoint'): - organisation_name_no_id_contact_point.add(name) - - def get_item_scheme(item): - classification = item.get('classification') - if classification: - scheme = classification.get('scheme') - if scheme: - item_identifier_schemes.add(scheme) - - releases = tools.get_no_exception(json_data, 'releases', []) - for release in releases: - # ### Release Section ### - release_count = release_count + 1 - ocid = release.get('ocid') - release_id = release.get('id') - if not ocid: - continue - if release_id: - if release_id in unique_release_ids: - duplicate_release_ids.add(release_id) - unique_release_ids.add(release_id) - - unique_ocids.add(release['ocid']) - if 'tag' in release: - tags.update(tools.to_list(release['tag'])) - initiation_type = release.get('initiationType') - if initiation_type: - unique_initation_type.add(initiation_type) - - release_date = release.get('date', '') - if release_date: - release_dates.append(str(release_date)) - - if 'language' in release: - unique_lang.add(release['language']) - buyer = release.get('buyer') - if buyer: - process_org(buyer, unique_buyers_identifier, unique_buyers_name_no_id) - - # ### Planning Section ### - planning = tools.get_no_exception(release, 'planning', {}) - if planning and isinstance(planning, dict): - planning_ocids.add(ocid) - planning_doc_count += tools.update_docs(planning, planning_doctype) - - # ### Tender Section ### - tender = tools.get_no_exception(release, 'tender', {}) - if tender and isinstance(tender, dict): - tender_ocids.add(ocid) - tender_doc_count += tools.update_docs(tender, tender_doctype) - tender_period = tender.get('tenderPeriod') - if tender_period: - start_date = tender_period.get('startDate', '') - if start_date: - tender_dates.append(str(start_date)) - procuring_entity = tender.get('procuringEntity') - if procuring_entity: - process_org(procuring_entity, unique_procuring_identifier, unique_procuring_name_no_id) - tenderers = tender.get('tenderers', []) - for tenderer in tenderers: - process_org(tenderer, unique_tenderers_identifier, unique_tenderers_name_no_id) - tender_items = tender.get('items', []) - for item in tender_items: - item_id = item.get('id') - if item_id and release_id: - release_tender_item_ids.add((ocid, release_id, item_id)) - get_item_scheme(item) - milestones = tender.get('milestones') - if milestones: - for milestone in milestones: - tender_milestones_doc_count += tools.update_docs(milestone, tender_milestones_doctype) - - # ### Award Section ### - awards = tools.get_no_exception(release, 'awards', []) - for award in awards: - if not isinstance(award, dict): - continue - award_id = award.get('id') - award_ocids.add(ocid) - if award_id: - unique_award_id.add(award_id) - awardid_ocids.add((award_id, ocid)) - award_date = award.get('date', '') - if award_date: - award_dates.append(str(award_date)) - award_items = award.get('items', []) - for item in award_items: - item_id = item.get('id') - if item_id and release_id and award_id: - release_award_item_ids.add((ocid, release_id, award_id, item_id)) - get_item_scheme(item) - suppliers = award.get('suppliers', []) - for supplier in suppliers: - process_org(supplier, unique_suppliers_identifier, unique_suppliers_name_no_id) - award_doc_count += tools.update_docs(award, award_doctype) - - # ### Contract section - contracts = tools.get_no_exception(release, 'contracts', []) - for contract in contracts: - contract_id = contract.get('id') - contract_ocids.add(ocid) - if contract_id: - contractid_ocids.add((contract_id, ocid)) - period = contract.get('period') - if period: - start_date = period.get('startDate', '') - if start_date: - contract_dates.append(start_date) - contract_items = contract.get('items', []) - for item in contract_items: - item_id = item.get('id') - if item_id and release_id and contract_id: - release_contract_item_ids.add((ocid, release_id, contract_id, item_id)) - get_item_scheme(item) - contract_doc_count += tools.update_docs(contract, contract_doctype) - implementation = contract.get('implementation') - if implementation: - implementation_ocids.add(ocid) - if contract_id: - implementation_contractid_ocids.add((contract_id, ocid)) - implementation_doc_count += tools.update_docs(implementation, implementation_doctype) - implementation_milestones = implementation.get('milestones', []) - for milestone in implementation_milestones: - implementation_milestones_doc_count += tools.update_docs(milestone, implementation_milestones_doctype) - - contracts_without_awards = [] - for release in releases: - contracts = release.get('contracts', []) - for contract in contracts: - award_id = contract.get('awardID') - if award_id not in unique_award_id: - contracts_without_awards.append(contract) - - unique_buyers_count = len(unique_buyers_identifier) + len(unique_buyers_name_no_id) - unique_buyers = [name + ' (' + str(id) + ')' for id, name in unique_buyers_identifier.items()] + list(unique_buyers_name_no_id) - - unique_suppliers_count = len(unique_suppliers_identifier) + len(unique_suppliers_name_no_id) - unique_suppliers = [name + ' (' + str(id) + ')' for id, name in unique_suppliers_identifier.items()] + list(unique_suppliers_name_no_id) - - unique_procuring_count = len(unique_procuring_identifier) + len(unique_procuring_name_no_id) - unique_procuring = [name + ' (' + str(id) + ')' for id, name in unique_procuring_identifier.items()] + list(unique_procuring_name_no_id) - - unique_tenderers_count = len(unique_tenderers_identifier) + len(unique_tenderers_name_no_id) - unique_tenderers = [name + ' (' + str(id) + ')' for id, name in unique_tenderers_identifier.items()] + list(unique_tenderers_name_no_id) - - unique_org_identifier_count = len(set(unique_buyers_identifier) | - set(unique_suppliers_identifier) | - set(unique_procuring_identifier) | - set(unique_tenderers_identifier)) - unique_org_name_count = len(unique_buyers_name_no_id | - unique_suppliers_name_no_id | - unique_procuring_name_no_id | - unique_tenderers_name_no_id) - unique_org_count = unique_org_identifier_count + unique_org_name_count - - def get_currencies(object): - if isinstance(object, dict): - for key, value in object.items(): - if key == 'currency': - unique_currency.add(value) - get_currencies(value) - if isinstance(object, list): - for item in object: - get_currencies(item) - get_currencies(json_data) - - return dict( - release_count=release_count, - unique_ocids=sorted(unique_ocids, key=lambda x: str(x)), - unique_initation_type=sorted(unique_initation_type, key=lambda x: str(x)), - duplicate_release_ids=sorted(duplicate_release_ids, key=lambda x: str(x)), - tags=dict(tags), - unique_lang=sorted(unique_lang, key=lambda x: str(x)), - unique_award_id=sorted(unique_award_id, key=lambda x: str(x)), - - planning_count=len(planning_ocids), - tender_count=len(tender_ocids), - award_count=len(awardid_ocids), - processes_award_count=len(award_ocids), - contract_count=len(contractid_ocids), - processes_contract_count=len(contract_ocids), - implementation_count=len(implementation_contractid_ocids), - processes_implementation_count=len(implementation_ocids), - - min_release_date=min(release_dates) if release_dates else '', - max_release_date=max(release_dates) if release_dates else '', - min_tender_date=min(tender_dates) if tender_dates else '', - max_tender_date=max(tender_dates) if tender_dates else '', - min_award_date=min(award_dates) if award_dates else '', - max_award_date=max(award_dates) if award_dates else '', - min_contract_date=min(contract_dates) if contract_dates else '', - max_contract_date=max(contract_dates) if contract_dates else '', - - unique_buyers_identifier=unique_buyers_identifier, - unique_buyers_name_no_id=sorted(unique_buyers_name_no_id, key=lambda x: str(x)), - unique_suppliers_identifier=unique_suppliers_identifier, - unique_suppliers_name_no_id=sorted(unique_suppliers_name_no_id, key=lambda x: str(x)), - unique_procuring_identifier=unique_procuring_identifier, - unique_procuring_name_no_id=sorted(unique_procuring_name_no_id, key=lambda x: str(x)), - unique_tenderers_identifier=unique_tenderers_identifier, - unique_tenderers_name_no_id=sorted(unique_tenderers_name_no_id, key=lambda x: str(x)), - - unique_buyers=sorted(set(unique_buyers)), - unique_suppliers=sorted(set(unique_suppliers)), - unique_procuring=sorted(set(unique_procuring)), - unique_tenderers=sorted(set(unique_tenderers)), - - unique_buyers_count=unique_buyers_count, - unique_suppliers_count=unique_suppliers_count, - unique_procuring_count=unique_procuring_count, - unique_tenderers_count=unique_tenderers_count, - - unique_org_identifier_count=unique_org_identifier_count, - unique_org_name_count=unique_org_name_count, - unique_org_count=unique_org_count, - - unique_organisation_schemes=sorted(unique_organisation_schemes, key=lambda x: str(x)), - - organisations_with_address=len(organisation_identifier_address) + len(organisation_name_no_id_address), - organisations_with_contact_point=len(organisation_identifier_contact_point) + len(organisation_name_no_id_contact_point), - - total_item_count=len(release_tender_item_ids) + len(release_award_item_ids) + len(release_contract_item_ids), - tender_item_count=len(release_tender_item_ids), - award_item_count=len(release_award_item_ids), - contract_item_count=len(release_contract_item_ids), - - item_identifier_schemes=sorted(item_identifier_schemes, key=lambda x: str(x)), - unique_currency=sorted(unique_currency, key=lambda x: str(x)), - - planning_doc_count=planning_doc_count, - tender_doc_count=tender_doc_count, - tender_milestones_doc_count=tender_milestones_doc_count, - award_doc_count=award_doc_count, - contract_doc_count=contract_doc_count, - implementation_doc_count=implementation_doc_count, - implementation_milestones_doc_count=implementation_milestones_doc_count, - - planning_doctype=dict(planning_doctype), - tender_doctype=dict(tender_doctype), - tender_milestones_doctype=dict(tender_milestones_doctype), - award_doctype=dict(award_doctype), - contract_doctype=dict(contract_doctype), - implementation_doctype=dict(implementation_doctype), - implementation_milestones_doctype=dict(implementation_milestones_doctype), - - contracts_without_awards=contracts_without_awards, - ) - - -def _lookup_schema(schema, path, ref_info=None): - if len(path) == 0: - return schema, ref_info - if hasattr(schema, '__reference__'): - ref_info = { - 'path': path, - 'reference': schema.__reference__, - } - path_item, *child_path = path - if 'items' in schema: - return _lookup_schema(schema['items'], path, ref_info) - elif 'properties' in schema: - if path_item in schema['properties']: - return _lookup_schema(schema['properties'][path_item], child_path, ref_info) - else: - return None, None - - -def lookup_schema(schema, path): - return _lookup_schema(schema, path.split('/')) - - -def common_checks_ocds(context, upload_dir, json_data, schema_obj, api=False, cache=True): - schema_name = schema_obj.release_pkg_schema_name - if 'records' in json_data: - schema_name = schema_obj.record_pkg_schema_name - common_checks = common_checks_context(upload_dir, json_data, schema_obj, schema_name, context, - fields_regex=True, api=api, cache=cache) - validation_errors = common_checks['context']['validation_errors'] - - new_validation_errors = [] - for (json_key, values) in validation_errors: - error = json.loads(json_key) - new_message = validation_error_lookup.get(error['message_type']) - if new_message: - error['message_safe'] = conditional_escape(new_message) - else: - if 'message_safe' in error: - error['message_safe'] = mark_safe(error['message_safe']) - else: - error['message_safe'] = conditional_escape(error['message']) - - schema_block, ref_info = lookup_schema(schema_obj.get_release_pkg_schema_obj(deref=True), error['path_no_number']) - if schema_block and error['message_type'] != 'required': - if 'description' in schema_block: - error['schema_title'] = escape(schema_block.get('title', '')) - error['schema_description_safe'] = mark_safe(bleach.clean( - CommonMark.commonmark(schema_block['description']), - tags=bleach.sanitizer.ALLOWED_TAGS + ['p'] - )) - if ref_info: - ref = ref_info['reference']['$ref'] - if ref.endswith('release-schema.json'): - ref = '' - else: - ref = ref.strip('#') - ref_path = '/'.join(ref_info['path']) - schema = 'release-schema.json' - else: - ref = '' - ref_path = error['path_no_number'] - schema = 'release-package-schema.json' - error['docs_ref'] = format_html('{},{},{}', schema, ref, ref_path) - - new_validation_errors.append([json.dumps(error, sort_keys=True), values]) - common_checks['context']['validation_errors'] = new_validation_errors - - context.update(common_checks['context']) - - if schema_name == 'record-package-schema.json': - context['records_aggregates'] = get_records_aggregates(json_data, ignore_errors=bool(validation_errors)) - context['schema_url'] = schema_obj.record_pkg_schema_url - else: - additional_codelist_values = get_additional_codelist_values(schema_obj, json_data) - closed_codelist_values = {key: value for key, value in additional_codelist_values.items() if not value['isopen']} - open_codelist_values = {key: value for key, value in additional_codelist_values.items() if value['isopen']} - - context.update({ - 'releases_aggregates': get_releases_aggregates(json_data, ignore_errors=bool(validation_errors)), - 'additional_closed_codelist_values': closed_codelist_values, - 'additional_open_codelist_values': open_codelist_values - }) - - context = add_conformance_rule_errors(context, json_data, schema_obj) - return context - - -@tools.ignore_errors -def get_records_aggregates(json_data): - # Unique ocids - unique_ocids = set() - - if 'records' in json_data: - for record in json_data['records']: - # Gather all the ocids - if 'ocid' in record: - unique_ocids.add(record['ocid']) - - # Number of records - count = len(json_data['records']) if 'records' in json_data else 0 - - return { - 'count': count, - 'unique_ocids': unique_ocids, - } - - -def get_bad_ocds_prefixes(json_data): - '''Yield tuples with ('ocid', 'path/to/ocid') for ocids with malformed prefixes''' - prefix_regex = re.compile(r'^ocds-[a-zA-Z0-9]{6}-') - releases = json_data.get('releases', []) - records = json_data.get('records', []) - bad_prefixes = [] - - if releases and isinstance(releases, list): - for n_rel, release in enumerate(releases): - if not isinstance(release, dict): - continue - ocid = release.get('ocid', '') - if ocid and isinstance(ocid, str) and not prefix_regex.match(ocid): - bad_prefixes.append((ocid, 'releases/%s/ocid' % n_rel)) - - elif records and isinstance(records, list): - for n_rec, record in enumerate(records): - if not isinstance(record, dict): - continue - for n_rel, release in enumerate(record.get('releases', {})): - ocid = release.get('ocid', '') - if ocid and not prefix_regex.match(ocid): - bad_prefixes.append((ocid, 'records/%s/releases/%s/ocid' % (n_rec, n_rel))) - - compiled_release = record.get('compiledRelease', {}) - if compiled_release: - ocid = compiled_release.get('ocid', '') - if ocid and not prefix_regex.match(ocid): - bad_prefixes.append((ocid, 'records/%s/compiledRelease/ocid' % n_rec)) - bad_prefixes.append((ocid, 'records/%s/compiledRelease/ocid' % n_rec)) - - return bad_prefixes - - -def add_conformance_rule_errors(context, json_data, schema_obj): - '''Return context dict augmented with conformance errors if any''' - ocds_prefixes_bad_format = get_bad_ocds_prefixes(json_data) - - if ocds_prefixes_bad_format: - ocid_schema_description = schema_obj.get_release_schema_obj()['properties']['ocid']['description'] - ocid_info_index = ocid_schema_description.index('For more information') - ocid_description = ocid_schema_description[:ocid_info_index] - ocid_info_url = ocid_schema_description[ocid_info_index:].split('[')[1].split(']')[1][1:-1] - context['conformance_errors'] = { - 'ocds_prefixes_bad_format': ocds_prefixes_bad_format, - 'ocid_description': ocid_description, - 'ocid_info_url': ocid_info_url - } - - return context diff --git a/cove_ocds/lib/schema.py b/cove_ocds/lib/schema.py deleted file mode 100644 index 8de11ba73..000000000 --- a/cove_ocds/lib/schema.py +++ /dev/null @@ -1,305 +0,0 @@ -import os -import json -from copy import deepcopy -from urllib.parse import urljoin, urlparse -from collections import OrderedDict - -import json_merge_patch -import requests -from cached_property import cached_property -from django.conf import settings -from django.utils import translation - - -from cove.lib.common import SchemaJsonMixin, schema_dict_fields_generator, get_schema_codelist_paths, load_core_codelists, load_codelist -from cove.lib.tools import cached_get_request - - -config = settings.COVE_CONFIG - - -class SchemaOCDS(SchemaJsonMixin): - release_schema_name = config['schema_item_name'] - release_pkg_schema_name = config['schema_name']['release'] - record_pkg_schema_name = config['schema_name']['record'] - version_choices = config['schema_version_choices'] - default_version = config['schema_version'] - default_schema_host = version_choices[default_version][1] - - def __init__(self, select_version=None, release_data=None, cache_schema=False): - '''Build the schema object using an specific OCDS schema version - - The version used will be select_version, release_data.get('version') or - default version, in that order. Invalid version choices in select_version or - release_data will be skipped and registered as self.invalid_version_argument - and self.invalid_version_data respectively. - ''' - self.version = self.default_version - self.schema_host = self.default_schema_host - self.cache_schema = cache_schema - - # Missing package is only for original json data - self.missing_package = False - if release_data: - if 'version' not in release_data: - self.version = '1.0' - self.schema_host = self.version_choices['1.0'][1] - if 'releases' not in release_data and 'records' not in release_data: - self.missing_package = True - - self.invalid_version_argument = False - self.invalid_version_data = False - self.json_deref_error = None - self.extensions = {} - self.invalid_extension = {} - self.extended = False - self.extended_schema_file = None - self.extended_schema_url = None - self.codelists = config['schema_codelists']['1.1'] - - if select_version: - try: - self.version_choices[select_version] - except KeyError: - select_version = None - self.invalid_version_argument = True - else: - self.version = select_version - self.schema_host = self.version_choices[select_version][1] - - if hasattr(release_data, 'get'): - data_extensions = release_data.get('extensions', {}) - if data_extensions: - self.extensions = OrderedDict((ext, tuple()) for ext in data_extensions if type(ext) == str) - if not select_version: - release_version = release_data and release_data.get('version') - if release_version: - version_choice = self.version_choices.get(release_version) - if version_choice: - self.version = release_version - self.schema_host = version_choice[1] - else: - self.invalid_version_data = True - else: - pass - - self.release_schema_url = urljoin(self.schema_host, self.release_schema_name) - self.release_pkg_schema_url = urljoin(self.schema_host, self.release_pkg_schema_name) - self.record_pkg_schema_url = urljoin(self.schema_host, self.record_pkg_schema_name) - - def process_codelists(self): - self.core_codelist_schema_paths = get_schema_codelist_paths(self, use_extensions=False) - self.extended_codelist_schema_paths = get_schema_codelist_paths(self, use_extensions=True) - - core_unique_files = frozenset(value[0] for value in self.core_codelist_schema_paths.values()) - self.core_codelists = load_core_codelists(self.codelists, core_unique_files) - - self.extended_codelists = deepcopy(self.core_codelists) - self.extended_codelist_urls = {} - # we do not want to cache if the requests failed. - if not self.core_codelists: - load_core_codelists.cache_clear() - return - - for extension, extension_detail in self.extensions.items(): - if not isinstance(extension_detail, dict): - continue - - codelist_list = extension_detail.get("codelists") - if not codelist_list: - continue - - base_url = "/".join(extension.split('/')[:-1]) + "/codelists/" - - for codelist in codelist_list: - try: - codelist_map = load_codelist(base_url + codelist) - except UnicodeDecodeError as e: - extension_detail['failed_codelists'][codelist] = "Unicode Error, codelists need to be in UTF-8" - except Exception as e: - extension_detail['failed_codelists'][codelist] = "Unknown Exception, {}".format(str(e)) - continue - - if not codelist_map: - extension_detail['failed_codelists'][ - codelist] = "Codelist Error, Could not find code field in codelist" - - if codelist[0] in ("+", "-"): - codelist_extension = codelist[1:] - if codelist_extension not in self.extended_codelists: - extension_detail['failed_codelists'][ - codelist] = "Extension error, Trying to extend non existing codelist {}".format(codelist_extension) - continue - - if codelist[0] == "+": - self.extended_codelists[codelist_extension].update(codelist_map) - elif codelist[0] == "-": - for code in codelist_map: - value = self.extended_codelists[codelist_extension].pop(code, None) - if not value: - extension_detail['failed_codelists'][ - codelist] = "Codelist error, Trying to remove non existing codelist value {}".format(code) - else: - self.extended_codelists[codelist] = codelist_map - - try: - self.extended_codelist_urls[codelist].append(base_url + codelist) - except KeyError: - self.extended_codelist_urls[codelist] = [base_url + codelist] - - def get_release_schema_obj(self, deref=False): - release_schema_obj = self._release_schema_obj - if self.extended_schema_file: - with open(self.extended_schema_file) as fp: - release_schema_obj = json.load(fp) - elif self.extensions: - release_schema_obj = deepcopy(self._release_schema_obj) - self.apply_extensions(release_schema_obj) - if deref: - if self.extended: - extended_release_schema_str = json.dumps(release_schema_obj) - release_schema_obj = self.deref_schema(extended_release_schema_str) - else: - release_schema_obj = self.deref_schema(self.release_schema_str) - return release_schema_obj - - def get_release_pkg_schema_obj(self, deref=False, use_extensions=True): - package_schema_obj = deepcopy(self._release_pkg_schema_obj) - if deref: - if self.extended and use_extensions: - deref_release_schema_obj = self.get_release_schema_obj(deref=True) - package_schema_obj['properties']['releases']['items'] = {} - release_pkg_schema_str = json.dumps(package_schema_obj) - package_schema_obj = self.deref_schema(release_pkg_schema_str) - package_schema_obj['properties']['releases']['items'].update(deref_release_schema_obj) - else: - return self.deref_schema(self.release_pkg_schema_str) - return package_schema_obj - - def apply_extensions(self, schema_obj): - if not self.extensions: - return - for extensions_descriptor_url in self.extensions.keys(): - - try: - response = requests.get(extensions_descriptor_url) - if not response.ok: - # extension descriptor is required to proceed - self.invalid_extension[extensions_descriptor_url] = '{}: {}'.format( - response.status_code, response.reason.lower()) - continue - except requests.exceptions.RequestException: - self.invalid_extension[extensions_descriptor_url] = 'fetching failed' - continue - - i = extensions_descriptor_url.rfind('/') - url = '{}/{}'.format(extensions_descriptor_url[:i], 'release-schema.json') - - try: - if self.cache_schema: - extension = cached_get_request(url) - else: - extension = requests.get(url) - except requests.exceptions.RequestException: - continue - - if extension.ok: - try: - extension_data = extension.json() - except ValueError: # would be json.JSONDecodeError for Python 3.5+ - self.invalid_extension[extensions_descriptor_url] = 'release schema invalid JSON' - continue - elif extension.status_code == 404: - url = None - extension_data = {} - else: - self.invalid_extension[extensions_descriptor_url] = '{}: {}'.format( - extension.status_code, extension.reason.lower()) - continue - - schema_obj = json_merge_patch.merge(schema_obj, extension_data) - try: - if self.cache_schema: - response = cached_get_request(extensions_descriptor_url) - else: - response = requests.get(extensions_descriptor_url) - extensions_descriptor = response.json() - - except ValueError: # would be json.JSONDecodeError for Python 3.5+ - self.invalid_extension[extensions_descriptor_url] = 'invalid JSON' - continue - cur_language = translation.get_language() - - extension_description = {'url': extensions_descriptor_url, 'release_schema_url': url} - - for field in ['description', 'name', 'documentationUrl']: - field_object = extensions_descriptor.get(field, {}) - if isinstance(field_object, str): - field_value = field_object - else: - field_value = field_object.get(cur_language) - if not field_value: - field_value = field_object.get('en', '') - extension_description[field] = field_value - extension_description['failed_codelists'] = {} - codelists = extensions_descriptor.get('codelists') - if codelists: - extension_description['codelists'] = codelists - - self.extensions[extensions_descriptor_url] = extension_description - self.extended = True - - def create_extended_release_schema_file(self, upload_dir, upload_url): - filepath = os.path.join(upload_dir, 'extended_release_schema.json') - - # Always replace any existing extended schema file - if os.path.exists(filepath): - os.remove(filepath) - self.extended_schema_file = None - self.extended_schema_url = None - - if not self.extensions: - return - - release_schema_obj = self.get_release_schema_obj() - if not self.extended: - return - - with open(filepath, 'w') as fp: - release_schema_str = json.dumps(release_schema_obj, indent=4) - fp.write(release_schema_str) - - self.extended_schema_file = filepath - self.extended_schema_url = urljoin(upload_url, 'extended_release_schema.json') - - @cached_property - def record_pkg_schema_str(self): - uri_scheme = urlparse(self.record_pkg_schema_url).scheme - if uri_scheme == 'http' or uri_scheme == 'https': - if self.cache_schema: - response = cached_get_request(self.record_pkg_schema_url) - else: - response = requests.get(self.record_pkg_schema_url) - return response.text - else: - with open(self.record_pkg_schema_url) as fp: - return fp.read() - - @property - def _record_pkg_schema_obj(self): - return json.loads(self.record_pkg_schema_str) - - def get_record_pkg_schema_obj(self, deref=False): - if deref: - deref_package_schema = self.deref_schema(self.record_pkg_schema_str) - if self.extended: - deref_release_schema_obj = self.get_release_schema_obj(deref=True) - deref_package_schema['properties']['records']['items'][ - 'properties']['compiledRelease'] = deref_release_schema_obj - deref_package_schema['properties']['records']['items'][ - 'properties']['releases']['oneOf'][1] = deref_release_schema_obj - return deref_package_schema - return deepcopy(self._record_pkg_schema_obj) - - def get_record_pkg_schema_fields(self): - return set(schema_dict_fields_generator(self.get_record_pkg_schema_obj(deref=True))) diff --git a/cove_ocds/management/commands/ocds_cli.py b/cove_ocds/management/commands/ocds_cli.py index 5d09236f7..c575902ab 100644 --- a/cove_ocds/management/commands/ocds_cli.py +++ b/cove_ocds/management/commands/ocds_cli.py @@ -6,7 +6,7 @@ from django.core.management.base import CommandError from cove.management.commands.base_command import CoveBaseCommand, SetEncoder -from cove_ocds.lib.api import APIException, ocds_json_output +from libcoveocds.api import APIException, ocds_json_output class Command(CoveBaseCommand): diff --git a/cove_ocds/test_hypothesis.py b/cove_ocds/test_hypothesis.py index feb330499..ec6f085e3 100644 --- a/cove_ocds/test_hypothesis.py +++ b/cove_ocds/test_hypothesis.py @@ -1,4 +1,4 @@ -from cove_ocds.lib.ocds import get_releases_aggregates +from libcoveocds.lib.common_checks import get_releases_aggregates from hypothesis import given, assume, strategies as st, example, settings from cove.input.models import SuppliedData from django.core.files.base import ContentFile diff --git a/cove_ocds/tests.py b/cove_ocds/tests.py index b5cd8d1cf..ac6756910 100644 --- a/cove_ocds/tests.py +++ b/cove_ocds/tests.py @@ -14,12 +14,13 @@ from django.core.management.base import CommandError import cove.lib.common as cove_common -from .lib.api import APIException, context_api_transform, ocds_json_output -from .lib.ocds import get_releases_aggregates, get_bad_ocds_prefixes -from .lib.schema import SchemaOCDS +from libcoveocds.api import APIException, ocds_json_output +from libcoveocds.lib.api import context_api_transform +from libcoveocds.lib.common_checks import get_releases_aggregates, get_bad_ocds_prefixes +from libcoveocds.schema import SchemaOCDS from cove.input.models import SuppliedData from cove.lib.converters import convert_json, convert_spreadsheet -from cove.lib.tools import cached_get_request +from libcoveocds.libcore.tools import cached_get_request OCDS_DEFAULT_SCHEMA_VERSION = settings.COVE_CONFIG['schema_version'] diff --git a/cove_ocds/views.py b/cove_ocds/views.py index f87afb640..824872054 100644 --- a/cove_ocds/views.py +++ b/cove_ocds/views.py @@ -8,11 +8,13 @@ from django.shortcuts import render from django.utils.translation import ugettext_lazy as _ +from django.utils import translation from django.utils.html import format_html from . lib import exceptions -from . lib.ocds import common_checks_ocds -from . lib.schema import SchemaOCDS +from libcoveocds.common_checks import common_checks_ocds +from libcoveocds.schema import SchemaOCDS +from libcoveocds.config import LibCoveOCDSConfig from cove.lib.common import get_spreadsheet_meta_data from cove.lib.converters import convert_spreadsheet, convert_json from cove.lib.exceptions import CoveInputDataError, cove_web_input_error @@ -28,6 +30,9 @@ def explore_ocds(request, pk): if error: return error + lib_cove_ocds_config = LibCoveOCDSConfig() + lib_cove_ocds_config.config['current_language'] = translation.get_language() + upload_dir = db_data.upload_dir() upload_url = db_data.upload_url() file_name = db_data.original_file.file.name @@ -64,7 +69,7 @@ def explore_ocds(request, pk): version_in_data = json_data.get('version', '') db_data.data_schema_version = version_in_data select_version = post_version_choice or db_data.schema_version - schema_ocds = SchemaOCDS(select_version=select_version, release_data=json_data) + schema_ocds = SchemaOCDS(select_version=select_version, release_data=json_data, lib_cove_ocds_config=lib_cove_ocds_config) if schema_ocds.missing_package: exceptions.raise_missing_package_error() @@ -97,7 +102,7 @@ def explore_ocds(request, pk): else: # Use the lowest release pkg schema version accepting 'version' field - metatab_schema_url = SchemaOCDS(select_version='1.1').release_pkg_schema_url + metatab_schema_url = SchemaOCDS(select_version='1.1', lib_cove_ocds_config=lib_cove_ocds_config).release_pkg_schema_url metatab_data = get_spreadsheet_meta_data(upload_dir, file_name, metatab_schema_url, file_type) if 'version' not in metatab_data: metatab_data['version'] = '1.0' @@ -105,7 +110,7 @@ def explore_ocds(request, pk): db_data.data_schema_version = metatab_data['version'] select_version = post_version_choice or db_data.schema_version - schema_ocds = SchemaOCDS(select_version=select_version, release_data=metatab_data) + schema_ocds = SchemaOCDS(select_version=select_version, release_data=metatab_data, lib_cove_ocds_config=lib_cove_ocds_config) # Unlike for JSON data case above, do not check for missing data package if schema_ocds.invalid_version_argument: diff --git a/requirements.in b/requirements.in index 4b02785af..efe0765bb 100644 --- a/requirements.in +++ b/requirements.in @@ -2,6 +2,7 @@ pip Django<1.12 #^^ rq.filter: <1.12 -e git+https://github.com/OpenDataServices/flatten-tool.git@#v0.3.0egg=flattentool +-e git+https://github.com/open-contracting/lib-cove-ocds.git@v0.1.0#egg=libcoveocds django-bootstrap3 django-debug-toolbar requests diff --git a/requirements.txt b/requirements.txt index d27eed112..83a46a17e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ pip==9.0.3 Django==1.11.15 # rq.filter: <1.12 -e git+https://github.com/OpenDataServices/flatten-tool.git@v0.3.0#egg=flattentool +-e git+https://github.com/open-contracting/lib-cove-ocds.git@v0.1.0#egg=libcoveocds django-bootstrap3==9.1.0 django-debug-toolbar==1.9.1 requests==2.18.4 diff --git a/requirements_dev.txt b/requirements_dev.txt index cdf9cda20..d97a29ea6 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,6 +1,7 @@ pip==9.0.3 Django==1.11.15 # rq.filter: <1.12 -e git+https://github.com/OpenDataServices/flatten-tool.git@v0.3.0#egg=flattentool +-e git+https://github.com/open-contracting/lib-cove-ocds.git@v0.1.0#egg=libcoveocds django-bootstrap3==9.1.0 django-debug-toolbar==1.9.1 requests==2.18.4