Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extensions mechanism #98

Merged
merged 10 commits into from
Apr 9, 2024
Binary file added cove/cove_360/fixtures/dei_extension.xlsx
Binary file not shown.
28 changes: 21 additions & 7 deletions cove/cove_360/templates/cove_360/explore.html
Original file line number Diff line number Diff line change
Expand Up @@ -60,24 +60,24 @@ <h3 class="panel-title panel-title-explore">

<script>
window.addEventListener('load', function() {

let publisherValidation, dataValidation, validation;

if ('{{ data_status.publisher.self_publish.enabled }}' === 'True') {
publisherValidation = "publishing-enabled"
} else {
publisherValidation = "publishing-disabled"
}

if ('{{ data_status.passed }}' === 'True') {
dataValidation = "data-valid"
} else {
dataValidation = 'data-invalid'
}

validation = `${publisherValidation}-${dataValidation}`;
source = '{{ source_url }}'

_paq.push(['trackEvent', 'Status', validation, source]);
_paq.push(['trackPageView']);
})
Expand Down Expand Up @@ -134,6 +134,9 @@ <h3 class="panel-title panel-title-explore">
{% if metadata.license %}
<strong> License: </strong> <a href="{{metadata.license}}">{{metadata.license}} </a><br>
{% endif %}
{% if extension_metadatas %}
<strong>Extensions: </strong> {% for extension_metadata in extension_metadatas %}<a href="{{extension_metadata.documentationUrl}}">{{extension_metadata.id}}</a> {% endfor %}<br>
{% endif %}
</div>
</div>
<br>
Expand Down Expand Up @@ -319,7 +322,8 @@ <h3 class="panel-title panel-title-explore">
{% else %}
{% trans "Congratulations! Your data is using the 360Giving Data Standard. We used the " %}
{% endif %}
<a href="https://www.threesixtygiving.org/standard/reference/#toc-360giving-json-schemas"> 360Giving JSON Package Schema</a> {% trans "to check this." %}
<a href="https://www.threesixtygiving.org/standard/reference/#toc-360giving-json-schemas"> 360Giving JSON Package Schema</a>
{% trans "to check this." %}
</p>
<div class="explore-help">{% trans "This means that the data" %}
{% if validation_errors or additional_closed_codelist_values %}
Expand All @@ -330,7 +334,17 @@ <h3 class="panel-title panel-title-explore">

{% blocktrans %}the requirements of the <a href="https://standard.threesixtygiving.org/en/latest/technical/reference/">360Giving Data Standard</a>.{% endblocktrans %}
{% blocktrans %} Making sure your data uses the standard correctly is important. Otherwise it cannot be used alongside other valid 360Giving data and cannot be included in 360Giving tools, such as GrantNav and 360Insights.{% endblocktrans %}
</diV>
</div>

{% if extension_metadatas %}
<p class="explanation">{% trans "The 360Giving Data Standard was extended using these extensions:" %}</p>
<ol>
{% for extension_metadata in extension_metadatas %}
<li><a href="{{extension_metadata.documentationUrl}}" title="{{extension_metadata.description}}">{{extension_metadata.title}} ({{extension_metadata.id}})</a></li>
{% endfor %}
</ol>
{% endif %}

{% if validation_errors or additional_closed_codelist_values %}
<br>
<p class="explanation">&nbsp;{% trans "The following <strong>errors</strong> are preventing your data from being valid 360Giving data. Please use the feedback below to find and resolve the issues in your file" %}</p>
Expand Down
11 changes: 0 additions & 11 deletions cove/cove_360/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
import pytest
import time
from cove.input.models import SuppliedData
from django.conf import settings
from django.core.files.base import ContentFile
from django.core.files.uploadedfile import UploadedFile

from lib360dataquality.cove.schema import Schema360
from lib360dataquality.cove.threesixtygiving import get_grants_aggregates, run_extra_checks, extend_numbers, spreadsheet_style_errors_table, TEST_CLASSES

# Source is cove_360/fixtures/fundingproviders-grants_fixed_2_grants.json
Expand Down Expand Up @@ -877,15 +875,6 @@ def test_explore_unconvertable_spreadsheet(client):
assert b'We think you tried to supply a spreadsheet, but we failed to convert it.' in resp.content


def test_schema_360():
schema = Schema360()
assert schema.schema_name == settings.COVE_CONFIG['schema_item_name']
assert schema.pkg_schema_name == settings.COVE_CONFIG['schema_name']
assert schema.schema_host == settings.COVE_CONFIG['schema_host']
assert schema.schema_url == settings.COVE_CONFIG['schema_host'] + settings.COVE_CONFIG['schema_item_name']
assert schema.pkg_schema_url == settings.COVE_CONFIG['schema_host'] + settings.COVE_CONFIG['schema_name']


# Suggested method of updating test_quality_accuracy/test_usefulness_checks data
# in each function save the output:
# with open("/tmp/update_test_data.txt", "w+") as f:
Expand Down
4 changes: 3 additions & 1 deletion cove/cove_360/tests_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ def server_url(request, live_server):
('nulls.json', [
'is not a JSON array',
'Date is not in the correct format',
'Invalid code found in countryCode',
'is not a number',
'is not a string',
], True),
Expand Down Expand Up @@ -168,6 +167,9 @@ def server_url(request, live_server):
'bad currency 3',
'bad currency 4',
], True),
("dei_extension.xlsx", [
"do not use the 360Giving Data Standard codelists correctly.",
], True),
])
@pytest.mark.parametrize('authed', [True, False])
def test_explore_360_url_input(server_url, browser, httpserver, source_filename, expected_text, conversion_successful, authed):
Expand Down
43 changes: 36 additions & 7 deletions cove/cove_360/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import logging
import re
import os
from decimal import Decimal

from cove.views import explore_data_context, cove_web_input_error
Expand All @@ -20,7 +21,7 @@
from libcove.lib.converters import convert_spreadsheet, convert_json
from libcove.lib.exceptions import CoveInputDataError

from lib360dataquality.cove.schema import Schema360
from lib360dataquality.cove.schema import Schema360, ExtensionsError
from lib360dataquality.cove.threesixtygiving import TEST_CLASSES
from lib360dataquality.cove.threesixtygiving import common_checks_360

Expand Down Expand Up @@ -63,7 +64,6 @@ def explore_360(request, pk, template='cove_360/explore.html'):
print("Cache hit")
return render(request, template, cached_context)

schema_360 = Schema360()
context, db_data, error = explore_data_context(request, pk)
if error:
return error
Expand Down Expand Up @@ -91,6 +91,7 @@ def explore_360(request, pk, template='cove_360/explore.html'):
upload_url = db_data.upload_url()
file_name = db_data.original_file.file.name
file_type = context['file_type']
schema_360 = Schema360(upload_dir)
R2ZER0 marked this conversation as resolved.
Show resolved Hide resolved

if file_type == 'json':
# open the data first so we can inspect for record package
Expand All @@ -114,16 +115,44 @@ def explore_360(request, pk, template='cove_360/explore.html'):
'link_text': _('Try Again'),
'msg': _('360Giving JSON should have an object as the top level, the JSON you supplied does not.'),
})
context.update(convert_json(upload_dir, upload_url, file_name, schema_url=schema_360.schema_url,
request=request, flatten=request.POST.get('flatten'),
lib_cove_config=lib_cove_config))

extension_metadatas = schema_360.resolve_extension(json_data)

context.update(convert_json(upload_dir, upload_url, file_name, schema_url=schema_360.schema_file,
request=request, flatten=request.POST.get('flatten'),
lib_cove_config=lib_cove_config))

else:
context.update(convert_spreadsheet(upload_dir, upload_url, file_name, file_type, lib_cove_config, schema_360.schema_url,
schema_360.pkg_schema_url))
# Convert spreadsheet to json
context.update(convert_spreadsheet(upload_dir, upload_url, file_name, file_type, lib_cove_config, schema_360.schema_file,
schema_360.pkg_schema_file))

with open(context['converted_path'], encoding='utf-8') as fp:
json_data = json.load(fp, parse_float=Decimal)

try:
# Check data for presence of any schema extensions if exists re-convert using the newly patched schema
if extension_metadatas := schema_360.resolve_extension(json_data):
# Delete old converted data. If it is detected by libcove it will skip conversion (unflattening)
os.unlink(context["converted_path"])

context.update(convert_spreadsheet(upload_dir, upload_url, file_name, file_type, lib_cove_config, schema_360.schema_file, schema_360.pkg_schema_file))
# Re-load the newly flattened data
with open(context['converted_path'], encoding='utf-8') as fp:
json_data = json.load(fp, parse_float=Decimal)

context["extension_metadatas"] = extension_metadatas
except ExtensionsError as err:
raise CoveInputDataError(context={
'sub_title': _("Sorry, we can't process the data with the specified extension(s)"),
'link': 'index',
'link_text': _('Try Again'),
'msg': _(format_html('We think you tried to upload data that uses an extension to the 360Giving standard. However there was a problem with the extension.'
'\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">'
'</span> <strong>Error message:</strong> {}', err)),
'error': format(err)
})

context = common_checks_360(context, upload_dir, json_data, schema_360)

# Construct the 360Giving specific urls for codelists in the docs
Expand Down
192 changes: 185 additions & 7 deletions lib360dataquality/cove/schema.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,191 @@
from urllib.parse import urljoin

from libcove.lib.common import SchemaJsonMixin
from libcove.lib.common import SchemaJsonMixin, get_schema_codelist_paths, load_core_codelists, load_codelist
from libcove.lib.tools import get_request
from .settings import COVE_CONFIG as config
import requests
import json_merge_patch
import json
import os
from typing import Optional

EXTENSIONS_REGISTRY_BASE_URL = "https://raw.githubusercontent.com/ThreeSixtyGiving/extensions-registry/main/extensions/"


class ExtensionsError(Exception):
pass


class Schema360(SchemaJsonMixin):
schema_host = config['schema_host']
schema_name = config['schema_item_name']
pkg_schema_name = config['schema_name']
schema_url = urljoin(schema_host, schema_name)
pkg_schema_url = urljoin(schema_host, pkg_schema_name)
codelists = config['codelists_host']

codelists = config["codelists_host"]
schema_name = config["schema_item_name"]
pkg_schema_name = config["schema_name"]
pkg_schema_url = "" # required by libcove but not in use
extended = False # required by libcove but not in use
extension_codelist_urls = []

_pkg_schema_obj = {}
_schema_obj = {}

def __init__(self, data_dir) -> None:
# Create dedicated location for schema work
self.working_dir = os.path.join(data_dir, "schema")
try:
os.mkdir(self.working_dir)
except FileExistsError:
pass

# required by lib-cove for CustomRefResolver the trailing / is needed to make sure
# urljoin does not discard the final part of the location.
self.schema_host = f"{self.working_dir}/"

schema_url = urljoin(config["schema_host"], self.schema_name)
pkg_schema_url = urljoin(config["schema_host"], self.pkg_schema_name)

self._pkg_schema_obj = get_request(pkg_schema_url).json()
self._schema_obj = get_request(schema_url).json()

# Update the pkg schema to no longer point to an external reference for the
# grants schema.
# If an extension is applied this will be the local merged version of the grant
# schema.
self._pkg_schema_obj["properties"]["grants"]["items"]["$ref"] = self.schema_file

self.write_pkg_schema_file()
self.write_schema_file()

super().__init__()

@property
def schema_file(self):
return os.path.join(self.working_dir, self.schema_name)

@property
def pkg_schema_file(self):
return os.path.join(self.working_dir, self.pkg_schema_name)

@property
def schema_str(self):
return json.dumps(self._schema_obj)

@property
def pkg_schema_str(self):
return json.dumps(self._pkg_schema_obj)

def write_schema_file(self):
with open(self.schema_file, "w") as fp:
fp.write(self.schema_str)

def write_pkg_schema_file(self):
with open(self.pkg_schema_file, "w") as fp:
fp.write(self.pkg_schema_str)

def process_codelists(self):
# From libcove common but with support for codelists from 360Giving extensions added.

self.core_codelist_schema_paths = get_schema_codelist_paths(
self, use_extensions=False
)

extension_unique_files = frozenset(
url.split("/")[-1] for url in self.extension_codelist_urls
)

core_unique_files = frozenset(
value[0] for value in self.core_codelist_schema_paths.values() if value[0] not in extension_unique_files
)

# This loader uses the codelist host from the config + filename that was taken out of the schema
self.core_codelists = load_core_codelists(
self.codelists,
core_unique_files,
config=self.config if hasattr(self, "config") else None,
)

extension_codelists = {}

for extension_codelist_url in self.extension_codelist_urls:
codelist_file = extension_codelist_url.split("/")[-1]

extension_codelists[codelist_file] = load_codelist(
extension_codelist_url,
config=self.config if hasattr(self, "config") else None)

# Update the codelists with any specified by the extension
# This has the unfortunate side-effect of making cove think these are part of
# the main standard however we have no current way to differentiate the paths
self.core_codelists.update(extension_codelists)

# Ignore. Properties required by libcove:
self.extended_codelist_schema_paths = self.core_codelist_schema_paths
self.extended_codelists = self.core_codelists
self.extended_codelist_urls = {}
# End ignore

# we do not want to cache if the requests failed.
if not self.core_codelists:
load_core_codelists.cache_clear()
return

def resolve_extension(self, json_data) -> Optional[list]:
"""
If json_data contains an extension id this patches the schemas if the extension is valid
the internal representation of the schema is replaced with the new patched version.
We write the new schema file(s) to disk for flattentool and caching purposes.

Returns an array of extension_infos or None
"""

try:
extension_ids = json_data["extensions"]
except KeyError:
return None

if len(extension_ids) == 0:
raise ExtensionsError("Extension key found but with no value(s)")
R2ZER0 marked this conversation as resolved.
Show resolved Hide resolved

extension_metadatas = []

for extension_id in extension_ids:
try:
r = requests.get(f"{EXTENSIONS_REGISTRY_BASE_URL}/{extension_id}.json")
r.raise_for_status()
extension_metadata = r.json()
extension_metadatas.append(extension_metadata)
except (json.JSONDecodeError, requests.HTTPError):
raise ExtensionsError("Couldn't not fetch or parse extension metadata")

for extension_schemas in extension_metadata["schemas"]:
try:
r = requests.get(extension_schemas["uri"])
r.raise_for_status()
extension = r.json()
except (json.JSONDecodeError, requests.HTTPError) as e:
raise ExtensionsError(f"Unable to fetch and decode supplied extension: {e}")

if extension_schemas["target"] not in [
self.schema_name,
self.pkg_schema_name,
]:
raise ExtensionsError(f"Unknown target for extension {extension_schemas['target']} not in {[self.schema_name, self.pkg_schema_name]}")

# Schema (grants) extension
if extension_schemas["target"] == self.schema_name:
self._schema_obj = json_merge_patch.merge(
self._schema_obj, extension
)

# Package schema extension
if extension_schemas["target"] == self.pkg_schema_name:
self._pkg_schema_obj = json_merge_patch.merge(
self._pkg_schema_obj, extension
)

self.extension_codelist_urls.extend(extension_metadata["codelists"])

# Write out the new schema objects
self.write_pkg_schema_file()
self.write_schema_file()

return extension_metadatas
Loading
Loading