Skip to content

Commit

Permalink
Merge pull request #57 from OpenDataServices/35-spreadsheet-upload
Browse files Browse the repository at this point in the history
 processor.cove: Add spreadsheet conversion
  • Loading branch information
michaelwood committed Mar 4, 2021
2 parents 658e7e0 + a83b9c1 commit 2c50416
Show file tree
Hide file tree
Showing 2 changed files with 163 additions and 46 deletions.
160 changes: 124 additions & 36 deletions standards_lab/processor/cove.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
SchemaJsonMixin,
validator,
)
from libcove.lib.converters import convert_spreadsheet
from libcove.config import LibCoveConfig

from decimal import Decimal
from urllib.parse import urljoin
Expand All @@ -11,36 +13,142 @@
import tempfile

import django_rq
import jsonref
from rq.job import Job
from rq.exceptions import NoSuchJobError

import api.views
from .extra_validator_funcs import patch_validator


patch_validator(validator)


def start(project):
schema_name = project["rootSchema"]
MIME_TYPE_TO_FILE_TYPE = {
"application/csv": "csv",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
"application/vnd.oasis.opendocument.spreadsheet": "ods",
}


print(project, flush=True)
def lib_cove_wrapper(
project,
data_file,
cache=False,
):
"""
A wrapper around common_checks_context and convert_spreadsheet from
lib-cove.
This is the function that gets queued at the moment. This means that
conversion happens automatically, but also means you can't run the
conversion on its own beforehand.
"""

schema_name = project["rootSchema"]
root_list_path = project["rootListPath"]

schema_obj = SchemaJsonMixin()

schema_obj.schema_host = os.path.join(project["path"], "")
# These are needed for flatten-tool:
# schema_obj.schema_name = schema_name
# schema_obj.schema_url = urljoin(schema_obj.schema_host, schema_obj.schema_name)
# Don't set schema_obj.schema_name or schema_obj.schema_url, because these
# are only used by flatten-tool, which requires a specific subschema, see
# comment above flattentool_schema_url below.
schema_obj.pkg_schema_name = schema_name
schema_obj.pkg_schema_url = urljoin(
schema_obj.schema_host, schema_obj.pkg_schema_name
)
print(schema_obj.pkg_schema_url, flush=True)

data_file_path = os.path.join(project["path"], data_file)
mime_type = api.views.check_allowed_project_mime_type(data_file_path)
file_type = MIME_TYPE_TO_FILE_TYPE.get(mime_type, "json")
context = {"file_type": file_type}

# Only used for constructing the converted url, which currently wouldn't
# work in standards-lab anyway, as the converted file isn't placed anywhere
# web accessiable
upload_url = "http://example.org/"

lib_cove_config = LibCoveConfig()
lib_cove_config.config["root_list_path"] = root_list_path
# This is the name of an extra id at the top level, e.g. ocds has ocid. An
# empty string means no such id
lib_cove_config.config["root_id"] = ""

# upload_dir is only used to output files to (e.g. cell source map from
# flatten-tool, or a cache of the validation results), so we don't have to
# set it to where the standards-lab data was uploaded
with tempfile.TemporaryDirectory() as upload_dir:
# flatten-tool takes a schema url or path, but it expects the
# sub-schema describing the repeated object, not the package schema.
# e.g. the schema describing a grant in 360Giving or a release in OCDS.
#
# For the existing standards we work on, this is a seperate file which
# we can point flatten-tool at. But, in standards-lab we don't know
# which schema file it is, or whether the schema files are even split
# this way. Instead, we deref to combine all the schemas, and find the
# sub-schema we want from the package schema, write that out to a file,
# and pass it to flatten-tool.
flattentool_schema_url = os.path.join(upload_dir, "flattentool_schema.json")

with open(schema_obj.pkg_schema_url) as schema_fp, open(
flattentool_schema_url, "w"
) as flattentool_schema_fp:
schema = jsonref.load(schema_fp)
flattentool_schema = (
schema.get("properties", {}).get(root_list_path, {}).get("items", {})
)
json.dump(flattentool_schema, flattentool_schema_fp)

if file_type != "json":
context.update(
convert_spreadsheet(
upload_dir,
upload_url,
data_file_path,
file_type,
lib_cove_config,
schema_url=flattentool_schema_url,
pkg_schema_url=schema_obj.pkg_schema_url,
metatab_name="Meta",
replace=True,
cache=False,
)
)

json_file_path = context["converted_path"]

else:
json_file_path = data_file_path

with open(json_file_path) as fp:
try:
json_data = json.load(fp, parse_float=Decimal)
except json.JSONDecodeError:
context.update(
{
"status": "FAILED",
"error": "Could not decode as a json file",
}
)
return context

context = common_checks_context(
upload_dir,
json_data,
schema_obj,
schema_name,
context,
cache=False,
)
context["status"] = "SUCCESS"
return context


def start(project):
output = {}
for data_file in project["dataFiles"]:
context = {"file_type": "json"}

job_id = project["name"] + "_cove_results_" + data_file
try:
job = Job.fetch(job_id, connection=django_rq.get_connection())
Expand All @@ -55,33 +163,13 @@ def start(project):
except NoSuchJobError:
pass

with open(
os.path.join(project["path"], data_file)
) as fp, tempfile.TemporaryDirectory() as upload_dir:
# upload_dir is only used to output files to (e.g. cell source map
# from flatten-tool, or a cache of the validation results).
try:
# Possibly we should do this in the worker for performance reasons
# Issue: https://github.com/OpenDataServices/standards-lab/issues/24
json_data = json.load(fp, parse_float=Decimal)
except json.JSONDecodeError:
output[data_file] = {
"status": "FAILED",
"error": "Could not decode as a json file",
}
continue

job = django_rq.enqueue(
common_checks_context,
upload_dir,
json_data,
schema_obj,
schema_name,
context,
cache=False,
job_id=job_id,
)
output[data_file] = {"status": "SUCCESS"}
job = django_rq.enqueue(
lib_cove_wrapper,
project,
data_file,
job_id=job_id,
)
output[data_file] = {"status": "SUCCESS"}
return output


Expand Down
49 changes: 39 additions & 10 deletions standards_lab/ui/templates/project.html
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,21 @@ <h4 class="card-title">Project Settings</h4>
Modified: <code>{{project.modified}}</code></p>
<div class="form-group">
<label for="project-name-input">Project Name</label>
<input type="text" id="project-name-input" class="form-control form-control-lg" style="width: 100%" v-model="project.name" >
<input type="text" id="project-name-input" class="form-control form-control-lg" style="width: 100%" v-model="project.name" v-on:keyup="unsavedChanges = true" >
<small>Accepted characters are A-Z, a-z, 0-9 , - and _ </small>
<p v-if="!validProjectName" class="alert alert-warning mt-2">Invalid characters in project name</p>
</div>
<div class="form-group" v-if="ownThisProject">
<input type="checkbox" name="editable" id="project-editable" v-model="project.editable" >
<input type="checkbox" name="editable" id="project-editable" v-model="project.editable" v-on:change="unsavedChanges = true" >
<label for="project-editable">Editable by anyone with the link</label>
</div>
<div class="form-group">
<label for="root-list-path">JSON key to main list of your data (needed for spreadsheet upload)</label>
<input type="text" class="form-control" id="project-root-list-path" v-model="project.rootListPath" v-on:keyup="unsavedChanges = true"/>
</div>
<div class="form-group">
<button v-bind:disabled="!validProjectName" class="btn btn-primary" v-on:click="updateProjectProperties">{{saveLabel}}</button>
<p v-if="unsavedChanges" class="alert alert-warning mt-2">You have unsaved changes</p>
</div>
</div>
</div>
Expand Down Expand Up @@ -193,6 +198,8 @@ <h4 class="card-title">Data</h4>
jsonEditorData: {},
jsonEditorDataFileName: "untitled.json",
maximiseDataEditor: false,

unsavedChanges: false,
}
},

Expand All @@ -202,7 +209,24 @@ <h4 class="card-title">Data</h4>

this.ownThisProject = ownThisProject;

setInterval(() => { this.getProjectProperties(); }, 2000);
setInterval(async () => {

let project = await this.getProjectProperties();

if (this.unsavedChanges){
/* We have unsavedChanges check if those unsavedChanges have been
* reverted by the user by comparing against the server's copy of project
* this could get expensive if the project object gets larger in the future
*/

if (JSON.stringify(this.project) === JSON.stringify(project)){
this.unsavedChanges = false;
}
/* Note we not updating this.project on this cycle */
} else {
this.project = project;
}
}, 2000);
},

watch: {
Expand All @@ -213,7 +237,6 @@ <h4 class="card-title">Data</h4>
this.saveLabel = "Save";
}
},

},

computed: {
Expand All @@ -224,17 +247,22 @@ <h4 class="card-title">Data</h4>

methods: {
/* GET the project properties */
getProjectProperties: function(){
getProjectProperties: async function(){

fetch(projectApiUrl, {
let response = await fetch(projectApiUrl, {
method:'GET',
credentials: 'same-origin',
headers: { 'X-CSRFToken': csrfmiddlewaretoken_value },
}).then(response => response.json()).then(result => {
if (result.error == undefined){
this.project = result;
}
});

let project_json = await response.json();

if (project_json.error === undefined){
return project_json;
} else {
console.error(project_json.error);
return this.project;
}
},

/* Update any of the project's properties */
Expand All @@ -247,6 +275,7 @@ <h4 class="card-title">Data</h4>
body: JSON.stringify(this.project),
}).then(response => response.json()).then(result => {
if (result.error == undefined){
this.unsavedChanges = false;
this.project = result;

/* If we have changed project name for simplicity we reload the page to the new project page */
Expand Down

0 comments on commit 2c50416

Please sign in to comment.