Merge pull request #57 from OpenDataServices/35-spreadsheet-upload

processor.cove: Add spreadsheet conversion
OpenDataServices · Mar 4, 2021 · 2c50416 · 2c50416
2 parents 658e7e0 + a83b9c1
commit 2c50416
Show file tree

Hide file tree

Showing 2 changed files with 163 additions and 46 deletions.
diff --git a/standards_lab/processor/cove.py b/standards_lab/processor/cove.py
@@ -3,6 +3,8 @@
     SchemaJsonMixin,
     validator,
 )
+from libcove.lib.converters import convert_spreadsheet
+from libcove.config import LibCoveConfig
 
 from decimal import Decimal
 from urllib.parse import urljoin
@@ -11,36 +13,142 @@
 import tempfile
 
 import django_rq
+import jsonref
 from rq.job import Job
 from rq.exceptions import NoSuchJobError
 
+import api.views
 from .extra_validator_funcs import patch_validator
 
 
 patch_validator(validator)
 
 
-def start(project):
-    schema_name = project["rootSchema"]
+MIME_TYPE_TO_FILE_TYPE = {
+    "application/csv": "csv",
+    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
+    "application/vnd.oasis.opendocument.spreadsheet": "ods",
+}
+
 
-    print(project, flush=True)
+def lib_cove_wrapper(
+    project,
+    data_file,
+    cache=False,
+):
+    """
+    A wrapper around common_checks_context and convert_spreadsheet from
+    lib-cove.
+
+    This is the function that gets queued at the moment.  This means that
+    conversion happens automatically, but also means you can't run the
+    conversion on its own beforehand.
+
+    """
+
+    schema_name = project["rootSchema"]
+    root_list_path = project["rootListPath"]
 
     schema_obj = SchemaJsonMixin()
 
     schema_obj.schema_host = os.path.join(project["path"], "")
-    # These are needed for flatten-tool:
-    # schema_obj.schema_name = schema_name
-    # schema_obj.schema_url = urljoin(schema_obj.schema_host, schema_obj.schema_name)
+    # Don't set schema_obj.schema_name or schema_obj.schema_url, because these
+    # are only used by flatten-tool, which requires a specific subschema, see
+    # comment above flattentool_schema_url below.
     schema_obj.pkg_schema_name = schema_name
     schema_obj.pkg_schema_url = urljoin(
         schema_obj.schema_host, schema_obj.pkg_schema_name
     )
-    print(schema_obj.pkg_schema_url, flush=True)
 
+    data_file_path = os.path.join(project["path"], data_file)
+    mime_type = api.views.check_allowed_project_mime_type(data_file_path)
+    file_type = MIME_TYPE_TO_FILE_TYPE.get(mime_type, "json")
+    context = {"file_type": file_type}
+
+    # Only used for constructing the converted url, which currently wouldn't
+    # work in standards-lab anyway, as the converted file isn't placed anywhere
+    # web accessiable
+    upload_url = "http://example.org/"
+
+    lib_cove_config = LibCoveConfig()
+    lib_cove_config.config["root_list_path"] = root_list_path
+    # This is the name of an extra id at the top level, e.g. ocds has ocid. An
+    # empty string means no such id
+    lib_cove_config.config["root_id"] = ""
+
+    # upload_dir is only used to output files to (e.g. cell source map from
+    # flatten-tool, or a cache of the validation results), so we don't have to
+    # set it to where the standards-lab data was uploaded
+    with tempfile.TemporaryDirectory() as upload_dir:
+        # flatten-tool takes a schema url or path, but it expects the
+        # sub-schema describing the repeated object, not the package schema.
+        # e.g. the schema describing a grant in 360Giving or a release in OCDS.
+        #
+        # For the existing standards we work on, this is a seperate file which
+        # we can point flatten-tool at. But, in standards-lab we don't know
+        # which schema file it is, or whether the schema files are even split
+        # this way. Instead, we deref to combine all the schemas, and find the
+        # sub-schema we want from the package schema, write that out to a file,
+        # and pass it to flatten-tool.
+        flattentool_schema_url = os.path.join(upload_dir, "flattentool_schema.json")
+
+        with open(schema_obj.pkg_schema_url) as schema_fp, open(
+            flattentool_schema_url, "w"
+        ) as flattentool_schema_fp:
+            schema = jsonref.load(schema_fp)
+            flattentool_schema = (
+                schema.get("properties", {}).get(root_list_path, {}).get("items", {})
+            )
+            json.dump(flattentool_schema, flattentool_schema_fp)
+
+        if file_type != "json":
+            context.update(
+                convert_spreadsheet(
+                    upload_dir,
+                    upload_url,
+                    data_file_path,
+                    file_type,
+                    lib_cove_config,
+                    schema_url=flattentool_schema_url,
+                    pkg_schema_url=schema_obj.pkg_schema_url,
+                    metatab_name="Meta",
+                    replace=True,
+                    cache=False,
+                )
+            )
+
+            json_file_path = context["converted_path"]
+
+        else:
+            json_file_path = data_file_path
+
+        with open(json_file_path) as fp:
+            try:
+                json_data = json.load(fp, parse_float=Decimal)
+            except json.JSONDecodeError:
+                context.update(
+                    {
+                        "status": "FAILED",
+                        "error": "Could not decode as a json file",
+                    }
+                )
+                return context
+
+            context = common_checks_context(
+                upload_dir,
+                json_data,
+                schema_obj,
+                schema_name,
+                context,
+                cache=False,
+            )
+    context["status"] = "SUCCESS"
+    return context
+
+
+def start(project):
     output = {}
     for data_file in project["dataFiles"]:
-        context = {"file_type": "json"}
-
         job_id = project["name"] + "_cove_results_" + data_file
         try:
             job = Job.fetch(job_id, connection=django_rq.get_connection())
@@ -55,33 +163,13 @@ def start(project):
         except NoSuchJobError:
             pass
 
-        with open(
-            os.path.join(project["path"], data_file)
-        ) as fp, tempfile.TemporaryDirectory() as upload_dir:
-            # upload_dir is only used to output files to (e.g. cell source map
-            # from flatten-tool, or a cache of the validation results).
-            try:
-                # Possibly we should do this in the worker for performance reasons
-                # Issue: https://github.com/OpenDataServices/standards-lab/issues/24
-                json_data = json.load(fp, parse_float=Decimal)
-            except json.JSONDecodeError:
-                output[data_file] = {
-                    "status": "FAILED",
-                    "error": "Could not decode as a json file",
-                }
-                continue
-
-            job = django_rq.enqueue(
-                common_checks_context,
-                upload_dir,
-                json_data,
-                schema_obj,
-                schema_name,
-                context,
-                cache=False,
-                job_id=job_id,
-            )
-            output[data_file] = {"status": "SUCCESS"}
+        job = django_rq.enqueue(
+            lib_cove_wrapper,
+            project,
+            data_file,
+            job_id=job_id,
+        )
+        output[data_file] = {"status": "SUCCESS"}
     return output
 
 

diff --git a/standards_lab/ui/templates/project.html b/standards_lab/ui/templates/project.html
@@ -27,16 +27,21 @@ <h4 class="card-title">Project Settings</h4>
       Modified: <code>{{project.modified}}</code></p>
       <div class="form-group">
         <label for="project-name-input">Project Name</label>
-        <input type="text" id="project-name-input" class="form-control form-control-lg" style="width: 100%" v-model="project.name"  >
+        <input type="text" id="project-name-input" class="form-control form-control-lg" style="width: 100%" v-model="project.name" v-on:keyup="unsavedChanges = true" >
         <small>Accepted characters are A-Z, a-z, 0-9 , - and _ </small>
         <p v-if="!validProjectName" class="alert alert-warning mt-2">Invalid characters in project name</p>
       </div>
       <div class="form-group" v-if="ownThisProject">
-        <input type="checkbox" name="editable" id="project-editable" v-model="project.editable" >
+        <input type="checkbox" name="editable" id="project-editable" v-model="project.editable" v-on:change="unsavedChanges = true" >
         <label for="project-editable">Editable by anyone with the link</label>
       </div>
+      <div class="form-group">
+        <label for="root-list-path">JSON key to main list of your data (needed for spreadsheet upload)</label>
+        <input type="text" class="form-control" id="project-root-list-path" v-model="project.rootListPath" v-on:keyup="unsavedChanges = true"/>
+      </div>
       <div class="form-group">
         <button v-bind:disabled="!validProjectName" class="btn btn-primary" v-on:click="updateProjectProperties">{{saveLabel}}</button>
+        <p v-if="unsavedChanges" class="alert alert-warning mt-2">You have unsaved changes</p>
       </div>
     </div>
   </div>
@@ -193,6 +198,8 @@ <h4 class="card-title">Data</h4>
         jsonEditorData: {},
         jsonEditorDataFileName: "untitled.json",
         maximiseDataEditor: false,
+
+        unsavedChanges: false,
       }
     },
 
@@ -202,7 +209,24 @@ <h4 class="card-title">Data</h4>
 
       this.ownThisProject = ownThisProject;
 
-      setInterval(() => { this.getProjectProperties(); }, 2000);
+      setInterval(async () => {
+
+        let project = await this.getProjectProperties();
+
+        if (this.unsavedChanges){
+          /* We have unsavedChanges check if those unsavedChanges have been
+           * reverted by the user by comparing against the server's copy of project
+           * this could get expensive if the project object gets larger in the future
+           */
+
+          if (JSON.stringify(this.project) === JSON.stringify(project)){
+            this.unsavedChanges = false;
+          }
+          /* Note we not updating this.project on this cycle */
+        } else {
+          this.project = project;
+        }
+      }, 2000);
     },
 
     watch: {
@@ -213,7 +237,6 @@ <h4 class="card-title">Data</h4>
           this.saveLabel = "Save";
         }
       },
-
     },
 
     computed: {
@@ -224,17 +247,22 @@ <h4 class="card-title">Data</h4>
 
     methods: {
       /* GET the project properties */
-      getProjectProperties: function(){
+      getProjectProperties: async function(){
 
-        fetch(projectApiUrl, {
+        let response = await fetch(projectApiUrl, {
           method:'GET',
           credentials: 'same-origin',
           headers: { 'X-CSRFToken': csrfmiddlewaretoken_value },
-        }).then(response => response.json()).then(result => {
-          if (result.error == undefined){
-            this.project = result;
-          }
         });
+
+        let project_json = await response.json();
+
+        if (project_json.error === undefined){
+          return project_json;
+        } else {
+          console.error(project_json.error);
+          return this.project;
+        }
       },
 
       /* Update any of the project's properties */
@@ -247,6 +275,7 @@ <h4 class="card-title">Data</h4>
           body: JSON.stringify(this.project),
         }).then(response => response.json()).then(result => {
           if (result.error == undefined){
+            this.unsavedChanges = false;
             this.project = result;
 
             /* If we have changed project name for simplicity we reload the page to the new project page */