Added clumsy method restricting max_err in validation

MS-Quality-Hub · Jul 19, 2023 · 302a15b · 302a15b
1 parent 19d3bff
commit 302a15b
Show file tree

Hide file tree

Showing 3 changed files with 45 additions and 5 deletions.
diff --git a/accessories/heroku/README.md b/accessories/heroku/README.md
@@ -23,6 +23,8 @@ For local tests calling the flask app directly (i.e. as single thread app) is fi
 The mzQC gitHub-pages integration and `local_validator.html` expect the API to run on port 5000.
 Calling the mzqc_online_validator directly in gunicorn is fine too (`podman run -p 8123:8123 -ti localhost/mzqc-validator python3 -m gunicorn mzqc_online_validator:app -b 0.0.0.0:8123 --chdir mzqc-validator/`), the `wsgi.py` indirection is a legacy effect from heroku's Procfile use and their example app.
 
+The validate function of SemanticCheck is considerate of the environment variable `MAX_ERR` which set to an integer limits the amount of validation errors that can occur before validation is aborted. This can be for example adjusted in the call like so: `podman run -p 5000:5000 -ti localhost/mzqc-validator env MAX_ERR=3 python3 -m gunicorn mzqc_online_validator:app -b 0.0.0.0:5000 --chdir mzqc-validator/`  
+
 #### Legacy Heroku Deployment
 Or you can deploy your own heroku dyno like so:
 ```

diff --git a/accessories/heroku/mzqc_online_validator.py b/accessories/heroku/mzqc_online_validator.py
@@ -1,3 +1,4 @@
+import os
 import json
 from flask import Flask
 from flask import Flask, jsonify, request
@@ -75,9 +76,13 @@ def post(self):
         else:
             removed_items = list(filter(lambda x: not x.uri.startswith('http'), target.controlledVocabularies))
             target.controlledVocabularies = list(filter(lambda x: x.uri.startswith('http'), target.controlledVocabularies))
-            sem_val_res = SemanticCheck().validate(target)
-            #print(sem_val_res)
-
+            me = os.getenv('MAX_ERR', None)
+            if isinstance(me, str) and me.isnumeric():
+                me = int(me)
+                sem_val_res = SemanticCheck().validate(target, max_errors=me)
+            else:
+                sem_val_res = SemanticCheck().validate(target)
+
             proto_response = {k: [str(i) for i in v] for k,v in sem_val_res.items()}
             proto_response.update({"unrecognised CVs": [str(it) for it in removed_items]})
             #print(proto_response)

diff --git a/mzqc/SemanticCheck.py b/mzqc/SemanticCheck.py
@@ -142,7 +142,7 @@ def _cvmatch(self, cv_par: CvParameter, voc_par: Term) -> List[SemanticError]:
         # error/warning/other messages) to collect all the stuff while going through the validation
         return term_errs
 
-    def validate(self, mzqc_obj: MzQcFile, load_local=False):
+    def validate(self, mzqc_obj: MzQcFile, max_errors:int=0, load_local:bool=False):
         # TODO incorporate version when SemanticValidation may differ between versions
         #! Semantic validation of the JSON file.
         #? Check that label (metadata) must be unique in the file
@@ -153,6 +153,7 @@ def validate(self, mzqc_obj: MzQcFile, load_local=False):
         #? Check that cv value has all attributes referred in cv
         #?? Check that multi-file metrics refer to existing filenames.
         #?? Check that filenames are unique within a run/setQuality. #50
+        # max_errors is max count for errors to accumulate before aborting the validation. Default is -1 implying unlimited because the test condition is max_error=count-=1==0
 
         # create validation error list object
         validation_errs = dict()  # need to keep it flexible
@@ -176,10 +177,20 @@ def validate(self, mzqc_obj: MzQcFile, load_local=False):
         # check if ontologies are listed multiple times (different versions etc)
         validation_errs['ontology load errors'] = voc_errs
 
+        #check max_error
+        if max_errors > 0:
+            if sum([len(x) for x in validation_errs.values()]) > max_errors:
+                validation_errs['general'] = validation_errs.get('general', list())
+                validation_errs['general'].append(
+                    ValidationError("Maximum number of errors incurred ({me} < {ie}), aborting!".format(
+                    ie=sum([len(x) for x in validation_errs.values()]), me = max_errors))
+                )
+                self.errors = validation_errs
+                return {k: [str(i) for i in v] for k,v in validation_errs.items()}
+
         # For all cv terms involved:
         term_errs = list()
         for cv_parameter in self._get_cv_parameters(mzqc_obj):
-
             #? Verify that the term exists in the CV.
             if not any(cv_parameter.accession in cvoc for cvoc in file_vocabularies.values()):
                 # cv not found error
@@ -203,6 +214,17 @@ def validate(self, mzqc_obj: MzQcFile, load_local=False):
                         term_errs.extend(cv_err)
         validation_errs['ontology term errors'] = term_errs
 
+        #check max_error
+        if max_errors > 0:
+            if sum([len(x) for x in validation_errs.values()]) > max_errors:
+                validation_errs['general'] = validation_errs.get('general', list())
+                validation_errs['general'].append(
+                    ValidationError("Maximum number of errors incurred ({me} < {ie}), aborting!".format(
+                    ie=sum([len(x) for x in validation_errs.values()]), me = max_errors))
+                )
+                self.errors = validation_errs
+                return {k: [str(i) for i in v] for k,v in validation_errs.items()}
+
         #? Check that qualityParameters are unique within a run/setQuality.
         metrics_uniq_warns = list()
         actual_metric_warns = list()
@@ -252,6 +274,17 @@ def validate(self, mzqc_obj: MzQcFile, load_local=False):
         validation_errs['metric usage errors'] = actual_metric_warns
         validation_errs['value type errors'] = metric_type_errs
 
+        #check max_error
+        if max_errors > 0:
+            if sum([len(x) for x in validation_errs.values()]) > max_errors:
+                validation_errs['general'] = validation_errs.get('general', list())
+                validation_errs['general'].append(
+                    ValidationError("Maximum number of errors incurred ({me} < {ie}), aborting!".format(
+                    ie=sum([len(x) for x in validation_errs.values()]), me = max_errors))
+                )
+                self.errors = validation_errs
+                return {k: [str(i) for i in v] for k,v in validation_errs.items()}
+
         # Regarding metadata, verify that input files are consistent and unique.
         validation_errs['input files'] = self._inputFileConsistency(mzqc_obj)