Skip to content

Commit

Permalink
Added clumsy method restricting max_err in validation
Browse files Browse the repository at this point in the history
  • Loading branch information
mwalzer committed Jul 19, 2023
1 parent 19d3bff commit 302a15b
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 5 deletions.
2 changes: 2 additions & 0 deletions accessories/heroku/README.md
Expand Up @@ -23,6 +23,8 @@ For local tests calling the flask app directly (i.e. as single thread app) is fi
The mzQC gitHub-pages integration and `local_validator.html` expect the API to run on port 5000.
Calling the mzqc_online_validator directly in gunicorn is fine too (`podman run -p 8123:8123 -ti localhost/mzqc-validator python3 -m gunicorn mzqc_online_validator:app -b 0.0.0.0:8123 --chdir mzqc-validator/`), the `wsgi.py` indirection is a legacy effect from heroku's Procfile use and their example app.

The validate function of SemanticCheck is considerate of the environment variable `MAX_ERR` which set to an integer limits the amount of validation errors that can occur before validation is aborted. This can be for example adjusted in the call like so: `podman run -p 5000:5000 -ti localhost/mzqc-validator env MAX_ERR=3 python3 -m gunicorn mzqc_online_validator:app -b 0.0.0.0:5000 --chdir mzqc-validator/`

#### Legacy Heroku Deployment
Or you can deploy your own heroku dyno like so:
```
Expand Down
11 changes: 8 additions & 3 deletions accessories/heroku/mzqc_online_validator.py
@@ -1,3 +1,4 @@
import os
import json
from flask import Flask
from flask import Flask, jsonify, request
Expand Down Expand Up @@ -75,9 +76,13 @@ def post(self):
else:
removed_items = list(filter(lambda x: not x.uri.startswith('http'), target.controlledVocabularies))
target.controlledVocabularies = list(filter(lambda x: x.uri.startswith('http'), target.controlledVocabularies))
sem_val_res = SemanticCheck().validate(target)
#print(sem_val_res)

me = os.getenv('MAX_ERR', None)
if isinstance(me, str) and me.isnumeric():
me = int(me)
sem_val_res = SemanticCheck().validate(target, max_errors=me)
else:
sem_val_res = SemanticCheck().validate(target)

proto_response = {k: [str(i) for i in v] for k,v in sem_val_res.items()}
proto_response.update({"unrecognised CVs": [str(it) for it in removed_items]})
#print(proto_response)
Expand Down
37 changes: 35 additions & 2 deletions mzqc/SemanticCheck.py
Expand Up @@ -142,7 +142,7 @@ def _cvmatch(self, cv_par: CvParameter, voc_par: Term) -> List[SemanticError]:
# error/warning/other messages) to collect all the stuff while going through the validation
return term_errs

def validate(self, mzqc_obj: MzQcFile, load_local=False):
def validate(self, mzqc_obj: MzQcFile, max_errors:int=0, load_local:bool=False):
# TODO incorporate version when SemanticValidation may differ between versions
#! Semantic validation of the JSON file.
#? Check that label (metadata) must be unique in the file
Expand All @@ -153,6 +153,7 @@ def validate(self, mzqc_obj: MzQcFile, load_local=False):
#? Check that cv value has all attributes referred in cv
#?? Check that multi-file metrics refer to existing filenames.
#?? Check that filenames are unique within a run/setQuality. #50
# max_errors is max count for errors to accumulate before aborting the validation. Default is -1 implying unlimited because the test condition is max_error=count-=1==0

# create validation error list object
validation_errs = dict() # need to keep it flexible
Expand All @@ -176,10 +177,20 @@ def validate(self, mzqc_obj: MzQcFile, load_local=False):
# check if ontologies are listed multiple times (different versions etc)
validation_errs['ontology load errors'] = voc_errs

#check max_error
if max_errors > 0:
if sum([len(x) for x in validation_errs.values()]) > max_errors:
validation_errs['general'] = validation_errs.get('general', list())
validation_errs['general'].append(
ValidationError("Maximum number of errors incurred ({me} < {ie}), aborting!".format(
ie=sum([len(x) for x in validation_errs.values()]), me = max_errors))
)
self.errors = validation_errs
return {k: [str(i) for i in v] for k,v in validation_errs.items()}

# For all cv terms involved:
term_errs = list()
for cv_parameter in self._get_cv_parameters(mzqc_obj):

#? Verify that the term exists in the CV.
if not any(cv_parameter.accession in cvoc for cvoc in file_vocabularies.values()):
# cv not found error
Expand All @@ -203,6 +214,17 @@ def validate(self, mzqc_obj: MzQcFile, load_local=False):
term_errs.extend(cv_err)
validation_errs['ontology term errors'] = term_errs

#check max_error
if max_errors > 0:
if sum([len(x) for x in validation_errs.values()]) > max_errors:
validation_errs['general'] = validation_errs.get('general', list())
validation_errs['general'].append(
ValidationError("Maximum number of errors incurred ({me} < {ie}), aborting!".format(
ie=sum([len(x) for x in validation_errs.values()]), me = max_errors))
)
self.errors = validation_errs
return {k: [str(i) for i in v] for k,v in validation_errs.items()}

#? Check that qualityParameters are unique within a run/setQuality.
metrics_uniq_warns = list()
actual_metric_warns = list()
Expand Down Expand Up @@ -252,6 +274,17 @@ def validate(self, mzqc_obj: MzQcFile, load_local=False):
validation_errs['metric usage errors'] = actual_metric_warns
validation_errs['value type errors'] = metric_type_errs

#check max_error
if max_errors > 0:
if sum([len(x) for x in validation_errs.values()]) > max_errors:
validation_errs['general'] = validation_errs.get('general', list())
validation_errs['general'].append(
ValidationError("Maximum number of errors incurred ({me} < {ie}), aborting!".format(
ie=sum([len(x) for x in validation_errs.values()]), me = max_errors))
)
self.errors = validation_errs
return {k: [str(i) for i in v] for k,v in validation_errs.items()}

# Regarding metadata, verify that input files are consistent and unique.
validation_errs['input files'] = self._inputFileConsistency(mzqc_obj)

Expand Down

0 comments on commit 302a15b

Please sign in to comment.