Merge pull request #116 from NAL-i5K/update-error-handling

Update error levels in gff3_QC according to issue #114
NAL-i5K · Nov 19, 2021 · 312b3e6 · 312b3e6
2 parents 8c84d34 + d67892b
commit 312b3e6
Show file tree

Hide file tree

Showing 10 changed files with 174 additions and 153 deletions.
diff --git a/docs/Detection-of-GFF3-format-errors.rst b/docs/Detection-of-GFF3-format-errors.rst
diff --git a/docs/gff3_QC.md b/docs/gff3_QC.md
@@ -17,11 +17,13 @@ Python 3.x
 1. Error report for the input GFF3 file
     * Line_num: Line numbers of the found problematic models in the input GFF3 file.
     * Error_code: Error codes for the found problematic models. Please refer to lib/ERROR/ERROR.py to see the full list of Error_code and the corresponding Error_tag.
-        * Error_tag: Detail of the found errors for the problematic models. Please refer to lib/ERROR/ERROR.py to see the full list of Error_code and the corresponding Error_tag.
+    * Error_level: Severity levels of the error codes. Three levels were defined: Error (violates the [GFF3 specification](https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md)), Warning (might violate the [GFF3 specification](https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md)), and Info (likely not an error, but worth checking).
+    * Error_tag: Detail of the found errors for the problematic models. Please refer to lib/ERROR/ERROR.py to see the full list of Error_code and the corresponding Error_tag.
 2. Statistic report for the output files
     * Error_code: Error codes for the found problematic models. Please refer to lib/ERROR/ERROR.py to see the full list of Error_code and the corresponding Error_tag.
     * Number of problematic models: Calculate the type and number of error_code.
-        * Error_tag: Detail of the found errors for the problematic models. Please refer to lib/ERROR/ERROR.py to see the full list of Error_code and the corresponding Error_tag.
+    * Error_level: Severity levels of the error codes. Three levels were defined: Error (violates the [GFF3 specification](https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md)), Warning (might violate the [GFF3 specification](https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md)), and Info (likely not an error, but worth checking).
+    * Error_tag: Detail of the found errors for the problematic models. Please refer to lib/ERROR/ERROR.py to see the full list of Error_code and the corresponding Error_tag.
 
 ## Quick start
 `gff3_QC -g example_file/example.gff3 -f example_file/reference.fa -o test -s statistic.txt`

diff --git a/gff3tool/bin/gff3_QC.py b/gff3tool/bin/gff3_QC.py
@@ -121,17 +121,21 @@ def script_main():
     else:
         logger_stderr.info('Print QC statistic report at {0:s}'.format('statistic.txt'))
         statistic_fh = open('statistic.txt', 'w')
-    report_fh.write('Line_num\tError_code\tError_tag\n')
+    report_fh.write('Line_num\tError_code\tError_level\tError_tag\n')
+
     for e in sorted(error_set, key=lambda x: sorted(x.keys())):
         tag = '[{0:s}]'.format(e['eTag'])
-        report_fh.write('{0:s}\t{1:s}\t{2:s}\n'.format(str(e['line_num']), str(e['eCode']), str(tag)))
+        if 'error_level' not in e:
+            e['error_level'] = "Not defined"
+        report_fh.write('{0:s}\t{1:s}\t{2:s}\t{3:s}\n'.format(str(e['line_num']), str(e['eCode']), str(e['error_level']), str(tag)))
     #statistic_file
     error_counts = dict()
     ERROR_INFO=ERROR.INFO
-    statistic_fh.write('Error_code\tNumber_of_problematic_models\tError_tag\n')
+    statistic_fh.write('Error_code\tNumber_of_problematic_models\tError_level\tError_tag\n')
     for s in sorted(error_set, key=lambda x: sorted(x.keys())):
         if s['eCode'] not in error_counts:
-            error_counts[s['eCode']]= {'count':0,'etag':ERROR_INFO[s['eCode']]}
+            error_counts[s['eCode']]= {'count':0, 'error_level':s['error_level'],'etag':ERROR_INFO[s['eCode']]}
         error_counts[s['eCode']]['count'] += 1   
     for a in error_counts:
-        statistic_fh.write('{0:s}\t{1:s}\t{2:s}\n'.format(str(a),str(error_counts[a]['count']),str(error_counts[a]['etag'])))
+        statistic_fh.write('{0:s}\t{1:s}\t{2:s}\t{3:s}\n'.format(str(a),str(error_counts[a]['count']), str(error_counts[a]['error_level']),str(error_counts[a]['etag'])))
+
diff --git a/gff3tool/bin/version.py b/gff3tool/bin/version.py
@@ -1,3 +1,3 @@
 # -*- coding: utf-8 -*-
 """Version of GFF3toolkit"""
-__version__ = '2.0.3'
+__version__ = '2.1.0'
diff --git a/gff3tool/lib/function4gff/function4gff.py b/gff3tool/lib/function4gff/function4gff.py
@@ -115,6 +115,10 @@ def extract_internal_detected_errors(gff):
                 result['eLines'] = [line]
                 result['eTag'] = e['message']
                 #print('{0:s}\t{1:s}\t[{2:s}]'.format(result['ID'], result['eCode'], result['eTag']))
+                if 'error_level' not in e:
+                    result['error_level'] = "Error"
+                else:
+                    result['error_level'] = e['error_level']
                 eSet.append(result)
         except:
             logger.error(line['line_raw'])