Skip to content

Commit

Permalink
Merge pull request #116 from NAL-i5K/update-error-handling
Browse files Browse the repository at this point in the history
Update error levels in gff3_QC according to issue #114
  • Loading branch information
mpoelchau committed Nov 19, 2021
2 parents 8c84d34 + d67892b commit 312b3e6
Show file tree
Hide file tree
Showing 10 changed files with 174 additions and 153 deletions.
230 changes: 115 additions & 115 deletions docs/Detection-of-GFF3-format-errors.rst

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions docs/gff3_QC.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@ Python 3.x
1. Error report for the input GFF3 file
* Line_num: Line numbers of the found problematic models in the input GFF3 file.
* Error_code: Error codes for the found problematic models. Please refer to lib/ERROR/ERROR.py to see the full list of Error_code and the corresponding Error_tag.
* Error_tag: Detail of the found errors for the problematic models. Please refer to lib/ERROR/ERROR.py to see the full list of Error_code and the corresponding Error_tag.
* Error_level: Severity levels of the error codes. Three levels were defined: Error (violates the [GFF3 specification](https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md)), Warning (might violate the [GFF3 specification](https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md)), and Info (likely not an error, but worth checking).
* Error_tag: Detail of the found errors for the problematic models. Please refer to lib/ERROR/ERROR.py to see the full list of Error_code and the corresponding Error_tag.
2. Statistic report for the output files
* Error_code: Error codes for the found problematic models. Please refer to lib/ERROR/ERROR.py to see the full list of Error_code and the corresponding Error_tag.
* Number of problematic models: Calculate the type and number of error_code.
* Error_tag: Detail of the found errors for the problematic models. Please refer to lib/ERROR/ERROR.py to see the full list of Error_code and the corresponding Error_tag.
* Error_level: Severity levels of the error codes. Three levels were defined: Error (violates the [GFF3 specification](https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md)), Warning (might violate the [GFF3 specification](https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md)), and Info (likely not an error, but worth checking).
* Error_tag: Detail of the found errors for the problematic models. Please refer to lib/ERROR/ERROR.py to see the full list of Error_code and the corresponding Error_tag.

## Quick start
`gff3_QC -g example_file/example.gff3 -f example_file/reference.fa -o test -s statistic.txt`
Expand Down
14 changes: 9 additions & 5 deletions gff3tool/bin/gff3_QC.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,17 +121,21 @@ def script_main():
else:
logger_stderr.info('Print QC statistic report at {0:s}'.format('statistic.txt'))
statistic_fh = open('statistic.txt', 'w')
report_fh.write('Line_num\tError_code\tError_tag\n')
report_fh.write('Line_num\tError_code\tError_level\tError_tag\n')

for e in sorted(error_set, key=lambda x: sorted(x.keys())):
tag = '[{0:s}]'.format(e['eTag'])
report_fh.write('{0:s}\t{1:s}\t{2:s}\n'.format(str(e['line_num']), str(e['eCode']), str(tag)))
if 'error_level' not in e:
e['error_level'] = "Not defined"
report_fh.write('{0:s}\t{1:s}\t{2:s}\t{3:s}\n'.format(str(e['line_num']), str(e['eCode']), str(e['error_level']), str(tag)))
#statistic_file
error_counts = dict()
ERROR_INFO=ERROR.INFO
statistic_fh.write('Error_code\tNumber_of_problematic_models\tError_tag\n')
statistic_fh.write('Error_code\tNumber_of_problematic_models\tError_level\tError_tag\n')
for s in sorted(error_set, key=lambda x: sorted(x.keys())):
if s['eCode'] not in error_counts:
error_counts[s['eCode']]= {'count':0,'etag':ERROR_INFO[s['eCode']]}
error_counts[s['eCode']]= {'count':0, 'error_level':s['error_level'],'etag':ERROR_INFO[s['eCode']]}
error_counts[s['eCode']]['count'] += 1
for a in error_counts:
statistic_fh.write('{0:s}\t{1:s}\t{2:s}\n'.format(str(a),str(error_counts[a]['count']),str(error_counts[a]['etag'])))
statistic_fh.write('{0:s}\t{1:s}\t{2:s}\t{3:s}\n'.format(str(a),str(error_counts[a]['count']), str(error_counts[a]['error_level']),str(error_counts[a]['etag'])))

2 changes: 1 addition & 1 deletion gff3tool/bin/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# -*- coding: utf-8 -*-
"""Version of GFF3toolkit"""
__version__ = '2.0.3'
__version__ = '2.1.0'
4 changes: 4 additions & 0 deletions gff3tool/lib/function4gff/function4gff.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ def extract_internal_detected_errors(gff):
result['eLines'] = [line]
result['eTag'] = e['message']
#print('{0:s}\t{1:s}\t[{2:s}]'.format(result['ID'], result['eCode'], result['eTag']))
if 'error_level' not in e:
result['error_level'] = "Error"
else:
result['error_level'] = e['error_level']
eSet.append(result)
except:
logger.error(line['line_raw'])
Expand Down

0 comments on commit 312b3e6

Please sign in to comment.