Skip to content

Commit

Permalink
Merge pull request #120 from NAL-i5K/new-phase-noerror
Browse files Browse the repository at this point in the history
New phase noerror
  • Loading branch information
mpoelchau committed Nov 18, 2021
2 parents 29bd971 + 3c4d316 commit 8c84d34
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 27 deletions.
2 changes: 1 addition & 1 deletion gff3tool/bin/gff3_fix.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,4 @@ def script_main():
logger_stderr.error('Failed to read GFF3 file!')
sys.exit(1)

gff3_fix.fix.main(gff3=gff3, output_gff=args.output_gff, error_dict=error_dict, line_num_dict=line_num_dict, logger=logger_null)
gff3_fix.fix.main(gff3=gff3, output_gff=args.output_gff, error_dict=error_dict, line_num_dict=line_num_dict, logger=logger_stderr)
55 changes: 29 additions & 26 deletions gff3tool/lib/gff3_fix/fix.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,42 +400,45 @@ def fix_phase(gff3, error_list, line_num_dict, logger):
for error in error_list:
for line_num in error:
if gff3.lines[line_num-1]['line_status'] != 'removed':
for root in gff3.collect_roots(gff3.lines[line_num-1]):
if len(gff3.lines[line_num-1]['parents'][0]) == 1:
parent = gff3.lines[line_num-1]['parents'][0][0]
CDS_list = []
CDS_set = set()
if root['type'] != 'CDS':
root['phase'] = '.'
for child in gff3.collect_descendants(root):
for child in gff3.collect_descendants(parent):
if child['type'] == 'CDS':
if child['line_raw'] not in CDS_set:
CDS_list.append(child)
CDS_set.add(child['line_raw'])
else:
gff3.lines[child['line_index']]['phase'] = '.'
if len(CDS_list) != 0:
if CDS_list[0]['strand'] == '-':
sorted_CDS_list = sorted(CDS_list, key=lambda x: x['end'], reverse=True)
elif CDS_list[0]['strand'] == '+':
sorted_CDS_list = sorted(CDS_list, key=lambda x: x['start'])
if sorted_CDS_list[0]['line_index']+1 in error:
if 'Ema0006' in line_num_dict[sorted_CDS_list[0]['line_index']+1]:
phase = list(map(int,re.findall(r'\d',line_num_dict[sorted_CDS_list[0]['line_index']+1]['Ema0006']))[1])
else:
try:
phase = sorted_CDS_list[0]['phase']
if phase not in valid_phase:
phase = 0
except ValueError:
phase = 0
gff3.lines[sorted_CDS_list[0]['line_index']]['phase'] = phase

else:
logger.warning('CDS has more than one parent - Line %s' % str(line_num))

if len(CDS_list) != 0:
if CDS_list[0]['strand'] == '-':
sorted_CDS_list = sorted(CDS_list, key=lambda x: x['end'], reverse=True)
elif CDS_list[0]['strand'] == '+':
sorted_CDS_list = sorted(CDS_list, key=lambda x: x['start'])
if [sorted_CDS_list[0]['line_index']+1] in error_list:
if 'Ema0006' in line_num_dict[sorted_CDS_list[0]['line_index']+1]:
phase = list(map(int,re.findall(r'\d',line_num_dict[sorted_CDS_list[0]['line_index']+1]['Ema0006']))[1])
else:
phase = sorted_CDS_list[0]['phase']
for CDS in sorted_CDS_list:
if CDS['phase'] != phase:
gff3.lines[CDS['line_index']]['phase'] = phase
try:
phase = sorted_CDS_list[0]['phase']
if phase not in valid_phase:
phase = 0
except ValueError:
phase = 0
gff3.lines[sorted_CDS_list[0]['line_index']]['phase'] = phase
else:
phase = sorted_CDS_list[0]['phase']
for CDS in sorted_CDS_list:
if CDS['phase'] != phase:
gff3.lines[CDS['line_index']]['phase'] = phase
try:
phase = (3 - ((CDS['end'] - CDS['start'] + 1 - phase) % 3)) % 3

except TypeError:
logger.warning('Fail to calculate phase - Line %s' % str(CDS['line_index']+1)) # phase = .



Expand Down

0 comments on commit 8c84d34

Please sign in to comment.