Skip to content

Commit

Permalink
Use regular expression solution from Uri Laserson to fix invalid loca…
Browse files Browse the repository at this point in the history
…tions during parsing IMGT files (Bug 3069)
  • Loading branch information
peterjc committed May 18, 2010
1 parent 945bc4b commit a41db09
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions Bio/GenBank/Scanner.py
@@ -1,4 +1,5 @@
# Copyright 2007-2010 by Peter Cock. All rights reserved.
# Revisions copyright 2010 by Uri Laserson. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
Expand Down Expand Up @@ -26,6 +27,7 @@

import warnings
import os
import re
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Alphabet import generic_alphabet, generic_protein
Expand Down Expand Up @@ -807,6 +809,8 @@ def parse_features(self, skip=False):
while self.line.rstrip() in self.FEATURE_START_MARKERS:
self.line = self.handle.readline()

bad_position_re = re.compile(r'([0-9]+)>{1}')

features = []
line = self.line
while True:
Expand Down Expand Up @@ -843,10 +847,10 @@ def parse_features(self, skip=False):
#Nasty hack for common IMGT bug, probably should be 1..end
#if we had the sequence length information here.
location_start = "1"
if location_start.endswith(">"):
if ">" in location_start:
#Nasty hack for common IMGT bug, should be >123 not 123>
#in a location string. TODO - reinsert the ">" char.
location_start = location_start[:-1]
#in a location string.
location_start = bad_position_re.sub(r'>\1',location_start)
feature_lines = [location_start]
line = self.handle.readline()
while line[:self.FEATURE_QUALIFIER_INDENT] == self.FEATURE_QUALIFIER_SPACER \
Expand Down

0 comments on commit a41db09

Please sign in to comment.