Skip to content

Commit

Permalink
Experimental slicing and addition of source features as a special case
Browse files Browse the repository at this point in the history
  • Loading branch information
peterjc committed Nov 18, 2009
1 parent c809b67 commit a074919
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 13 deletions.
49 changes: 43 additions & 6 deletions Bio/SeqRecord.py
Expand Up @@ -422,6 +422,17 @@ def __getitem__(self, index):
if start <= f.location.nofuzzy_start \
and f.location.nofuzzy_end <= stop:
answer.features.append(f._shift(-start))
elif f.type=="source" \
and f.location.nofuzzy_start == 0 \
and f.location.nofuzzy_end == parent_length :
assert f.strand != -1
#Special case, slice the source feature
from Bio.SeqFeature import SeqFeature, FeatureLocation
answer.features.append(SeqFeature(FeatureLocation(0, len(answer)),
strand = f.strand,
id=f.id,
type=f.type,
qualifiers=f.qualifiers.copy()))

#Slice all the values to match the sliced sequence
#(this should also work with strides, even negative strides):
Expand Down Expand Up @@ -976,17 +987,43 @@ def __add__(self, other):
annotations = self.annotations.copy(),
dbxrefs = self.dbxrefs[:])
#Adding two SeqRecord objects... must merge annotation.
#Take all the database cross references:
answer = SeqRecord(self.seq + other.seq,
features = self.features[:],
dbxrefs = self.dbxrefs[:])
#Will take all the features and all the db cross refs,
l = len(self)
for f in other.features:
answer.features.append(f._shift(l))
del l
for ref in other.dbxrefs:
if ref not in answer.dbxrefs:
answer.append(ref)
#Will take all the features, but source features are
#handled as a special case:
l = len(self)
left_source = None
for f in self.features:
if f.type=="source" :
#Does it span the whole record?
if left_source is None \
and f.location.nofuzzy_start == 0 \
and f.location.nofuzzy_end == l :
left_source = f
else :
answer.features.append(f._shift(0))
right_source = None
for f in other.features:
if f.type=="source" :
#Does it span the whole record?
if right_source is None \
and f.location.nofuzzy_start == 0 \
and f.location.nofuzzy_end == len(other) :
right_source = f
else :
answer.features.append(f._shift(l))
del l
if left_source and right_source :
from Bio.SeqFeature import SeqFeature, FeatureLocation
f = SeqFeature(FeatureLocation(0, len(answer)), type="source")
#TODO - keep any common qualifiers and dxbrefs for sources
if left_source.id == right_source.id :
f.id = left_source.id
answer.features.insert(0,f) #at start like a GenBank/EMBL file
#Take common id/name/description/annotation
if self.id == other.id:
answer.id = self.id
Expand Down
27 changes: 20 additions & 7 deletions Tests/test_SeqRecord.py
Expand Up @@ -105,10 +105,14 @@ def test_slice_simple(self):
self.assertEqual(sub.letter_annotations, {"fake":"X"*10})
self.assertEqual(sub.dbxrefs, []) # May change this...
self.assertEqual(sub.annotations, {}) # May change this...
self.assertEqual(len(sub.features), 1)
self.assertEqual(len(sub.features), 2) #source plus one
self.assertEqual(sub.features[0].type, "source")
self.assertEqual(sub.features[0].location.nofuzzy_start, 0)
self.assertEqual(sub.features[0].location.nofuzzy_end, 10)
#By construction, each feature matches the full sliced region:
self.assertEqual(str(sub.features[0].extract(sub.seq)), str(sub.seq))
self.assertEqual(sub.features[0].extract(str(sub.seq)), str(sub.seq))
for f in sub.features :
self.assertEqual(str(f.extract(sub.seq)), str(sub.seq))
self.assertEqual(f.extract(str(sub.seq)), str(sub.seq))

def test_add_simple(self):
"""Simple addition"""
Expand All @@ -120,12 +124,15 @@ def test_add_simple(self):
self.assertEqual(rec.dbxrefs, ["TestXRef"])
self.assertEqual(rec.annotations, {"k":"v"})
self.assertEqual(rec.letter_annotations, {"fake":"X"*52})
self.assertEqual(len(rec.features), 2*len(self.record.features))
self.assertEqual(len(rec.features), 2*len(self.record.features)-1)
self.assertEqual(rec.features[0].type, "source")
self.assertEqual(rec.features[0].location.nofuzzy_start, 0)
self.assertEqual(rec.features[0].location.nofuzzy_end, 52)

def test_add_seq(self):
"""Simple addition of Seq or string"""
for other in [Seq("BIO"), "BIO"] :
rec = self.record + other # will use SeqRecord's __add__ method
rec = self.record + other
self.assertEqual(len(rec), 26+3)
self.assertEqual(str(rec.seq), str(self.record.seq)+"BIO")
self.assertEqual(rec.id, "TestID")
Expand All @@ -142,7 +149,7 @@ def test_add_seq(self):
def test_add_seq_left(self):
"""Simple left addition of Seq or string"""
for other in [Seq("BIO"), "BIO"] :
rec = other + self.record # will use SeqRecord's __radd__ method
rec = other + self.record
self.assertEqual(len(rec), 26+3)
self.assertEqual(str(rec.seq), "BIO"+str(self.record.seq))
self.assertEqual(rec.id, "TestID")
Expand All @@ -155,7 +162,7 @@ def test_add_seq_left(self):
self.assertEqual(rec.features[0].type, "source")
self.assertEqual(rec.features[0].location.nofuzzy_start, 3)
self.assertEqual(rec.features[0].location.nofuzzy_end, 26+3)

def test_slice_add_simple(self):
"""Simple slice and add"""
for cut in range(27) :
Expand All @@ -169,6 +176,9 @@ def test_slice_add_simple(self):
self.assertEqual(rec.annotations, {}) # May change this...
self.assertEqual(rec.letter_annotations, {"fake":"X"*26})
self.assert_(len(rec.features) <= len(self.record.features))
self.assertEqual(rec.features[0].type, "source")
self.assertEqual(rec.features[0].location.nofuzzy_start, 0)
self.assertEqual(rec.features[0].location.nofuzzy_end, 26)

def test_slice_add_shift(self):
"""Simple slice and add to shift"""
Expand All @@ -183,6 +193,9 @@ def test_slice_add_shift(self):
self.assertEqual(rec.annotations, {}) # May change this...
self.assertEqual(rec.letter_annotations, {"fake":"X"*26})
self.assert_(len(rec.features) <= len(self.record.features))
self.assertEqual(rec.features[0].type, "source")
self.assertEqual(rec.features[0].location.nofuzzy_start, 0)
self.assertEqual(rec.features[0].location.nofuzzy_end, 26)

if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity = 2)
Expand Down

0 comments on commit a074919

Please sign in to comment.