Skip to content

Commit

Permalink
Merge pull request #115 in SAT/pbcore from feature/SE-1840-pbcore-mak…
Browse files Browse the repository at this point in the history
…e-reference-m5-optional to develop

* commit '55037ece00676d1c90874d8367b0cffd9d0ec6ec':
  Remove M5 requirement from BAM header
  • Loading branch information
armintoepfer committed Oct 2, 2018
2 parents f59bb13 + 55037ec commit d45d2c9
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 11 deletions.
4 changes: 1 addition & 3 deletions pbcore/io/align/BamIO.py
Expand Up @@ -46,7 +46,6 @@ def _loadReferenceInfo(self):
refRecords = self.peer.header["SQ"]
refNames = [r["SN"] for r in refRecords]
refLengths = [r["LN"] for r in refRecords]
refMD5s = [r["M5"] for r in refRecords]
refIds = map(self.peer.get_tid, refNames)
nRefs = len(refRecords)

Expand All @@ -57,12 +56,11 @@ def _loadReferenceInfo(self):
refNames,
refNames,
refLengths,
refMD5s,
np.zeros(nRefs, dtype=np.uint32),
np.zeros(nRefs, dtype=np.uint32)),
dtype=[('ID', '<i8'), ('RefInfoID', '<i8'),
('Name', 'O'), ('FullName', 'O'),
('Length', '<i8'), ('MD5', 'O'),
('Length', '<i8'),
('StartRow', '<u4'), ('EndRow', '<u4')])
self._referenceDict = {}
self._referenceDict.update(zip(refIds, self._referenceInfoTable))
Expand Down
28 changes: 28 additions & 0 deletions tests/test_pbcore_io_AlnFileReaders.py
Expand Up @@ -525,6 +525,34 @@ def test_retrieve_read_group_properties(self):
movie_names.append(aln.movieName)
EQ(movie_names, ['movie1', 'm140906_231018_42161_c100676332550000001823129611271486_s1_p0'])

class TestMissingHeaderM5(object):
"""
Verify that BAM files no M5 for SQ can still be processed
"""
SAM_IN = """\
@HD\tVN:1.5\tSO:coordinate\tpb:3.0.1
@SQ\tSN:ecoliK12_pbi_March2013_2955000_to_2980000\tLN:25000
@RG\tID:3f58e5b8\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BASECALLERVERSION=2.1;FRAMERATEHZ=75.000000;BINDINGKIT=100356300;SEQUENCINGKIT=100356200\tPU:movie1
@RG\tID:b5482b33\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.1;FRAMERATEHZ=75.000000\tPU:m140906_231018_42161_c100676332550000001823129611271486_s1_p0
movie1/54130/0_10\t2\tecoliK12_pbi_March2013_2955000_to_2980000\t2\t10\t10=\t*\t0\t0\tAATGAGGAGA\t*\tRG:Z:3f58e5b8\tdq:Z:2222'$22'2\tdt:Z:NNNNAGNNGN\tip:B:C,255,2,0,10,22,34,0,2,3,0,16\tiq:Z:(+#1'$#*1&\tmq:Z:&1~51*5&~2\tnp:i:1\tqe:i:10\tqs:i:0\trq:f:0.854\tsn:B:f,2,2,2,2\tsq:Z:<32<4<<<<3\tzm:i:54130\tAS:i:-3020\tNM:i:134\tcx:i:2
m140906_231018_42161_c100676332550000001823129611271486_s1_p0/1/10_20\t2\tecoliK12_pbi_March2013_2955000_to_2980000\t12\t10\t10=\t*\t0\t0\tAATGAGGAGA\t*\tRG:Z:b5482b33\tdq:Z:2222'$22'2\tdt:Z:NNNNAGNNGN\tip:B:C,255,2,0,10,22,34,0,2,3,0,16\tiq:Z:(+#1'$#*1&\tmq:Z:&1~51*5&~2\tnp:i:1\tqe:i:20\tqs:i:10\trq:f:0.854\tsn:B:f,2,2,2,2\tsq:Z:<32<4<<<<3\tzm:i:54130\tAS:i:-3020\tNM:i:134\tcx:i:2"""

def test_retrieve_read_group_properties(self):
f1 = tempfile.NamedTemporaryFile(suffix=".sam").name
f2 = tempfile.NamedTemporaryFile(suffix=".bam").name
with open(f1, "w") as f:
f.write(self.SAM_IN)
with AlignmentFile(f1) as sam_in:
with AlignmentFile(f2, 'wb', template=sam_in) as bam_out:
for aln in sam_in:
bam_out.write(aln)
movie_names = []
with BamReader(f2) as bam_in:
for aln in bam_in:
EQ(aln.sequencingChemistry, "P6-C4")
movie_names.append(aln.movieName)
EQ(movie_names, ['movie1', 'm140906_231018_42161_c100676332550000001823129611271486_s1_p0'])


class TestUpdatedChemistryMapping(object):
"""
Expand Down
16 changes: 8 additions & 8 deletions tests/test_pbdataset.py
Expand Up @@ -1730,14 +1730,14 @@ def test_referenceInfo(self):
self.assertEqual(len(readers[0].referenceInfoTable), 59)
obstbl = [readers[0].referenceInfo('E.faecalis.1')]
exptbl = [(27, 27, 'E.faecalis.1', 'E.faecalis.1', 1482,
'a1a59c267ac1341e5a12bce7a7d37bcb', 0, 0)]
0, 0)]
self.assertListOfTuplesEqual(obstbl, exptbl)
# TODO: add a bam with a different referenceInfoTable to check merging
# and id remapping:
#self.assertEqual(
#str(aln.referenceInfo('E.faecalis.1')),
#"(27, 27, 'E.faecalis.1', 'E.faecalis.1', 1482, "
#"'a1a59c267ac1341e5a12bce7a7d37bcb', 0L, 0L)")
#"0L, 0L)")

# TODO: turn this back on when a bam with a different referenceInfoTable is
# found
Expand Down Expand Up @@ -2319,7 +2319,7 @@ def test_two_bam(self):
self.assertEqual(len3, 65346)
obstbl = aln.referenceInfoTable
exptbl = [(0, 0, 'ecoliK12_pbi_March2013', 'ecoliK12_pbi_March2013',
4642522, '52cd7c5fa92877152fa487906ae484c5', 0, 0)]
4642522, 0, 0)]
self.assertListOfTuplesEqual(obstbl, exptbl)
self.assertEqual(set(aln.tId), {0})
self.assertEqual(aln.referenceInfo('ecoliK12_pbi_March2013'),
Expand All @@ -2343,7 +2343,7 @@ def test_two_xml(self):
self.assertEqual(len1 + len2, len3)
self.assertEqual(len3, 160264)
exptbl = [(0, 0, 'ecoliK12_pbi_March2013', 'ecoliK12_pbi_March2013',
4642522, '52cd7c5fa92877152fa487906ae484c5', 0, 0)]
4642522, 0, 0)]
obstbl = aln.referenceInfoTable
self.assertListOfTuplesEqual(obstbl, exptbl)
self.assertEqual(set(aln.tId), {0})
Expand Down Expand Up @@ -2375,9 +2375,9 @@ def test_two_ref_bam(self):
# and endrow fields for bams someday...
obstbl = aln.referenceInfoTable
exptbl = [(0, 0, 'ecoliK12_pbi_March2013', 'ecoliK12_pbi_March2013',
4642522, '52cd7c5fa92877152fa487906ae484c5', 0, 0),
4642522, 0, 0),
(1, 1, 'lambda_NEB3011', 'lambda_NEB3011', 48502,
'a1319ff90e994c8190a4fe6569d0822a', 0, 0)]
0, 0)]
self.assertListOfTuplesEqual(obstbl, exptbl)
self.assertEqual(set(aln.tId), {0, 1})
self.assertEqual(aln.referenceInfo('ecoliK12_pbi_March2013'),
Expand Down Expand Up @@ -2409,9 +2409,9 @@ def test_two_ref_three_bam(self):
self.assertEqual(len4, 160376)
obstbl = aln.referenceInfoTable
exptbl = [(0, 0, 'ecoliK12_pbi_March2013', 'ecoliK12_pbi_March2013',
4642522, '52cd7c5fa92877152fa487906ae484c5', 0, 0),
4642522, 0, 0),
(1, 1, 'lambda_NEB3011', 'lambda_NEB3011', 48502,
'a1319ff90e994c8190a4fe6569d0822a', 0, 0)]
0, 0)]
self.assertListOfTuplesEqual(obstbl, exptbl)
self.assertEqual(set(aln.tId), {0, 1})
self.assertEqual(aln.referenceInfo('ecoliK12_pbi_March2013'),
Expand Down

0 comments on commit d45d2c9

Please sign in to comment.