Skip to content

Commit

Permalink
AttachBarcodes generic entrypoint -- fix bugs & add test (#66)
Browse files Browse the repository at this point in the history
Fixes a few plumbing issues in the generic AttachBarcodes entry point (which allows command-line specification of the in-read positions of all barcodes). The code path with all three of (i) sample barcode in I1 fastq, (ii) cell barcode whitelist, and (iii) cell/UMI barcodes in R1 fastq as usual, had some typing issues; fixes these and tests them.
  • Loading branch information
mlin committed May 15, 2019
1 parent 84e9679 commit 5e04815
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 3 deletions.
7 changes: 4 additions & 3 deletions src/sctools/platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -935,15 +935,16 @@ def _make_tag_generators(
barcode_args = {'fastq_files': r1}

if i1:
barcode_args['embedded_barcodes'] = [cls.sample_barcode]
tag_generators.append(fastq.EmbeddedBarcodeGenerator(**barcode_args))
sample_barcode_args = dict(barcode_args)
sample_barcode_args['embedded_barcodes'] = [cls.sample_barcode]
tag_generators.append(fastq.EmbeddedBarcodeGenerator(**sample_barcode_args))

if whitelist:
barcode_args['whitelist'] = whitelist
if cls.cell_barcode:
barcode_args['embedded_cell_barcode'] = cls.cell_barcode
if cls.molecule_barcode:
barcode_args['other_embedded_barcodes'] = cls.molecule_barcode
barcode_args['other_embedded_barcodes'] = [cls.molecule_barcode]
tag_generators.append(
fastq.BarcodeGeneratorWithCorrectedCellBarcodes(**barcode_args)
)
Expand Down
55 changes: 55 additions & 0 deletions src/sctools/test/test_entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,61 @@ def test_Attach10XBarcodes_entrypoint_with_whitelist():
os.remove('test_tagged_bam.bam') # clean up


def test_AttachBarcodes_entrypoint_with_whitelist():
# test of the BarcodePlatform.attach_barcodes entry point with
# sample, cell, and molecule barcodes all specified
args = [
'--r1',
data_dir + 'test_r1.fastq',
'--i1',
data_dir + 'test_i7.fastq',
'--u2',
data_dir + 'test.bam',
'--output-bamfile',
'test_tagged_bam.bam',
'--whitelist',
data_dir + '1k-august-2016.txt',
"--sample-barcode-start-position",
"0",
"--sample-barcode-length",
"8",
"--cell-barcode-start-position",
"0",
"--cell-barcode-length",
"16",
"--molecule-barcode-start-position",
"16",
"--molecule-barcode-length",
"7", # changed 10>7 intentionally for test
]

return_call = platform.BarcodePlatform.attach_barcodes(args)
assert return_call == 0
success = False
with pysam.AlignmentFile('test_tagged_bam.bam', 'rb', check_sq=False) as f:
for alignment in f:
if alignment.has_tag(consts.CELL_BARCODE_TAG_KEY):
success = True
# each alignment should now have a tag, and that tag should be a string
assert isinstance(alignment.get_tag(consts.RAW_CELL_BARCODE_TAG_KEY), str)
assert isinstance(
alignment.get_tag(consts.QUALITY_CELL_BARCODE_TAG_KEY), str
)
assert isinstance(
alignment.get_tag(consts.RAW_MOLECULE_BARCODE_TAG_KEY), str
)
assert len(alignment.get_tag(consts.RAW_MOLECULE_BARCODE_TAG_KEY)) == 7
assert isinstance(
alignment.get_tag(consts.QUALITY_MOLECULE_BARCODE_TAG_KEY), str
)
assert isinstance(alignment.get_tag(consts.RAW_SAMPLE_BARCODE_TAG_KEY), str)
assert isinstance(
alignment.get_tag(consts.QUALITY_SAMPLE_BARCODE_TAG_KEY), str
)
assert success
os.remove('test_tagged_bam.bam') # clean up


def test_split_bam():
tag_args = [
'--r1',
Expand Down

0 comments on commit 5e04815

Please sign in to comment.