Skip to content

Commit

Permalink
Fix elprep-sfm python wrapper when using cram as output type for inte…
Browse files Browse the repository at this point in the history
…rmediate files during split/merge.
  • Loading branch information
caherzee committed Mar 15, 2018
1 parent 258395a commit 621a628
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 4 deletions.
10 changes: 9 additions & 1 deletion README.md
Expand Up @@ -394,7 +394,7 @@ The Python scripts have been tested with Python 2.7.6. Please add the elPrep and

## Synopsis

./elprep-sfm.py $input $output --filter-unmapped-reads --filter-unmapped-reads-strict --replace-reference-sequences ucsc.hg19.dict --replace-read-group "ID:group1 LB:lib1 PL:illumina PU:unit1 SM:sample1" --mark-duplicates --remove-duplicates --sorting-order coordinate --nr-of-threads $threads --intermediate-files-output-type [sam | bam | cram] --intermediate-files-output-prefix name --single-end
./elprep-sfm.py $input $output --filter-unmapped-reads --filter-unmapped-reads-strict --replace-reference-sequences ucsc.hg19.dict --replace-read-group "ID:group1 LB:lib1 PL:illumina PU:unit1 SM:sample1" --mark-duplicates --remove-duplicates --sorting-order coordinate --nr-of-threads $threads --intermediate-files-output-type [sam | bam | cram] --refernce-T fasta reference-t fai --intermediate-files-output-prefix name --single-end

## Description

Expand All @@ -418,6 +418,14 @@ The output prefix that will be default used for the intermediate split files. Th

The elprep split and merge commands will treat the data as single-end data. When this option is not used, the elprep split and merge commands will be called to treat the data as paired-end data.

### --reference-T reference-fasta

A fasta format reference file used by SAMtools for .cram compression, optionally compressed with bgzip and indexed by samtools faidx. elPrep uses it to fill in the "-T" option when calling the samtools view command for converting to .cram. This option (or --reference-t) is required when setting --intermediate-files-output-type to cram.

### --reference-t reference-fai

A tab-delimited file, where a first column lists the reference names and a second column lists the lengths of those references; for example, a .fai file generated with samtools faidx. elPrep uses it to fill in the "-t" option when calling the samtools view command for converting to .cram. This option (or --reference-T) is required when setting --intermediate-files-output-type to cram.

### Name

### elprep-sfm-gnupar.py - a Python script that illustrates the use of elprep split and merge and GNU parallel for optimal execution on a multi-socket server
Expand Down
2 changes: 1 addition & 1 deletion scripts/elprep_io_wrapper.py
Expand Up @@ -93,7 +93,7 @@ def cmd_wrap_io(cmd_list, file_in, file_out, cmd_opts):
reference_bigT_opt = cmd_option("--reference-T", cmd_opts)
if not(reference_t_opt) and not(reference_bigT_opt): return "Converting to .cram. Need to pass reference-t or reference-T"
opt_to_delete = "--reference-t" if reference_t_opt else "--reference-T"
p2 = subprocess.Popen(cmd_list + ["dev/stdin", "/dev/stdout"] + remove_cmd_option(cmd_opts, opt_to_delete), bufsize=-1, stdin=p1.stdout, stdout=subprocess.PIPE)
p2 = subprocess.Popen(cmd_list + ["/dev/stdin", "/dev/stdout"] + remove_cmd_option(cmd_opts, opt_to_delete), bufsize=-1, stdin=p1.stdout, stdout=subprocess.PIPE)
t_opt = ["-t", reference_t_opt[1]] if reference_t_opt else ["-T", reference_bigT_opt[1]]
p3 = subprocess.Popen(["samtools", "view", "-C", "-@", nr_of_threads] + t_opt + ["-o", file_out, "-"], bufsize=-1, stdin=p2.stdout)
p3.communicate()
Expand Down
15 changes: 13 additions & 2 deletions scripts/elprep_sfm.py
Expand Up @@ -35,6 +35,17 @@ def elprep_sfm (argv):
else:
intermediate_files_output_type = output_extension[1:]

fasta_opt = []
if intermediate_files_output_type == "cram":
fasta_t_opt = elprep_io_wrapper.cmd_option("--reference-t", argv)
fasta_T_opt = elprep_io_wrapper.cmd_option("--reference-T", argv)
if fasta_t_opt:
fasta_opt = fasta_t_opt
elif fasta_T_opt:
fasta_opt = fasta_T_opt
else:
return "Intermediate files output type is .cram, so need to pass either --reference-t or reference-T"

intermediate_files_op_opt = elprep_io_wrapper.cmd_option("--intermediate-files-output-prefix", argv)
if intermediate_files_op_opt:
output_prefix = intermediate_files_op_opt[1]
Expand All @@ -50,7 +61,7 @@ def elprep_sfm (argv):

single_end_opt = elprep_io_wrapper.flg_option("--single-end", argv)

elprep_io_wrapper.cmd_wrap_input(["elprep", "split"], file_in, split_dir, ["--output-prefix", output_prefix, "--output-type", intermediate_files_output_type] + nr_of_threads_opt + single_end_opt)
elprep_io_wrapper.cmd_wrap_input(["elprep", "split"], file_in, split_dir, ["--output-prefix", output_prefix, "--output-type", intermediate_files_output_type] + fasta_opt + nr_of_threads_opt + single_end_opt)

if single_end_opt:
splits_path = split_dir
Expand Down Expand Up @@ -81,7 +92,7 @@ def elprep_sfm (argv):
os.rmdir(split_dir)

# merge command
elprep_io_wrapper.cmd_wrap_output(["elprep", "merge"], result_dir, file_out, nr_of_threads_opt + single_end_opt)
elprep_io_wrapper.cmd_wrap_output(["elprep", "merge"], result_dir, file_out, fasta_opt + nr_of_threads_opt + single_end_opt)
# remove directories for intermediate results
for root, dirs, files in os.walk(result_dir):
for file in files:
Expand Down

0 comments on commit 621a628

Please sign in to comment.