From 6709ebca8031cdbe7f860f958efc265b7cd55977 Mon Sep 17 00:00:00 2001 From: owlang Date: Tue, 5 Jan 2021 18:31:44 -0500 Subject: [PATCH 1/6] fix BAM format converter cli validation bug: max/min insert filter option validation did not account for default negative values add validation: midpoint option to automatically flip option "require PE" which will better mimic GUI version of the tool --- src/cli/BAM_Format_Converter/BAMtoBEDCLI.java | 4 +++- src/cli/BAM_Format_Converter/BAMtoGFFCLI.java | 4 +++- src/cli/BAM_Format_Converter/BAMtobedGraphCLI.java | 4 +++- src/cli/BAM_Format_Converter/BAMtoscIDXCLI.java | 4 +++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/cli/BAM_Format_Converter/BAMtoBEDCLI.java b/src/cli/BAM_Format_Converter/BAMtoBEDCLI.java index 769d34b66..60937985e 100644 --- a/src/cli/BAM_Format_Converter/BAMtoBEDCLI.java +++ b/src/cli/BAM_Format_Converter/BAMtoBEDCLI.java @@ -86,6 +86,8 @@ private String validateInput() throws IOException { else if(readType.midpoint) { STRAND=3; } else if(readType.fragment) { STRAND=4; } else { STRAND=0; } + // set PE defaults + if(STRAND==3 || STRAND==4) { matePair=true; } //check inputs exist if(!bamFile.exists()){ @@ -131,7 +133,7 @@ private String validateInput() throws IOException { // validate insert sizes if( MIN_INSERT<0 && MIN_INSERT!=-9999 ){ r += "MIN_INSERT must be a positive integer value: " + MIN_INSERT + "\n"; } if( MAX_INSERT<0 && MAX_INSERT!=-9999 ){ r += "MAX_INSERT must be a positive integer value: " + MAX_INSERT + "\n"; } - if( MAX_INSERT Date: Tue, 5 Jan 2021 18:35:22 -0500 Subject: [PATCH 2/6] allow FilterPIPseq to use multi-nucleotide strings Pattern (regex object) check to allow for more than one character strings for the filter string input option (only single ATCG allowed before this commit) Fix some indentation formatting and explicitly initialize PrintStream object as null in the script class for the FilterPIPseq tool. This part should not change the behavior. --- .../BAM_Manipulation/FilterforPIPseqCLI.java | 2 +- .../BAM_Manipulation/FilterforPIPseq.java | 25 ++++++++++--------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/cli/BAM_Manipulation/FilterforPIPseqCLI.java b/src/cli/BAM_Manipulation/FilterforPIPseqCLI.java index 170fb7ea6..2beb8e465 100644 --- a/src/cli/BAM_Manipulation/FilterforPIPseqCLI.java +++ b/src/cli/BAM_Manipulation/FilterforPIPseqCLI.java @@ -107,7 +107,7 @@ private String validateInput() throws IOException { } //check filter string is valid ATCG - Pattern seqPat = Pattern.compile("[ATCG]"); + Pattern seqPat = Pattern.compile("[ATCG]+"); Matcher m = seqPat.matcher( filterString ); if( !m.matches() ){ r += "(!)Filter string must be formatted as a nucleotide sequence.\n" + filterString + diff --git a/src/scripts/BAM_Manipulation/FilterforPIPseq.java b/src/scripts/BAM_Manipulation/FilterforPIPseq.java index 74544a25e..f011d965f 100644 --- a/src/scripts/BAM_Manipulation/FilterforPIPseq.java +++ b/src/scripts/BAM_Manipulation/FilterforPIPseq.java @@ -24,9 +24,9 @@ public class FilterforPIPseq { File genome = null; File output = null; String SEQ = ""; - - private PrintStream PS; - + + private PrintStream PS = null; + public FilterforPIPseq(File in, File gen, File out, String s, PrintStream ps) { bamFile = in; genome = gen; @@ -34,27 +34,28 @@ public FilterforPIPseq(File in, File gen, File out, String s, PrintStream ps) { SEQ = s.toUpperCase(); PS = ps; } - + public void run() throws IOException, InterruptedException { IndexedFastaSequenceFile QUERY = new IndexedFastaSequenceFile(genome); - + IOUtil.assertFileIsReadable(bamFile); IOUtil.assertFileIsWritable(output); final SamReader reader = SamReaderFactory.makeDefault().open(bamFile); reader.getFileHeader().setSortOrder(SAMFileHeader.SortOrder.coordinate); final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), false, output); - + printBoth(bamFile.getName()); //output file name to textarea - + //Code to get individual chromosome stats AbstractBAMFileIndex bai = (AbstractBAMFileIndex) reader.indexing().getIndex(); for (int z = 0; z < bai.getNumberOfReferences(); z++) { SAMSequenceRecord seq = reader.getFileHeader().getSequence(z); + printBoth(seq.getSequenceName()); - + CloseableIterator iter = reader.query(seq.getSequenceName(), 0, seq.getSequenceLength(), false); while (iter.hasNext()) { - //Create the record object + //Create the record object SAMRecord sr = iter.next(); if(sr.getReadPairedFlag()) { if(sr.getProperPairFlag() && sr.getFirstOfPairFlag()) { @@ -70,7 +71,7 @@ public void run() throws IOException, InterruptedException { } } //System.out.println(sr.getReadString() + "\t" + seq.getSequenceName() + "\t" + sr.getUnclippedStart() + "\t" + sr.getUnclippedEnd() + "\t" + sr.getReadNegativeStrandFlag() + "\t" + filter); - if(filter.toUpperCase().equals(SEQ)) { writer.addAlignment(sr); } + if(filter.toUpperCase().equals(SEQ)) { writer.addAlignment(sr); } } } else { String filter = ""; @@ -83,7 +84,7 @@ public void run() throws IOException, InterruptedException { filter = FASTAUtilities.RevComplement(filter); } //System.out.println(sr.getReadString() + "\t" + seq.getSequenceName() + "\t" + sr.getUnclippedStart() + "\t" + sr.getUnclippedEnd() + "\t" + sr.getReadNegativeStrandFlag() + "\t" + filter); - if(filter.toUpperCase().equals(SEQ)) { writer.addAlignment(sr); } + if(filter.toUpperCase().equals(SEQ)) { writer.addAlignment(sr); } } } iter.close(); @@ -93,7 +94,7 @@ public void run() throws IOException, InterruptedException { reader.close(); bai.close(); } - + private void printBoth(String message){ if(PS!=null){ PS.println(message); } System.err.println(message); From 0a44cae679dedd8a78796e1f252fed8cbb79a43d Mon Sep 17 00:00:00 2001 From: owlang Date: Tue, 5 Jan 2021 18:41:46 -0500 Subject: [PATCH 3/6] fix extension check The GFFtoBED extension check in the CLI class was looking for the wrong extension. This bug is fixed with this commit. --- .../Coordinate_Manipulation/GFF_Manipulation/GFFtoBEDCLI.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cli/Coordinate_Manipulation/GFF_Manipulation/GFFtoBEDCLI.java b/src/cli/Coordinate_Manipulation/GFF_Manipulation/GFFtoBEDCLI.java index b92d72af1..e4fa1d92a 100644 --- a/src/cli/Coordinate_Manipulation/GFF_Manipulation/GFFtoBEDCLI.java +++ b/src/cli/Coordinate_Manipulation/GFF_Manipulation/GFFtoBEDCLI.java @@ -70,8 +70,8 @@ private String validateInput() throws IOException { }else{ //check ext try{ - if(!"gff".equals(ExtensionFileFilter.getExtension(output))){ - r += "(!)Use GFF extension for output filename. Try: " + ExtensionFileFilter.stripExtension(output) + ".gff\n"; + if(!"bed".equals(ExtensionFileFilter.getExtension(output))){ + r += "(!)Use BED extension for output filename. Try: " + ExtensionFileFilter.stripExtension(output) + ".bed\n"; } } catch( NullPointerException e){ r += "(!)Output filename must have extension: use GFF extension for output filename. Try: " + output + ".gff\n"; } //check directory From 2cc0bc7fe53f0447d8bc5003e68489cd2e78ef11 Mon Sep 17 00:00:00 2001 From: owlang Date: Tue, 5 Jan 2021 18:44:03 -0500 Subject: [PATCH 4/6] remove default title-setting Change default plot output to not include a title. Before this commit default was to add filename as default title for the composite plot. --- src/cli/Figure_Generation/CompositePlotCLI.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/cli/Figure_Generation/CompositePlotCLI.java b/src/cli/Figure_Generation/CompositePlotCLI.java index ace724c9c..643ff9b3a 100644 --- a/src/cli/Figure_Generation/CompositePlotCLI.java +++ b/src/cli/Figure_Generation/CompositePlotCLI.java @@ -104,7 +104,7 @@ public Integer call() throws Exception { // Save Composite Plot OutputStream OUT = new FileOutputStream(output); ChartUtilities.writeChartAsPNG(OUT, chart, pixelWidth, pixelHeight); - + System.err.println( "Image Generated." ); return(0); } @@ -140,8 +140,6 @@ private String validateInput() throws IOException { } } - //set default title name - if(title!=null){ title = compositeData.getName(); } //check pixel ranges are valid if(pixelHeight<=0){ r += "(!)Cell height must be a positive integer value! check \"-y\" flag.\""; } if(pixelWidth<=0) { r += "(!)Cell width must be a positive integer value! check \"-x\" flag.\""; } From 5fe99f382a1ef941aa1ac85c11fa69b04d18b367 Mon Sep 17 00:00:00 2001 From: owlang Date: Tue, 5 Jan 2021 18:45:45 -0500 Subject: [PATCH 5/6] add basic checks to row/column options AggregateDataCLI was missing a basic option check for negative index values for the starting row or column. This commit adds this basic validation. More validation may be added in the future. --- src/cli/Read_Analysis/AggregateDataCLI.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/cli/Read_Analysis/AggregateDataCLI.java b/src/cli/Read_Analysis/AggregateDataCLI.java index 52365e74e..40fb9ce26 100644 --- a/src/cli/Read_Analysis/AggregateDataCLI.java +++ b/src/cli/Read_Analysis/AggregateDataCLI.java @@ -149,6 +149,10 @@ private String validateInput() throws IOException { else if(aggr.max) { aggType = 5; } else if(aggr.var) { aggType = 6; } + //validate row&column start indexes + if(startROW<0){ r += "(!)Row start must not be less than zero\n"; } + if(startCOL<0){ r += "(!)Column start must not be less than zero\n"; } + return(r); } } \ No newline at end of file From d2b691078d6d3983b45c382f8c7666e9d3601f02 Mon Sep 17 00:00:00 2001 From: owlang Date: Tue, 5 Jan 2021 18:47:22 -0500 Subject: [PATCH 6/6] add validation for motif string input Using the Pattern and Match regular expression objects, check that the motif string is composed of capital ATCG characters. Be sure to update this validation if IUPAC degenerate nucleotide symbols are supported in the future. --- src/cli/Sequence_Analysis/SearchMotifCLI.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/cli/Sequence_Analysis/SearchMotifCLI.java b/src/cli/Sequence_Analysis/SearchMotifCLI.java index 67dfd3667..6757ac03f 100644 --- a/src/cli/Sequence_Analysis/SearchMotifCLI.java +++ b/src/cli/Sequence_Analysis/SearchMotifCLI.java @@ -6,6 +6,8 @@ import picocli.CommandLine.Parameters; import java.util.concurrent.Callable; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.io.File; import java.io.IOException; @@ -83,6 +85,14 @@ private String validateInput() throws IOException { } } + //check filter string is valid ATCG + Pattern seqPat = Pattern.compile("[ATCG]+"); + Matcher m = seqPat.matcher( motif ); + if( !m.matches() ){ + r += "(!)Motif string must be formatted as a nucleotide sequence.\n" + motif + + " is not a valid nucleotide sequence.\nExpected input string format: \"[ATCG]\""; + } + //check mismatch value if(ALLOWED_MISMATCH<0){ r += "(!)Please use a non-negative integer for allowed mismatches."; }