From 9e7f628e8a6d66e02553bef330c0efbb702c8b59 Mon Sep 17 00:00:00 2001 From: ExpressionAnalysis Date: Thu, 26 Jan 2017 17:57:16 -0500 Subject: [PATCH 1/3] new options for min max and handle 0 length reads --- clipper/fastq-mcf.cpp | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/clipper/fastq-mcf.cpp b/clipper/fastq-mcf.cpp index af59e57..a19c1c9 100644 --- a/clipper/fastq-mcf.cpp +++ b/clipper/fastq-mcf.cpp @@ -31,7 +31,7 @@ See "void usage" below for usage. #include "fastq-lib.h" -#define VERSION "1.04.807" +#define VERSION "1.05" #define MAX_ADAPTER_NUM 1000 #define SCANLEN 15 @@ -220,6 +220,8 @@ int main (int argc, char **argv) { int ilv3 = -1; int duplen = 0; int dupskip = 0; + int min_start_trim = 0; + int min_end_trim = 0; bool noexec = 0; bool hompol_filter = 0; bool lowcom_filter = 0; @@ -258,6 +260,8 @@ int main (int argc, char **argv) { {"mate-min-len", 1, 0, 0}, {"homopolymer-pct", 1, 0, 0}, {"lowcomplex-pct", 1, 0, 0}, + {"min-start-trim", 1, 0, 0}, + {"min-end-trim", 1, 0, 0}, {0, 0, 0, 0} }; @@ -275,6 +279,10 @@ int main (int argc, char **argv) { keeponlyclip=1; } else if(!strcmp(oname, "mate-qual-mean")) { qf2_mean=atoi(optarg); + } else if (!strcmp(oname, "min-start-trim")) { + min_start_trim = atoi(optarg); + } else if (!strcmp(oname, "min-end-trim")) { + min_end_trim = atoi(optarg); } else if(!strcmp(oname, "homopolymer-pct")) { hompol_pct=atof(optarg)/100.0; hompol_filter=1; @@ -631,6 +639,10 @@ int main (int argc, char **argv) { --nq; --ns; // don't count newline for read len // skip poor quals/lots of N's when doing sampling (otherwise you'll miss some) + if (ns == 0) { + ++skipped; + continue; + } if ((st.st_size > (sampcnt * 500)) && (skipped < sampcnt) && poorqual(i, ns, s, q)) { ++skipped; continue; @@ -1003,6 +1015,9 @@ int main (int argc, char **argv) { for (f=0;f Date: Mon, 30 Jan 2017 15:42:53 -0500 Subject: [PATCH 2/3] 0 length read problem, fastq-mcf would stop reading the fastq file once encountering a 0 length read --- clipper/fastq-mcf.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/clipper/fastq-mcf.cpp b/clipper/fastq-mcf.cpp index a19c1c9..e508925 100644 --- a/clipper/fastq-mcf.cpp +++ b/clipper/fastq-mcf.cpp @@ -174,7 +174,7 @@ class inbuffer { fq->qual.s[--fq->qual.n] = '\0'; } - return fq->qual.n > 0; + return fq->qual.n >= 0; // github issue 46, 53 } else { return ::read_fq(fin, rno, fq, name); } @@ -220,8 +220,8 @@ int main (int argc, char **argv) { int ilv3 = -1; int duplen = 0; int dupskip = 0; - int min_start_trim = 0; - int min_end_trim = 0; + int min_start_trim = 0; + int min_end_trim = 0; bool noexec = 0; bool hompol_filter = 0; bool lowcom_filter = 0; @@ -639,7 +639,7 @@ int main (int argc, char **argv) { --nq; --ns; // don't count newline for read len // skip poor quals/lots of N's when doing sampling (otherwise you'll miss some) - if (ns == 0) { + if (ns == 0) { // github issue 46, 53 ++skipped; continue; } @@ -982,6 +982,15 @@ int main (int argc, char **argv) { } } ++nrec; + if (fq[0].qual.n == 0) { // github issue 46, 53 + ++nfiltered; + continue; + } else if (i_n > 1) { + if (fq[1].qual.n == 0) { + ++nfiltered; + continue; + } + } if (read_ok < 0) { ++nerr; continue; From 672c7e6457585c92cb45f53cd1646279d1cb9744 Mon Sep 17 00:00:00 2001 From: ExpressionAnalysis Date: Thu, 2 Feb 2017 16:48:52 -0500 Subject: [PATCH 3/3] add -s option for barcode label in file name --- clipper/fastq-multx.cpp | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/clipper/fastq-multx.cpp b/clipper/fastq-multx.cpp index 0772579..6a195ec 100644 --- a/clipper/fastq-multx.cpp +++ b/clipper/fastq-multx.cpp @@ -34,7 +34,7 @@ See "void usage" below for usage. #define THFIXFACTOR 20 #define endstr(e) (e=='e'?"end":e=='b'?"start":"n/a") -const char * VERSION = "1.02.772"; +const char * VERSION = "1.03"; // barcode struct bc { @@ -116,6 +116,7 @@ int main (int argc, char **argv) { const char* list=NULL; // use a barcode master list char verify='\0'; bool noexec = false; + bool seqnames = false; const char *group = NULL; bool usefile1 = false; int phred = 33; @@ -125,7 +126,7 @@ int main (int argc, char **argv) { int i; bool omode = false; char *bfil = NULL; - while ( (c = getopt (argc, argv, "-DzxnHhbeov:m:B:g:L:l:G:q:d:t:")) != -1) { + while ( (c = getopt (argc, argv, "-DzxnHhbeosv:m:B:g:L:l:G:q:d:t:")) != -1) { switch (c) t:{ case '\1': if (omode) { @@ -143,6 +144,7 @@ int main (int argc, char **argv) { } break; case 'o': omode=true; break; + case 's': seqnames=true; break; case 'v': if (strlen(optarg)>1) { fprintf(stderr, "Option -v requires a single character argument"); @@ -801,6 +803,13 @@ int main (int argc, char **argv) { // TODO: output barcode read ...but only for unmatched? int b; for (b=0;b<=bcnt;++b) { + size_t nameseq_len = strlen(bc[b].id.s); + if ((b < bcnt) && seqnames) { + nameseq_len = strlen(bc[b].seq.s); + if (bc[b].dual) + nameseq_len += bc[b].dual_n + 1; + } + for (i=0;i, using BCFIL as a master list\n" "-B BCFIL Use barcodes from BCFIL, no determination step, codes in \n" "-H Use barcodes from illumina's header, instead of a read\n" +"-s Substitute barcode sequence instead of barcode label into output file names\n" "-b Force beginning of line (5') for barcode matching\n" "-e Force end of line (3') for batcode matching\n" "-t NUM Divide threshold for auto-determine by factor NUM (1), > 1 = more sensitive\n"