Permalink
Browse files

new options for min max and handle 0 length reads

  • Loading branch information...
1 parent 27a4809 commit 9e7f628e8a6d66e02553bef330c0efbb702c8b59 @ExpressionAnalysis committed Jan 26, 2017
Showing with 20 additions and 1 deletion.
  1. +20 −1 clipper/fastq-mcf.cpp
View
@@ -31,7 +31,7 @@ See "void usage" below for usage.
#include "fastq-lib.h"
-#define VERSION "1.04.807"
+#define VERSION "1.05"
#define MAX_ADAPTER_NUM 1000
#define SCANLEN 15
@@ -220,6 +220,8 @@ int main (int argc, char **argv) {
int ilv3 = -1;
int duplen = 0;
int dupskip = 0;
+ int min_start_trim = 0;
+ int min_end_trim = 0;
bool noexec = 0;
bool hompol_filter = 0;
bool lowcom_filter = 0;
@@ -258,6 +260,8 @@ int main (int argc, char **argv) {
{"mate-min-len", 1, 0, 0},
{"homopolymer-pct", 1, 0, 0},
{"lowcomplex-pct", 1, 0, 0},
+ {"min-start-trim", 1, 0, 0},
+ {"min-end-trim", 1, 0, 0},
{0, 0, 0, 0}
};
@@ -275,6 +279,10 @@ int main (int argc, char **argv) {
keeponlyclip=1;
} else if(!strcmp(oname, "mate-qual-mean")) {
qf2_mean=atoi(optarg);
+ } else if (!strcmp(oname, "min-start-trim")) {
+ min_start_trim = atoi(optarg);
+ } else if (!strcmp(oname, "min-end-trim")) {
+ min_end_trim = atoi(optarg);
} else if(!strcmp(oname, "homopolymer-pct")) {
hompol_pct=atof(optarg)/100.0;
hompol_filter=1;
@@ -631,6 +639,10 @@ int main (int argc, char **argv) {
--nq; --ns; // don't count newline for read len
// skip poor quals/lots of N's when doing sampling (otherwise you'll miss some)
+ if (ns == 0) {
+ ++skipped;
+ continue;
+ }
if ((st.st_size > (sampcnt * 500)) && (skipped < sampcnt) && poorqual(i, ns, s, q)) {
++skipped;
continue;
@@ -1003,6 +1015,9 @@ int main (int argc, char **argv) {
for (f=0;f<i_n;++f) {
dotrim[f][0] = sktrim[f][0]; // default, trim to detected skew levels
dotrim[f][1] = sktrim[f][1];
+ // trim to minimum, if specified
+ dotrim[f][0] = max(dotrim[f][0], min_start_trim);
+ dotrim[f][1] = max(dotrim[f][1], min_end_trim);
if (avgns[f] < 11)
// reads of avg length < 11 ? barcode lane, skip it
continue;
@@ -1493,6 +1508,10 @@ void usage(FILE *f, const char *msg) {
" -C N Number of reads to use for subsampling (300k)\n"
" -d Output lots of random debugging stuff\n"
"\n"
+"Minimum trimming options:\n"
+" --min-start-trim NUM Always trim at least NUM bases from start\n"
+" --min-end-trim NUM Always trim at least NUM bases from end\n"
+"\n"
"Quality adjustment options:\n"
" --cycle-adjust CYC,AMT Adjust cycle CYC (negative = offset from end) by amount AMT\n"
" --phred-adjust SCORE,AMT Adjust score SCORE by amount AMT\n"

0 comments on commit 9e7f628

Please sign in to comment.