From 7d3eb2f02175075549c6a70ae60e7faeb97ee1e3 Mon Sep 17 00:00:00 2001 From: langmead Date: Mon, 5 Apr 2010 19:16:03 +0000 Subject: [PATCH] *** empty log message *** --- NEWS | 15 ++++++++++++++- VERSION | 2 +- ebwt.h | 6 ++++-- ebwt_search.cpp | 10 ++++++---- ebwt_search_backtrack.h | 2 ++ filebuf.h | 24 ++++++++++++++++++++++-- pat.h | 6 +++--- pool.h | 24 +++++++++++++++--------- search_globals.h | 5 +++++ threading.h | 18 ++++++++++++++++-- tokenize.h | 7 +++++++ 11 files changed, 95 insertions(+), 24 deletions(-) diff --git a/NEWS b/NEWS index ddf5adf..7c45449 100644 --- a/NEWS +++ b/NEWS @@ -5,7 +5,7 @@ Bowtie NEWS Bowtie is now available for download. 0.9.0 is the first version to be released under the OSI Artistic License (see `COPYING') and freely -available to the public for download. The current version is 0.12.3. +available to the public for download. The current version is 0.12.4. Reporting Issues ================ @@ -26,6 +26,19 @@ our mailing list: Version Release History ======================= +Version 0.12.4 - April 5, 2010 + * Periods in read sequences are now treated as Ns instead of + ignored. This should help with some problems where Bowtie + erroneously reports "Reads file contained a pattern with more than + 1024 quality values..." for data from recent versions of the + Illumina GA pipeline. + * Fixed a bug whereby some error and warning messages would be + printed on top of each other in -p mode. + * Chunk-exhaustion warnings messages are now suppressed when --quiet + is specified. + * Fixed small issue in quality decoding whereby no-confidence colors + would incorrectly influence decoded quality of adjacent bases. + Version 0.12.3 - February 17, 2010 * Fixed a significant bug in -C/--color mode whereby quality values for SNP nucleotide positions were erroneously penalized. diff --git a/VERSION b/VERSION index d61567c..7fd0b1e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.12.3 \ No newline at end of file +0.12.4 \ No newline at end of file diff --git a/ebwt.h b/ebwt.h index f5891f4..32d71f9 100644 --- a/ebwt.h +++ b/ebwt.h @@ -1275,8 +1275,9 @@ class EbwtSearchParams { } else { hit.quals[destpos] += qual[i-1]; } + } else if((int)hit.colSeq[i-1] != 4) { + hit.quals[destpos] -= qual[i-1]; } - else hit.quals[destpos] -= qual[i-1]; } if(i < qlen) { if(cmm[i] == 'M') { @@ -1285,8 +1286,9 @@ class EbwtSearchParams { } else { hit.quals[destpos] += qual[i]; } + } else if((int)hit.patSeq[i] != 4) { + hit.quals[destpos] -= qual[i]; } - else hit.quals[destpos] -= qual[i]; } if(hit.quals[destpos] < '!') { hit.quals[destpos] = '!'; diff --git a/ebwt_search.cpp b/ebwt_search.cpp index 13fa830..f35d108 100644 --- a/ebwt_search.cpp +++ b/ebwt_search.cpp @@ -43,7 +43,7 @@ static vector mates12; // mated reads (1st/2nd interleaved in 1 file) static string adjustedEbwtFileBase; static bool verbose; // be talkative static bool startVerbose; // be talkative at startup -static bool quiet; // print nothing but the alignments +bool quiet; // print nothing but the alignments static int sanityCheck; // enable expensive sanity checks static int format; // default read format is FASTQ static string origString; // reference text, or filename(s) @@ -143,6 +143,7 @@ bool showSeed; static vector qualities; static vector qualities1; static vector qualities2; +MUTEX_T gLock; static void resetOptions() { mates1.clear(); @@ -251,6 +252,7 @@ static void resetOptions() { qualities.clear(); qualities1.clear(); qualities2.clear(); + MUTEX_INIT(gLock); } // mating constraints @@ -2769,9 +2771,9 @@ static void driver(const char * type, cerr << "Invalid output type: " << outType << endl; throw 1; } - if(verbose || startVerbose) { - cerr << "Dispatching to search driver: "; logTime(cerr, true); - } + if(verbose || startVerbose) { + cerr << "Dispatching to search driver: "; logTime(cerr, true); + } if(maqLike) { seededQualCutoffSearchFull(seedLen, qualThresh, diff --git a/ebwt_search_backtrack.h b/ebwt_search_backtrack.h index 22968b5..befd3f8 100644 --- a/ebwt_search_backtrack.h +++ b/ebwt_search_backtrack.h @@ -115,6 +115,7 @@ class GreedyDFSRangeSource { _chars = new char[_qlen]; assert(_pairs != NULL && _elims != NULL && _chars != NULL); } catch(std::bad_alloc& e) { + ThreadSafe _ts(&gLock); cerr << "Unable to allocate memory for depth-first " << "backtracking search; new length = " << length(*_qry) << endl; @@ -1953,6 +1954,7 @@ class EbwtRangeSource : public RangeSource { if(offRev2_ != offRev3_) maxmms = 3; if(qlen_ <= maxmms) { if(!quiet_) { + ThreadSafe _ts(&gLock); cerr << "Warning: Read (" << (*name_) << ") is less than " << (maxmms+1) << " characters long; skipping..." << endl; } this->done = true; diff --git a/filebuf.h b/filebuf.h index e98c608..8b55fa6 100644 --- a/filebuf.h +++ b/filebuf.h @@ -299,6 +299,20 @@ class FileBuf { return _lastn_cur; } + /** + * Get const pointer to the last-N-chars buffer. + */ + const char *lastN() const { + return _lastn_buf; + } + + /** + * Get current size of the last-N-chars buffer. + */ + const size_t lastNLen() const { + return _lastn_cur; + } + private: void init() { @@ -451,7 +465,10 @@ class OutFileBuf { if(cur_ + slen > BUF_SZ) { if(cur_ > 0) flush(); if(slen >= BUF_SZ) { - fwrite(s.c_str(), slen, 1, out_); + if(fwrite(s.c_str(), slen, 1, out_) != slen) { + std::cerr << "Error while writing string output; not all characters written" << std::endl; + throw 1; + } } else { memcpy(&buf_[cur_], s.data(), slen); assert_eq(0, cur_); @@ -472,7 +489,10 @@ class OutFileBuf { if(cur_ + len > BUF_SZ) { if(cur_ > 0) flush(); if(len >= BUF_SZ) { - fwrite(s, len, 1, out_); + if(fwrite(s, len, 1, out_) != len) { + std::cerr << "Error while writing string output; not all characters written" << std::endl; + throw 1; + } } else { memcpy(&buf_[cur_], s, len); assert_eq(0, cur_); diff --git a/pat.h b/pat.h index 63d221a..e47b181 100644 --- a/pat.h +++ b/pat.h @@ -2257,8 +2257,8 @@ class TabbedPatternSource : public BufferedFilePatternSource { while(c != upto) { if(color_) { if(c >= '0' && c <= '4') c = "ACGTN"[(int)c - '0']; - if(c == '.') c = 'N'; } + if(c == '.') c = 'N'; if(isalpha(c)) { assert_in(toupper(c), "ACGTN"); if(begin++ >= trim5) { @@ -2660,8 +2660,8 @@ class FastqPatternSource : public BufferedFilePatternSource { // Convert color numbers to letters if necessary if(color_) { if(c >= '0' && c <= '4') c = "ACGTN"[(int)c - '0']; - if(c == '.') c = 'N'; } + if(c == '.') c = 'N'; if(fuzzy_ && c == '-') c = 'A'; if(isalpha(c)) { // If it's past the 5'-end trim point @@ -2971,8 +2971,8 @@ class RawPatternSource : public BufferedFilePatternSource { while(!isspace(c) && c >= 0) { if(color_) { if(c >= '0' && c <= '4') c = "ACGTN"[(int)c - '0']; - if(c == '.') c = 'N'; } + if(c == '.') c = 'N'; if(isalpha(c) && dstLen >= mytrim5) { size_t len = dstLen - mytrim5; if(len >= 1024) tooManyQualities(String("(no name)")); diff --git a/pool.h b/pool.h index 3d46c89..6c39b7f 100644 --- a/pool.h +++ b/pool.h @@ -12,6 +12,7 @@ #include #include "bitset.h" #include "log.h" +#include "search_globals.h" /** * Very simple allocator for fixed-size chunks of memory. Chunk size @@ -36,6 +37,7 @@ class ChunkPool { throw std::bad_alloc(); } } catch(std::bad_alloc& e) { + ThreadSafe _ts(&gLock); std::cerr << "Error: Could not allocate ChunkPool of " << totSz << " bytes" << std::endl; exhausted(); @@ -141,14 +143,18 @@ class ChunkPool { */ void exhausted() { if(patid != lastSkippedRead_) { - if(!exhaustCrash_) { - std::cerr << "Warning: "; + if(!exhaustCrash_ && !quiet) std::cerr << "Warning: "; + if(!quiet) { + std::cerr << "Exhausted best-first chunk memory for read " + << (*readName_) << " (patid " << patid + << "); skipping read" << std::endl; + } + if(exhaustCrash_) { + if(!quiet) { + std::cerr << "Please try specifying a larger --chunkmbs (default is 32)" << std::endl; + } + throw 1; } - std::cerr << "Exhausted best-first chunk memory for read " << (*readName_) << " (patid " << patid << "); skipping read" << std::endl; - } - if(exhaustCrash_) { - std::cerr << "Please try specifying a larger --chunkmbs (default is 32)" << std::endl; - throw 1; } lastSkippedRead_ = patid; } @@ -334,7 +340,7 @@ class AllocOnlyPool { throw std::bad_alloc(); } } catch(std::bad_alloc& e) { - //std::cerr << "Error: Could not allocate " << name_ << " pool #" << (curPool_+1) << " of " << (lim_ * sizeof(T)) << " bytes" << std::endl; + ThreadSafe _ts(&gLock); pool_->exhausted(); return false; } @@ -354,7 +360,7 @@ class AllocOnlyPool { throw std::bad_alloc(); } } catch(std::bad_alloc& e) { - //std::cerr << "Error: Could not allocate " << name_ << " pool #1" << std::endl; + ThreadSafe _ts(&gLock); pool_->exhausted(); return false; } diff --git a/search_globals.h b/search_globals.h index 04e4c06..2fa5917 100644 --- a/search_globals.h +++ b/search_globals.h @@ -8,6 +8,8 @@ #ifndef SEARCH_GLOBALS_H_ #define SEARCH_GLOBALS_H_ +#include "threading.h" + // declared in ebwt_search.cpp extern bool color; extern bool colorExEnds; @@ -15,5 +17,8 @@ extern bool colorSeq; extern bool colorQual; extern int snpPhred; extern bool showSeed; +extern bool quiet; + +extern MUTEX_T gLock; #endif /* SEARCH_GLOBALS_H_ */ diff --git a/threading.h b/threading.h index 1e14904..b6b5e0b 100644 --- a/threading.h +++ b/threading.h @@ -14,8 +14,8 @@ # include "spinlock.h" # define MUTEX_T SpinLock # define MUTEX_INIT(l) -# define MUTEX_LOCK(l) l.Enter() -# define MUTEX_UNLOCK(l) l.Leave() +# define MUTEX_LOCK(l) (l).Enter() +# define MUTEX_UNLOCK(l) (l).Leave() #else # ifdef BOWTIE_PTHREADS # define MUTEX_T pthread_mutex_t @@ -57,4 +57,18 @@ static inline void createThread(pthread_t* th, } #endif +/** + * Wrap a lock; obtain lock upon construction, release upon destruction. + */ +class ThreadSafe { +public: + ThreadSafe(MUTEX_T* lock) { + lock_ = lock; + MUTEX_LOCK(*lock_); + } + ~ThreadSafe() { MUTEX_UNLOCK(*lock_); } +private: + MUTEX_T *lock_; +}; + #endif diff --git a/tokenize.h b/tokenize.h index 60238ce..9f3fbce 100644 --- a/tokenize.h +++ b/tokenize.h @@ -1,3 +1,10 @@ +/* + * tokenize.h + * + * Created on: Jul 21, 2009 + * Author: Ben Langmead + */ + #ifndef TOKENIZE_H_ #define TOKENIZE_H_