From 4f02e10f4960c4d0c597f11bc3845f5efde9ef97 Mon Sep 17 00:00:00 2001 From: langmead Date: Tue, 8 Sep 2009 21:18:42 +0000 Subject: [PATCH] *** empty log message *** --- aligner.h | 43 +++ aligner_0mm.h | 12 +- aligner_1mm.h | 12 +- aligner_23mm.h | 12 +- aligner_seed_mm.h | 14 +- annot.cpp | 3 +- bitset.h | 7 +- blockwise_sa.h | 14 +- bowtie_inspect.cpp | 94 ++++--- chaincat.cpp | 33 ++- ebwt.h | 57 ++-- ebwt_build.cpp | 259 ++++++++--------- ebwt_search.cpp | 618 ++++++++++++++++++++++++----------------- ebwt_search_backtrack.h | 3 +- filebuf.h | 16 +- hit.h | 27 +- map_tool.cpp | 253 ++++++++--------- maq_convert/bfa.cpp | 11 +- maq_convert/bowtie_convert.cpp | 137 ++++----- maq_convert/maqmap.h | 7 +- pat.h | 58 ++-- pool.h | 4 +- qual.h | 12 +- range_cache.h | 2 +- range_source.h | 2 +- ref_aligner.h | 3 +- ref_read.cpp | 2 +- ref_read.h | 9 +- reference.h | 19 +- refmap.cpp | 7 +- search_1mm_phase1.c | 2 +- search_23mm_phase1.c | 4 +- search_23mm_phase3.c | 3 - search_seeded_phase1.c | 39 ++- search_seeded_phase3.c | 3 - search_seeded_phase4.c | 7 - shmem.h | 13 +- tokenize.h | 1 + 38 files changed, 1008 insertions(+), 814 deletions(-) diff --git a/aligner.h b/aligner.h index 54f7a96..8e0cbb8 100644 --- a/aligner.h +++ b/aligner.h @@ -333,6 +333,7 @@ class UnpairedAlignerV2 : public Aligner { vector >& os, bool rangeMode, bool verbose, + bool quiet, int maxBts, ChunkPool *pool, int *btCnt = NULL, @@ -346,6 +347,8 @@ class UnpairedAlignerV2 : public Aligner { params_(params), rchase_(rchase), driver_(driver), + verbose_(verbose), + quiet_(quiet), maxBts_(maxBts), pool_(pool), btCnt_(btCnt), @@ -374,6 +377,15 @@ class UnpairedAlignerV2 : public Aligner { metrics_->nextRead(patsrc->bufa().patFw); } pool_->reset(&patsrc->bufa().name, patsrc->patid()); + if(patsrc->bufa().length() < 4) { + if(!quiet_) { + cerr << "Warning: Skipping read " << patsrc->bufa().name + << " because it is less than 4 characters long" << endl; + } + this->done = true; + sinkPt_->finishRead(*patsrc_, true, true); + return; + } driver_->setQuery(patsrc, NULL); this->done = driver_->done; doneFirst_ = false; @@ -508,6 +520,9 @@ class UnpairedAlignerV2 : public Aligner { // Range-finding state TDriver* driver_; + bool verbose_; // be talkative + bool quiet_; // don't print informational/warning info + const int maxBts_; ChunkPool *pool_; int *btCnt_; @@ -548,6 +563,7 @@ class PairedBWAlignerV1 : public Aligner { const BitPairReference* refs, bool rangeMode, bool verbose, + bool quiet, int maxBts, ChunkPool *pool, int *btCnt) : @@ -572,6 +588,7 @@ class PairedBWAlignerV1 : public Aligner { fw1_(fw1), fw2_(fw2), rchase_(rchase), verbose_(verbose), + quiet_(quiet), maxBts_(maxBts), pool_(pool), btCnt_(btCnt), @@ -654,6 +671,15 @@ class PairedBWAlignerV1 : public Aligner { // Give all of the drivers pointers to the relevant read info patsrc_ = patsrc; pool_->reset(&patsrc->bufa().name, patsrc->patid()); + if(patsrc->bufa().length() < 4 || patsrc->bufb().length() < 4) { + if(!quiet_) { + cerr << "Warning: Skipping pair " << patsrc->bufa().name + << " because a mate is less than 4 characters long" << endl; + } + this->done = true; + sinkPt_->finishRead(*patsrc_, true, true); + return; + } driver1Fw_->setQuery(patsrc, NULL); driver1Rc_->setQuery(patsrc, NULL); driver2Fw_->setQuery(patsrc, NULL); @@ -1327,6 +1353,8 @@ class PairedBWAlignerV1 : public Aligner { // true -> be talkative bool verbose_; + // true -> suppress warnings + bool quiet_; int maxBts_; ChunkPool *pool_; @@ -1451,6 +1479,7 @@ class PairedBWAlignerV2 : public Aligner { const BitPairReference* refs, bool rangeMode, bool verbose, + bool quiet, int maxBts, ChunkPool *pool, int *btCnt) : @@ -1477,6 +1506,8 @@ class PairedBWAlignerV2 : public Aligner { rchase_(rchase), driver_(driver), pool_(pool), + verbose_(verbose), + quiet_(quiet), maxBts_(maxBts), btCnt_(btCnt) { @@ -1512,6 +1543,15 @@ class PairedBWAlignerV2 : public Aligner { // Give all of the drivers pointers to the relevant read info patsrc_ = patsrc; pool_->reset(&patsrc->bufa().name, patsrc->patid()); + if(patsrc->bufa().length() < 4 || patsrc->bufb().length() < 4) { + if(!quiet_) { + cerr << "Warning: Skipping pair " << patsrc->bufa().name + << " because a mate is less than 4 characters long" << endl; + } + this->done = true; + sinkPt_->finishRead(*patsrc_, true, true); + return; + } driver_->setQuery(patsrc, NULL); qlen1_ = patsrc_->bufa().length(); qlen2_ = patsrc_->bufb().length(); @@ -1934,6 +1974,9 @@ class PairedBWAlignerV2 : public Aligner { // Pool for distributing chunks of best-first path descriptor memory ChunkPool *pool_; + bool verbose_; + bool quiet_; + int maxBts_; // maximum allowed # backtracks int *btCnt_; // current backtrack count diff --git a/aligner_0mm.h b/aligner_0mm.h index cb1d9ff..92f64d3 100644 --- a/aligner_0mm.h +++ b/aligner_0mm.h @@ -39,6 +39,7 @@ class UnpairedExactAlignerV1Factory : public AlignerFactory { bool strandFix, bool rangeMode, bool verbose, + bool quiet, uint32_t seed) : ebwtFw_(ebwtFw), ebwtBw_(ebwtBw), @@ -55,6 +56,7 @@ class UnpairedExactAlignerV1Factory : public AlignerFactory { strandFix_(strandFix), rangeMode_(rangeMode), verbose_(verbose), + quiet_(quiet), seed_(seed) { assert(ebwtFw.isInMemory()); @@ -107,7 +109,7 @@ class UnpairedExactAlignerV1Factory : public AlignerFactory { return new UnpairedAlignerV2( params, dr, rchase, sink_, sinkPtFactory_, sinkPt, os_, rangeMode_, verbose_, - INT_MAX, pool_, NULL, NULL); + quiet_, INT_MAX, pool_, NULL, NULL); } private: @@ -127,6 +129,7 @@ class UnpairedExactAlignerV1Factory : public AlignerFactory { bool strandFix_; bool rangeMode_; bool verbose_; + bool quiet_; uint32_t seed_; }; @@ -166,6 +169,7 @@ class PairedExactAlignerV1Factory : public AlignerFactory { bool qualOrder, bool rangeMode, bool verbose, + bool quiet, uint32_t seed) : ebwtFw_(ebwtFw), doFw_(doFw), @@ -192,6 +196,7 @@ class PairedExactAlignerV1Factory : public AlignerFactory { strandFix_(strandFix), rangeMode_(rangeMode), verbose_(verbose), + quiet_(quiet), seed_(seed) { assert(ebwtFw.isInMemory()); @@ -312,7 +317,7 @@ class PairedExactAlignerV1Factory : public AlignerFactory { refAligner, rchase, sink_, sinkPtFactory_, sinkPt, mate1fw_, mate2fw_, peInner_, peOuter_, dontReconcile_, symCeil_, mixedThresh_, - mixedAttemptLim_, refs_, rangeMode_, verbose_, + mixedAttemptLim_, refs_, rangeMode_, verbose_, quiet_, INT_MAX, pool_, NULL); return al; } else { @@ -328,7 +333,7 @@ class PairedExactAlignerV1Factory : public AlignerFactory { rchase, sink_, sinkPtFactory_, sinkPt, sinkPtSe1, sinkPtSe2, mate1fw_, mate2fw_, peInner_, peOuter_, - mixedAttemptLim_, refs_, rangeMode_, verbose_, + mixedAttemptLim_, refs_, rangeMode_, verbose_, quiet_, INT_MAX, pool_, NULL); delete drVec; return al; @@ -363,6 +368,7 @@ class PairedExactAlignerV1Factory : public AlignerFactory { const bool strandFix_; const bool rangeMode_; const bool verbose_; + const bool quiet_; const uint32_t seed_; }; diff --git a/aligner_1mm.h b/aligner_1mm.h index c48a3c2..9d3d112 100644 --- a/aligner_1mm.h +++ b/aligner_1mm.h @@ -39,6 +39,7 @@ class Unpaired1mmAlignerV1Factory : public AlignerFactory { bool strandFix, bool rangeMode, bool verbose, + bool quiet, uint32_t seed) : ebwtFw_(ebwtFw), ebwtBw_(ebwtBw), @@ -56,6 +57,7 @@ class Unpaired1mmAlignerV1Factory : public AlignerFactory { strandFix_(strandFix), rangeMode_(rangeMode), verbose_(verbose), + quiet_(quiet), seed_(seed) { assert(ebwtFw.isInMemory()); @@ -144,7 +146,7 @@ class Unpaired1mmAlignerV1Factory : public AlignerFactory { return new UnpairedAlignerV2( params, dr, rchase, sink_, sinkPtFactory_, sinkPt, os_, rangeMode_, verbose_, - INT_MAX, pool_, NULL, NULL); + quiet_, INT_MAX, pool_, NULL, NULL); } private: @@ -164,6 +166,7 @@ class Unpaired1mmAlignerV1Factory : public AlignerFactory { bool strandFix_; bool rangeMode_; bool verbose_; + bool quiet_; uint32_t seed_; }; @@ -203,6 +206,7 @@ class Paired1mmAlignerV1Factory : public AlignerFactory { bool strandFix, bool rangeMode, bool verbose, + bool quiet, uint32_t seed) : ebwtFw_(ebwtFw), ebwtBw_(ebwtBw), @@ -230,6 +234,7 @@ class Paired1mmAlignerV1Factory : public AlignerFactory { strandFix_(strandFix), rangeMode_(rangeMode), verbose_(verbose), + quiet_(quiet), seed_(seed) { assert(ebwtBw != NULL); @@ -424,7 +429,7 @@ class Paired1mmAlignerV1Factory : public AlignerFactory { refAligner, rchase, sink_, sinkPtFactory_, sinkPt, mate1fw_, mate2fw_, peInner_, peOuter_, dontReconcile_, symCeil_, mixedThresh_, - mixedAttemptLim_, refs_, rangeMode_, verbose_, + mixedAttemptLim_, refs_, rangeMode_, verbose_, quiet_, INT_MAX, pool_, NULL); delete dr1FwVec; delete dr1RcVec; @@ -440,7 +445,7 @@ class Paired1mmAlignerV1Factory : public AlignerFactory { sinkPt, sinkPtSe1, sinkPtSe2, mate1fw_, mate2fw_, peInner_, peOuter_, - mixedAttemptLim_, refs_, rangeMode_, verbose_, + mixedAttemptLim_, refs_, rangeMode_, verbose_, quiet_, INT_MAX, pool_, NULL); delete dr1FwVec; return al; @@ -475,6 +480,7 @@ class Paired1mmAlignerV1Factory : public AlignerFactory { const bool strandFix_; const bool rangeMode_; const bool verbose_; + const bool quiet_; const uint32_t seed_; }; diff --git a/aligner_23mm.h b/aligner_23mm.h index 640c971..2a56865 100644 --- a/aligner_23mm.h +++ b/aligner_23mm.h @@ -40,6 +40,7 @@ class Unpaired23mmAlignerV1Factory : public AlignerFactory { bool strandFix, bool rangeMode, bool verbose, + bool quiet, uint32_t seed) : ebwtFw_(ebwtFw), ebwtBw_(ebwtBw), @@ -57,6 +58,7 @@ class Unpaired23mmAlignerV1Factory : public AlignerFactory { strandFix_(strandFix), rangeMode_(rangeMode), verbose_(verbose), + quiet_(quiet), seed_(seed) { assert(ebwtFw.isInMemory()); @@ -213,7 +215,7 @@ class Unpaired23mmAlignerV1Factory : public AlignerFactory { return new UnpairedAlignerV2( params, dr, rchase, sink_, sinkPtFactory_, sinkPt, os_, rangeMode_, verbose_, - INT_MAX, pool_, NULL, NULL); + quiet_, INT_MAX, pool_, NULL, NULL); } private: @@ -234,6 +236,7 @@ class Unpaired23mmAlignerV1Factory : public AlignerFactory { const bool strandFix_; const bool rangeMode_; const bool verbose_; + const bool quiet_; uint32_t seed_; }; @@ -275,6 +278,7 @@ class Paired23mmAlignerV1Factory : public AlignerFactory { bool strandFix, bool rangeMode, bool verbose, + bool quiet, uint32_t seed) : ebwtFw_(ebwtFw), ebwtBw_(ebwtBw), @@ -303,6 +307,7 @@ class Paired23mmAlignerV1Factory : public AlignerFactory { strandFix_(strandFix), rangeMode_(rangeMode), verbose_(verbose), + quiet_(quiet), seed_(seed) { assert(ebwtBw != NULL); @@ -621,7 +626,7 @@ class Paired23mmAlignerV1Factory : public AlignerFactory { refAligner, rchase, sink_, sinkPtFactory_, sinkPt, mate1fw_, mate2fw_, peInner_, peOuter_, dontReconcile_, symCeil_, mixedThresh_, - mixedAttemptLim_, refs_, rangeMode_, verbose_, + mixedAttemptLim_, refs_, rangeMode_, verbose_, quiet_, INT_MAX, pool_, NULL); delete dr1FwVec; delete dr1RcVec; @@ -637,7 +642,7 @@ class Paired23mmAlignerV1Factory : public AlignerFactory { sinkPt, sinkPtSe1, sinkPtSe2, mate1fw_, mate2fw_, peInner_, peOuter_, - mixedAttemptLim_, refs_, rangeMode_, verbose_, + mixedAttemptLim_, refs_, rangeMode_, verbose_, quiet_, INT_MAX, pool_, NULL); delete dr1FwVec; return al; @@ -673,6 +678,7 @@ class Paired23mmAlignerV1Factory : public AlignerFactory { const bool strandFix_; const bool rangeMode_; const bool verbose_; + const bool quiet_; const uint32_t seed_; }; diff --git a/aligner_seed_mm.h b/aligner_seed_mm.h index bc4225e..b7cd8e8 100644 --- a/aligner_seed_mm.h +++ b/aligner_seed_mm.h @@ -44,6 +44,7 @@ class UnpairedSeedAlignerFactory : public AlignerFactory { bool strandFix, bool rangeMode, bool verbose, + bool quiet, uint32_t seed, AlignerMetrics *metrics) : ebwtFw_(ebwtFw), @@ -65,6 +66,7 @@ class UnpairedSeedAlignerFactory : public AlignerFactory { qualOrder_(qualOrder), rangeMode_(rangeMode), verbose_(verbose), + quiet_(quiet), metrics_(metrics) { assert(ebwtFw.isInMemory()); @@ -532,7 +534,7 @@ class UnpairedSeedAlignerFactory : public AlignerFactory { return new UnpairedAlignerV2( params, dr, rchase, sink_, sinkPtFactory_, sinkPt, os_, rangeMode_, verbose_, - maxBts_, pool_, btCnt, metrics_); + quiet_, maxBts_, pool_, btCnt, metrics_); } private: @@ -556,6 +558,7 @@ class UnpairedSeedAlignerFactory : public AlignerFactory { bool qualOrder_; bool rangeMode_; bool verbose_; + bool quiet_; AlignerMetrics *metrics_; }; @@ -599,6 +602,7 @@ class PairedSeedAlignerFactory : public AlignerFactory { bool strandFix, bool rangeMode, bool verbose, + bool quiet, uint32_t seed) : ebwtFw_(ebwtFw), ebwtBw_(ebwtBw), @@ -629,7 +633,8 @@ class PairedSeedAlignerFactory : public AlignerFactory { qualOrder_(qualOrder), strandFix_(strandFix), rangeMode_(rangeMode), - verbose_(verbose) + verbose_(verbose), + quiet_(quiet) { assert(ebwtFw.isInMemory()); assert(ebwtBw->isInMemory()); @@ -1318,7 +1323,7 @@ class PairedSeedAlignerFactory : public AlignerFactory { refAligner, rchase, sink_, sinkPtFactory_, sinkPt, mate1fw_, mate2fw_, peInner_, peOuter_, dontReconcile_, symCeil_, mixedThresh_, mixedAttemptLim_, refs_, - rangeMode_, verbose_, maxBts_, pool_, btCnt); + rangeMode_, verbose_, quiet_, maxBts_, pool_, btCnt); delete dr1FwVec; delete dr1RcVec; delete dr2FwVec; @@ -1331,7 +1336,7 @@ class PairedSeedAlignerFactory : public AlignerFactory { new TCostAwareRangeSrcDr(strandFix_, dr1FwVec, verbose_, true), refAligner, rchase, sink_, sinkPtFactory_, sinkPt, sinkPtSe1, sinkPtSe2, mate1fw_, mate2fw_, peInner_, peOuter_, - mixedAttemptLim_, refs_, rangeMode_, verbose_, maxBts_, + mixedAttemptLim_, refs_, rangeMode_, verbose_, quiet_, maxBts_, pool_, btCnt); delete dr1FwVec; return al; @@ -1370,6 +1375,7 @@ class PairedSeedAlignerFactory : public AlignerFactory { const bool strandFix_; const bool rangeMode_; const bool verbose_; + const bool quiet_; }; #endif /* ALIGNER_SEED_MM_H_ */ diff --git a/annot.cpp b/annot.cpp index c85cbae..77cf6e9 100644 --- a/annot.cpp +++ b/annot.cpp @@ -5,6 +5,7 @@ * Author: Ben Langmead */ +#include #include "annot.h" using namespace std; @@ -16,7 +17,7 @@ void AnnotationMap::parse() { ifstream in(fname_); if(!in.good() && in.is_open()) { cerr << "Could not open annotation file " << fname_ << endl; - exit(1); + throw std::runtime_error(""); } while(!in.eof()) { U32Pair pos; diff --git a/bitset.h b/bitset.h index cfc5280..70cd6f1 100644 --- a/bitset.h +++ b/bitset.h @@ -5,6 +5,7 @@ #include #include #include +#include #include "assert_helpers.h" #include "threading.h" @@ -33,7 +34,7 @@ bitsetRealloc(uint32_t& sz, uint32_t* words, const char *errmsg = NULL) { // Output given error message std::cerr << errmsg; } - exit(1); + throw std::runtime_error(""); } if(oldsz > 0) { // Move old values into new array @@ -64,7 +65,7 @@ class SyncBitset { if(_errmsg != NULL) { std::cerr << _errmsg; } - exit(1); + throw std::runtime_error(""); } assert(_words != NULL); memset(_words, 0, nwords * 4 /* words to bytes */); @@ -173,7 +174,7 @@ class Bitset { if(_errmsg != NULL) { std::cerr << _errmsg; } - exit(1); + throw std::runtime_error(""); } assert(_words != NULL); memset(_words, 0, nwords * 4); diff --git a/blockwise_sa.h b/blockwise_sa.h index 8255bd7..188d4f3 100644 --- a/blockwise_sa.h +++ b/blockwise_sa.h @@ -233,7 +233,7 @@ class SillyBlockwiseDnaSA : public InorderBlockwiseSA { cerr << "Out of memory creating suffix array in " << "SillyBlockwiseDnaSA::SillyBlockwiseDnaSA()" << " at " << __FILE__ << ":" << __LINE__ << endl; - exit(1); + std::runtime_error(""); } } assert_eq(length(this->text())+1, length(_sa)); @@ -482,7 +482,7 @@ void KarkkainenBlockwiseSA::buildSamples() { cerr << "Could not allocate sample suffix container of " << (numSamples * 4) << " bytes." << endl << "Please try using a smaller number of blocks by specifying a larger --bmax or" << endl << "a smaller --bmaxdivn" << endl; - exit(1); + std::runtime_error(""); } } } @@ -531,7 +531,7 @@ void KarkkainenBlockwiseSA::buildSamples() { cerr << "Could not allocate sizes, representatives (" << ((numBuckets*8)>>10) << " KB) for blocks." << endl << "Please try using a smaller number of blocks by specifying a larger --bmax or a" << endl << "smaller --bmaxdivn." << endl; - exit(1); + std::runtime_error(""); } } // Iterate through every suffix in the text, determine which @@ -844,7 +844,7 @@ void KarkkainenBlockwiseSA::nextBlock() { cerr << "Could not allocate a master suffix-array block of " << ((len+1) * 4) << " bytes" << endl << "Please try using a larger number of blocks by specifying a smaller --bmax or" << endl << "a larger --bmaxdivn" << endl; - exit(1); + std::runtime_error(""); } } } else { @@ -860,7 +860,7 @@ void KarkkainenBlockwiseSA::nextBlock() { cerr << "Could not allocate a suffix-array block of " << ((this->bucketSz()+1) * 4) << " bytes" << endl; cerr << "Please try using a larger number of blocks by specifying a smaller --bmax or" << endl << "a larger --bmaxdivn" << endl; - exit(1); + std::runtime_error(""); } } // Select upper and lower bounds from _sampleSuffs[] and @@ -899,7 +899,7 @@ void KarkkainenBlockwiseSA::nextBlock() { cerr << "Could not allocate a z-array of " << (_dcV * 4) << " bytes" << endl; cerr << "Please try using a larger number of blocks by specifying a smaller --bmax or" << endl << "a larger --bmaxdivn" << endl; - exit(1); + std::runtime_error(""); } } @@ -942,7 +942,7 @@ void KarkkainenBlockwiseSA::nextBlock() { cerr << "Could not append element to block of " << ((length(bucket)) * 4) << " bytes" << endl; cerr << "Please try using a larger number of blocks by specifying a smaller --bmax or" << endl << "a larger --bmaxdivn" << endl; - exit(1); + std::runtime_error(""); } } // Not necessarily true; we allow overflowing buckets diff --git a/bowtie_inspect.cpp b/bowtie_inspect.cpp index 288fab3..b6e7313 100644 --- a/bowtie_inspect.cpp +++ b/bowtie_inspect.cpp @@ -107,13 +107,13 @@ static int parseInt(int lower, const char *errmsg) { if (l < lower) { cerr << errmsg << endl; printUsage(cerr); - exit(1); + throw std::runtime_error(""); } return (int32_t)l; } cerr << errmsg << endl; printUsage(cerr); - exit(1); + throw std::runtime_error(""); return -1; } @@ -140,7 +140,7 @@ static void parseOptions(int argc, char **argv) { default: cerr << "Unknown option: " << (char)next_option << endl; printUsage(cerr); - exit(1); + throw std::runtime_error(""); } } while(next_option != -1); } @@ -251,7 +251,7 @@ static void driver(const string& ebwtFileBase, const string& query) { } else { // Initialize Ebwt object TPackedEbwt ebwt(adjustedEbwtFileBase, true, -1, -1, - false, false, false, // no memory-mapped io + false, false, false, true, // no memory-mapped io NULL, // no reference map verbose); // Load whole index into memory @@ -268,52 +268,56 @@ static void driver(const string& ebwtFileBase, const string& query) { * main function. Parses command-line arguments. */ int main(int argc, char **argv) { - string ebwtFile; // read serialized Ebwt from this file - string query; // read query string(s) from this file - vector queries; - string outfile; // write query results to this file - argv0 = argv[0]; - parseOptions(argc, argv); - if(showVersion) { - cout << argv0 << " version " << BOWTIE_VERSION << endl; - if(sizeof(void*) == 4) { - cout << "32-bit" << endl; - } else if(sizeof(void*) == 8) { - cout << "64-bit" << endl; - } else { - cout << "Neither 32- nor 64-bit: sizeof(void*) = " << sizeof(void*) << endl; + try { + string ebwtFile; // read serialized Ebwt from this file + string query; // read query string(s) from this file + vector queries; + string outfile; // write query results to this file + argv0 = argv[0]; + parseOptions(argc, argv); + if(showVersion) { + cout << argv0 << " version " << BOWTIE_VERSION << endl; + if(sizeof(void*) == 4) { + cout << "32-bit" << endl; + } else if(sizeof(void*) == 8) { + cout << "64-bit" << endl; + } else { + cout << "Neither 32- nor 64-bit: sizeof(void*) = " << sizeof(void*) << endl; + } + cout << "Built on " << BUILD_HOST << endl; + cout << BUILD_TIME << endl; + cout << "Compiler: " << COMPILER_VERSION << endl; + cout << "Options: " << COMPILER_OPTIONS << endl; + cout << "Sizeof {int, long, long long, void*, size_t, off_t}: {" + << sizeof(int) + << ", " << sizeof(long) << ", " << sizeof(long long) + << ", " << sizeof(void *) << ", " << sizeof(size_t) + << ", " << sizeof(off_t) << "}" << endl; + return 0; } - cout << "Built on " << BUILD_HOST << endl; - cout << BUILD_TIME << endl; - cout << "Compiler: " << COMPILER_VERSION << endl; - cout << "Options: " << COMPILER_OPTIONS << endl; - cout << "Sizeof {int, long, long long, void*, size_t, off_t}: {" - << sizeof(int) - << ", " << sizeof(long) << ", " << sizeof(long long) - << ", " << sizeof(void *) << ", " << sizeof(size_t) - << ", " << sizeof(off_t) << "}" << endl; - return 0; - } - // Get input filename - if(optind >= argc) { - cerr << "No index name given!" << endl; - printUsage(cerr); - return 1; - } - ebwtFile = argv[optind++]; + // Get input filename + if(optind >= argc) { + cerr << "No index name given!" << endl; + printUsage(cerr); + return 1; + } + ebwtFile = argv[optind++]; - // Optionally summarize - if(verbose) { - cout << "Input ebwt file: \"" << ebwtFile << "\"" << endl; - cout << "Output file: \"" << outfile << "\"" << endl; - cout << "Local endianness: " << (currentlyBigEndian()? "big":"little") << endl; + // Optionally summarize + if(verbose) { + cout << "Input ebwt file: \"" << ebwtFile << "\"" << endl; + cout << "Output file: \"" << outfile << "\"" << endl; + cout << "Local endianness: " << (currentlyBigEndian()? "big":"little") << endl; #ifdef NDEBUG - cout << "Assertions: disabled" << endl; + cout << "Assertions: disabled" << endl; #else - cout << "Assertions: enabled" << endl; + cout << "Assertions: enabled" << endl; #endif + } + driver(ebwtFile, query); + return 0; + } catch(std::exception& e) { + return 1; } - driver(ebwtFile, query); - return 0; } diff --git a/chaincat.cpp b/chaincat.cpp index 1e571a0..065527a 100644 --- a/chaincat.cpp +++ b/chaincat.cpp @@ -8,25 +8,30 @@ */ #include +#include #include "hit_set.h" #include "filebuf.h" using namespace std; int main(int argc, char **argv) { - if(argc <= 1) { - cerr << "Error: must specify chain file as first argument" << endl; - exit(1); + try { + if(argc <= 1) { + cerr << "Error: must specify chain file as first argument" << endl; + return 1; + } + FILE *in = fopen(argv[1], "rb"); + if(in == NULL) { + cerr << "Could not open " << argv[1] << endl; + return 1; + } + FileBuf fb(in); + while(!fb.eof()) { + HitSet s(fb); + s.reportUpTo(cout); + } + fb.close(); + } catch(std::exception& e) { + return 1; } - FILE *in = fopen(argv[1], "rb"); - if(in == NULL) { - cerr << "Could not open " << argv[1] << endl; - exit(1); - } - FileBuf fb(in); - while(!fb.eof()) { - HitSet s(fb); - s.reportUpTo(cout); - } - fb.close(); } diff --git a/ebwt.h b/ebwt.h index 54df890..1a4f4d1 100644 --- a/ebwt.h +++ b/ebwt.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -413,14 +414,14 @@ class Ebwt { cerr << "Could not open index file for writing: \"" << _in1Str << "\"" << endl << "Please make sure the directory exists and that permissions allow writing by" << endl << "Bowtie." << endl; - exit(1); + throw std::runtime_error(""); } ofstream fout2(_in2Str.c_str(), ios::binary); if(!fout2.good()) { cerr << "Could not open index file for writing: \"" << _in2Str << "\"" << endl << "Please make sure the directory exists and that permissions allow writing by" << endl << "Bowtie." << endl; - exit(1); + throw std::runtime_error(""); } // Build initFromVector(is, @@ -457,7 +458,7 @@ class Ebwt { } if(err) { cerr << "Please check if there is a problem with the disk or if disk is full." << endl; - exit(1); + throw std::runtime_error(""); } // Reopen as input streams VMSG_NL("Re-opening _in1 and _in2 as input streams"); @@ -541,7 +542,7 @@ class Ebwt { cerr << "If this computer has more than 4 GB of memory, try using a 64-bit executable;" << endl << "this executable is 32-bit." << endl; } - exit(1); + throw std::runtime_error(""); } // Succesfully obtained joined reference string assert_geq(length(s), jlen); @@ -578,7 +579,7 @@ class Ebwt { cerr << "If this computer has more than 4 GB of memory, try using a 64-bit executable;" << endl << "this executable is 32-bit." << endl; } - exit(1); + throw std::runtime_error(""); } if((iter % 6) == 5 && dcv < 4096 && dcv != 0) { dcv <<= 1; // double difference-cover period @@ -628,7 +629,7 @@ class Ebwt { out1.flush(); out2.flush(); if(out1.fail() || out2.fail()) { cerr << "An error occurred writing the index to disk. Please check if the disk is full." << endl; - exit(1); + throw std::runtime_error(""); } break; } catch(bad_alloc& e) { @@ -637,7 +638,7 @@ class Ebwt { } else { cerr << "Out of memory while constructing suffix array. Please try using a smaller" << endl << "number of blocks by specifying a smaller --bmax or a larger --bmaxdivn" << endl; - exit(1); + throw std::runtime_error(""); } } first = false; @@ -652,7 +653,7 @@ class Ebwt { out1.flush(); out2.flush(); if(out1.fail() || out2.fail()) { cerr << "An error occurred writing the index to disk. Please check if the disk is full." << endl; - exit(1); + throw std::runtime_error(""); } VMSG_NL("Returning from initFromVector"); } @@ -2727,14 +2728,14 @@ void Ebwt::readIntoMemory(bool justHeader, if (stat(names[i], &sbuf) == -1) { perror("stat"); cerr << "Error: Could not stat index file " << names[i] << " prior to memory-mapping" << endl; - exit(1); + throw std::runtime_error(""); } mmFile[i] = (char*)mmap((void *)0, sbuf.st_size, PROT_READ, MAP_SHARED, fds[i], 0); if(mmFile == (void *)(-1)) { perror("mmap"); cerr << "Error: Could not memory-map the index file " << names[i] << endl; - exit(1); + throw std::runtime_error(""); } if(mmSweep) { int sum = 0; @@ -2790,7 +2791,7 @@ void Ebwt::readIntoMemory(bool justHeader, // or we might be setting up a race condition with other processes. if(switchEndian && _useMm) { cerr << "Error: Can't use memory-mapped files when the index is the opposite endianness" << endl; - exit(1); + throw std::runtime_error(""); } // Reads header entries one by one from primary stream @@ -2857,7 +2858,7 @@ void Ebwt::readIntoMemory(bool justHeader, // into their own memory spaces. if(_useMm && (offRateDiff || isaRateDiff)) { cerr << "Error: Can't use memory-mapped files when the offrate or isarate is overridden" << endl; - exit(1); + throw std::runtime_error(""); } // Read nPat from primary stream @@ -2891,7 +2892,7 @@ void Ebwt::readIntoMemory(bool justHeader, MM_READ_RET r = MM_READ(_in1, (void*)this->_plen, this->_nPat*4); if(r != (MM_READ_RET)(this->_nPat*4)) { cerr << "Error reading _plen[] array: " << r << ", " << (this->_nPat*4) << endl; - exit(1); + throw std::runtime_error(""); } } } catch(bad_alloc& e) { @@ -2936,7 +2937,7 @@ void Ebwt::readIntoMemory(bool justHeader, MM_READ_RET r = MM_READ(_in1, (void *)this->_rstarts, this->_nFrag*4*3); if(r != (MM_READ_RET)(this->_nFrag*4*3)) { cerr << "Error reading _rstarts[] array: " << r << ", " << (this->_nFrag*4*3) << endl; - exit(1); + throw std::runtime_error(""); } } } @@ -2969,7 +2970,7 @@ void Ebwt::readIntoMemory(bool justHeader, << "Bowtie without the -z option, try adding the -z option to save memory. If the" << endl << "-z option does not solve the problem, please try again on a computer with more" << endl << "memory." << endl; - exit(1); + throw std::runtime_error(""); } } if(shmemLeader) { @@ -2977,7 +2978,7 @@ void Ebwt::readIntoMemory(bool justHeader, MM_READ_RET r = MM_READ(_in1, (void *)this->_ebwt, eh->_ebwtTotLen); if(r != (MM_READ_RET)eh->_ebwtTotLen) { cerr << "Error reading _ebwt[] array: " << r << ", " << (eh->_ebwtTotLen) << endl; - exit(1); + throw std::runtime_error(""); } if(switchEndian) { uint8_t *side = this->_ebwt; @@ -3039,7 +3040,7 @@ void Ebwt::readIntoMemory(bool justHeader, MM_READ_RET r = MM_READ(_in1, (void *)this->_ftab, eh->_ftabLen*4); if(r != (MM_READ_RET)(eh->_ftabLen*4)) { cerr << "Error reading _ftab[] array: " << r << ", " << (eh->_ftabLen*4) << endl; - exit(1); + throw std::runtime_error(""); } } } @@ -3063,7 +3064,7 @@ void Ebwt::readIntoMemory(bool justHeader, MM_READ_RET r = MM_READ(_in1, (void *)this->_eftab, eh->_eftabLen*4); if(r != (MM_READ_RET)(eh->_eftabLen*4)) { cerr << "Error reading _eftab[] array: " << r << ", " << (eh->_eftabLen*4) << endl; - exit(1); + throw std::runtime_error(""); } } } @@ -3079,7 +3080,7 @@ void Ebwt::readIntoMemory(bool justHeader, << "If you ran Bowtie without the -z option, try adding the -z option to save" << endl << "memory. If the -z option does not solve the problem, please try again on a" << endl << "computer with more memory." << endl; - exit(1); + throw std::runtime_error(""); } // Read reference sequence names from primary index file (or not, @@ -3118,7 +3119,7 @@ void Ebwt::readIntoMemory(bool justHeader, << "If you ran Bowtie without the -z option, try adding the -z option to save" << endl << "memory. If the -z option does not solve the problem, please try again on a" << endl << "computer with more memory." << endl; - exit(1); + throw std::runtime_error(""); } } else { shmemLeader = ALLOC_SHARED_U32( @@ -3140,7 +3141,7 @@ void Ebwt::readIntoMemory(bool justHeader, MM_READ_RET r = MM_READ(_in2, (void *)buf, block << 2); if(r != (MM_READ_RET)(block << 2)) { cerr << "Error reading block of _offs[] array: " << r << ", " << (block << 2) << endl; - exit(1); + throw std::runtime_error(""); } uint32_t idx = i >> offRateDiff; for(uint32_t j = 0; j < block; j += (1 << offRateDiff)) { @@ -3165,7 +3166,7 @@ void Ebwt::readIntoMemory(bool justHeader, if((offsLen & 0xc0000000) != 0) { if(sizeof(char *) <= 4) { cerr << "Sanity error: sizeof(char *) <= 4 but offsLen is " << hex << offsLen << endl; - exit(1); + throw std::runtime_error(""); } // offsLen << 2 overflows, so do it in four reads char *offs = (char *)this->_offs; @@ -3173,7 +3174,7 @@ void Ebwt::readIntoMemory(bool justHeader, MM_READ_RET r = MM_READ(_in2, (void*)offs, offsLen); if(r != (MM_READ_RET)(offsLen)) { cerr << "Error reading block of _offs[] array: " << r << ", " << offsLen << endl; - exit(1); + throw std::runtime_error(""); } offs += offsLen; } @@ -3182,7 +3183,7 @@ void Ebwt::readIntoMemory(bool justHeader, MM_READ_RET r = MM_READ(_in2, (void*)this->_offs, offsLen << 2); if(r != (MM_READ_RET)(offsLen << 2)) { cerr << "Error reading _offs[] array: " << r << ", " << (offsLen << 2) << endl; - exit(1); + throw std::runtime_error(""); } } } @@ -3218,7 +3219,7 @@ void Ebwt::readIntoMemory(bool justHeader, << "If you ran Bowtie without the -z option, try adding the -z option to save" << endl << "memory. If the -z option does not solve the problem, please try again on a" << endl << "computer with more memory." << endl; - exit(1); + throw std::runtime_error(""); } } // Read _isa[] @@ -3230,7 +3231,7 @@ void Ebwt::readIntoMemory(bool justHeader, MM_READ_RET r = MM_READ(_in2, (void *)tmp, 4); if(r != (MM_READ_RET)4) { cerr << "Error reading a word of the _isa[] array: " << r << ", 4" << endl; - exit(1); + throw std::runtime_error(""); } } else { uint32_t idx = i >> isaRateDiff; @@ -3249,7 +3250,7 @@ void Ebwt::readIntoMemory(bool justHeader, MM_READ_RET r = MM_READ(_in2, (void *)this->_isa, isaLen*4); if(r != (MM_READ_RET)(isaLen*4)) { cerr << "Error reading _isa[] array: " << r << ", " << (isaLen*4) << endl; - exit(1); + throw std::runtime_error(""); } } } @@ -4201,7 +4202,7 @@ string adjustEbwtBase(const string& cmdline, } if(!in.is_open()) { cerr << "Could not locate a Bowtie index corresponding to basename \"" << ebwtFileBase << "\"" << endl; - exit(1); + throw std::runtime_error(""); } return str; } diff --git a/ebwt_build.cpp b/ebwt_build.cpp index 4cdb3d8..a42da0f 100644 --- a/ebwt_build.cpp +++ b/ebwt_build.cpp @@ -355,13 +355,13 @@ static int parseNumber(T lower, const char *errmsg) { if (t < lower) { cerr << errmsg << endl; printUsage(cerr); - exit(1); + std::runtime_error(""); } return t; } cerr << errmsg << endl; printUsage(cerr); - exit(1); + std::runtime_error(""); return -1; } @@ -445,7 +445,7 @@ static void parseOptions(int argc, char **argv) { default: cerr << "Unknown option: " << (char)next_option << endl; printUsage(cerr); - exit(1); + std::runtime_error(""); } } while(next_option != -1); if(bmax < 40) { @@ -487,7 +487,7 @@ static void driver(const string& infile, FILE *f = fopen(infiles[i].c_str(), "r"); if (f == NULL) { cerr << "Error: could not open "<< infiles[i] << endl; - exit(1); + std::runtime_error(""); } FileBuf *fb = new FileBuf(f); assert(fb != NULL); @@ -519,7 +519,7 @@ static void driver(const string& infile, cerr << "Could not open index file for writing: \"" << file3 << "\"" << endl << "Please make sure the directory exists and that permissions allow writing by" << endl << "Bowtie." << endl; - exit(1); + std::runtime_error(""); } bool be = currentlyBigEndian(); writeU32(fout3, 1, be); // endianness sentinel @@ -601,141 +601,144 @@ static char *argv0 = NULL; * main function. Parses command-line arguments. */ int main(int argc, char **argv) { + try { + string infile; + vector infiles; + string outfile; - string infile; - vector infiles; - string outfile; - - parseOptions(argc, argv); - argv0 = argv[0]; - if(showVersion) { - cout << argv0 << " version " << BOWTIE_VERSION << endl; - if(sizeof(void*) == 4) { - cout << "32-bit" << endl; - } else if(sizeof(void*) == 8) { - cout << "64-bit" << endl; - } else { - cout << "Neither 32- nor 64-bit: sizeof(void*) = " << sizeof(void*) << endl; + parseOptions(argc, argv); + argv0 = argv[0]; + if(showVersion) { + cout << argv0 << " version " << BOWTIE_VERSION << endl; + if(sizeof(void*) == 4) { + cout << "32-bit" << endl; + } else if(sizeof(void*) == 8) { + cout << "64-bit" << endl; + } else { + cout << "Neither 32- nor 64-bit: sizeof(void*) = " << sizeof(void*) << endl; + } + cout << "Built on " << BUILD_HOST << endl; + cout << BUILD_TIME << endl; + cout << "Compiler: " << COMPILER_VERSION << endl; + cout << "Options: " << COMPILER_OPTIONS << endl; + cout << "Sizeof {int, long, long long, void*, size_t, off_t}: {" + << sizeof(int) + << ", " << sizeof(long) << ", " << sizeof(long long) + << ", " << sizeof(void *) << ", " << sizeof(size_t) + << ", " << sizeof(off_t) << "}" << endl; + return 0; } - cout << "Built on " << BUILD_HOST << endl; - cout << BUILD_TIME << endl; - cout << "Compiler: " << COMPILER_VERSION << endl; - cout << "Options: " << COMPILER_OPTIONS << endl; - cout << "Sizeof {int, long, long long, void*, size_t, off_t}: {" - << sizeof(int) - << ", " << sizeof(long) << ", " << sizeof(long long) - << ", " << sizeof(void *) << ", " << sizeof(size_t) - << ", " << sizeof(off_t) << "}" << endl; - return 0; - } - - // Get input filename - if(optind >= argc) { - cerr << "No input sequence or sequence file specified!" << endl; - printUsage(cerr); - return 1; - } - infile = argv[optind++]; - - // Get output filename - if(optind >= argc) { - cerr << "No output file specified!" << endl; - printUsage(cerr); - return 1; - } - outfile = argv[optind++]; - tokenize(infile, ",", infiles); - if(infiles.size() < 1) { - cerr << "Tokenized input file list was empty!" << endl; - printUsage(cerr); - return 1; - } - - // Optionally summarize - if(verbose) { - cout << "Settings:" << endl - << " Output files: \"" << outfile << ".*.ebwt\"" << endl - << " Line rate: " << lineRate << " (line is " << (1<= argc) { + cerr << "No input sequence or sequence file specified!" << endl; + printUsage(cerr); + return 1; } - if(bmaxDivN == 0xffffffff) { - cout << " Max bucket size, len divisor: default" << endl; - } else { - cout << " Max bucket size, len divisor: " << bmaxDivN << endl; + infile = argv[optind++]; + + // Get output filename + if(optind >= argc) { + cerr << "No output file specified!" << endl; + printUsage(cerr); + return 1; } - cout << " Difference-cover sample period: " << dcv << endl; - cout << " Reference base cutoff: "; - if(cutoff == -1) cout << "none"; else cout << cutoff << " bases"; - cout << endl; - cout << " Endianness: " << (bigEndian? "big":"little") << endl - << " Actual local endianness: " << (currentlyBigEndian()? "big":"little") << endl - << " Sanity checking: " << (sanityCheck? "enabled":"disabled") << endl; -#ifdef NDEBUG - cout << " Assertions: disabled" << endl; -#else - cout << " Assertions: enabled" << endl; -#endif - cout << " Random seed: " << seed << endl; - cout << " Sizeofs: void*:" << sizeof(void*) << ", int:" << sizeof(int) << ", long:" << sizeof(long) << ", size_t:" << sizeof(size_t) << endl; - cout << "Input files DNA, " << file_format_names[format] << ":" << endl; - for(size_t i = 0; i < infiles.size(); i++) { - cout << " " << infiles[i] << endl; + outfile = argv[optind++]; + + tokenize(infile, ",", infiles); + if(infiles.size() < 1) { + cerr << "Tokenized input file list was empty!" << endl; + printUsage(cerr); + return 1; } - } - // Seed random number generator - srand(seed); - int64_t origCutoff = cutoff; // save cutoff since it gets modified - { - Timer timer(cout, "Total time for call to driver() for forward index: ", verbose); - if(!packed) { - try { - driver > >(infile, infiles, outfile); - } catch(bad_alloc& e) { - if(autoMem) { - cerr << "Switching to a packed string representation." << endl; - packed = true; - } else { - throw e; - } + + // Optionally summarize + if(verbose) { + cout << "Settings:" << endl + << " Output files: \"" << outfile << ".*.ebwt\"" << endl + << " Line rate: " << lineRate << " (line is " << (1< > > >(infile, infiles, outfile); - } - } - cutoff = origCutoff; // reset cutoff for backward Ebwt - if(doubleEbwt) { + // Seed random number generator srand(seed); - Timer timer(cout, "Total time for backward call to driver() for mirror index: ", verbose); - if(!packed) { - try { - driver > >(infile, infiles, outfile + ".rev", true); - } catch(bad_alloc& e) { - if(autoMem) { - cerr << "Switching to a packed string representation." << endl; - packed = true; - } else { - throw e; + int64_t origCutoff = cutoff; // save cutoff since it gets modified + { + Timer timer(cout, "Total time for call to driver() for forward index: ", verbose); + if(!packed) { + try { + driver > >(infile, infiles, outfile); + } catch(bad_alloc& e) { + if(autoMem) { + cerr << "Switching to a packed string representation." << endl; + packed = true; + } else { + throw e; + } } } + if(packed) { + driver > > >(infile, infiles, outfile); + } } - if(packed) { - driver > > >(infile, infiles, outfile + ".rev", true); + cutoff = origCutoff; // reset cutoff for backward Ebwt + if(doubleEbwt) { + srand(seed); + Timer timer(cout, "Total time for backward call to driver() for mirror index: ", verbose); + if(!packed) { + try { + driver > >(infile, infiles, outfile + ".rev", true); + } catch(bad_alloc& e) { + if(autoMem) { + cerr << "Switching to a packed string representation." << endl; + packed = true; + } else { + throw e; + } + } + } + if(packed) { + driver > > >(infile, infiles, outfile + ".rev", true); + } } + return 0; + } catch(std::exception& e) { + return 1; } - return 0; } diff --git a/ebwt_search.cpp b/ebwt_search.cpp index c829d9f..2d7716f 100644 --- a/ebwt_search.cpp +++ b/ebwt_search.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -35,108 +36,203 @@ using namespace std; using namespace seqan; -static vector mates1; // mated reads (first mate) -static vector mates2; // mated reads (second mate) +static vector mates1; // mated reads (first mate) +static vector mates2; // mated reads (second mate) static vector mates12; // mated reads (1st/2nd interleaved in 1 file) -static string adjustedEbwtFileBase = ""; -static bool verbose = 0; // be talkative -static bool startVerbose = 0; // be talkative at startup -static bool quiet = false; // print nothing but the alignments -static int sanityCheck = 0; // enable expensive sanity checks -static int format = FASTQ; // default read format is FASTQ -static string origString = ""; // reference text, or filename(s) -static int revcomp = 1; // search for reverse complements? -static int seed = 0; // srandom() seed -static int timing = 0; // whether to report basic timing data -static bool allHits = false; // for multihits, report just one -static bool rangeMode = false; // report BWT ranges instead of ref locs -static int showVersion = 0; // just print version and quit? -static int ipause = 0; // pause before maching? -static uint32_t qUpto = 0xffffffff; // max # of queries to read -static int skipSearch = 0; // abort before searching -static int qSameLen = 0; // abort before searching -static int trim5 = 0; // amount to trim from 5' end -static int trim3 = 0; // amount to trim from 3' end -static int reportOpps = 0; // whether to report # of other mappings -static int offRate = -1; // keep default offRate -static int isaRate = -1; // keep default isaRate -static int mismatches = 0; // allow 0 mismatches by default -static char *patDumpfile = NULL; // filename to dump patterns to -static bool solexaQuals = false; // quality strings are solexa quals, not phred, and subtract 64 (not 33) -static bool phred64Quals = false; // quality chars are phred, but must subtract 64 (not 33) -static bool integerQuals = false; // quality strings are space-separated strings of integers, not ASCII -static int maqLike = 1; // do maq-like searching -static int seedLen = 28; // seed length (changed in Maq 0.6.4 from 24) -static int seedMms = 2; // # mismatches allowed in seed (maq's -n) -static int qualThresh = 70; // max qual-weighted hamming dist (maq's -e) -static int maxBtsBetter = 125; // max # backtracks allowed in half-and-half mode -static int maxBts = 800; // max # backtracks allowed in half-and-half mode -static int nthreads = 1; // number of pthreads operating concurrently -static output_types outType = OUTPUT_FULL; // style of output -static bool randReadsNoSync = false; // true -> generate reads from per-thread random source -static int numRandomReads = 50000000; // # random reads (see Random*PatternSource in pat.h) -static int lenRandomReads = 35; // len of random reads (see Random*PatternSource in pat.h) -static bool fullIndex = true; // load halves one at a time and proceed in phases -static bool noRefNames = false; // true -> print reference indexes; not names -static ofstream *dumpNoHits = NULL; // file to dump non-hitting reads to (for performance study) -static ofstream *dumpHHHits = NULL; // file to dump half-and-half hits to (for performance study) -static string dumpAlFaBase = ""; // basename of FASTA files to dump aligned reads to -static string dumpAlFqBase = ""; // basename of FASTQ files to dump aligned reads to -static string dumpAlBase = ""; // basename of same-format files to dump aligned reads to -static string dumpUnalFaBase = ""; // basename of FASTA files to dump unaligned reads to -static string dumpUnalFqBase = ""; // basename of FASTQ files to dump unaligned reads to -static string dumpUnalBase = ""; // basename of same-format files to dump unaligned reads to -static string dumpMaxFaBase = ""; // basename of FASTA files to dump reads with more than -m valid alignments to -static string dumpMaxFqBase = ""; // basename of FASTQ files to dump reads with more than -m valid alignments to -static string dumpMaxBase = ""; // basename of same-format files to dump reads with more than -m valid alignments to -static uint32_t khits = 1; // number of hits per read; >1 is much slower -static uint32_t mhits = 0xffffffff; // don't report any hits if there are > mhits -static bool better = false; // true -> guarantee alignments from best possible stratum -static bool oldBest = false; // true -> guarantee alignments from best possible stratum (the old way) -static bool strata = false; // true -> don't stop at stratum boundaries -static bool refOut = false; // if true, alignments go to per-ref files -static bool seedAndExtend = false; // use seed-and-extend aligner; for metagenomics recruitment -static int partitionSz = 0; // output a partitioning key in first field -static bool noMaqRound = false; // true -> don't round quals to nearest 10 like maq -static bool forgiveInput = false; // let read input be a little wrong w/o complaining or dying -static bool useSpinlock = true; // false -> don't use of spinlocks even if they're #defines -static bool fileParallel = false; // separate threads read separate input files in parallel -static bool useShmem = false; // use shared memory to hold the index -static bool useMm = false; // use memory-mapped files to hold the index -static bool mmSweep = false; // sweep through memory-mapped files immediately after mapping -static bool stateful = false; // use stateful aligners -static uint32_t prefetchWidth = 1; // number of reads to process in parallel w/ --stateful -static uint32_t minInsert = 0; // minimum insert size (Maq = 0, SOAP = 400) -static uint32_t maxInsert = 250; // maximum insert size (Maq = 250, SOAP = 600) -static bool mate1fw = true; // -1 mate aligns in fw orientation on fw strand -static bool mate2fw = false; // -2 mate aligns in rc orientation on fw strand -static uint32_t mixedThresh = 4; // threshold for when to switch to paired-end mixed mode (see aligner.h) -static uint32_t mixedAttemptLim = 100; // number of attempts to make in "mixed mode" before giving up on orientation -static bool dontReconcileMates = true; // suppress pairwise all-versus-all way of resolving mates -static uint32_t cacheLimit = 5; // ranges w/ size > limit will be cached -static uint32_t cacheSize = 0; // # words per range cache -static int offBase = 0; // offsets are 0-based by default, but configurable -static bool tryHard = false; // set very high maxBts, mixedAttemptLim -static uint32_t skipReads = 0; // # reads/read pairs to skip -static bool nofw = false; // don't align fw orientation of read -static bool norc = false; // don't align rc orientation of read -static bool strandFix = true; // attempt to fix strand bias -static bool randomizeQuals = false; // randomize quality values -static bool stats = false; // print performance stats -static int chunkPoolMegabytes = 32; // max MB to dedicate to best-first search frames per thread -static int chunkSz = 16; // size of single chunk disbursed by ChunkPool -static bool chunkVerbose = false; // have chunk allocator output status messages? -static bool recal = false; -static int recalMaxCycle = 64; -static int recalMaxQual = 40; -static int recalQualShift = 2; -static bool useV1 = true; -static bool reportSe = false; -static const char * refMapFile = NULL; // file containing a map from index coordinates to another coordinate system -static const char * annotMapFile= NULL; // file containing a map from reference coordinates to annotations -static size_t fastaContLen = 0; -static size_t fastaContFreq = 0; -static bool hadoopOut = false; // print Hadoop status and summary messages +static string adjustedEbwtFileBase; +static bool verbose; // be talkative +static bool startVerbose; // be talkative at startup +static bool quiet; // print nothing but the alignments +static int sanityCheck; // enable expensive sanity checks +static int format; // default read format is FASTQ +static string origString; // reference text, or filename(s) +static int revcomp; // search for reverse complements? +static int seed; // srandom() seed +static int timing; // whether to report basic timing data +static bool allHits; // for multihits, report just one +static bool rangeMode; // report BWT ranges instead of ref locs +static int showVersion; // just print version and quit? +static int ipause; // pause before maching? +static uint32_t qUpto; // max # of queries to read +static int trim5; // amount to trim from 5' end +static int trim3; // amount to trim from 3' end +static int reportOpps; // whether to report # of other mappings +static int offRate; // keep default offRate +static int isaRate; // keep default isaRate +static int mismatches; // allow 0 mismatches by default +static char *patDumpfile; // filename to dump patterns to +static bool solexaQuals; // quality strings are solexa quals, not phred, and subtract 64 (not 33) +static bool phred64Quals; // quality chars are phred, but must subtract 64 (not 33) +static bool integerQuals; // quality strings are space-separated strings of integers, not ASCII +static int maqLike; // do maq-like searching +static int seedLen; // seed length (changed in Maq 0.6.4 from 24) +static int seedMms; // # mismatches allowed in seed (maq's -n) +static int qualThresh; // max qual-weighted hamming dist (maq's -e) +static int maxBtsBetter; // max # backtracks allowed in half-and-half mode +static int maxBts; // max # backtracks allowed in half-and-half mode +static int nthreads; // number of pthreads operating concurrently +static output_types outType; // style of output +static bool randReadsNoSync; // true -> generate reads from per-thread random source +static int numRandomReads; // # random reads (see Random*PatternSource in pat.h) +static int lenRandomReads; // len of random reads (see Random*PatternSource in pat.h) +static bool fullIndex; // load halves one at a time and proceed in phases +static bool noRefNames; // true -> print reference indexes; not names +static string dumpAlFaBase; // basename of FASTA files to dump aligned reads to +static string dumpAlFqBase; // basename of FASTQ files to dump aligned reads to +static string dumpAlBase; // basename of same-format files to dump aligned reads to +static string dumpUnalFaBase; // basename of FASTA files to dump unaligned reads to +static string dumpUnalFqBase; // basename of FASTQ files to dump unaligned reads to +static string dumpUnalBase; // basename of same-format files to dump unaligned reads to +static string dumpMaxFaBase; // basename of FASTA files to dump reads with more than -m valid alignments to +static string dumpMaxFqBase; // basename of FASTQ files to dump reads with more than -m valid alignments to +static string dumpMaxBase; // basename of same-format files to dump reads with more than -m valid alignments to +static uint32_t khits; // number of hits per read; >1 is much slower +static uint32_t mhits; // don't report any hits if there are > mhits +static bool better; // true -> guarantee alignments from best possible stratum +static bool oldBest; // true -> guarantee alignments from best possible stratum (the old way) +static bool strata; // true -> don't stop at stratum boundaries +static bool refOut; // if true, alignments go to per-ref files +static int partitionSz; // output a partitioning key in first field +static bool noMaqRound; // true -> don't round quals to nearest 10 like maq +static bool forgiveInput; // let read input be a little wrong w/o complaining or dying +static bool useSpinlock; // false -> don't use of spinlocks even if they're #defines +static bool fileParallel; // separate threads read separate input files in parallel +static bool useShmem; // use shared memory to hold the index +static bool useMm; // use memory-mapped files to hold the index +static bool mmSweep; // sweep through memory-mapped files immediately after mapping +static bool stateful; // use stateful aligners +static uint32_t prefetchWidth; // number of reads to process in parallel w/ --stateful +static uint32_t minInsert; // minimum insert size (Maq = 0, SOAP = 400) +static uint32_t maxInsert; // maximum insert size (Maq = 250, SOAP = 600) +static bool mate1fw; // -1 mate aligns in fw orientation on fw strand +static bool mate2fw; // -2 mate aligns in rc orientation on fw strand +static uint32_t mixedThresh; // threshold for when to switch to paired-end mixed mode (see aligner.h) +static uint32_t mixedAttemptLim; // number of attempts to make in "mixed mode" before giving up on orientation +static bool dontReconcileMates; // suppress pairwise all-versus-all way of resolving mates +static uint32_t cacheLimit; // ranges w/ size > limit will be cached +static uint32_t cacheSize; // # words per range cache +static int offBase; // offsets are 0-based by default, but configurable +static bool tryHard; // set very high maxBts, mixedAttemptLim +static uint32_t skipReads; // # reads/read pairs to skip +static bool nofw; // don't align fw orientation of read +static bool norc; // don't align rc orientation of read +static bool strandFix; // attempt to fix strand bias +static bool randomizeQuals; // randomize quality values +static bool stats; // print performance stats +static int chunkPoolMegabytes; // max MB to dedicate to best-first search frames per thread +static int chunkSz; // size of single chunk disbursed by ChunkPool +static bool chunkVerbose; // have chunk allocator output status messages? +static bool recal; +static int recalMaxCycle; +static int recalMaxQual; +static int recalQualShift; +static bool useV1; +static bool reportSe; +static const char * refMapFile; // file containing a map from index coordinates to another coordinate system +static const char * annotMapFile; // file containing a map from reference coordinates to annotations +static size_t fastaContLen; +static size_t fastaContFreq; +static bool hadoopOut; // print Hadoop status and summary messages + +static void resetOptions() { + mates1.clear(); + mates2.clear(); + mates12.clear(); + adjustedEbwtFileBase = ""; + verbose = 0; + startVerbose = 0; + quiet = false; + sanityCheck = 0; // enable expensive sanity checks + format = FASTQ; // default read format is FASTQ + origString = ""; // reference text, or filename(s) + revcomp = 1; // search for reverse complements? + seed = 0; // srandom() seed + timing = 0; // whether to report basic timing data + allHits = false; // for multihits, report just one + rangeMode = false; // report BWT ranges instead of ref locs + showVersion = 0; // just print version and quit? + ipause = 0; // pause before maching? + qUpto = 0xffffffff; // max # of queries to read + trim5 = 0; // amount to trim from 5' end + trim3 = 0; // amount to trim from 3' end + reportOpps = 0; // whether to report # of other mappings + offRate = -1; // keep default offRate + isaRate = -1; // keep default isaRate + mismatches = 0; // allow 0 mismatches by default + patDumpfile = NULL; // filename to dump patterns to + solexaQuals = false; // quality strings are solexa quals, not phred, and subtract 64 (not 33) + phred64Quals = false; // quality chars are phred, but must subtract 64 (not 33) + integerQuals = false; // quality strings are space-separated strings of integers, not ASCII + maqLike = 1; // do maq-like searching + seedLen = 28; // seed length (changed in Maq 0.6.4 from 24) + seedMms = 2; // # mismatches allowed in seed (maq's -n) + qualThresh = 70; // max qual-weighted hamming dist (maq's -e) + maxBtsBetter = 125; // max # backtracks allowed in half-and-half mode + maxBts = 800; // max # backtracks allowed in half-and-half mode + nthreads = 1; // number of pthreads operating concurrently + outType = OUTPUT_FULL; // style of output + randReadsNoSync = false; // true -> generate reads from per-thread random source + numRandomReads = 50000000; // # random reads (see Random*PatternSource in pat.h) + lenRandomReads = 35; // len of random reads (see Random*PatternSource in pat.h) + fullIndex = true; // load halves one at a time and proceed in phases + noRefNames = false; // true -> print reference indexes; not names + dumpAlFaBase = ""; // basename of FASTA files to dump aligned reads to + dumpAlFqBase = ""; // basename of FASTQ files to dump aligned reads to + dumpAlBase = ""; // basename of same-format files to dump aligned reads to + dumpUnalFaBase = ""; // basename of FASTA files to dump unaligned reads to + dumpUnalFqBase = ""; // basename of FASTQ files to dump unaligned reads to + dumpUnalBase = ""; // basename of same-format files to dump unaligned reads to + dumpMaxFaBase = ""; // basename of FASTA files to dump reads with more than -m valid alignments to + dumpMaxFqBase = ""; // basename of FASTQ files to dump reads with more than -m valid alignments to + dumpMaxBase = ""; // basename of same-format files to dump reads with more than -m valid alignments to + khits = 1; // number of hits per read; >1 is much slower + mhits = 0xffffffff; // don't report any hits if there are > mhits + better = false; // true -> guarantee alignments from best possible stratum + oldBest = false; // true -> guarantee alignments from best possible stratum (the old way) + strata = false; // true -> don't stop at stratum boundaries + refOut = false; // if true, alignments go to per-ref files + partitionSz = 0; // output a partitioning key in first field + noMaqRound = false; // true -> don't round quals to nearest 10 like maq + forgiveInput = false; // let read input be a little wrong w/o complaining or dying + useSpinlock = true; // false -> don't use of spinlocks even if they're #defines + fileParallel = false; // separate threads read separate input files in parallel + useShmem = false; // use shared memory to hold the index + useMm = false; // use memory-mapped files to hold the index + mmSweep = false; // sweep through memory-mapped files immediately after mapping + stateful = false; // use stateful aligners + prefetchWidth = 1; // number of reads to process in parallel w/ --stateful + minInsert = 0; // minimum insert size (Maq = 0, SOAP = 400) + maxInsert = 250; // maximum insert size (Maq = 250, SOAP = 600) + mate1fw = true; // -1 mate aligns in fw orientation on fw strand + mate2fw = false; // -2 mate aligns in rc orientation on fw strand + mixedThresh = 4; // threshold for when to switch to paired-end mixed mode (see aligner.h) + mixedAttemptLim = 100; // number of attempts to make in "mixed mode" before giving up on orientation + dontReconcileMates = true; // suppress pairwise all-versus-all way of resolving mates + cacheLimit = 5; // ranges w/ size > limit will be cached + cacheSize = 0; // # words per range cache + offBase = 0; // offsets are 0-based by default, but configurable + tryHard = false; // set very high maxBts, mixedAttemptLim + skipReads = 0; // # reads/read pairs to skip + nofw = false; // don't align fw orientation of read + norc = false; // don't align rc orientation of read + strandFix = true; // attempt to fix strand bias + randomizeQuals = false; // randomize quality values + stats = false; // print performance stats + chunkPoolMegabytes = 32; // max MB to dedicate to best-first search frames per thread + chunkSz = 16; // size of single chunk disbursed by ChunkPool + chunkVerbose = false; // have chunk allocator output status messages? + recal = false; + recalMaxCycle = 64; + recalMaxQual = 40; + recalQualShift = 2; + useV1 = true; + reportSe = false; + refMapFile = NULL; // file containing a map from index coordinates to another coordinate system + annotMapFile = NULL; // file containing a map from reference coordinates to annotations + fastaContLen = 0; + fastaContFreq = 0; + hadoopOut = false; // print Hadoop status and summary messages +} // mating constraints @@ -249,8 +345,6 @@ static struct option long_options[] = { {(char*)"maxfq", required_argument, 0, ARG_MAXFQ}, {(char*)"offrate", required_argument, 0, 'o'}, {(char*)"isarate", required_argument, 0, ARG_ISARATE}, - {(char*)"skipsearch", no_argument, &skipSearch, 1}, - {(char*)"qsamelen", no_argument, &qSameLen, 1}, {(char*)"reportopps", no_argument, &reportOpps, 1}, {(char*)"version", no_argument, &showVersion, 1}, {(char*)"dumppats", required_argument, 0, ARG_DUMP_PATS}, @@ -1350,13 +1444,13 @@ static int parseInt(int lower, const char *errmsg) { if (l < lower) { cerr << errmsg << endl; printUsage(cerr); - exit(1); + throw std::runtime_error(""); } return (int32_t)l; } cerr << errmsg << endl; printUsage(cerr); - exit(1); + throw std::runtime_error(""); return -1; } @@ -1432,7 +1526,6 @@ static void parseOptions(int argc, char **argv) { case ARG_CHAINOUT: outType = OUTPUT_CHAIN; break; case 'b': outType = OUTPUT_BINARY; break; case ARG_REFOUT: refOut = true; break; - case ARG_SEED_EXTEND: seedAndExtend = true; break; case ARG_NOOUT: outType = OUTPUT_NONE; break; case ARG_REFMAP: refMapFile = optarg; break; case ARG_ANNOTMAP: annotMapFile = optarg; break; @@ -1447,13 +1540,11 @@ static void parseOptions(int argc, char **argv) { << "BOWTIE_MM defined. Memory-mapped I/O is not supported under Windows. If you" << endl << "would like to use memory-mapped I/O on a platform that supports it, please" << endl << "refrain from specifying BOWTIE_MM=0 when compiling Bowtie." << endl; - exit(1); + throw std::runtime_error(""); #endif } case ARG_MMSWEEP: mmSweep = true; break; case ARG_HADOOPOUT: hadoopOut = true; break; - case ARG_DUMP_NOHIT: dumpNoHits = new ofstream(".nohits.dump"); break; - case ARG_DUMP_HHHIT: dumpHHHits = new ofstream(".hhhits.dump"); break; case ARG_AL: dumpAlBase = optarg; break; case ARG_ALFA: dumpAlFaBase = optarg; break; case ARG_ALFQ: dumpAlFqBase = optarg; break; @@ -1510,14 +1601,14 @@ static void parseOptions(int argc, char **argv) { case 'p': #ifndef BOWTIE_PTHREADS cerr << "-p/--threads is disabled because bowtie was not compiled with pthreads support" << endl; - exit(1); + throw std::runtime_error(""); #endif nthreads = parseInt(1, "-p/--threads arg must be at least 1"); break; case ARG_FILEPAR: #ifndef BOWTIE_PTHREADS cerr << "--filepar is disabled because bowtie was not compiled with pthreads support" << endl; - exit(1); + throw std::runtime_error(""); #endif fileParallel = true; break; @@ -1526,7 +1617,7 @@ static void parseOptions(int argc, char **argv) { mismatches = parseInt(0, "-v arg must be at least 0"); if(mismatches > 3) { cerr << "-v arg must be at most 3" << endl; - exit(1); + throw std::runtime_error(""); } break; case '3': trim3 = parseInt(0, "-3/--trim3 arg must be at least 0"); break; @@ -1537,7 +1628,7 @@ static void parseOptions(int argc, char **argv) { case 'n': seedMms = parseInt(0, "-n/--seedmms arg must be at least 0"); maqLike = 1; break; case 'l': seedLen = parseInt(5, "-l/--seedlen arg must be at least 5"); break; case 'h': printLongUsage(cout); exit(0); break; - case '?': printUsage(cerr); exit(1); break; + case '?': printUsage(cerr); throw std::runtime_error(""); break; case 'a': allHits = true; break; case 'y': tryHard = true; break; case ARG_RECAL: recal = true; break; @@ -1570,7 +1661,7 @@ static void parseOptions(int argc, char **argv) { if(optarg == NULL || strlen(optarg) == 0) { cerr << "--orig arg must be followed by a string" << endl; printUsage(cerr); - exit(1); + throw std::runtime_error(""); } origString = optarg; break; @@ -1582,7 +1673,7 @@ static void parseOptions(int argc, char **argv) { default: cerr << "Unknown option: " << (char)next_option << endl; printUsage(cerr); - exit(1); + throw std::runtime_error(""); } } while(next_option != -1); bool paired = mates1.size() > 0 || mates2.size() > 0 || mates12.size() > 0; @@ -1603,13 +1694,13 @@ static void parseOptions(int argc, char **argv) { cerr << "Error: " << mates1.size() << " mate files/sequences were specified with -1, but " << mates2.size() << endl << "mate files/sequences were specified with -2. The same number of mate files/" << endl << "sequences must be specified with -1 and -2." << endl; - exit(1); + throw std::runtime_error(""); } // Check for duplicate mate input files if(format != CMDLINE) { for(size_t i = 0; i < mates1.size(); i++) { for(size_t j = 0; j < mates2.size(); j++) { - if(mates1[i] == mates2[j]) { + if(mates1[i] == mates2[j] && !quiet) { cerr << "Warning: Same mate file \"" << mates1[i] << "\" appears as argument to both -1 and -2" << endl; } } @@ -1655,7 +1746,7 @@ static void parseOptions(int argc, char **argv) { cerr << "When -z/--phased is used, the --al option is unavailable" << endl; error = true; } - if(error) exit(1); + if(error) throw std::runtime_error(""); } if(tryHard) { // Increase backtracking limit to huge number @@ -1665,11 +1756,11 @@ static void parseOptions(int argc, char **argv) { } if(fullIndex && strata && !stateful && !oldBest) { cerr << "--strata must be combined with --best" << endl; - exit(1); + throw std::runtime_error(""); } if(strata && !allHits && khits == 1 && mhits == 0xffffffff) { cerr << "--strata has no effect unless combined with -k, -m or -a" << endl; - exit(1); + throw std::runtime_error(""); } // If both -s and -u are used, we need to adjust qUpto accordingly // since it uses patid to know if we've reached the -u limit (and @@ -1677,7 +1768,7 @@ static void parseOptions(int argc, char **argv) { if(qUpto + skipReads > qUpto) { qUpto += skipReads; } - if(useShmem && useMm) { + if(useShmem && useMm && !quiet) { cerr << "Warning: --shmem overrides --mm..." << endl; useMm = false; } @@ -1691,7 +1782,7 @@ static void parseOptions(int argc, char **argv) { cerr << "Error: --chainin cannot be combined with paired-end alignment; aborting..." << endl; error = true; } - if(error) exit(1); + if(error) throw std::runtime_error(""); } if(outType == OUTPUT_CHAIN) { @@ -1708,12 +1799,12 @@ static void parseOptions(int argc, char **argv) { cerr << "Error: --chainout cannot be combined with paired-end alignment; aborting..." << endl; error = true; } - if(error) exit(1); + if(error) throw std::runtime_error(""); } if(mate1fw && trim5 > 0) { if(verbose) { - cout << "Adjusting -I/-X down by " << trim5 + cerr << "Adjusting -I/-X down by " << trim5 << " because mate1 is FW & trim5 is " << trim5 << endl; } maxInsert = max(0, (int)maxInsert - trim5); @@ -1721,7 +1812,7 @@ static void parseOptions(int argc, char **argv) { } if(mate2fw && trim3 > 0) { if(verbose) { - cout << "Adjusting -I/-X down by " << trim3 + cerr << "Adjusting -I/-X down by " << trim3 << " because mate2 is FW & trim3 is " << trim3 << endl; } maxInsert = max(0, (int)maxInsert - trim3); @@ -1729,7 +1820,7 @@ static void parseOptions(int argc, char **argv) { } if(!mate1fw && trim3 > 0) { if(verbose) { - cout << "Adjusting -I/-X down by " << trim3 + cerr << "Adjusting -I/-X down by " << trim3 << " because mate1 is RC & trim3 is " << trim3 << endl; } maxInsert = max(0, (int)maxInsert - trim3); @@ -1737,7 +1828,7 @@ static void parseOptions(int argc, char **argv) { } if(!mate2fw && trim5 > 0) { if(verbose) { - cout << "Adjusting -I/-X down by " << trim5 + cerr << "Adjusting -I/-X down by " << trim5 << " because mate2 is RC & trim5 is " << trim5 << endl; } maxInsert = max(0, (int)maxInsert - trim5); @@ -2022,6 +2113,7 @@ static void *exactSearchWorkerStateful(void *vp) { strandFix, rangeMode, verbose, + quiet, seed); PairedExactAlignerV1Factory alPEfact( ebwt, @@ -2050,6 +2142,7 @@ static void *exactSearchWorkerStateful(void *vp) { !better, rangeMode, verbose, + quiet, seed); { MixedMultiAligner multi( @@ -2112,7 +2205,7 @@ static void exactSearch(PairedPatternSource& _patsrc, if((mates1.size() > 0 || mates12.size() > 0) && mixedThresh < 0xffffffff) { Timer _t(cerr, "Time loading reference: ", timing); refs = new BitPairReference(adjustedEbwtFileBase, sanityCheck, NULL, &os, false, useMm, useShmem, mmSweep, verbose, startVerbose); - if(!refs->loaded()) exit(1); + if(!refs->loaded()) throw std::runtime_error(""); } exactSearch_refs = refs; @@ -2144,7 +2237,7 @@ static void exactSearch(PairedPatternSource& _patsrc, int ret; if((ret = pthread_join(threads[i], NULL)) != 0) { cerr << "Error: pthread_join returned non-zero status: " << ret << endl; - exit(1); + throw std::runtime_error(""); } } #endif @@ -2334,7 +2427,7 @@ static void mismatchSearch(PairedPatternSource& _patsrc, int ret; if((ret = pthread_join(threads[i], NULL)) != 0) { cerr << "Error: pthread_join returned non-zero status: " << ret << endl; - exit(1); + throw std::runtime_error(""); } } #endif @@ -2369,7 +2462,7 @@ static void mismatchSearch(PairedPatternSource& _patsrc, int ret; if((ret = pthread_join(threads[i], NULL)) != 0) { cerr << "Error: pthread_join returned non-zero status: " << ret << endl; - exit(1); + throw std::runtime_error(""); } } #endif @@ -2415,6 +2508,7 @@ static void *mismatchSearchWorkerFullStateful(void *vp) { strandFix, rangeMode, verbose, + quiet, seed); Paired1mmAlignerV1Factory alPEfact( ebwtFw, @@ -2443,6 +2537,7 @@ static void *mismatchSearchWorkerFullStateful(void *vp) { strandFix, rangeMode, verbose, + quiet, seed); { MixedMultiAligner multi( @@ -2549,7 +2644,7 @@ static void mismatchSearchFull(PairedPatternSource& _patsrc, if((mates1.size() > 0 || mates12.size() > 0) && mixedThresh < 0xffffffff) { Timer _t(cerr, "Time loading reference: ", timing); refs = new BitPairReference(adjustedEbwtFileBase, sanityCheck, NULL, &os, false, useMm, useShmem, mmSweep, verbose, startVerbose); - if(!refs->loaded()) exit(1); + if(!refs->loaded()) throw std::runtime_error(""); } mismatchSearch_refs = refs; @@ -2582,7 +2677,7 @@ static void mismatchSearchFull(PairedPatternSource& _patsrc, int ret; if((ret = pthread_join(threads[i], NULL)) != 0) { cerr << "Error: pthread_join returned non-zero status: " << ret << endl; - exit(1); + throw std::runtime_error(""); } } #endif @@ -2920,7 +3015,7 @@ static void twoOrThreeMismatchSearch( int ret; if((ret = pthread_join(threads[i], NULL)) != 0) { cerr << "Error: pthread_join returned non-zero status: " << ret << endl; - exit(1); + throw std::runtime_error(""); } } #endif @@ -2941,7 +3036,7 @@ static void twoOrThreeMismatchSearch( int ret; if((ret = pthread_join(threads[i], NULL)) != 0) { cerr << "Error: pthread_join returned non-zero status: " << ret << endl; - exit(1); + throw std::runtime_error(""); } } #endif @@ -2961,7 +3056,7 @@ static void twoOrThreeMismatchSearch( int ret; if((ret = pthread_join(threads[i], NULL)) != 0) { cerr << "Error: pthread_join returned non-zero status: " << ret << endl; - exit(1); + throw std::runtime_error(""); } } #endif @@ -3010,6 +3105,7 @@ static void *twoOrThreeMismatchSearchWorkerStateful(void *vp) { strandFix, rangeMode, verbose, + quiet, seed); Paired23mmAlignerV1Factory alPEfact( ebwtFw, @@ -3039,6 +3135,7 @@ static void *twoOrThreeMismatchSearchWorkerStateful(void *vp) { strandFix, rangeMode, verbose, + quiet, seed); { MixedMultiAligner multi( @@ -3167,7 +3264,7 @@ static void twoOrThreeMismatchSearchFull( if((mates1.size() > 0 || mates12.size() > 0) && mixedThresh < 0xffffffff) { Timer _t(cerr, "Time loading reference: ", timing); refs = new BitPairReference(adjustedEbwtFileBase, sanityCheck, NULL, &os, false, useMm, useShmem, mmSweep, verbose, startVerbose); - if(!refs->loaded()) exit(1); + if(!refs->loaded()) throw std::runtime_error(""); } twoOrThreeMismatchSearch_refs = refs; twoOrThreeMismatchSearch_patsrc = &_patsrc; @@ -3206,7 +3303,7 @@ static void twoOrThreeMismatchSearchFull( int ret; if((ret = pthread_join(threads[i], NULL)) != 0) { cerr << "Error: pthread_join returned non-zero status: " << ret << endl; - exit(1); + throw std::runtime_error(""); } } #endif @@ -3689,6 +3786,7 @@ static void* seededQualSearchWorkerFullStateful(void *vp) { strandFix, rangeMode, verbose, + quiet, seed, metrics); PairedSeedAlignerFactory alPEfact( @@ -3723,6 +3821,7 @@ static void* seededQualSearchWorkerFullStateful(void *vp) { strandFix, rangeMode, verbose, + quiet, seed); { MixedMultiAligner multi( @@ -3834,7 +3933,7 @@ static void seededQualCutoffSearch( int ret; if((ret = pthread_join(threads[i], NULL)) != 0) { cerr << "Error: pthread_join returned non-zero status: " << ret << endl; - exit(1); + throw std::runtime_error(""); } } #endif @@ -3847,7 +3946,7 @@ static void seededQualCutoffSearch( } catch(bad_alloc& ba) { cerr << "Could not reserve space for PartialAlignmentManager" << endl; cerr << "Please subdivide the read set and invoke bowtie separately for each subdivision" << endl; - exit(1); + throw std::runtime_error(""); } seededQualSearch_pamRc = pamRc; { @@ -3870,7 +3969,7 @@ static void seededQualCutoffSearch( int ret; if((ret = pthread_join(threads[i], NULL)) != 0) { cerr << "Error: pthread_join returned non-zero status: " << ret << endl; - exit(1); + throw std::runtime_error(""); } } #endif @@ -3889,7 +3988,7 @@ static void seededQualCutoffSearch( } catch(bad_alloc& ba) { cerr << "Could not reserve space for PartialAlignmentManager" << endl; cerr << "Please subdivide the read set and invoke bowtie separately for each subdivision" << endl; - exit(1); + throw std::runtime_error(""); } seededQualSearch_pamFw = pamFw; { @@ -3908,7 +4007,7 @@ static void seededQualCutoffSearch( int ret; if((ret = pthread_join(threads[i], NULL)) != 0) { cerr << "Error: pthread_join returned non-zero status: " << ret << endl; - exit(1); + throw std::runtime_error(""); } } #endif @@ -3936,7 +4035,7 @@ static void seededQualCutoffSearch( int ret; if((ret = pthread_join(threads[i], NULL)) != 0) { cerr << "Error: pthread_join returned non-zero status: " << ret << endl; - exit(1); + throw std::runtime_error(""); } } #endif @@ -3999,7 +4098,7 @@ static void seededQualCutoffSearchFull( if((mates1.size() > 0 || mates12.size() > 0) && mixedThresh < 0xffffffff) { Timer _t(cerr, "Time loading reference: ", timing); refs = new BitPairReference(adjustedEbwtFileBase, sanityCheck, NULL, &os, false, useMm, useShmem, mmSweep, verbose, startVerbose); - if(!refs->loaded()) exit(1); + if(!refs->loaded()) throw std::runtime_error(""); } seededQualSearch_refs = refs; @@ -4041,7 +4140,7 @@ static void seededQualCutoffSearchFull( int ret; if((ret = pthread_join(threads[i], NULL)) != 0) { cerr << "Error: pthread_join returned non-zero status: " << ret << endl; - exit(1); + throw std::runtime_error(""); } } #endif @@ -4109,7 +4208,7 @@ patsrcFromStrings(int format, const vector& qs) { seed); default: { cerr << "Internal error; bad patsrc format: " << format << endl; - exit(1); + throw std::runtime_error(""); } } } @@ -4171,7 +4270,7 @@ static void driver(const char * type, for(size_t i = 0; i < mates12.size(); i++) { if(mates12[i] == "-" && !fullIndex) { cerr << "Input file \"-\" is not compatible with -z/--phased" << endl; - exit(1); + throw std::runtime_error(""); } const vector* qs = &mates12; vector tmp; @@ -4191,7 +4290,7 @@ static void driver(const char * type, for(size_t i = 0; i < mates1.size(); i++) { if(mates1[i] == "-" && !fullIndex) { cerr << "Input file \"-\" is not compatible with -z/--phased" << endl; - exit(1); + throw std::runtime_error(""); } const vector* qs = &mates1; vector tmp; @@ -4211,7 +4310,7 @@ static void driver(const char * type, for(size_t i = 0; i < mates2.size(); i++) { if(mates2[i] == "-" && !fullIndex) { cerr << "Input file \"-\" is not compatible with -z/--phased" << endl; - exit(1); + throw std::runtime_error(""); } const vector* qs = &mates2; vector tmp; @@ -4236,7 +4335,7 @@ static void driver(const char * type, for(size_t i = 0; i < queries.size(); i++) { if(queries[i] == "-" && !fullIndex) { cerr << "Input file \"-\" is not compatible with -z/--phased" << endl; - exit(1); + throw std::runtime_error(""); } const vector* qs = &queries; PatternSource* patsrc = NULL; @@ -4266,8 +4365,6 @@ static void driver(const char * type, patsrc = new PairedDualPatternSource(patsrcs_a, patsrcs_b); } - if(skipSearch) return; - // Open hit output file if(verbose || startVerbose) { cerr << "Opening hit output file: "; logTime(cerr, true); @@ -4276,18 +4373,20 @@ static void driver(const char * type, if(!outfile.empty()) { if(refOut) { fout = NULL; - cerr << "Warning: ignoring alignment output file " << outfile << " because --refout was specified" << endl; + if(!quiet) { + cerr << "Warning: ignoring alignment output file " << outfile << " because --refout was specified" << endl; + } } else { fout = new OutFileBuf(outfile.c_str(), outType == OUTPUT_BINARY); } } else { if(outType == OUTPUT_BINARY && !refOut) { cerr << "Error: Must specify an output file when output mode is binary" << endl; - exit(1); + throw std::runtime_error(""); } else if(outType == OUTPUT_CHAIN) { cerr << "Error: Must specify an output file when output mode is --chain" << endl; - exit(1); + throw std::runtime_error(""); } fout = new OutFileBuf(); } @@ -4432,7 +4531,7 @@ static void driver(const char * type, break; default: cerr << "Invalid output type: " << outType << endl; - exit(1); + throw std::runtime_error(""); } if(verbose || startVerbose) { cerr << "Dispatching to search driver: "; logTime(cerr, true); @@ -4474,7 +4573,7 @@ static void driver(const char * type, } } else { cerr << "Error: " << mismatches << " is not a supported number of mismatches" << endl; - exit(1); + throw std::runtime_error(""); } } else { // Search without mismatches @@ -4492,8 +4591,6 @@ static void driver(const char * type, if(!quiet) { sink->finish(hadoopOut); // end the hits section of the hit file } - if(dumpHHHits != NULL) dumpHHHits->close(); - if(dumpNoHits != NULL) dumpNoHits->close(); for(size_t i = 0; i < patsrcs_a.size(); i++) { assert(patsrcs_a[i] != NULL); delete patsrcs_a[i]; @@ -4515,116 +4612,121 @@ static void driver(const char * type, * main function. Parses command-line arguments. */ int main(int argc, char **argv) { - string ebwtFile; // read serialized Ebwt from this file - string query; // read query string(s) from this file - vector queries; - string outfile; // write query results to this file - if(startVerbose) { cerr << "Entered main(): "; logTime(cerr, true); } - parseOptions(argc, argv); - argv0 = argv[0]; - if(showVersion) { - cout << argv0 << " version " << BOWTIE_VERSION << endl; - if(sizeof(void*) == 4) { - cout << "32-bit" << endl; - } else if(sizeof(void*) == 8) { - cout << "64-bit" << endl; - } else { - cout << "Neither 32- nor 64-bit: sizeof(void*) = " << sizeof(void*) << endl; - } - cout << "Built on " << BUILD_HOST << endl; - cout << BUILD_TIME << endl; - cout << "Compiler: " << COMPILER_VERSION << endl; - cout << "Options: " << COMPILER_OPTIONS << endl; - cout << "Sizeof {int, long, long long, void*, size_t, off_t}: {" - << sizeof(int) - << ", " << sizeof(long) << ", " << sizeof(long long) - << ", " << sizeof(void *) << ", " << sizeof(size_t) - << ", " << sizeof(off_t) << "}" << endl; - return 0; - } -#ifdef CHUD_PROFILING - chudInitialize(); - chudAcquireRemoteAccess(); -#endif - { - Timer _t(cerr, "Overall time: ", timing); - if(startVerbose) { - cerr << "Parsing index and read arguments: "; logTime(cerr, true); - } - - // Get index basename - if(optind >= argc) { - cerr << "No index, query, or output file specified!" << endl; - printUsage(cerr); - return 1; - } - ebwtFile = argv[optind++]; - - // Get query filename - if(optind >= argc) { - if(mates1.size() > 0 || mates12.size() > 0) { - query = ""; + try { + resetOptions(); + string ebwtFile; // read serialized Ebwt from this file + string query; // read query string(s) from this file + vector queries; + string outfile; // write query results to this file + if(startVerbose) { cerr << "Entered main(): "; logTime(cerr, true); } + parseOptions(argc, argv); + argv0 = argv[0]; + if(showVersion) { + cout << argv0 << " version " << BOWTIE_VERSION << endl; + if(sizeof(void*) == 4) { + cout << "32-bit" << endl; + } else if(sizeof(void*) == 8) { + cout << "64-bit" << endl; } else { - cerr << "No query or output file specified!" << endl; - printUsage(cerr); - return 1; + cout << "Neither 32- nor 64-bit: sizeof(void*) = " << sizeof(void*) << endl; + } + cout << "Built on " << BUILD_HOST << endl; + cout << BUILD_TIME << endl; + cout << "Compiler: " << COMPILER_VERSION << endl; + cout << "Options: " << COMPILER_OPTIONS << endl; + cout << "Sizeof {int, long, long long, void*, size_t, off_t}: {" + << sizeof(int) + << ", " << sizeof(long) << ", " << sizeof(long long) + << ", " << sizeof(void *) << ", " << sizeof(size_t) + << ", " << sizeof(off_t) << "}" << endl; + return 0; + } + #ifdef CHUD_PROFILING + chudInitialize(); + chudAcquireRemoteAccess(); + #endif + { + Timer _t(cerr, "Overall time: ", timing); + if(startVerbose) { + cerr << "Parsing index and read arguments: "; logTime(cerr, true); } - } else if (mates1.size() == 0 && mates12.size() == 0) { - query = argv[optind++]; - // Tokenize the list of query files - tokenize(query, ",", queries); - if(queries.size() < 1) { - cerr << "Tokenized query file list was empty!" << endl; + + // Get index basename + if(optind >= argc) { + cerr << "No index, query, or output file specified!" << endl; printUsage(cerr); return 1; } - } + ebwtFile = argv[optind++]; - // Get output filename - if(optind < argc) { - outfile = argv[optind++]; - } + // Get query filename + if(optind >= argc) { + if(mates1.size() > 0 || mates12.size() > 0) { + query = ""; + } else { + cerr << "No query or output file specified!" << endl; + printUsage(cerr); + return 1; + } + } else if (mates1.size() == 0 && mates12.size() == 0) { + query = argv[optind++]; + // Tokenize the list of query files + tokenize(query, ",", queries); + if(queries.size() < 1) { + cerr << "Tokenized query file list was empty!" << endl; + printUsage(cerr); + return 1; + } + } - // Extra parametesr? - if(optind < argc) { - cerr << "Extra parameter(s) specified: "; - for(int i = optind; i < argc; i++) { - cerr << "\"" << argv[i] << "\""; - if(i < argc-1) cerr << ", "; + // Get output filename + if(optind < argc) { + outfile = argv[optind++]; } - cerr << endl; - if(mates1.size() > 0) { - cerr << "Note that if files are specified using -1/-2, a file cannot" << endl - << "also be specified. Please run bowtie separately for mates and singles." << endl; + + // Extra parametesr? + if(optind < argc) { + cerr << "Extra parameter(s) specified: "; + for(int i = optind; i < argc; i++) { + cerr << "\"" << argv[i] << "\""; + if(i < argc-1) cerr << ", "; + } + cerr << endl; + if(mates1.size() > 0) { + cerr << "Note that if files are specified using -1/-2, a file cannot" << endl + << "also be specified. Please run bowtie separately for mates and singles." << endl; + } + throw std::runtime_error(""); } - exit(1); - } - // Optionally summarize - if(verbose) { - cout << "Input ebwt file: \"" << ebwtFile << "\"" << endl; - cout << "Query inputs (DNA, " << file_format_names[format] << "):" << endl; - for(size_t i = 0; i < queries.size(); i++) { - cout << " " << queries[i] << endl; + // Optionally summarize + if(verbose) { + cout << "Input ebwt file: \"" << ebwtFile << "\"" << endl; + cout << "Query inputs (DNA, " << file_format_names[format] << "):" << endl; + for(size_t i = 0; i < queries.size(); i++) { + cout << " " << queries[i] << endl; + } + cout << "Output file: \"" << outfile << "\"" << endl; + cout << "Local endianness: " << (currentlyBigEndian()? "big":"little") << endl; + cout << "Sanity checking: " << (sanityCheck? "enabled":"disabled") << endl; + #ifdef NDEBUG + cout << "Assertions: disabled" << endl; + #else + cout << "Assertions: enabled" << endl; + #endif } - cout << "Output file: \"" << outfile << "\"" << endl; - cout << "Local endianness: " << (currentlyBigEndian()? "big":"little") << endl; - cout << "Sanity checking: " << (sanityCheck? "enabled":"disabled") << endl; - #ifdef NDEBUG - cout << "Assertions: disabled" << endl; - #else - cout << "Assertions: enabled" << endl; - #endif - } - if(ipause) { - cout << "Press key to continue..." << endl; - getchar(); + if(ipause) { + cout << "Press key to continue..." << endl; + getchar(); + } + driver > >("DNA", ebwtFile, query, queries, outfile); + CHUD_STOP(); } - driver > >("DNA", ebwtFile, query, queries, outfile); - CHUD_STOP(); - } #ifdef CHUD_PROFILING - chudReleaseRemoteAccess(); + chudReleaseRemoteAccess(); #endif - return 0; + return 0; + } catch(exception& e) { + return 1; + } } diff --git a/ebwt_search_backtrack.h b/ebwt_search_backtrack.h index 61e7cfa..337aa2d 100644 --- a/ebwt_search_backtrack.h +++ b/ebwt_search_backtrack.h @@ -1,6 +1,7 @@ #ifndef EBWT_SEARCH_BACKTRACK_H_ #define EBWT_SEARCH_BACKTRACK_H_ +#include #include #include "pat.h" #include "qual.h" @@ -3123,7 +3124,7 @@ class EbwtRangeSourceDriver : if(cext == PIN_TO_BEGINNING) return 0; if(cext == PIN_TO_LEN) return len; cerr << "Bad SearchConstraintExtent: " << cext; - exit(1); + throw std::runtime_error(""); } bool seed_; diff --git a/filebuf.h b/filebuf.h index ebd4ba7..399db6a 100644 --- a/filebuf.h +++ b/filebuf.h @@ -1,3 +1,8 @@ +/* + * filebuf.h + * + * Author: Ben Langmead + */ #ifndef FILEBUF_H_ #define FILEBUF_H_ @@ -6,6 +11,7 @@ #include #include #include +#include /** * Simple wrapper for a FILE*, istream or ifstream that reads it in @@ -270,7 +276,7 @@ class BitpairOutFileBuf { out_ = fopen(in, "wb"); if(out_ == NULL) { std::cerr << "Error: Could not open bitpair-output file " << in << std::endl; - exit(1); + throw std::runtime_error(""); } } @@ -289,7 +295,7 @@ class BitpairOutFileBuf { // Flush the buffer if(!fwrite((const void *)buf_, BUF_SZ, 1, out_)) { std::cerr << "Error writing to the reference index file (.4.ebwt)" << std::endl; - exit(1); + throw std::runtime_error(""); } // Reset to beginning of the buffer cur_ = 0; @@ -309,7 +315,7 @@ class BitpairOutFileBuf { if(bpPtr_ == 0) cur_--; if(!fwrite((const void *)buf_, cur_ + 1, 1, out_)) { std::cerr << "Error writing to the reference index file (.4.ebwt)" << std::endl; - exit(1); + throw std::runtime_error(""); } } fclose(out_); @@ -341,7 +347,7 @@ class OutFileBuf { out_ = fopen(out, binary ? "wb" : "w"); if(out_ == NULL) { std::cerr << "Error: Could not open alignment output file " << out << std::endl; - exit(1); + throw std::runtime_error(""); } } @@ -400,7 +406,7 @@ class OutFileBuf { void flush() { if(!fwrite((const void *)buf_, cur_, 1, out_)) { std::cerr << "Error while flushing and closing output" << std::endl; - exit(1); + throw std::runtime_error(""); } cur_ = 0; } diff --git a/hit.h b/hit.h index 48a497d..2e51405 100644 --- a/hit.h +++ b/hit.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "alphabet.h" #include "assert_helpers.h" @@ -85,27 +86,27 @@ class Hit { if(seqan::length(patName) > 0xffff) { cerr << "Error: One or more read names are 2^16 characters or longer. Please truncate" << endl << "read names and re-run bowtie." << endl; - exit(1); + throw std::runtime_error(""); } if(mms.count() > 0xff) { cerr << "Error: The alignment contains 256 or more mismatches. bowtie cannot handle" << endl << "alignments with this many alignments. Please provide smaller reads or consider" << endl << "using a different tool." << endl; - exit(1); + throw std::runtime_error(""); } if(seqan::length(quals) > 0xffff) { cerr << "Error: One or more quality strings are 2^16 characters or longer. Please" << endl << "truncate reads and re-run bowtie." << endl; - exit(1); + throw std::runtime_error(""); } if(seqan::length(patSeq) > 0xffff) { cerr << "Error: One or more read sequences are 2^16 characters or longer. Please" << endl << "truncate reads and re-run bowtie." << endl; - exit(1); + throw std::runtime_error(""); } if(stratum < 0 || stratum >= 4) { cerr << "Error: Stratum is out of bounds: " << stratum << endl; - exit(1); + throw std::runtime_error(""); } } @@ -304,7 +305,7 @@ class RecalTable { memset(ents_, 0, len_ << 2); } catch(std::bad_alloc& e) { cerr << "Error allocating recalibration table with " << len_ << " entries" << endl; - exit(1); + throw std::runtime_error(""); } } } @@ -1099,7 +1100,7 @@ class HitSink { s = name.substr(0, dotoff) + "_2" + s.substr(dotoff); } } else if(mateType != 0) { - cerr << "Bad mate type " << mateType << endl; exit(1); + cerr << "Bad mate type " << mateType << endl; throw std::runtime_error(""); } std::ofstream* tmp = new ofstream(s.c_str(), ios::out); if(tmp->fail()) { @@ -1108,7 +1109,7 @@ class HitSink { } else { cerr << "Could not open paired-end aligned/unaligned-read file for writing: " << name << endl; } - exit(1); + throw std::runtime_error(""); } return tmp; } @@ -1362,7 +1363,7 @@ class HitSinkPerThread { virtual bool setHits(HitSet& hs) { if(!hs.empty()) { cerr << "Error: default setHits() called with non-empty HitSet" << endl; - exit(1); + throw std::runtime_error(""); } return false; } @@ -2725,11 +2726,11 @@ class VerboseHitSink : public HitSink { } if(in.bad()) { cerr << "Alignment file set \"bad\" bit" << endl; - exit(1); + throw std::runtime_error(""); } if(in.fail()) { cerr << "A line from the alignment file was longer than 4K" << endl; - exit(1); + throw std::runtime_error(""); } in.getline(buf, 4096); size_t len = in.gcount(); @@ -2823,7 +2824,7 @@ class VerboseHitSink : public HitSink { } } cerr << "Could not find an id to map reference name \"" << refName << "\" to." << endl; - exit(1); + throw std::runtime_error(""); } // Parse reference sequence offset @@ -3583,7 +3584,7 @@ class ChainingHitSink : public HitSink { */ virtual void append(ostream& o, const Hit& h) { cerr << "Error: ChainingHitSink::append() not implemented" << endl; - exit(1); + throw std::runtime_error(""); } /** diff --git a/map_tool.cpp b/map_tool.cpp index 120704e..c74b329 100644 --- a/map_tool.cpp +++ b/map_tool.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -185,7 +186,7 @@ static void parseOptions(int argc, char **argv) { default: cerr << "Unknown option: " << (char)next_option << endl; printUsage(cerr); - exit(1); + throw runtime_error(""); } } while(next_option != -1); } @@ -219,146 +220,150 @@ static char *argv0 = NULL; * output appropriate converted alignment. */ int main(int argc, char **argv) { - string infile; - vector infiles; - string outfile; - argv0 = argv[0]; - parseOptions(argc, argv); - ostream *out = &cout; - if(showVersion) { - cout << argv0 << " version " << BOWTIE_VERSION << endl; - if(sizeof(void*) == 4) { - cout << "32-bit" << endl; - } else if(sizeof(void*) == 8) { - cout << "64-bit" << endl; - } else { - cout << "Neither 32- nor 64-bit: sizeof(void*) = " << sizeof(void*) << endl; + try { + string infile; + vector infiles; + string outfile; + argv0 = argv[0]; + parseOptions(argc, argv); + ostream *out = &cout; + if(showVersion) { + cout << argv0 << " version " << BOWTIE_VERSION << endl; + if(sizeof(void*) == 4) { + cout << "32-bit" << endl; + } else if(sizeof(void*) == 8) { + cout << "64-bit" << endl; + } else { + cout << "Neither 32- nor 64-bit: sizeof(void*) = " << sizeof(void*) << endl; + } + cout << "Built on " << BUILD_HOST << endl; + cout << BUILD_TIME << endl; + cout << "Compiler: " << COMPILER_VERSION << endl; + cout << "Options: " << COMPILER_OPTIONS << endl; + cout << "Sizeof {int, long, long long, void*, size_t, off_t}: {" + << sizeof(int) + << ", " << sizeof(long) << ", " << sizeof(long long) + << ", " << sizeof(void *) << ", " << sizeof(size_t) + << ", " << sizeof(off_t) << "}" << endl; + return 0; } - cout << "Built on " << BUILD_HOST << endl; - cout << BUILD_TIME << endl; - cout << "Compiler: " << COMPILER_VERSION << endl; - cout << "Options: " << COMPILER_OPTIONS << endl; - cout << "Sizeof {int, long, long long, void*, size_t, off_t}: {" - << sizeof(int) - << ", " << sizeof(long) << ", " << sizeof(long long) - << ", " << sizeof(void *) << ", " << sizeof(size_t) - << ", " << sizeof(off_t) << "}" << endl; - return 0; - } - // Get input filename - if(optind >= argc) { - cerr << "No input alignments file specified!" << endl; - printUsage(cerr); - return 1; - } - infile = argv[optind++]; - tokenize(infile, ",", infiles); - - // Get output filename - if(optind < argc) { - out = new ofstream(argv[optind]); - } else if(outformat == FORMAT_BIN) { - cerr << "If -B/--outbin is specified, must also be specified" << endl; - exit(1); - } + // Get input filename + if(optind >= argc) { + cerr << "No input alignments file specified!" << endl; + printUsage(cerr); + return 1; + } + infile = argv[optind++]; + tokenize(infile, ",", infiles); - // Read in reference names, if requested - vector refnames; - if(!ebwt_name.empty()) { - string adjust = adjustEbwtBase(argv[0], ebwt_name, verbose); - readEbwtRefnames(adjust, refnames); - if(verbose) { - cout << "Successfully read " << refnames.size() << " reference sequence names from index" << endl; + // Get output filename + if(optind < argc) { + out = new ofstream(argv[optind]); + } else if(outformat == FORMAT_BIN) { + cerr << "If -B/--outbin is specified, must also be specified" << endl; + throw runtime_error(""); } - } - for(size_t i = 0; i < infiles.size(); i++) { - istream *inp; - if(infiles[i] == "-") { - inp = &cin; - } else { - inp = new ifstream(infiles[i].c_str(), ios_base::out | ios_base::binary); + // Read in reference names, if requested + vector refnames; + if(!ebwt_name.empty()) { + string adjust = adjustEbwtBase(argv[0], ebwt_name, verbose); + readEbwtRefnames(adjust, refnames); + if(verbose) { + cout << "Successfully read " << refnames.size() << " reference sequence names from index" << endl; + } } - istream& in = *inp; - vector hits; - while(in.good() && !in.eof()) { - if(sorthits) { - hits.resize(hits.size()+1); // add elt on back using default constructor - bool good; - vector* inrefnames = &refnames; - if(informat == FORMAT_BIN) { - good = BinaryHitSink::readHit(hits.back(), in, inrefnames, verbose); + + for(size_t i = 0; i < infiles.size(); i++) { + istream *inp; + if(infiles[i] == "-") { + inp = &cin; + } else { + inp = new ifstream(infiles[i].c_str(), ios_base::out | ios_base::binary); + } + istream& in = *inp; + vector hits; + while(in.good() && !in.eof()) { + if(sorthits) { + hits.resize(hits.size()+1); // add elt on back using default constructor + bool good; + vector* inrefnames = &refnames; + if(informat == FORMAT_BIN) { + good = BinaryHitSink::readHit(hits.back(), in, inrefnames, verbose); + } else { + good = VerboseHitSink::readHit(hits.back(), in, inrefnames, verbose); + } + if(!good) { + hits.pop_back(); + continue; // bad alignment; skip it + } } else { - good = VerboseHitSink::readHit(hits.back(), in, inrefnames, verbose); - } - if(!good) { - hits.pop_back(); - continue; // bad alignment; skip it + Hit h; + bool good; + vector* inrefnames = &refnames; + vector* outrefnames = NULL; + if(!refIdx) { + outrefnames = &refnames; + } + if(informat == FORMAT_BIN) { + good = BinaryHitSink::readHit(h, in, inrefnames, verbose); + } else { + good = VerboseHitSink::readHit(h, in, inrefnames, verbose); + } + if(!good) continue; // bad alignment; skip it + + if(outformat == FORMAT_BIN) { + BinaryHitSink::append(*out, h, outrefnames, 0); + } else if(outformat == FORMAT_DEFAULT) { + VerboseHitSink::append(*out, h, outrefnames, NULL, NULL, 0 /* partition */, 0); + } else if(outformat == FORMAT_FASTQ) { + fastqAppend(*out, h); + } else if(outformat == FORMAT_FASTA) { + fastaAppend(*out, h); + } else { + ConciseHitSink::append(*out, h, 0, false /* reportOpps */); + } } - } else { - Hit h; - bool good; - vector* inrefnames = &refnames; + } + // If the user requested that hits be sorted, sort them in memory and output them now + if(sorthits) { + std::sort(hits.begin(), hits.end()); vector* outrefnames = NULL; if(!refIdx) { outrefnames = &refnames; } - if(informat == FORMAT_BIN) { - good = BinaryHitSink::readHit(h, in, inrefnames, verbose); - } else { - good = VerboseHitSink::readHit(h, in, inrefnames, verbose); + for(size_t i = 0; i < hits.size(); i++) { + Hit& h = hits[i]; + if(outformat == FORMAT_BIN) { + BinaryHitSink::append(*out, h, outrefnames, 0); + } else if(outformat == FORMAT_DEFAULT) { + VerboseHitSink::append(*out, h, outrefnames, NULL, NULL, 0 /* partition */, 0); + } else if(outformat == FORMAT_FASTQ) { + fastqAppend(*out, h); + } else if(outformat == FORMAT_FASTA) { + fastaAppend(*out, h); + } else { + ConciseHitSink::append(*out, h, 0, false /* reportOpps */); + } } - if(!good) continue; // bad alignment; skip it - - if(outformat == FORMAT_BIN) { - BinaryHitSink::append(*out, h, outrefnames, 0); - } else if(outformat == FORMAT_DEFAULT) { - VerboseHitSink::append(*out, h, outrefnames, NULL, NULL, 0 /* partition */, 0); - } else if(outformat == FORMAT_FASTQ) { - fastqAppend(*out, h); - } else if(outformat == FORMAT_FASTA) { - fastaAppend(*out, h); - } else { - ConciseHitSink::append(*out, h, 0, false /* reportOpps */); - } - } - } - // If the user requested that hits be sorted, sort them in memory and output them now - if(sorthits) { - std::sort(hits.begin(), hits.end()); - vector* outrefnames = NULL; - if(!refIdx) { - outrefnames = &refnames; + } else { + assert_eq(0, hits.size()); } - for(size_t i = 0; i < hits.size(); i++) { - Hit& h = hits[i]; - if(outformat == FORMAT_BIN) { - BinaryHitSink::append(*out, h, outrefnames, 0); - } else if(outformat == FORMAT_DEFAULT) { - VerboseHitSink::append(*out, h, outrefnames, NULL, NULL, 0 /* partition */, 0); - } else if(outformat == FORMAT_FASTQ) { - fastqAppend(*out, h); - } else if(outformat == FORMAT_FASTA) { - fastaAppend(*out, h); - } else { - ConciseHitSink::append(*out, h, 0, false /* reportOpps */); - } + if(infiles[i] != "-") { + ((ifstream*)inp)->close(); + delete inp; } - } else { - assert_eq(0, hits.size()); } - if(infiles[i] != "-") { - ((ifstream*)inp)->close(); - delete inp; + + // Close and delete output + if(optind < argc) { + ((ofstream*)out)->close(); + delete out; } - } - // Close and delete output - if(optind < argc) { - ((ofstream*)out)->close(); - delete out; + return 0; + } catch(std::exception& e) { + return 1; } - - return 0; } diff --git a/maq_convert/bfa.cpp b/maq_convert/bfa.cpp index 460a246..ced9914 100644 --- a/maq_convert/bfa.cpp +++ b/maq_convert/bfa.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -13,7 +14,7 @@ nst_bfa1_t *nst_new_bfa1() bfa1 = (nst_bfa1_t*)malloc(sizeof(nst_bfa1_t)); if(bfa1 == NULL) { cerr << "Exhausted memory allocating space for the .bfa file" << endl; - exit(1); + throw std::runtime_error(""); } bfa1->name = 0; bfa1->seq = bfa1->mask = 0; @@ -30,7 +31,7 @@ void nst_delete_bfa1(nst_bfa1_t *bfa1) } static void bfa_read_error() { fprintf(stderr, "Error reading from .bfa file\n"); - exit(1); + throw std::runtime_error(""); } nst_bfa1_t *nst_load_bfa1(FILE *fp) { @@ -41,7 +42,7 @@ nst_bfa1_t *nst_load_bfa1(FILE *fp) bfa1->name = (char*)malloc(sizeof(char) * len); if(bfa1->name == NULL) { cerr << "Exhausted memory allocating space for the .bfa file name" << endl; - exit(1); + throw std::runtime_error(""); } /* * BTL: I had to add in these return-value checks to keep gcc 4.3.2 @@ -59,7 +60,7 @@ nst_bfa1_t *nst_load_bfa1(FILE *fp) bfa1->seq = (bit64_t*)malloc(sizeof(bit64_t) * bfa1->len); if(bfa1->seq == NULL) { cerr << "Exhausted memory allocating space for the .bfa file sequence" << endl; - exit(1); + throw std::runtime_error(""); } if(fread(bfa1->seq, sizeof(bit64_t), bfa1->len, fp) != (size_t)bfa1->len) { bfa_read_error(); @@ -67,7 +68,7 @@ nst_bfa1_t *nst_load_bfa1(FILE *fp) bfa1->mask = (bit64_t*)malloc(sizeof(bit64_t) * bfa1->len); if(bfa1->mask == NULL) { cerr << "Exhausted memory allocating space for the .bfa file mask" << endl; - exit(1); + throw std::runtime_error(""); } if(fread(bfa1->mask, sizeof(bit64_t), bfa1->len, fp) != (size_t)bfa1->len) { bfa_read_error(); diff --git a/maq_convert/bowtie_convert.cpp b/maq_convert/bowtie_convert.cpp index da0bf9c..cd3b92d 100644 --- a/maq_convert/bowtie_convert.cpp +++ b/maq_convert/bowtie_convert.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "maqmap.h" #include "algo.hh" @@ -195,14 +196,14 @@ int convert_bwt_to_maq(const string& bwtmap_fname, if (!bwtf) { fprintf(stderr, "Error: could not open Bowtie mapfile %s for reading\n", bwtmap_fname.c_str()); - exit(1); + throw std::runtime_error(""); } void* maqf = gzopen(maqmap_fname.c_str(), "w"); if (!maqf) { fprintf(stderr, "Error: could not open Maq mapfile %s for writing\n", maqmap_fname.c_str()); - exit(1); + throw std::runtime_error(""); } std::map seqid_to_name; @@ -286,7 +287,7 @@ int convert_bwt_to_maq(const string& bwtmap_fname, (aln_t*)malloc(sizeof(aln_t) * (next_max)); if(tmp == NULL) { cerr << "Memory exhausted allocating space for alignments." << endl; - exit(1); + throw std::runtime_error(""); } memcpy(tmp, mm->mapped_reads, sizeof(aln_t) * (max)); free(mm->mapped_reads); @@ -419,7 +420,7 @@ int convert_bwt_to_maq(const string& bwtmap_fname, mm->ref_name = (char**)malloc(sizeof(char*) * mm->n_ref); if(mm->ref_name == NULL) { cerr << "Exhausted memory allocating reference name" << endl; - exit(1); + throw std::runtime_error(""); } for (map::const_iterator i = names_to_ids.begin(); @@ -462,7 +463,7 @@ void get_names_from_bfa(const string& bfa_filename, if (!bfaf) { fprintf(stderr, "Error: could not open Binary FASTA file %s for reading\n", bfa_filename.c_str()); - exit(1); + throw std::runtime_error(""); } unsigned int next_id = 0; @@ -480,75 +481,79 @@ void get_names_from_bfa(const string& bfa_filename, int main(int argc, char **argv) { - string bwtmap_filename; - string maqmap_filename; - string bfa_filename; - const char *short_options = "voh?"; - int next_option; - do { - next_option = getopt(argc, argv, short_options); - switch (next_option) { - case 'v': /* verbose */ - verbose = true; - break; - case 'o': - short_map_format = true; - break; - case 'h': - case '?': - printLongUsage(cout); - exit(0); - case -1: /* Done with options. */ - break; - default: - printUsage(cerr); - exit(1); + try { + string bwtmap_filename; + string maqmap_filename; + string bfa_filename; + const char *short_options = "voh?"; + int next_option; + do { + next_option = getopt(argc, argv, short_options); + switch (next_option) { + case 'v': /* verbose */ + verbose = true; + break; + case 'o': + short_map_format = true; + break; + case 'h': + case '?': + printLongUsage(cout); + exit(0); + case -1: /* Done with options. */ + break; + default: + printUsage(cerr); + throw std::runtime_error(""); + } + } while(next_option != -1); + + if(optind >= argc) { + printUsage(cerr); + return 1; } - } while(next_option != -1); - if(optind >= argc) { - printUsage(cerr); - return 1; - } + // The Bowtie output text file to be converted + bwtmap_filename = argv[optind++]; - // The Bowtie output text file to be converted - bwtmap_filename = argv[optind++]; + if(optind >= argc) { + printUsage(cerr); + return 1; + } - if(optind >= argc) { - printUsage(cerr); - return 1; - } + // The name of the binary Maq map to be written + maqmap_filename = argv[optind++]; - // The name of the binary Maq map to be written - maqmap_filename = argv[optind++]; + // An optional argument: + // a two-column text file of [Bowtie ref id, reference name string] pairs + if(optind >= argc) + { + printUsage(cerr); + return 1; + } + bfa_filename = string(argv[optind++]); - // An optional argument: - // a two-column text file of [Bowtie ref id, reference name string] pairs - if(optind >= argc) - { - printUsage(cerr); - return 1; - } - bfa_filename = string(argv[optind++]); + init_log_n(); - init_log_n(); + map names_to_ids; + get_names_from_bfa(bfa_filename, names_to_ids); - map names_to_ids; - get_names_from_bfa(bfa_filename, names_to_ids); + int ret; + if (short_map_format) + { + ret = convert_bwt_to_maq<64>(bwtmap_filename, + maqmap_filename, + names_to_ids); + } + else + { + ret = convert_bwt_to_maq<128>(bwtmap_filename, + maqmap_filename, + names_to_ids); + } - int ret; - if (short_map_format) - { - ret = convert_bwt_to_maq<64>(bwtmap_filename, - maqmap_filename, - names_to_ids); - } - else - { - ret = convert_bwt_to_maq<128>(bwtmap_filename, - maqmap_filename, - names_to_ids); + return ret; + } catch(std::exception& e) { + return 1; } - - return ret; } diff --git a/maq_convert/maqmap.h b/maq_convert/maqmap.h index 64578a2..0cadf64 100644 --- a/maq_convert/maqmap.h +++ b/maq_convert/maqmap.h @@ -7,6 +7,7 @@ #define MAQMAP_FORMAT_NEW -1 #include +#include #include #include #include @@ -49,7 +50,7 @@ header_t* maq_init_header() header_t* mm = (header_t*)calloc(1, sizeof(header_t)); if(mm == NULL) { std::cerr << "Exhausted memory allocating maqmap header" << std::endl; - exit(1); + throw std::runtime_error(""); } mm->format = MAQMAP_FORMAT_NEW; return mm; @@ -95,14 +96,14 @@ header_t* maq_read_header(gzFile fp) mm->ref_name = (char**)calloc(mm->n_ref, sizeof(char*)); if(mm->ref_name == NULL) { std::cerr << "Exhausted memory allocating reference name list" << std::endl; - exit(1); + throw std::runtime_error(""); } for (k = 0; k != mm->n_ref; ++k) { gzread(fp, &len, 4); mm->ref_name[k] = (char*)malloc(len); if(mm->ref_name[k] == NULL) { std::cerr << "Exhausted memory allocating reference name" << std::endl; - exit(1); + throw std::runtime_error(""); } gzread(fp, mm->ref_name[k], len); } diff --git a/pat.h b/pat.h index e5e9664..86efc1c 100644 --- a/pat.h +++ b/pat.h @@ -276,7 +276,7 @@ class PatternSource { out_.open(dumpfile_, ios_base::out); if(!out_.good()) { cerr << "Could not open pattern dump file \"" << dumpfile_ << "\" for writing" << endl; - exit(1); + throw std::runtime_error(""); } } MUTEX_INIT(lock_); @@ -968,7 +968,7 @@ class RandomPatternSource : public PatternSource { { if(length_ > 1024) { cerr << "Read length for RandomPatternSource may not exceed 1024; got " << length_ << endl; - exit(1); + throw std::runtime_error(""); } rand_.init(seed_); } @@ -1063,7 +1063,7 @@ class RandomPatternSourcePerThread : public PatternSourcePerThread { patid_ = thread_; if(length_ > 1024) { cerr << "Read length for RandomPatternSourcePerThread may not exceed 1024; got " << length_ << endl; - exit(1); + throw std::runtime_error(""); } rand_.init(thread_); } @@ -1499,7 +1499,7 @@ class BufferedFilePatternSource : public TrimmingPatternSource { filebuf_.newFile(in); return; } - exit(1); + throw std::runtime_error(""); } vector infiles_; /// filenames for read files vector errs_; /// whether we've already printed an error for each file @@ -1576,7 +1576,7 @@ class FastaPatternSource : public BufferedFilePatternSource { } if(c != '>') { cerr << "Error: reads file does not look like a FASTA file" << endl; - exit(1); + throw std::runtime_error(""); } assert(c == '>' || c == '#'); first_ = false; @@ -1613,8 +1613,8 @@ class FastaPatternSource : public BufferedFilePatternSource { if(isalpha(c) && begin++ >= this->trim5_) { if(dstLen + 1 > 1024) { cerr << "Input file contained a pattern more than 1024 characters long. Please truncate" << endl - << "reads and re-run Bowtie"; - exit(1); + << "reads and re-run Bowtie" << endl; + throw std::runtime_error(""); } r.patBufFw [dstLen] = charToDna5[c]; r.qualBufFw[dstLen] = 'I'; @@ -1651,7 +1651,7 @@ class FastaPatternSource : public BufferedFilePatternSource { virtual void readPair(ReadBuf& ra, ReadBuf& rb, uint32_t& patid) { // (For now, we shouldn't ever be here) cerr << "In FastaPatternSource.readPair()" << endl; - exit(1); + throw std::runtime_error(""); read(ra, patid); readCnt_--; read(rb, patid); @@ -1914,8 +1914,8 @@ class TabbedPatternSource : public BufferedFilePatternSource { } if(dstLen + 1 > 1024) { cerr << "Input file contained a pattern more than 1024 characters long. Please truncate" << endl - << "reads and re-run Bowtie"; - exit(1); + << "reads and re-run Bowtie" << endl; + throw std::runtime_error(""); } r.patBufFw[dstLen] = charToDna5[c]; dstLen++; @@ -1958,8 +1958,8 @@ class TabbedPatternSource : public BufferedFilePatternSource { size_t off = qualsRead - trim5_; if(off + 1 > 1024) { cerr << "Reads file contained a pattern with more than 1024 quality values." << endl - << "Please truncate reads and quality values and and re-run Bowtie"; - exit(1); + << "Please truncate reads and quality values and and re-run Bowtie" << endl; + throw std::runtime_error(""); } assert_geq(c, 33); assert_leq(c, 73); @@ -1970,7 +1970,7 @@ class TabbedPatternSource : public BufferedFilePatternSource { } // done reading integer quality lines if (charsRead > qualsRead) { tooFewQualities(r.name); - exit(1); + throw std::runtime_error(""); } } else { // Non-integer qualities @@ -1979,7 +1979,7 @@ class TabbedPatternSource : public BufferedFilePatternSource { c2 = c; if (c == ' ') { wrongQualityFormat(); - exit(1); + throw std::runtime_error(""); } if(c < 0) { // EOF occurred in the middle of a read - abort @@ -1990,8 +1990,8 @@ class TabbedPatternSource : public BufferedFilePatternSource { size_t off = qualsRead - trim5_; if(off + 1 > 1024) { cerr << "Reads file contained a pattern with more than 1024 quality values." << endl - << "Please truncate reads and quality values and and re-run Bowtie"; - exit(1); + << "Please truncate reads and quality values and and re-run Bowtie" << endl; + throw std::runtime_error(""); } c = charToPhred33(c, solQuals_, phred64Quals_); assert_geq(c, 33); @@ -2106,7 +2106,7 @@ class FastaContinuousPatternSource : public BufferedFilePatternSource { // from a continuous input. virtual void readPair(ReadBuf& ra, ReadBuf& rb, uint32_t& patid) { cerr << "In FastaContinuousPatternSource.readPair()" << endl; - exit(1); + throw std::runtime_error(""); } /** * Reset state to be read for the next file. @@ -2232,7 +2232,7 @@ class FastqPatternSource : public BufferedFilePatternSource { } if(c != '@') { cerr << "Error: reads file does not look like a FASTQ file" << endl; - exit(1); + throw std::runtime_error(""); } assert_eq('@', c); first_ = false; @@ -2266,7 +2266,7 @@ class FastqPatternSource : public BufferedFilePatternSource { _setLength(r.name, nameLen); cerr << "FASTQ read name is too long; read names must be " << (bufSz-2) << " characters or fewer." << endl; cerr << "Beginning of bad read name: " << r.name << endl; - exit(1); + throw std::runtime_error(""); } } _setBegin(r.name, r.nameBuf); @@ -2285,7 +2285,7 @@ class FastqPatternSource : public BufferedFilePatternSource { if(dstLen + 1 > 1024) { cerr << "Input file contained a pattern more than 1024 characters long. Please truncate" << endl << "reads and re-run Bowtie"; - exit(1); + throw std::runtime_error(""); } r.patBufFw[dstLen] = charToDna5[c]; dstLen++; @@ -2330,7 +2330,7 @@ class FastqPatternSource : public BufferedFilePatternSource { if(off + 1 > 1024) { cerr << "Reads file contained a pattern with more than 1024 quality values." << endl << "Please truncate reads and quality values and and re-run Bowtie"; - exit(1); + throw std::runtime_error(""); } assert_geq(c, 33); assert_leq(c, 73); @@ -2341,7 +2341,7 @@ class FastqPatternSource : public BufferedFilePatternSource { } // done reading integer quality lines if (charsRead > qualsRead) { tooFewQualities(r.name); - exit(1); + throw std::runtime_error(""); } _setBegin(r.qualFw, (char*)r.qualBufFw); _setLength(r.qualFw, dstLen); @@ -2352,7 +2352,7 @@ class FastqPatternSource : public BufferedFilePatternSource { c = filebuf_.get(); if (c == ' ') { wrongQualityFormat(); - exit(1); + throw std::runtime_error(""); } if(c < 0) { // EOF occurred in the middle of a read - abort @@ -2366,7 +2366,7 @@ class FastqPatternSource : public BufferedFilePatternSource { if(off + 1 > 1024) { cerr << "Reads file contained a pattern with more than 1024 quality values." << endl << "Please truncate reads and quality values and and re-run Bowtie"; - exit(1); + throw std::runtime_error(""); } c = charToPhred33(c, solQuals_, phred64Quals_); assert_geq(c, 33); @@ -2407,7 +2407,7 @@ class FastqPatternSource : public BufferedFilePatternSource { virtual void readPair(ReadBuf& ra, ReadBuf& rb, uint32_t& patid) { // (For now, we shouldn't ever be here) cerr << "In FastqPatternSource.readPair()" << endl; - exit(1); + throw std::runtime_error(""); read(ra, patid); readCnt_--; read(rb, patid); @@ -2473,7 +2473,7 @@ class RawPatternSource : public BufferedFilePatternSource { if(c == '@') { cerr << "Reads file looks like a FASTQ file; please use -q" << endl; } - exit(1); + throw std::runtime_error(""); } first_ = false; } @@ -2487,7 +2487,7 @@ class RawPatternSource : public BufferedFilePatternSource { if(len + 1 > 1024) { cerr << "Reads file contained a pattern with more than 1024 characters." << endl << "Please truncate reads and and re-run Bowtie"; - exit(1); + throw std::runtime_error(""); } r.patBufFw [len] = charToDna5[c]; r.qualBufFw[len] = 'I'; @@ -2522,7 +2522,7 @@ class RawPatternSource : public BufferedFilePatternSource { virtual void readPair(ReadBuf& ra, ReadBuf& rb, uint32_t& patid) { // (For now, we shouldn't ever be here) cerr << "In RawPatternSource.readPair()" << endl; - exit(1); + throw std::runtime_error(""); read(ra, patid); readCnt_--; read(rb, patid); @@ -2595,7 +2595,7 @@ class ChainPatternSource : public BufferedFilePatternSource { virtual void readPair(ReadBuf& ra, ReadBuf& rb, uint32_t& patid) { // (For now, we shouldn't ever be here) cerr << "In ChainPatternSource.readPair()" << endl; - exit(1); + throw std::runtime_error(""); read(ra, patid); readCnt_--; read(rb, patid); diff --git a/pool.h b/pool.h index dd89755..e9854c8 100644 --- a/pool.h +++ b/pool.h @@ -39,7 +39,7 @@ class ChunkPool { std::cerr << "Error: Could not allocate ChunkPool of " << totSz << " bytes" << std::endl; exhausted(); - exit(1); // Exit if we haven't already + throw std::runtime_error(""); // Exit if we haven't already } } @@ -148,7 +148,7 @@ class ChunkPool { } if(exhaustCrash_) { std::cerr << "Please try specifying a larger --chunkmbs (default is 32)" << std::endl; - exit(1); + throw std::runtime_error(""); } lastSkippedRead_ = patid; } diff --git a/qual.h b/qual.h index 45043b1..456b10d 100644 --- a/qual.h +++ b/qual.h @@ -1,6 +1,8 @@ #ifndef QUAL_H_ #define QUAL_H_ +#include + extern unsigned char qualRounds[]; extern unsigned char solToPhred[]; @@ -83,7 +85,7 @@ inline static char charToPhred33(char c, bool solQuals, bool phred64Quals) { if(c == ' ') { cerr << "Saw a space but expected an ASCII-encoded quality value." << endl << "Are quality values formatted as integers? If so, try --integer-quals." << endl; - exit(1); + throw std::runtime_error(""); } if (solQuals) { // Convert solexa-scaled chars to phred @@ -94,7 +96,7 @@ inline static char charToPhred33(char c, bool solQuals, bool phred64Quals) { << ((int)c) << " but expected 64-based Solexa qual (converts to " << (int)cc << ")." << endl << "Try not specifying --solexa-quals." << endl; - exit(1); + throw std::runtime_error(""); } c = cc; } @@ -104,7 +106,7 @@ inline static char charToPhred33(char c, bool solQuals, bool phred64Quals) { << ((int)c) << " but expected 64-based Phred qual." << endl << "Try not specifying --solexa1.3-quals/--phred64-quals." << endl; - exit(1); + throw std::runtime_error(""); } // Convert to 33-based phred c -= (64-33); @@ -115,7 +117,7 @@ inline static char charToPhred33(char c, bool solQuals, bool phred64Quals) { cerr << "Saw ASCII character " << ((int)c) << " but expected 33-based Phred qual." << endl; - exit(1); + throw std::runtime_error(""); } } return c; @@ -139,7 +141,7 @@ inline static char intToPhred33(int iQ, bool solQuals) { } if (pQ < 33) { cerr << "Saw negative Phred quality " << ((int)pQ-33) << "." << endl; - exit(1); + throw std::runtime_error(""); } assert_geq(pQ, 0); return (int)pQ; diff --git a/range_cache.h b/range_cache.h index 6eb232c..1ed590d 100644 --- a/range_cache.h +++ b/range_cache.h @@ -43,7 +43,7 @@ class RangeCacheMemPool { } catch(std::bad_alloc& e) { cerr << "Allocation error allocating " << lim << " words of range-cache memory" << endl; - exit(1); + throw std::runtime_error(""); } assert(buf_ != NULL); // Fill with 1s to signal that these elements are diff --git a/range_source.h b/range_source.h index ee6d081..a389623 100644 --- a/range_source.h +++ b/range_source.h @@ -1692,7 +1692,7 @@ class StubRangeSourceDriver : public RangeSourceDriver { virtual void advanceImpl(int until) { } /// Return the last valid range found - virtual Range& range() { exit(1); } + virtual Range& range() { throw std::runtime_error(""); } /** * Return whether we're generating ranges for the first or the diff --git a/ref_aligner.h b/ref_aligner.h index 9612315..613dec7 100644 --- a/ref_aligner.h +++ b/ref_aligner.h @@ -8,6 +8,7 @@ #include #include #include +#include #include "seqan/sequence.h" #include "range.h" #include "reference.h" @@ -136,7 +137,7 @@ class RefAligner { if(refbuf_ == NULL) throw std::bad_alloc(); } catch(std::bad_alloc& e) { cerr << "Error: Could not allocate reference-space alignment buffer of " << newsz << "B" << endl; - exit(1); + throw std::runtime_error(""); } refbufSz_ = newsz; freeRefbuf_ = true; diff --git a/ref_read.cpp b/ref_read.cpp index 05347b2..1e96f52 100644 --- a/ref_read.cpp +++ b/ref_read.cpp @@ -179,7 +179,7 @@ size_t fastaRefReadSizes(vector& in, cerr << "Error: Reference sequence has more than 2^32-1 characters! Please divide the" << endl << "reference into batches or chunks of about 3.6 billion characters or less each" << endl << "and index each independently." << endl; - exit(1); + throw std::runtime_error(""); } tot += rec.len; first = false; diff --git a/ref_read.h b/ref_read.h index f74ee5e..4e3d7ed 100644 --- a/ref_read.h +++ b/ref_read.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "alphabet.h" #include "assert_helpers.h" @@ -42,12 +43,12 @@ struct RefRecord { assert(in != NULL); if(!fread(&off, 4, 1, in)) { cerr << "Error reading RefRecord offset from FILE" << endl; - exit(1); + throw std::runtime_error(""); } if(swap) off = endianSwapU32(off); if(!fread(&len, 4, 1, in)) { cerr << "Error reading RefRecord offset from FILE" << endl; - exit(1); + throw std::runtime_error(""); } if(swap) len = endianSwapU32(len); first = fgetc(in) ? true : false; @@ -59,7 +60,7 @@ struct RefRecord { char c; if(!read(in, &c, 1)) { cerr << "Error reading RefRecord 'first' flag" << endl; - exit(1); + throw std::runtime_error(""); } first = (c ? true : false); } @@ -138,7 +139,7 @@ static RefRecord fastaRefReadAppend(FileBuf& in, c = skipWhitespace(in); if(c != '>') { cerr << "Reference file does not seem to be a FASTA file" << endl; - exit(1); + throw std::runtime_error(""); } lastc = c; } diff --git a/reference.h b/reference.h index f2b3b55..026728d 100644 --- a/reference.h +++ b/reference.h @@ -1,6 +1,7 @@ #ifndef REFERENCE_H_ #define REFERENCE_H_ +#include #include "endian_swap.h" #include "mm.h" #include "shmem.h" @@ -76,14 +77,14 @@ class BitPairReference { if (stat(s4.c_str(), &sbuf) == -1) { perror("stat"); cerr << "Error: Could not stat index file " << s4.c_str() << " prior to memory-mapping" << endl; - exit(1); + throw std::runtime_error(""); } mmFile = (char*)mmap((void *)0, sbuf.st_size, PROT_READ, MAP_SHARED, f4, 0); if(mmFile == (void *)(-1) || mmFile == NULL) { perror("mmap"); cerr << "Error: Could not memory-map the index file " << s4.c_str() << endl; - exit(1); + throw std::runtime_error(""); } if(mmSweep) { int sum = 0; @@ -120,7 +121,7 @@ class BitPairReference { if(one != 1) { if(useMm_) { cerr << "Error: Can't use memory-mapped files when the index is the opposite endianness" << endl; - exit(1); + throw std::runtime_error(""); } assert_eq(0x1000000, one); swap = true; // have to endian swap U32s @@ -131,7 +132,7 @@ class BitPairReference { sz = readU32(f3, swap); if(sz == 0) { cerr << "Error: number of reference records is 0 in " << s3 << endl; - exit(1); + throw std::runtime_error(""); } // Read records @@ -158,7 +159,7 @@ class BitPairReference { nrefs_++; } else if(i == 0) { cerr << "First record in reference index file was not marked as 'first'" << endl; - exit(1); + throw std::runtime_error(""); } cumsz += recs_.back().len; cumlen += recs_.back().off; @@ -191,7 +192,7 @@ class BitPairReference { size_t ret = fread(sanityBuf_, 1, cumsz >> 2, ftmp); if(ret != (cumsz >> 2)) { cerr << "Only read " << ret << " bytes (out of " << (cumsz >> 2) << ") from reference index file " << s4 << endl; - exit(1); + throw std::runtime_error(""); } fclose(ftmp); for(size_t i = 0; i < (cumsz >> 2); i++) { @@ -200,7 +201,7 @@ class BitPairReference { } #else cerr << "Shouldn't be at " << __FILE__ << ":" << __LINE__ << " without BOWTIE_MM defined" << endl; - exit(1); + throw std::runtime_error(""); #endif } else { bool shmemLeader = true; @@ -212,7 +213,7 @@ class BitPairReference { } catch(std::bad_alloc& e) { cerr << "Error: Ran out of memory allocating space for the bitpacked reference. Please" << endl << "re-run on a computer with more memory." << endl; - exit(1); + throw std::runtime_error(""); } } else { shmemLeader = ALLOC_SHARED_U8( @@ -235,7 +236,7 @@ class BitPairReference { // Didn't read all of it? if(ret != (cumsz >> 2)) { cerr << "Only read " << ret << " bytes (out of " << (cumsz >> 2) << ") from reference index file " << s4 << endl; - exit(1); + throw std::runtime_error(""); } // Make sure there's no more char c; diff --git a/refmap.cpp b/refmap.cpp index 6bf8716..b254e00 100644 --- a/refmap.cpp +++ b/refmap.cpp @@ -5,6 +5,7 @@ * Author: Ben Langmead */ +#include #include "refmap.h" #include "assert_helpers.h" @@ -20,7 +21,7 @@ void ReferenceMap::map(U32Pair& h) const { cerr << "Could not find a reference-map entry for reference " << h.first << " in map file \"" << fname_ << "\"" << endl; - exit(1); + throw std::runtime_error(""); } h.second += map_[h.first].second; h.first = map_[h.first].first; @@ -33,7 +34,7 @@ void ReferenceMap::parse() { ifstream in(fname_); if(!in.good() || !in.is_open()) { cerr << "Could not open reference map file " << fname_ << endl; - exit(1); + throw std::runtime_error(""); } int c; while((c = in.peek()) != EOF) { @@ -60,7 +61,7 @@ void ReferenceMap::parse() { } assert_eq(EOF, c); if(map_.empty()) { - cerr << "Error: no entries in refmap file " << fname_ << endl; + throw std::runtime_error(""); } in.close(); } diff --git a/search_1mm_phase1.c b/search_1mm_phase1.c index 35f0c1e..f6ac90a 100644 --- a/search_1mm_phase1.c +++ b/search_1mm_phase1.c @@ -11,7 +11,7 @@ if(plen < 2) { cerr << "Error: Reads must be at least 2 characters long in 1-mismatch mode" << endl; - exit(1); + throw std::runtime_error(""); } if(!nofw) { diff --git a/search_23mm_phase1.c b/search_23mm_phase1.c index 5e984f6..9288555 100644 --- a/search_23mm_phase1.c +++ b/search_23mm_phase1.c @@ -12,11 +12,11 @@ if(plen < 3 && two) { cerr << "Error: Read (" << name << ") is less than 3 characters long" << endl; - exit(1); + throw std::runtime_error(""); } else if(plen < 4) { cerr << "Error: Read (" << name << ") is less than 4 characters long" << endl; - exit(1); + throw std::runtime_error(""); } if(!nofw) { // Do an exact-match search on the forward pattern, just in diff --git a/search_23mm_phase3.c b/search_23mm_phase3.c index f6e0ff0..4c3ff1f 100644 --- a/search_23mm_phase3.c +++ b/search_23mm_phase3.c @@ -39,9 +39,6 @@ } bthh3.resetNumBacktracks(); if(done) { - if(dumpHHHits != NULL) { - (*dumpHHHits) << patFw << endl << qualFw << endl << "---" << endl; - } continue; } } diff --git a/search_seeded_phase1.c b/search_seeded_phase1.c index fea6541..840f3fc 100644 --- a/search_seeded_phase1.c +++ b/search_seeded_phase1.c @@ -13,29 +13,24 @@ cout << patFw << ":" << qualFw << ", " << patRc << ":" << qualRc << endl; } - if(plen < 2 && seedMms >= 1) { - cerr << "Error: Read (" << name << ") is less than 2 characters long" << endl; - exit(1); - } - else if(plen < 3 && seedMms >= 2) { - cerr << "Error: Read (" << name << ") is less than 3 characters long" << endl; - exit(1); - } - else if(plen < 4 && seedMms >= 3) { - cerr << "Error: Read (" << name << ") is less than 4 characters long" << endl; - exit(1); - } - // Check and see if the distribution of Ns disqualifies - // this read right off the bat - size_t slen = min(plen, seedLen); - int ns = 0; bool done = false; - for(size_t i = 0; i < slen; i++) { - if((int)(Dna5)patFw[i] == 4) { - if(++ns > seedMms) { - // Set 'done' so that - done = true; - break; + if(plen < 4) { + if(!quiet) { + cerr << "Warning: Skipping read (" << name << ") because it is less than 4 characters long" << endl; + } + done = true; + } else { + // Check and see if the distribution of Ns disqualifies + // this read right off the bat + size_t slen = min(plen, seedLen); + int ns = 0; + for(size_t i = 0; i < slen; i++) { + if((int)(Dna5)patFw[i] == 4) { + if(++ns > seedMms) { + // Set 'done' so that + done = true; + break; + } } } } diff --git a/search_seeded_phase3.c b/search_seeded_phase3.c index 1f1fcc0..0bdf5e5 100644 --- a/search_seeded_phase3.c +++ b/search_seeded_phase3.c @@ -85,9 +85,6 @@ gaveUp = true; } if(done) { - if(dumpHHHits != NULL) { - (*dumpHHHits) << patFw << endl << qualFw << endl << btr23.numBacktracks() << endl; - } DONEMASK_SET(patid); btr23.resetNumBacktracks(); continue; diff --git a/search_seeded_phase4.c b/search_seeded_phase4.c index 92632b0..d641483 100644 --- a/search_seeded_phase4.c +++ b/search_seeded_phase4.c @@ -87,15 +87,8 @@ gaveUp = true; } if(done) { - if(dumpHHHits != NULL) { - (*dumpHHHits) << patFw << endl << qualFw << endl << btf24.numBacktracks() << endl; - } btf24.resetNumBacktracks(); continue; - } else { - if(dumpNoHits != NULL) { - (*dumpNoHits) << patFw << endl << qualFw << endl << btf24.numBacktracks() << endl; - } } btf24.resetNumBacktracks(); } diff --git a/shmem.h b/shmem.h index 77f4038..c991696 100644 --- a/shmem.h +++ b/shmem.h @@ -15,6 +15,7 @@ #include #include #include +#include #include "str_util.h" extern void notifySharedMem(void *mem, size_t len); @@ -69,7 +70,7 @@ bool allocSharedMem(std::string fname, cerr << "shmctl returned " << ret << " for IPC_RMID, errno is " << errno << ", shmid is " << shmid << endl; - exit(1); + throw std::runtime_error(""); } else { cerr << "Deleted shared mem chunk with shmid " << shmid << endl; } @@ -81,22 +82,22 @@ bool allocSharedMem(std::string fname, } else { cerr << "shmget returned " << shmid << " for and errno is " << errno << endl; } - exit(1); + throw std::runtime_error(""); } ptr = (T*)shmat(shmid, 0, 0); if(ptr == (void*)-1) { cerr << "Failed to attach " << memName << " to shared memory with shmat()." << endl; - exit(1); + throw std::runtime_error(""); } if(ptr == NULL) { cerr << memName << " pointer returned by shmat() was NULL." << endl; - exit(1); + throw std::runtime_error(""); } // Did I create it, or did I just attach to one created by // another process? if((ret = shmctl(shmid, IPC_STAT, &ds)) < 0) { cerr << "shmctl returned " << ret << " for IPC_STAT and errno is " << errno << endl; - exit(1); + throw std::runtime_error(""); } if(ds.shm_segsz != shmemLen) { cerr << "Warning: shared-memory chunk's segment size (" << ds.shm_segsz @@ -104,7 +105,7 @@ bool allocSharedMem(std::string fname, << "Deleteing old shared memory block and trying again." << endl; if((ret = shmctl(shmid, IPC_RMID, &ds)) < 0) { cerr << "shmctl returned " << ret << " for IPC_RMID and errno is " << errno << endl; - exit(1); + throw std::runtime_error(""); } } else { break; diff --git a/tokenize.h b/tokenize.h index 0884edb..60238ce 100644 --- a/tokenize.h +++ b/tokenize.h @@ -2,6 +2,7 @@ #define TOKENIZE_H_ #include +#include #include using namespace std;