diff --git a/assert_helpers.h b/assert_helpers.h index 212006e..cb46fa0 100644 --- a/assert_helpers.h +++ b/assert_helpers.h @@ -236,4 +236,24 @@ static inline void assert_in2(char c, const char *str, const char *file, int lin #define assert_in(c, s) #endif +#ifndef NDEBUG +#define assert_range(b, e, v) assert_range_helper(b, e, v, __FILE__, __LINE__) +template +inline static void assert_range_helper(const T& begin, + const T& end, + const T& val, + const char *file, + int line) +{ + if(val < begin || val > end) { + std::cout << "assert_range: (" << val << ") not in [" + << begin << ", " << end << "]" << std::endl; + std::cout << file << ":" << line << std::endl; + assert(0); + } +} +#else +#define assert_range(b, e, v) +#endif + #endif /*ASSERT_HELPERS_H_*/ diff --git a/bowtie_inspect.cpp b/bowtie_inspect.cpp index c505117..de0f6ce 100644 --- a/bowtie_inspect.cpp +++ b/bowtie_inspect.cpp @@ -5,8 +5,10 @@ #include #include +#include "assert_helpers.h" #include "endian_swap.h" #include "ebwt.h" +#include "reference.h" using namespace std; using namespace seqan; @@ -19,7 +21,7 @@ static int across = 60; // number of characters across in FASTA output static bool extra = false; // print extra summary info static bool refFromEbwt = false; // true -> when printing reference, decode it from Ebwt instead of reading it from BitPairReference -static const char *short_options = "vhnsa:"; +static const char *short_options = "vhnsea:"; enum { ARG_VERSION = 256, @@ -97,6 +99,7 @@ static void parseOptions(int argc, char **argv) { case 'v': verbose = true; break; case ARG_VERSION: showVersion = true; break; case ARG_EXTRA: extra = true; break; + case 'e': refFromEbwt = true; break; case 'n': names_only = true; break; case 's': summarize_only = true; break; case 'a': across = parseInt(-1, "-a/--across arg must be at least 1"); break; @@ -132,6 +135,69 @@ void print_fasta_record(ostream& fout, } } +/** + * Given output stream, BitPairReference, reference index, name and + * length, print the whole nucleotide reference with the appropriate + * number of columns. + */ +void print_ref_sequence( + ostream& fout, + BitPairReference& ref, + const string& name, + size_t refi, + size_t len) +{ + size_t incr = across * 1000; + uint32_t *buf = new uint32_t[(incr + 128)/4]; + fout << ">" << name << "\n"; + for(size_t i = 0; i < len; i += incr) { + size_t amt = min(incr, len-i); + assert_leq(amt, incr); + int off = ref.getStretch(buf, refi, i, amt); + uint8_t *cb = ((uint8_t*)buf) + off; + for(size_t j = 0; j < amt; j++) { + if(j > 0 && (j % across) == 0) fout << "\n"; + assert_range(0, 4, (int)cb[j]); + fout << "ACGTN"[(int)cb[j]]; + } + fout << "\n"; + } + delete buf; +} + +/** + * + */ +void print_ref_sequences( + ostream& fout, + bool color, + const vector& refnames, + const uint32_t* plen, + const string& adjustedEbwtFileBase) +{ + BitPairReference ref( + adjustedEbwtFileBase, // input basename + color, // true -> expect colorspace reference + false, // sanity-check reference + NULL, // infiles + NULL, // originals + false, // infiles are sequences + false, // memory-map + false, // use shared memory + false, // sweep mm-mapped ref + verbose, // be talkative + verbose); // be talkative at startup + assert_eq(ref.numRefs(), refnames.size()); + for(size_t i = 0; i < ref.numRefs(); i++) { + print_ref_sequence( + fout, + ref, + refnames[i], + i, + plen[i]); + } +} + template void print_index_sequences(ostream& fout, Ebwt& ebwt) { @@ -280,8 +346,19 @@ static void driver( false, // pass up memory exceptions? false); // sanity check? // Load whole index into memory - ebwt.loadIntoMemory(-1, true, false); - print_index_sequences(cout, ebwt); + if(refFromEbwt) { + ebwt.loadIntoMemory(-1, true, false); + print_index_sequences(cout, ebwt); + } else { + vector refnames; + readEbwtRefnames(adjustedEbwtFileBase, refnames); + print_ref_sequences( + cout, + readEbwtColor(ebwtFileBase), + refnames, + ebwt.plen(), + adjustedEbwtFileBase); + } // Evict any loaded indexes from memory if(ebwt.isInMemory()) { ebwt.evictFromMemory(); diff --git a/ebwt.h b/ebwt.h index db820f5..c3f9eeb 100644 --- a/ebwt.h +++ b/ebwt.h @@ -72,7 +72,8 @@ if(this->verbose()) { \ * Flags describing type of Ebwt. */ enum EBWT_FLAGS { - EBWT_COLOR = 2 // true -> Ebwt is colorspace + EBWT_COLOR = 2, // true -> Ebwt is colorspace + EBWT_ENTIRE_REV = 4 // true -> Ebwt is concatenated then reversed }; /** @@ -3524,11 +3525,9 @@ readEbwtRefnames(const string& instr, vector& refnames) { } /** - * Read just enough of the Ebwt's header to determine whether it's - * colorspace. + * Read just enough of the Ebwt's header to get its flags */ -static inline bool -readEbwtColor(const string& instr) { +static inline int32_t readFlags(const string& instr) { ifstream in; // Initialize our primary and secondary input-stream fields in.open((instr + ".1.ebwt").c_str(), ios_base::in | ios::binary); @@ -3550,12 +3549,35 @@ readEbwtColor(const string& instr) { readI32(in, switchEndian); readI32(in, switchEndian); int32_t flags = readI32(in, switchEndian); + return flags; +} + +/** + * Read just enough of the Ebwt's header to determine whether it's + * colorspace. + */ +static inline bool +readEbwtColor(const string& instr) { + int32_t flags = readFlags(instr); if(flags < 0 && (((-flags) & EBWT_COLOR) != 0)) { return true; } else { return false; } +} +/** + * Read just enough of the Ebwt's header to determine whether it's + * entirely reversed. + */ +static inline bool +readEntireReverse(const string& instr) { + int32_t flags = readFlags(instr); + if(flags < 0 && (((-flags) & EBWT_ENTIRE_REV) != 0)) { + return true; + } else { + return false; + } } /**