Permalink
Browse files

*** empty log message ***

  • Loading branch information...
1 parent 6ef2a88 commit 9596b6a0abe3e9540c50cbd7dadbc0a6bc3ee2be langmead committed Aug 27, 2010
Showing with 127 additions and 8 deletions.
  1. +20 −0 assert_helpers.h
  2. +80 −3 bowtie_inspect.cpp
  3. +27 −5 ebwt.h
View
@@ -236,4 +236,24 @@ static inline void assert_in2(char c, const char *str, const char *file, int lin
#define assert_in(c, s)
#endif
+#ifndef NDEBUG
+#define assert_range(b, e, v) assert_range_helper(b, e, v, __FILE__, __LINE__)
+template<typename T>
+inline static void assert_range_helper(const T& begin,
+ const T& end,
+ const T& val,
+ const char *file,
+ int line)
+{
+ if(val < begin || val > end) {
+ std::cout << "assert_range: (" << val << ") not in ["
+ << begin << ", " << end << "]" << std::endl;
+ std::cout << file << ":" << line << std::endl;
+ assert(0);
+ }
+}
+#else
+#define assert_range(b, e, v)
+#endif
+
#endif /*ASSERT_HELPERS_H_*/
View
@@ -5,8 +5,10 @@
#include <stdexcept>
#include <seqan/find.h>
+#include "assert_helpers.h"
#include "endian_swap.h"
#include "ebwt.h"
+#include "reference.h"
using namespace std;
using namespace seqan;
@@ -19,7 +21,7 @@ static int across = 60; // number of characters across in FASTA output
static bool extra = false; // print extra summary info
static bool refFromEbwt = false; // true -> when printing reference, decode it from Ebwt instead of reading it from BitPairReference
-static const char *short_options = "vhnsa:";
+static const char *short_options = "vhnsea:";
enum {
ARG_VERSION = 256,
@@ -97,6 +99,7 @@ static void parseOptions(int argc, char **argv) {
case 'v': verbose = true; break;
case ARG_VERSION: showVersion = true; break;
case ARG_EXTRA: extra = true; break;
+ case 'e': refFromEbwt = true; break;
case 'n': names_only = true; break;
case 's': summarize_only = true; break;
case 'a': across = parseInt(-1, "-a/--across arg must be at least 1"); break;
@@ -132,6 +135,69 @@ void print_fasta_record(ostream& fout,
}
}
+/**
+ * Given output stream, BitPairReference, reference index, name and
+ * length, print the whole nucleotide reference with the appropriate
+ * number of columns.
+ */
+void print_ref_sequence(
+ ostream& fout,
+ BitPairReference& ref,
+ const string& name,
+ size_t refi,
+ size_t len)
+{
+ size_t incr = across * 1000;
+ uint32_t *buf = new uint32_t[(incr + 128)/4];
+ fout << ">" << name << "\n";
+ for(size_t i = 0; i < len; i += incr) {
+ size_t amt = min(incr, len-i);
+ assert_leq(amt, incr);
+ int off = ref.getStretch(buf, refi, i, amt);
+ uint8_t *cb = ((uint8_t*)buf) + off;
+ for(size_t j = 0; j < amt; j++) {
+ if(j > 0 && (j % across) == 0) fout << "\n";
+ assert_range(0, 4, (int)cb[j]);
+ fout << "ACGTN"[(int)cb[j]];
+ }
+ fout << "\n";
+ }
+ delete buf;
+}
+
+/**
+ *
+ */
+void print_ref_sequences(
+ ostream& fout,
+ bool color,
+ const vector<string>& refnames,
+ const uint32_t* plen,
+ const string& adjustedEbwtFileBase)
+{
+ BitPairReference ref(
+ adjustedEbwtFileBase, // input basename
+ color, // true -> expect colorspace reference
+ false, // sanity-check reference
+ NULL, // infiles
+ NULL, // originals
+ false, // infiles are sequences
+ false, // memory-map
+ false, // use shared memory
+ false, // sweep mm-mapped ref
+ verbose, // be talkative
+ verbose); // be talkative at startup
+ assert_eq(ref.numRefs(), refnames.size());
+ for(size_t i = 0; i < ref.numRefs(); i++) {
+ print_ref_sequence(
+ fout,
+ ref,
+ refnames[i],
+ i,
+ plen[i]);
+ }
+}
+
template<typename TStr>
void print_index_sequences(ostream& fout, Ebwt<TStr>& ebwt)
{
@@ -280,8 +346,19 @@ static void driver(
false, // pass up memory exceptions?
false); // sanity check?
// Load whole index into memory
- ebwt.loadIntoMemory(-1, true, false);
- print_index_sequences(cout, ebwt);
+ if(refFromEbwt) {
+ ebwt.loadIntoMemory(-1, true, false);
+ print_index_sequences(cout, ebwt);
+ } else {
+ vector<string> refnames;
+ readEbwtRefnames(adjustedEbwtFileBase, refnames);
+ print_ref_sequences(
+ cout,
+ readEbwtColor(ebwtFileBase),
+ refnames,
+ ebwt.plen(),
+ adjustedEbwtFileBase);
+ }
// Evict any loaded indexes from memory
if(ebwt.isInMemory()) {
ebwt.evictFromMemory();
View
32 ebwt.h
@@ -72,7 +72,8 @@ if(this->verbose()) { \
* Flags describing type of Ebwt.
*/
enum EBWT_FLAGS {
- EBWT_COLOR = 2 // true -> Ebwt is colorspace
+ EBWT_COLOR = 2, // true -> Ebwt is colorspace
+ EBWT_ENTIRE_REV = 4 // true -> Ebwt is concatenated then reversed
};
/**
@@ -3524,11 +3525,9 @@ readEbwtRefnames(const string& instr, vector<string>& refnames) {
}
/**
- * Read just enough of the Ebwt's header to determine whether it's
- * colorspace.
+ * Read just enough of the Ebwt's header to get its flags
*/
-static inline bool
-readEbwtColor(const string& instr) {
+static inline int32_t readFlags(const string& instr) {
ifstream in;
// Initialize our primary and secondary input-stream fields
in.open((instr + ".1.ebwt").c_str(), ios_base::in | ios::binary);
@@ -3550,12 +3549,35 @@ readEbwtColor(const string& instr) {
readI32(in, switchEndian);
readI32(in, switchEndian);
int32_t flags = readI32(in, switchEndian);
+ return flags;
+}
+
+/**
+ * Read just enough of the Ebwt's header to determine whether it's
+ * colorspace.
+ */
+static inline bool
+readEbwtColor(const string& instr) {
+ int32_t flags = readFlags(instr);
if(flags < 0 && (((-flags) & EBWT_COLOR) != 0)) {
return true;
} else {
return false;
}
+}
+/**
+ * Read just enough of the Ebwt's header to determine whether it's
+ * entirely reversed.
+ */
+static inline bool
+readEntireReverse(const string& instr) {
+ int32_t flags = readFlags(instr);
+ if(flags < 0 && (((-flags) & EBWT_ENTIRE_REV) != 0)) {
+ return true;
+ } else {
+ return false;
+ }
}
/**

0 comments on commit 9596b6a

Please sign in to comment.