Permalink
Cannot retrieve contributors at this time
Fetching contributors…
| #ifndef MULTIKEY_QSORT_H_ | |
| #define MULTIKEY_QSORT_H_ | |
| #include <iostream> | |
| #include <seqan/basic.h> | |
| #include <seqan/file.h> | |
| #include <seqan/sequence.h> | |
| #include "sequence_io.h" | |
| #include "alphabet.h" | |
| #include "assert_helpers.h" | |
| #include "diff_sample.h" | |
| #include "btypes.h" | |
| using namespace std; | |
| using namespace seqan; | |
| /** | |
| * Swap elements a and b in seqan::String s | |
| */ | |
| template <typename TStr, typename TPos> | |
| static inline void swap(TStr& s, size_t slen, TPos a, TPos b) { | |
| typedef typename Value<TStr>::Type TAlphabet; | |
| assert_lt(a, slen); | |
| assert_lt(b, slen); | |
| TAlphabet tmp = s[a]; | |
| s[a] = s[b]; | |
| s[b] = tmp; | |
| } | |
| /** | |
| * Swap elements a and b in array s | |
| */ | |
| template <typename TVal, typename TPos> | |
| static inline void swap(TVal* s, size_t slen, TPos a, TPos b) { | |
| assert_lt(a, slen); | |
| assert_lt(b, slen); | |
| TVal tmp = s[a]; | |
| s[a] = s[b]; | |
| s[b] = tmp; | |
| } | |
| /** | |
| * Helper macro for swapping elements a and b in seqan::String s. Does | |
| * some additional sainty checking w/r/t begin and end (which are | |
| * parameters to the sorting routines below). | |
| */ | |
| #define SWAP(s, a, b) { \ | |
| assert_geq(a, begin); \ | |
| assert_geq(b, begin); \ | |
| assert_lt(a, end); \ | |
| assert_lt(b, end); \ | |
| swap(s, slen, a, b); \ | |
| } | |
| /** | |
| * Helper macro for swapping the same pair of elements a and b in | |
| * two different seqan::Strings s and s2. This is a helpful variant | |
| * if, for example, the caller would like to see how their input was | |
| * permuted by the sort routine (in that case, the caller would let s2 | |
| * be an array s2[] where s2 is the same length as s and s2[i] = i). | |
| */ | |
| #define SWAP2(s, s2, a, b) { \ | |
| SWAP(s, a, b); \ | |
| swap(s2, slen, a, b); \ | |
| } | |
| #define SWAP1(s, s2, a, b) { \ | |
| SWAP(s, a, b); \ | |
| } | |
| /** | |
| * Helper macro that swaps a range of elements [i, i+n) with another | |
| * range [j, j+n) in seqan::String s. | |
| */ | |
| #define VECSWAP(s, i, j, n) { \ | |
| if(n > 0) { vecswap(s, slen, i, j, n, begin, end); } \ | |
| } | |
| /** | |
| * Helper macro that swaps a range of elements [i, i+n) with another | |
| * range [j, j+n) both in seqan::String s and seqan::String s2. | |
| */ | |
| #define VECSWAP2(s, s2, i, j, n) { \ | |
| if(n > 0) { vecswap2(s, slen, s2, i, j, n, begin, end); } \ | |
| } | |
| /** | |
| * Helper function that swaps a range of elements [i, i+n) with another | |
| * range [j, j+n) in seqan::String s. begin and end represent the | |
| * current range under consideration by the caller (one of the | |
| * recursive multikey_quicksort routines below). | |
| */ | |
| template <typename TStr, typename TPos> | |
| static inline void vecswap(TStr& s, size_t slen, TPos i, TPos j, TPos n, TPos begin, TPos end) { | |
| assert_geq(i, begin); | |
| assert_geq(j, begin); | |
| assert_lt(i, end); | |
| assert_lt(j, end); | |
| while(n-- > 0) { | |
| assert_geq(n, 0); | |
| TPos a = i+n; | |
| TPos b = j+n; | |
| assert_geq(a, begin); | |
| assert_geq(b, begin); | |
| assert_lt(a, end); | |
| assert_lt(b, end); | |
| swap(s, slen, a, b); | |
| } | |
| } | |
| template <typename TVal, typename TPos> | |
| static inline void vecswap(TVal *s, size_t slen, TPos i, TPos j, TPos n, TPos begin, TPos end) { | |
| assert_geq(i, begin); | |
| assert_geq(j, begin); | |
| assert_lt(i, end); | |
| assert_lt(j, end); | |
| while(n-- > 0) { | |
| assert_geq(n, 0); | |
| TPos a = i+n; | |
| TPos b = j+n; | |
| assert_geq(a, begin); | |
| assert_geq(b, begin); | |
| assert_lt(a, end); | |
| assert_lt(b, end); | |
| swap(s, slen, a, b); | |
| } | |
| } | |
| /** | |
| * Helper function that swaps a range of elements [i, i+n) with another | |
| * range [j, j+n) both in seqan::String s and seqan::String s2. begin | |
| * and end represent the current range under consideration by the | |
| * caller (one of the recursive multikey_quicksort routines below). | |
| */ | |
| template <typename TStr, typename TPos> | |
| static inline void vecswap2(TStr& s, size_t slen, TStr& s2, TPos i, TPos j, TPos n, TPos begin, TPos end) { | |
| assert_geq(i, begin); | |
| assert_geq(j, begin); | |
| assert_lt(i, end); | |
| assert_lt(j, end); | |
| while(n-- > 0) { | |
| assert_geq(n, 0); | |
| TPos a = i+n; | |
| TPos b = j+n; | |
| assert_geq(a, begin); | |
| assert_geq(b, begin); | |
| assert_lt(a, end); | |
| assert_lt(b, end); | |
| swap(s, slen, a, b); | |
| swap(s2, slen, a, b); | |
| } | |
| } | |
| template <typename TVal, typename TPos> | |
| static inline void vecswap2(TVal* s, size_t slen, TVal* s2, TPos i, TPos j, TPos n, TPos begin, TPos end) { | |
| assert_geq(i, begin); | |
| assert_geq(j, begin); | |
| assert_lt(i, end); | |
| assert_lt(j, end); | |
| while(n-- > 0) { | |
| assert_geq(n, 0); | |
| TPos a = i+n; | |
| TPos b = j+n; | |
| assert_geq(a, begin); | |
| assert_geq(b, begin); | |
| assert_lt(a, end); | |
| assert_lt(b, end); | |
| swap(s, slen, a, b); | |
| swap(s2, slen, a, b); | |
| } | |
| } | |
| /// Retrieve an int-ized version of the ath character of string s, or, | |
| /// if a goes off the end of s, return a (user-specified) int greater | |
| /// than any TAlphabet character - 'hi'. | |
| #define CHAR_AT(ss, aa) ((length(s[ss]) > aa) ? (int)(Dna)(s[ss][aa]) : hi) | |
| /// Retrieve an int-ized version of the ath character of string s, or, | |
| /// if a goes off the end of s, return a (user-specified) int greater | |
| /// than any TAlphabet character - 'hi'. | |
| #define CHAR_AT_SUF(si, off) (((off+s[si]) < hlen) ? ((int)(Dna)(host[off+s[si]])) : (hi)) | |
| /// Retrieve an int-ized version of the ath character of string s, or, | |
| /// if a goes off the end of s, return a (user-specified) int greater | |
| /// than any TAlphabet character - 'hi'. | |
| #define CHAR_AT_SUF_U8(si, off) char_at_suf_u8(host, hlen, s, si, off, hi) | |
| // Note that CHOOSE_AND_SWAP_RANDOM_PIVOT is unused | |
| #define CHOOSE_AND_SWAP_RANDOM_PIVOT(sw, ch) { \ | |
| /* Note: rand() didn't really cut it here; it seemed to run out of */ \ | |
| /* randomness and, after a time, returned the same thing over and */ \ | |
| /* over again */ \ | |
| a = (rand() % n) + begin; /* choose pivot between begin and end */ \ | |
| assert_lt(a, end); assert_geq(a, begin); \ | |
| sw(s, s2, begin, a); /* move pivot to beginning */ \ | |
| } | |
| /** | |
| * Ad-hoc DNA-centric way of choose a pretty good pivot without using | |
| * the pseudo-random number generator. We try to get a 1 or 2 if | |
| * possible, since they'll split things more evenly than a 0 or 4. We | |
| * also avoid swapping in the event that we choose the first element. | |
| */ | |
| #define CHOOSE_AND_SWAP_SMART_PIVOT(sw, ch) { \ | |
| a = begin; /* choose first elt */ \ | |
| /* now try to find a better elt */ \ | |
| if(n >= 5) { /* n is the difference between begin and end */ \ | |
| if (ch(begin+1, depth) == 1 || ch(begin+1, depth) == 2) a = begin+1; \ | |
| else if(ch(begin+2, depth) == 1 || ch(begin+2, depth) == 2) a = begin+2; \ | |
| else if(ch(begin+3, depth) == 1 || ch(begin+3, depth) == 2) a = begin+3; \ | |
| else if(ch(begin+4, depth) == 1 || ch(begin+4, depth) == 2) a = begin+4; \ | |
| if(a != begin) sw(s, s2, begin, a); /* move pivot to beginning */ \ | |
| } \ | |
| /* the element at [begin] now holds the pivot value */ \ | |
| } | |
| #define CHOOSE_AND_SWAP_PIVOT CHOOSE_AND_SWAP_SMART_PIVOT | |
| #ifndef NDEBUG | |
| /** | |
| * Assert that the range of chars at depth 'depth' in strings 'begin' | |
| * to 'end' in string-of-suffix-offsets s is parititioned properly | |
| * according to the ternary paritioning strategy of Bentley and McIlroy | |
| * (*prior to* swapping the = regions to the center) | |
| */ | |
| template<typename THost> | |
| bool assertPartitionedSuf( | |
| const THost& host, | |
| TIndexOffU *s, | |
| size_t slen, | |
| int hi, | |
| int pivot, | |
| size_t begin, | |
| size_t end, | |
| size_t depth) | |
| { | |
| size_t hlen = length(host); | |
| int state = 0; // 0 -> 1st = section, 1 -> < section, 2 -> > section, 3 -> 2nd = section | |
| for(size_t i = begin; i < end; i++) { | |
| switch(state) { | |
| case 0: | |
| if (CHAR_AT_SUF(i, depth) < pivot) { state = 1; break; } | |
| else if (CHAR_AT_SUF(i, depth) > pivot) { state = 2; break; } | |
| assert_eq(CHAR_AT_SUF(i, depth), pivot); break; | |
| case 1: | |
| if (CHAR_AT_SUF(i, depth) > pivot) { state = 2; break; } | |
| else if (CHAR_AT_SUF(i, depth) == pivot) { state = 3; break; } | |
| assert_lt(CHAR_AT_SUF(i, depth), pivot); break; | |
| case 2: | |
| if (CHAR_AT_SUF(i, depth) == pivot) { state = 3; break; } | |
| assert_gt(CHAR_AT_SUF(i, depth), pivot); break; | |
| case 3: | |
| assert_eq(CHAR_AT_SUF(i, depth), pivot); break; | |
| } | |
| } | |
| return true; | |
| } | |
| /** | |
| * Assert that the range of chars at depth 'depth' in strings 'begin' | |
| * to 'end' in string-of-suffix-offsets s is parititioned properly | |
| * according to the ternary paritioning strategy of Bentley and McIlroy | |
| * (*after* swapping the = regions to the center) | |
| */ | |
| template<typename THost> | |
| bool assertPartitionedSuf2( | |
| const THost& host, | |
| TIndexOffU *s, | |
| size_t slen, | |
| int hi, | |
| int pivot, | |
| size_t begin, | |
| size_t end, | |
| size_t depth) | |
| { | |
| size_t hlen = length(host); | |
| int state = 0; // 0 -> < section, 1 -> = section, 2 -> > section | |
| for(size_t i = begin; i < end; i++) { | |
| switch(state) { | |
| case 0: | |
| if (CHAR_AT_SUF(i, depth) == pivot) { state = 1; break; } | |
| else if (CHAR_AT_SUF(i, depth) > pivot) { state = 2; break; } | |
| assert_lt(CHAR_AT_SUF(i, depth), pivot); break; | |
| case 1: | |
| if (CHAR_AT_SUF(i, depth) > pivot) { state = 2; break; } | |
| assert_eq(CHAR_AT_SUF(i, depth), pivot); break; | |
| case 2: | |
| assert_gt(CHAR_AT_SUF(i, depth), pivot); break; | |
| } | |
| } | |
| return true; | |
| } | |
| #endif | |
| /** | |
| * Assert that the seqan::String s of suffix offsets into seqan::String | |
| * 'host' is a seemingly legitimate suffix-offset list (at this time, | |
| * we just check that it doesn't list any suffix twice). | |
| */ | |
| static inline void sanityCheckInputSufs(TIndexOffU *s, size_t slen) { | |
| assert_gt(slen, 0); | |
| for(size_t i = 0; i < slen; i++) { | |
| // Actually, it's convenient to allow the caller to provide | |
| // suffix offsets thare are off the end of the host string. | |
| // See, e.g., build() in diff_sample.cpp. | |
| //assert_lt(s[i], length(host)); | |
| for(size_t j = i+1; j < slen; j++) { | |
| assert_neq(s[i], s[j]); | |
| } | |
| } | |
| } | |
| /** | |
| * Assert that the seqan::String s of suffix offsets into seqan::String | |
| * 'host' really are in lexicographical order up to depth 'upto'. | |
| */ | |
| template <typename T> | |
| void sanityCheckOrderedSufs(const T& host, | |
| size_t hlen, | |
| TIndexOffU *s, | |
| size_t slen, | |
| size_t upto, | |
| size_t lower = 0, | |
| size_t upper = OFF_MASK) | |
| { | |
| assert_lt(s[0], hlen); | |
| upper = min<size_t>(upper, slen-1); | |
| for(size_t i = lower; i < upper; i++) { | |
| // Allow s[i+t] to point off the end of the string; this is | |
| // convenient for some callers | |
| if(s[i+1] >= hlen) continue; | |
| #ifndef NDEBUG | |
| if(upto == OFF_MASK) { | |
| assert(dollarLt(suffix(host, s[i]), suffix(host, s[i+1]))); | |
| } else { | |
| if(prefix(suffix(host, s[i]), upto) > prefix(suffix(host, s[i+1]), upto)) { | |
| // operator > treats shorter strings as | |
| // lexicographically smaller, but we want to opposite | |
| //assert(isPrefix(suffix(host, s[i+1]), suffix(host, s[i]))); | |
| } | |
| } | |
| #endif | |
| } | |
| } | |
| /** | |
| * Main multikey quicksort function for suffixes. Based on Bentley & | |
| * Sedgewick's algorithm on p.5 of their paper "Fast Algorithms for | |
| * Sorting and Searching Strings". That algorithm has been extended in | |
| * three ways: | |
| * | |
| * 1. Deal with keys of different lengths by checking bounds and | |
| * considering off-the-end values to be 'hi' (b/c our goal is the | |
| * BWT transform, we're biased toward considring prefixes as | |
| * lexicographically *greater* than their extensions). | |
| * 2. The multikey_qsort_suffixes version takes a single host string | |
| * and a list of suffix offsets as input. This reduces memory | |
| * footprint compared to an approach that treats its input | |
| * generically as a set of strings (not necessarily suffixes), thus | |
| * requiring that we store at least two integers worth of | |
| * information for each string. | |
| * 3. Sorting functions take an extra "upto" parameter that upper- | |
| * bounds the depth to which the function sorts. | |
| * | |
| * TODO: Consult a tie-breaker (like a difference cover sample) if two | |
| * keys share a long prefix. | |
| */ | |
| template<typename T> | |
| void mkeyQSortSuf( | |
| const T& host, | |
| size_t hlen, | |
| TIndexOffU *s, | |
| size_t slen, | |
| int hi, | |
| size_t begin, | |
| size_t end, | |
| size_t depth, | |
| size_t upto = OFF_MASK) | |
| { | |
| // Helper for making the recursive call; sanity-checks arguments to | |
| // make sure that the problem actually got smaller. | |
| #define MQS_RECURSE_SUF(nbegin, nend, ndepth) { \ | |
| assert(nbegin > begin || nend < end || ndepth > depth); \ | |
| if(ndepth < upto) { /* don't exceed depth of 'upto' */ \ | |
| mkeyQSortSuf(host, hlen, s, slen, hi, nbegin, nend, ndepth, upto); \ | |
| } \ | |
| } | |
| assert_leq(begin, slen); | |
| assert_leq(end, slen); | |
| size_t a, b, c, d, /*e,*/ r; | |
| size_t n = end - begin; | |
| if(n <= 1) return; // 1-element list already sorted | |
| CHOOSE_AND_SWAP_PIVOT(SWAP1, CHAR_AT_SUF); // pick pivot, swap it into [begin] | |
| int v = CHAR_AT_SUF(begin, depth); // v <- randomly-selected pivot value | |
| #ifndef NDEBUG | |
| { | |
| bool stillInBounds = false; | |
| for(size_t i = begin; i < end; i++) { | |
| if(depth < (hlen-s[i])) { | |
| stillInBounds = true; | |
| break; | |
| } else { /* already fell off this suffix */ } | |
| } | |
| assert(stillInBounds); // >=1 suffix must still be in bounds | |
| } | |
| #endif | |
| a = b = begin; | |
| c = d = end-1; | |
| while(true) { | |
| // Invariant: everything before a is = pivot, everything | |
| // between a and b is < | |
| int bc = 0; // shouldn't have to init but gcc on Mac complains | |
| while(b <= c && v >= (bc = CHAR_AT_SUF(b, depth))) { | |
| if(v == bc) { | |
| SWAP(s, a, b); a++; | |
| } | |
| b++; | |
| } | |
| // Invariant: everything after d is = pivot, everything | |
| // between c and d is > | |
| int cc = 0; // shouldn't have to init but gcc on Mac complains | |
| while(b <= c && v <= (cc = CHAR_AT_SUF(c, depth))) { | |
| if(v == cc) { | |
| SWAP(s, c, d); d--; | |
| } | |
| c--; | |
| } | |
| if(b > c) break; | |
| SWAP(s, b, c); | |
| b++; | |
| c--; | |
| } | |
| assert(a > begin || c < end-1); // there was at least one =s | |
| assert_lt(d-c, n); // they can't all have been > pivot | |
| assert_lt(b-a, n); // they can't all have been < pivot | |
| assert(assertPartitionedSuf(host, s, slen, hi, v, begin, end, depth)); // check pre-=-swap invariant | |
| r = min(a-begin, b-a); VECSWAP(s, begin, b-r, r); // swap left = to center | |
| r = min(d-c, end-d-1); VECSWAP(s, b, end-r, r); // swap right = to center | |
| assert(assertPartitionedSuf2(host, s, slen, hi, v, begin, end, depth)); // check post-=-swap invariant | |
| r = b-a; // r <- # of <'s | |
| if(r > 0) { | |
| MQS_RECURSE_SUF(begin, begin + r, depth); // recurse on <'s | |
| } | |
| // Do not recurse on ='s if the pivot was the off-the-end value; | |
| // they're already fully sorted | |
| if(v != hi) { | |
| MQS_RECURSE_SUF(begin + r, begin + r + (a-begin) + (end-d-1), depth+1); // recurse on ='s | |
| } | |
| r = d-c; // r <- # of >'s excluding those exhausted | |
| if(r > 0 && v < hi-1) { | |
| MQS_RECURSE_SUF(end-r, end, depth); // recurse on >'s | |
| } | |
| } | |
| /** | |
| * Toplevel function for multikey quicksort over suffixes. | |
| */ | |
| template<typename T> | |
| void mkeyQSortSuf( | |
| const T& host, | |
| TIndexOffU *s, | |
| size_t slen, | |
| int hi, | |
| bool verbose = false, | |
| bool sanityCheck = false, | |
| size_t upto = OFF_MASK) | |
| { | |
| size_t hlen = length(host); | |
| assert(!empty(s)); | |
| if(sanityCheck) sanityCheckInputSufs(s, slen); | |
| mkeyQSortSuf(host, hlen, s, slen, hi, (size_t)0, slen, (size_t)0, upto); | |
| if(sanityCheck) sanityCheckOrderedSufs(host, hlen, s, slen, upto); | |
| } | |
| struct QSortRange { | |
| size_t begin; | |
| size_t end; | |
| size_t depth; | |
| }; | |
| template<typename T> | |
| void mkeyQSortSuf2( | |
| const T& host, | |
| size_t hlen, | |
| TIndexOffU *s, | |
| size_t slen, | |
| TIndexOffU *s2, | |
| int hi, | |
| size_t _begin, | |
| size_t _end, | |
| size_t _depth, | |
| size_t upto = OFF_MASK, | |
| std::vector<size_t>* boundaries = NULL) | |
| { | |
| std::vector<std::vector<QSortRange> > block_list; | |
| while(true) { | |
| size_t begin = 0, end = 0, depth = 0; | |
| if(block_list.size() == 0) { | |
| begin = _begin; | |
| end = _end; | |
| depth = _depth; | |
| } else { | |
| if(block_list.back().size() > 0) { | |
| begin = block_list.back()[0].begin; | |
| end = block_list.back()[0].end; | |
| depth = block_list.back()[0].depth; | |
| block_list.back().erase((block_list.back()).begin()); | |
| } else { | |
| block_list.resize(block_list.size() - 1); | |
| if(block_list.size() == 0) { | |
| break; | |
| } | |
| } | |
| } | |
| if(depth == upto) { | |
| if(boundaries != NULL) { | |
| (*boundaries).push_back(end); | |
| } | |
| continue; | |
| } | |
| assert_leq(begin, slen); | |
| assert_leq(end, slen); | |
| size_t a, b, c, d, /*e,*/ r; | |
| size_t n = end - begin; | |
| if(n <= 1) { // 1-element list already sorted | |
| if(n == 1 && boundaries != NULL) { | |
| boundaries->push_back(end); | |
| } | |
| continue; | |
| } | |
| CHOOSE_AND_SWAP_PIVOT(SWAP2, CHAR_AT_SUF); // pick pivot, swap it into [begin] | |
| int v = CHAR_AT_SUF(begin, depth); // v <- randomly-selected pivot value | |
| #ifndef NDEBUG | |
| { | |
| bool stillInBounds = false; | |
| for(size_t i = begin; i < end; i++) { | |
| if(depth < (hlen-s[i])) { | |
| stillInBounds = true; | |
| break; | |
| } else { /* already fell off this suffix */ } | |
| } | |
| assert(stillInBounds); // >=1 suffix must still be in bounds | |
| } | |
| #endif | |
| a = b = begin; | |
| c = d = /*e =*/ end-1; | |
| while(true) { | |
| // Invariant: everything before a is = pivot, everything | |
| // between a and b is < | |
| int bc = 0; // shouldn't have to init but gcc on Mac complains | |
| while(b <= c && v >= (bc = CHAR_AT_SUF(b, depth))) { | |
| if(v == bc) { | |
| SWAP2(s, s2, a, b); a++; | |
| } | |
| b++; | |
| } | |
| // Invariant: everything after d is = pivot, everything | |
| // between c and d is > | |
| int cc = 0; // shouldn't have to init but gcc on Mac complains | |
| while(b <= c && v <= (cc = CHAR_AT_SUF(c, depth))) { | |
| if(v == cc) { | |
| SWAP2(s, s2, c, d); d--; /*e--;*/ | |
| } | |
| //else if(c == e && v == hi) e--; | |
| c--; | |
| } | |
| if(b > c) break; | |
| SWAP2(s, s2, b, c); | |
| b++; | |
| c--; | |
| } | |
| assert(a > begin || c < end-1); // there was at least one =s | |
| assert_lt(/*e*/d-c, n); // they can't all have been > pivot | |
| assert_lt(b-a, n); // they can't all have been < pivot | |
| assert(assertPartitionedSuf(host, s, slen, hi, v, begin, end, depth)); // check pre-=-swap invariant | |
| r = min(a-begin, b-a); VECSWAP2(s, s2, begin, b-r, r); // swap left = to center | |
| r = min(d-c, end-d-1); VECSWAP2(s, s2, b, end-r, r); // swap right = to center | |
| assert(assertPartitionedSuf2(host, s, slen, hi, v, begin, end, depth)); // check post-=-swap invariant | |
| r = b-a; // r <- # of <'s | |
| std::vector<QSortRange> tmp1; | |
| block_list.push_back(tmp1); | |
| block_list.back().clear(); | |
| if(r > 0) { // recurse on <'s | |
| QSortRange tmp2; | |
| block_list.back().push_back(tmp2); | |
| block_list.back().back().begin = begin; | |
| block_list.back().back().end = begin + r; | |
| block_list.back().back().depth = depth; | |
| } | |
| // Do not recurse on ='s if the pivot was the off-the-end value; | |
| // they're already fully sorted | |
| //if(v != hi) { // recurse on ='s | |
| QSortRange tmp3; | |
| block_list.back().push_back(tmp3); | |
| block_list.back().back().begin = begin + r; | |
| block_list.back().back().end = begin + r + (a-begin) + (end-d-1); | |
| block_list.back().back().depth = depth + 1; | |
| //} | |
| r = d-c; // r <- # of >'s excluding those exhausted | |
| if(r > 0 /*&& v < hi-1*/) { // recurse on >'s | |
| QSortRange tmp4; | |
| block_list.back().push_back(tmp4); | |
| block_list.back().back().begin = end - r; | |
| block_list.back().back().end = end; | |
| block_list.back().back().depth = depth; | |
| } | |
| } | |
| } | |
| /** | |
| * Toplevel function for multikey quicksort over suffixes with double | |
| * swapping. | |
| */ | |
| template<typename T> | |
| void mkeyQSortSuf2( | |
| const T& host, | |
| TIndexOffU *s, | |
| size_t slen, | |
| TIndexOffU *s2, | |
| int hi, | |
| bool verbose = false, | |
| bool sanityCheck = false, | |
| size_t upto = OFF_MASK, | |
| std::vector<size_t>* boundaries = NULL) | |
| { | |
| size_t hlen = length(host); | |
| if(sanityCheck) sanityCheckInputSufs(s, slen); | |
| TIndexOffU *sOrig = NULL; | |
| if(sanityCheck) { | |
| sOrig = new TIndexOffU[slen]; | |
| memcpy(sOrig, s, OFF_SIZE * slen); | |
| } | |
| mkeyQSortSuf2(host, hlen, s, slen, s2, hi, (size_t)0, slen, (size_t)0, upto, boundaries); | |
| if(sanityCheck) { | |
| sanityCheckOrderedSufs(host, hlen, s, slen, upto); | |
| for(size_t i = 0; i < slen; i++) { | |
| assert_eq(s[i], sOrig[s2[i]]); | |
| } | |
| delete[] sOrig; | |
| } | |
| } | |
| // Ugly but necessary; otherwise the compiler chokes dramatically on | |
| // the DifferenceCoverSample<> template args to the next few functions | |
| template <typename T> | |
| class DifferenceCoverSample; | |
| /** | |
| * Constant time | |
| */ | |
| template<typename T1, typename T2> inline | |
| bool sufDcLt(const T1& host, | |
| const T2& s1, | |
| const T2& s2, | |
| const DifferenceCoverSample<T1>& dc, | |
| bool sanityCheck = false) | |
| { | |
| size_t diff = dc.tieBreakOff(s1, s2); | |
| assert_lt(diff, dc.v()); | |
| assert_lt(diff, length(host)-s1); | |
| assert_lt(diff, length(host)-s2); | |
| if(sanityCheck) { | |
| for(size_t i = 0; i < diff; i++) { | |
| assert_eq(host[s1+i], host[s2+i]); | |
| } | |
| } | |
| bool ret = dc.breakTie(s1+diff, s2+diff) < 0; | |
| #ifndef NDEBUG | |
| if(sanityCheck && ret != dollarLt(suffix(host, s1), suffix(host, s2))) { | |
| assert(false); | |
| } | |
| #endif | |
| return ret; | |
| } | |
| /** | |
| * k log(k) | |
| */ | |
| template<typename T> inline | |
| void qsortSufDc(const T& host, | |
| size_t hlen, | |
| TIndexOffU* s, | |
| size_t slen, | |
| const DifferenceCoverSample<T>& dc, | |
| size_t begin, | |
| size_t end, | |
| bool sanityCheck = false) | |
| { | |
| assert_leq(end, slen); | |
| assert_lt(begin, slen); | |
| assert_gt(end, begin); | |
| size_t n = end - begin; | |
| if(n <= 1) return; // 1-element list already sorted | |
| // Note: rand() didn't really cut it here; it seemed to run out of | |
| // randomness and, after a time, returned the same thing over and | |
| // over again | |
| size_t a = (rand() % n) + begin; // choose pivot between begin and end | |
| assert_lt(a, end); | |
| assert_geq(a, begin); | |
| SWAP(s, end-1, a); // move pivot to end | |
| size_t cur = 0; | |
| for(size_t i = begin; i < end-1; i++) { | |
| if(sufDcLt(host, s[i], s[end-1], dc, sanityCheck)) { | |
| if(sanityCheck) | |
| assert(dollarLt(suffix(host, s[i]), suffix(host, s[end-1]))); | |
| assert_lt(begin + cur, end-1); | |
| SWAP(s, i, begin + cur); | |
| cur++; | |
| } | |
| } | |
| // Put pivot into place | |
| assert_lt(cur, end-begin); | |
| SWAP(s, end-1, begin+cur); | |
| if(begin+cur > begin) qsortSufDc(host, hlen, s, slen, dc, begin, begin+cur); | |
| if(end > begin+cur+1) qsortSufDc(host, hlen, s, slen, dc, begin+cur+1, end); | |
| } | |
| /** | |
| * Toplevel function for multikey quicksort over suffixes. | |
| */ | |
| template<typename T1, typename T2> | |
| void mkeyQSortSufDcU8(const T1& host1, | |
| const T2& host, | |
| size_t hlen, | |
| TIndexOffU* s, | |
| size_t slen, | |
| const DifferenceCoverSample<T1>& dc, | |
| int hi, | |
| bool verbose = false, | |
| bool sanityCheck = false) | |
| { | |
| if(sanityCheck) sanityCheckInputSufs(s, slen); | |
| mkeyQSortSufDcU8(host1, host, hlen, s, slen, dc, hi, 0, slen, 0, sanityCheck); | |
| if(sanityCheck) sanityCheckOrderedSufs(host1, hlen, s, slen, OFF_MASK); | |
| } | |
| /** | |
| * Return a boolean indicating whether s1 < s2 using the difference | |
| * cover to break the tie. | |
| */ | |
| template<typename T1, typename T2> inline | |
| bool sufDcLtU8(const T1& host1, | |
| const T2& host, | |
| size_t hlen, | |
| size_t s1, | |
| size_t s2, | |
| const DifferenceCoverSample<T1>& dc, | |
| bool sanityCheck = false) | |
| { | |
| hlen += 0; | |
| size_t diff = dc.tieBreakOff((TIndexOffU)s1, (TIndexOffU)s2); | |
| assert_lt(diff, dc.v()); | |
| assert_lt(diff, hlen-s1); | |
| assert_lt(diff, hlen-s2); | |
| if(sanityCheck) { | |
| for(size_t i = 0; i < diff; i++) { | |
| assert_eq(host[s1+i], host1[s2+i]); | |
| } | |
| } | |
| bool ret = dc.breakTie((TIndexOffU)(s1+diff), (TIndexOffU)(s2+diff)) < 0; | |
| // Sanity-check return value using dollarLt | |
| #ifndef NDEBUG | |
| if(sanityCheck && | |
| ret != dollarLt(suffix(host1, s1), suffix(host1, s2))) | |
| { | |
| assert(false); | |
| } | |
| #endif | |
| return ret; | |
| } | |
| /** | |
| * k log(k) | |
| */ | |
| template<typename T1, typename T2> inline | |
| void qsortSufDcU8(const T1& host1, | |
| const T2& host, | |
| size_t hlen, | |
| TIndexOffU* s, | |
| size_t slen, | |
| const DifferenceCoverSample<T1>& dc, | |
| size_t begin, | |
| size_t end, | |
| bool sanityCheck = false) | |
| { | |
| assert_leq(end, slen); | |
| assert_lt(begin, slen); | |
| assert_gt(end, begin); | |
| size_t n = end - begin; | |
| if(n <= 1) return; // 1-element list already sorted | |
| // Note: rand() didn't really cut it here; it seemed to run out of | |
| // randomness and, after a time, returned the same thing over and | |
| // over again | |
| size_t a = (rand() % n) + begin; // choose pivot between begin and end | |
| assert_lt(a, end); | |
| assert_geq(a, begin); | |
| SWAP(s, end-1, a); // move pivot to end | |
| size_t cur = 0; | |
| for(size_t i = begin; i < end-1; i++) { | |
| if(sufDcLtU8(host1, host, hlen, s[i], s[end-1], dc, sanityCheck)) { | |
| #ifndef NDEBUG | |
| if(sanityCheck) | |
| assert(dollarLt(suffix(host1, s[i]), suffix(host1, s[end-1]))); | |
| assert_lt(begin + cur, end-1); | |
| #endif | |
| SWAP(s, i, begin + cur); | |
| cur++; | |
| } | |
| } | |
| // Put pivot into place | |
| assert_lt(cur, end-begin); | |
| SWAP(s, end-1, begin+cur); | |
| if(begin+cur > begin) qsortSufDcU8(host1, host, hlen, s, slen, dc, begin, begin+cur); | |
| if(end > begin+cur+1) qsortSufDcU8(host1, host, hlen, s, slen, dc, begin+cur+1, end); | |
| } | |
| #define BUCKET_SORT_CUTOFF (4 * 1024 * 1024) | |
| #define SELECTION_SORT_CUTOFF 6 | |
| // 5 64-element buckets for bucket-sorting A, C, G, T, $ | |
| static TIndexOffU bkts[4][4 * 1024 * 1024]; | |
| /** | |
| * Straightforwardly obtain a uint8_t-ized version of t[off]. This | |
| * works fine as long as TStr is not packed. | |
| */ | |
| template<typename TStr> | |
| inline uint8_t get_uint8(const TStr& t, size_t off) { | |
| return t[off]; | |
| } | |
| /** | |
| * For incomprehensible generic-programming reasons, getting a uint8_t | |
| * version of a character in a packed String<> requires casting first | |
| * to Dna then to uint8_t. | |
| */ | |
| template<> | |
| inline uint8_t get_uint8(const String<Dna, Packed<> >& t, size_t off) { | |
| return (uint8_t)(Dna)t[off]; | |
| } | |
| /** | |
| * Return character at offset 'off' from the 'si'th suffix in the array | |
| * 's' of suffixes. If the character is out-of-bounds, return hi. | |
| */ | |
| template<typename TStr> | |
| static inline int char_at_suf_u8(const TStr& host, | |
| size_t hlen, | |
| TIndexOffU* s, | |
| size_t si, | |
| size_t off, | |
| uint8_t hi) | |
| { | |
| return ((off+s[si]) < hlen) ? get_uint8(host, off+s[si]) : (hi); | |
| } | |
| template<typename T1, typename T2> | |
| static void selectionSortSufDcU8( | |
| const T1& host1, | |
| const T2& host, | |
| size_t hlen, | |
| TIndexOffU* s, | |
| size_t slen, | |
| const DifferenceCoverSample<T1>& dc, | |
| uint8_t hi, | |
| size_t begin, | |
| size_t end, | |
| size_t depth, | |
| bool sanityCheck = false) | |
| { | |
| #define ASSERT_SUF_LT(l, r) \ | |
| if(sanityCheck && \ | |
| !dollarLt(suffix(host1, s[l]), \ | |
| suffix(host1, s[r]))) { \ | |
| cout << "l: " << suffixStr(host1, s[l]) << endl; \ | |
| cout << "r: " << suffixStr(host1, s[r]) << endl; \ | |
| assert(false); \ | |
| } | |
| assert_gt(end, begin+1); | |
| assert_leq(end-begin, SELECTION_SORT_CUTOFF); | |
| assert_eq(hi, 4); | |
| size_t v = dc.v(); | |
| if(end == begin+2) { | |
| size_t off = dc.tieBreakOff(s[begin], s[begin+1]); | |
| if(off + s[begin] >= hlen || | |
| off + s[begin+1] >= hlen) | |
| { | |
| off = OFF_MASK; | |
| } | |
| if(off != OFF_MASK) { | |
| if(off < depth) { | |
| qsortSufDcU8<T1,T2>(host1, host, hlen, s, slen, dc, | |
| begin, end, sanityCheck); | |
| // It's helpful for debugging if we call this here | |
| if(sanityCheck) { | |
| sanityCheckOrderedSufs(host1, hlen, s, slen, | |
| OFF_MASK, begin, end); | |
| } | |
| return; | |
| } | |
| v = off - depth + 1; | |
| } | |
| } | |
| assert_leq(v, dc.v()); | |
| size_t lim = v; | |
| assert_geq(lim, 0); | |
| for(size_t i = begin; i < end-1; i++) { | |
| size_t targ = i; | |
| size_t targoff = depth + s[i]; | |
| for(size_t j = i+1; j < end; j++) { | |
| assert_neq(j, targ); | |
| size_t joff = depth + s[j]; | |
| size_t k; | |
| for(k = 0; k <= lim; k++) { | |
| assert_neq(j, targ); | |
| uint8_t jc = (k + joff < hlen) ? get_uint8(host, k + joff) : hi; | |
| uint8_t tc = (k + targoff < hlen) ? get_uint8(host, k + targoff) : hi; | |
| assert(jc != hi || tc != hi); | |
| if(jc > tc) { | |
| // the jth suffix is greater than the current | |
| // smallest suffix | |
| ASSERT_SUF_LT(targ, j); | |
| break; | |
| } else if(jc < tc) { | |
| // the jth suffix is less than the current smallest | |
| // suffix, so update smallest to be j | |
| ASSERT_SUF_LT(j, targ); | |
| targ = j; | |
| targoff = joff; | |
| break; | |
| } else if(k == lim) { | |
| // Check whether either string ends immediately | |
| // after this character | |
| assert_leq(k + joff + 1, hlen); | |
| assert_leq(k + targoff + 1, hlen); | |
| if(k + joff + 1 == hlen) { | |
| // targ < j | |
| assert_neq(k + targoff + 1, hlen); | |
| ASSERT_SUF_LT(targ, j); | |
| break; | |
| } else if(k + targoff + 1 == hlen) { | |
| // j < targ | |
| ASSERT_SUF_LT(j, targ); | |
| targ = j; | |
| targoff = joff; | |
| break; | |
| } | |
| } else { | |
| // They're equal so far, keep going | |
| } | |
| } | |
| // The jth suffix was equal to the current smallest suffix | |
| // up to the difference-cover period, so disambiguate with | |
| // difference cover | |
| if(k == lim+1) { | |
| assert_neq(j, targ); | |
| if(sufDcLtU8(host1, host, hlen, s[j], s[targ], dc, sanityCheck)) { | |
| // j < targ | |
| assert(!sufDcLtU8(host1, host, hlen, s[targ], s[j], dc, sanityCheck)); | |
| ASSERT_SUF_LT(j, targ); | |
| targ = j; | |
| targoff = joff; | |
| } else { | |
| assert(sufDcLtU8(host1, host, hlen, s[targ], s[j], dc, sanityCheck)); | |
| ASSERT_SUF_LT(targ, j); // ! | |
| } | |
| } | |
| } | |
| if(i != targ) { | |
| ASSERT_SUF_LT(targ, i); | |
| // swap i and targ | |
| TIndexOffU tmp = s[i]; | |
| s[i] = s[targ]; | |
| s[targ] = tmp; | |
| } | |
| for(size_t j = i+1; j < end; j++) { | |
| ASSERT_SUF_LT(i, j); | |
| } | |
| } | |
| if(sanityCheck) { | |
| sanityCheckOrderedSufs(host1, hlen, s, slen, OFF_MASK, begin, end); | |
| } | |
| } | |
| template<typename T1, typename T2> | |
| static void bucketSortSufDcU8( | |
| const T1& host1, | |
| const T2& host, | |
| size_t hlen, | |
| TIndexOffU* s, | |
| size_t slen, | |
| const DifferenceCoverSample<T1>& dc, | |
| uint8_t hi, | |
| size_t _begin, | |
| size_t _end, | |
| size_t _depth, | |
| bool sanityCheck = false) | |
| { | |
| // 5 64-element buckets for bucket-sorting A, C, G, T, $ | |
| TIndexOffU* bkts[4]; | |
| for(size_t i = 0; i < 4; i++) { | |
| bkts[i] = new TIndexOffU[4 * 1024 * 1024]; | |
| } | |
| std::vector<std::vector<size_t> > block_list; | |
| bool first = true; | |
| while(true) { | |
| size_t begin = 0, end = 0; | |
| if(first) { | |
| begin = _begin; | |
| end = _end; | |
| first = false; | |
| } else { | |
| if(block_list.size() == 0) { | |
| break; | |
| } | |
| if(block_list.back().size() > 1) { | |
| end = block_list.back().back(); | |
| block_list.back().pop_back(); | |
| begin = block_list.back().back(); | |
| } else { | |
| block_list.resize(block_list.size() - 1); | |
| if(block_list.size() == 0) { | |
| break; | |
| } | |
| } | |
| } | |
| size_t depth = block_list.size() + _depth; | |
| assert_leq(end-begin, BUCKET_SORT_CUTOFF); | |
| assert_eq(hi, 4); | |
| if(end <= begin + 1) { // 1-element list already sorted | |
| continue; | |
| } | |
| if(depth > dc.v()) { | |
| // Quicksort the remaining suffixes using difference cover | |
| // for constant-time comparisons; this is O(k*log(k)) where | |
| // k=(end-begin) | |
| qsortSufDcU8<T1,T2>(host1, host, hlen, s, slen, dc, begin, end, sanityCheck); | |
| continue; | |
| } | |
| if(end-begin <= SELECTION_SORT_CUTOFF) { | |
| // Bucket sort remaining items | |
| selectionSortSufDcU8(host1, host, hlen, s, slen, dc, hi, | |
| begin, end, depth, sanityCheck); | |
| if(sanityCheck) { | |
| sanityCheckOrderedSufs(host1, hlen, s, slen, | |
| OFF_MASK, begin, end); | |
| } | |
| continue; | |
| } | |
| size_t cnts[] = { 0, 0, 0, 0, 0 }; | |
| for(size_t i = begin; i < end; i++) { | |
| size_t off = depth + s[i]; | |
| uint8_t c = (off < hlen) ? get_uint8(host, off) : hi; | |
| assert_leq(c, 4); | |
| if(c == 0) { | |
| s[begin + cnts[0]++] = s[i]; | |
| } else { | |
| bkts[c-1][cnts[c]++] = s[i]; | |
| } | |
| } | |
| assert_eq(cnts[0] + cnts[1] + cnts[2] + cnts[3] + cnts[4], end - begin); | |
| size_t cur = begin + cnts[0]; | |
| if(cnts[1] > 0) { memcpy(&s[cur], bkts[0], cnts[1] << (OFF_SIZE/4 + 1)); cur += cnts[1]; } | |
| if(cnts[2] > 0) { memcpy(&s[cur], bkts[1], cnts[2] << (OFF_SIZE/4 + 1)); cur += cnts[2]; } | |
| if(cnts[3] > 0) { memcpy(&s[cur], bkts[2], cnts[3] << (OFF_SIZE/4 + 1)); cur += cnts[3]; } | |
| if(cnts[4] > 0) { memcpy(&s[cur], bkts[3], cnts[4] << (OFF_SIZE/4 + 1)); } | |
| // This frame is now totally finished with bkts[][], so recursive | |
| // callees can safely clobber it; we're not done with cnts[], but | |
| // that's local to the stack frame. | |
| std::vector<size_t> tmp; | |
| block_list.push_back(tmp); | |
| block_list.back().clear(); | |
| block_list.back().push_back(begin); | |
| for(size_t i = 0; i < 4; i++) { | |
| if(cnts[i] > 0) { | |
| block_list.back().push_back(block_list.back().back() + cnts[i]); | |
| } | |
| } | |
| } | |
| // Done | |
| for(size_t i = 0; i < 4; i++) { | |
| delete [] bkts[i]; | |
| } | |
| } | |
| /** | |
| * Main multikey quicksort function for suffixes. Based on Bentley & | |
| * Sedgewick's algorithm on p.5 of their paper "Fast Algorithms for | |
| * Sorting and Searching Strings". That algorithm has been extended in | |
| * three ways: | |
| * | |
| * 1. Deal with keys of different lengths by checking bounds and | |
| * considering off-the-end values to be 'hi' (b/c our goal is the | |
| * BWT transform, we're biased toward considring prefixes as | |
| * lexicographically *greater* than their extensions). | |
| * 2. The multikey_qsort_suffixes version takes a single host string | |
| * and a list of suffix offsets as input. This reduces memory | |
| * footprint compared to an approach that treats its input | |
| * generically as a set of strings (not necessarily suffixes), thus | |
| * requiring that we store at least two integers worth of | |
| * information for each string. | |
| * 3. Sorting functions take an extra "upto" parameter that upper- | |
| * bounds the depth to which the function sorts. | |
| */ | |
| template<typename T1, typename T2> | |
| void mkeyQSortSufDcU8(const T1& host1, | |
| const T2& host, | |
| size_t hlen, | |
| TIndexOffU* s, | |
| size_t slen, | |
| const DifferenceCoverSample<T1>& dc, | |
| int hi, | |
| size_t begin, | |
| size_t end, | |
| size_t depth, | |
| bool sanityCheck = false) | |
| { | |
| // Helper for making the recursive call; sanity-checks arguments to | |
| // make sure that the problem actually got smaller. | |
| #define MQS_RECURSE_SUF_DC_U8(nbegin, nend, ndepth) { \ | |
| assert(nbegin > begin || nend < end || ndepth > depth); \ | |
| mkeyQSortSufDcU8(host1, host, hlen, s, slen, dc, hi, nbegin, nend, ndepth, sanityCheck); \ | |
| } | |
| assert_leq(begin, slen); | |
| assert_leq(end, slen); | |
| size_t n = end - begin; | |
| if(n <= 1) return; // 1-element list already sorted | |
| if(depth > dc.v()) { | |
| // Quicksort the remaining suffixes using difference cover | |
| // for constant-time comparisons; this is O(k*log(k)) where | |
| // k=(end-begin) | |
| qsortSufDcU8<T1,T2>(host1, host, hlen, s, slen, dc, begin, end, sanityCheck); | |
| if(sanityCheck) { | |
| sanityCheckOrderedSufs(host1, hlen, s, slen, OFF_MASK, begin, end); | |
| } | |
| return; | |
| } | |
| if(n <= BUCKET_SORT_CUTOFF) { | |
| // Bucket sort remaining items | |
| bucketSortSufDcU8(host1, host, hlen, s, slen, dc, | |
| (uint8_t)hi, begin, end, depth, sanityCheck); | |
| if(sanityCheck) { | |
| sanityCheckOrderedSufs(host1, hlen, s, slen, OFF_MASK, begin, end); | |
| } | |
| return; | |
| } | |
| size_t a, b, c, d, r; | |
| CHOOSE_AND_SWAP_PIVOT(SWAP1, CHAR_AT_SUF_U8); // choose pivot, swap to begin | |
| int v = CHAR_AT_SUF_U8(begin, depth); // v <- pivot value | |
| #ifndef NDEBUG | |
| { | |
| bool stillInBounds = false; | |
| for(size_t i = begin; i < end; i++) { | |
| if(depth < (hlen-s[i])) { | |
| stillInBounds = true; | |
| break; | |
| } else { /* already fell off this suffix */ } | |
| } | |
| assert(stillInBounds); // >=1 suffix must still be in bounds | |
| } | |
| #endif | |
| a = b = begin; | |
| c = d = end-1; | |
| while(true) { | |
| // Invariant: everything before a is = pivot, everything | |
| // between a and b is < | |
| int bc = 0; // shouldn't have to init but gcc on Mac complains | |
| while(b <= c && v >= (bc = CHAR_AT_SUF_U8(b, depth))) { | |
| if(v == bc) { | |
| SWAP(s, a, b); a++; | |
| } | |
| b++; | |
| } | |
| // Invariant: everything after d is = pivot, everything | |
| // between c and d is > | |
| int cc = 0; // shouldn't have to init but gcc on Mac complains | |
| //bool hiLatch = true; | |
| while(b <= c && v <= (cc = CHAR_AT_SUF_U8(c, depth))) { | |
| if(v == cc) { | |
| SWAP(s, c, d); d--; | |
| } | |
| //else if(hiLatch && cc == hi) { } | |
| c--; | |
| } | |
| if(b > c) break; | |
| SWAP(s, b, c); | |
| b++; | |
| c--; | |
| } | |
| assert(a > begin || c < end-1); // there was at least one =s | |
| assert_lt(d-c, n); // they can't all have been > pivot | |
| assert_lt(b-a, n); // they can't all have been < pivot | |
| r = min(a-begin, b-a); VECSWAP(s, begin, b-r, r); // swap left = to center | |
| r = min(d-c, end-d-1); VECSWAP(s, b, end-r, r); // swap right = to center | |
| r = b-a; // r <- # of <'s | |
| if(r > 0) { | |
| MQS_RECURSE_SUF_DC_U8(begin, begin + r, depth); // recurse on <'s | |
| } | |
| // Do not recurse on ='s if the pivot was the off-the-end value; | |
| // they're already fully sorted | |
| if(v != hi) { | |
| MQS_RECURSE_SUF_DC_U8(begin + r, begin + r + (a-begin) + (end-d-1), depth+1); // recurse on ='s | |
| } | |
| r = d-c; // r <- # of >'s excluding those exhausted | |
| if(r > 0 && v < hi-1) { | |
| MQS_RECURSE_SUF_DC_U8(end-r, end, depth); // recurse on >'s | |
| } | |
| } | |
| #endif /*MULTIKEY_QSORT_H_*/ |