Permalink
Browse files

fix sPrimeSz indexing

  • Loading branch information...
1 parent c5b8fb7 commit f0b83ad0a102c8fbd3ae023022e7bca2d04b006b @BenLangmead committed Jun 7, 2017
Showing with 49 additions and 46 deletions.
  1. +49 −46 diff_sample.h
View
@@ -600,7 +600,7 @@ class DifferenceCoverSample {
private:
void doBuiltSanityCheck() const;
- void buildSPrime(String<TIndexOffU>& sPrime);
+ void buildSPrime(String<TIndexOffU>& sPrime, size_t padding);
bool built() const {
return length(_isaPrime) > 0;
@@ -687,7 +687,10 @@ void DifferenceCoverSample<TStr>::doBuiltSanityCheck() const {
* Also builds _doffs map.
*/
template <typename TStr>
-void DifferenceCoverSample<TStr>::buildSPrime(String<TIndexOffU>& sPrime) {
+void DifferenceCoverSample<TStr>::buildSPrime(
+ String<TIndexOffU>& sPrime,
+ size_t padding)
+{
const TStr& t = this->text();
const String<uint32_t>& ds = this->ds();
TIndexOffU tlen = (TIndexOffU)length(t);
@@ -721,8 +724,8 @@ void DifferenceCoverSample<TStr>::buildSPrime(String<TIndexOffU>& sPrime) {
#endif
assert_eq(length(_doffs), d+1);
// Size sPrime appropriately
- reserve(sPrime, sPrimeSz+1, Exact()); // reserve extra slot for LS
- fill(sPrime, sPrimeSz, OFF_MASK, Exact());
+ reserve(sPrime, sPrimeSz + padding, Exact()); // reserve extra slot for LS
+ fill(sPrime, sPrimeSz + padding, OFF_MASK, Exact());
// Slot suffixes from text into sPrime according to the mu
// mapping; where the mapping would leave a blank, insert a 0
TIndexOffU added = 0;
@@ -771,7 +774,7 @@ struct VSortingParam {
size_t sPrimeSz;
TIndexOffU* sPrimeOrderArr;
size_t depth;
- const std::vector<size_t>* boundaries;
+ const std::vector<size_t>* boundaries;
size_t* cur;
MUTEX_T* mutex;
};
@@ -839,10 +842,11 @@ void DifferenceCoverSample<TStr>::build(int nthreads) {
uint32_t v = this->v();
assert_gt(v, 2);
// Build s'
+ size_t padding = 1;
String<TIndexOffU> sPrime;
VMSG_NL(" Building sPrime");
- buildSPrime(sPrime);
- size_t sPrimeSz = length(sPrime) - 1;
+ buildSPrime(sPrime, padding);
+ size_t sPrimeSz = length(sPrime) - padding;
assert_gt(length(sPrime), 0);
assert_leq(length(sPrime), length(t)+1); // +1 is because of the end-cap
TIndexOffU nextRank = 0;
@@ -862,12 +866,16 @@ void DifferenceCoverSample<TStr>::build(int nthreads) {
// the mkeyQSortSuf2 routine works on the array for maximum
// efficiency
TIndexOffU *sPrimeArr = (TIndexOffU*)begin(sPrime);
- size_t slen = length(sPrime);
assert_eq(sPrimeArr[0], sPrime[0]);
- assert_eq(sPrimeArr[slen-1], sPrime[slen-1]);
+ assert_eq(sPrimeArr[sPrimeSz-1], sPrime[sPrimeSz-1]);
TIndexOffU *sPrimeOrderArr = (TIndexOffU*)begin(sPrimeOrder);
assert_eq(sPrimeOrderArr[0], sPrimeOrder[0]);
- assert_eq(sPrimeOrderArr[slen-1], sPrimeOrder[slen-1]);
+ assert_eq(sPrimeOrderArr[sPrimeSz-1], sPrimeOrder[sPrimeSz-1]);
+ TIndexOffU *sOrig = NULL;
+ if(this->sanityCheck()) {
+ sOrig = new TIndexOffU[sPrimeSz];
+ memcpy(sOrig, sPrimeArr, OFF_SIZE * sPrimeSz);
+ }
// Sort sample suffixes up to the vth character using a
// multikey quicksort. Sort time is proportional to the
// number of samples times v. It isn't quadratic.
@@ -877,26 +885,21 @@ void DifferenceCoverSample<TStr>::build(int nthreads) {
// sPrimeOrder too. This allows us to easily reconstruct
// what the sort did.
if(nthreads == 1) {
- mkeyQSortSuf2(t, sPrimeArr, slen, sPrimeOrderArr, 4,
- this->verbose(), this->sanityCheck(), v);
+ mkeyQSortSuf2(t, sPrimeArr, sPrimeSz, sPrimeOrderArr, 4,
+ this->verbose(), this->sanityCheck(), v);
} else {
- int query_depth = 0;
+ int query_depth = 0;
int tmp_nthreads = nthreads;
while(tmp_nthreads > 0) {
- query_depth++;
+ query_depth++;
tmp_nthreads >>= 1;
}
std::vector<size_t> boundaries; // bucket boundaries for parallelization
- TIndexOffU *sOrig = NULL;
- if(this->sanityCheck()) {
- sOrig = new TIndexOffU[sPrimeSz];
- memcpy(sOrig, sPrimeArr, OFF_SIZE * sPrimeSz);
- }
- mkeyQSortSuf2(t, sPrimeArr, slen, sPrimeOrderArr, 4,
- this->verbose(), false, query_depth, &boundaries);
+ mkeyQSortSuf2(t, sPrimeArr, sPrimeSz, sPrimeOrderArr, 4,
+ this->verbose(), false, query_depth, &boundaries);
if(boundaries.size() > 0) {
#ifdef WITH_TBB
- tbb::task_group tbb_grp;
+ tbb::task_group tbb_grp;
#else
AutoArray<tthread::thread*> threads(nthreads);
#endif
@@ -905,9 +908,9 @@ void DifferenceCoverSample<TStr>::build(int nthreads) {
size_t cur = 0;
MUTEX_T mutex;
for(int tid = 0; tid < nthreads; tid++) {
- // Calculate bucket sizes by doing a binary search for each
- // suffix and noting where it lands
- tparams[tid].dcs = this;
+ // Calculate bucket sizes by doing a binary search for each
+ // suffix and noting where it lands
+ tparams[tid].dcs = this;
tparams[tid].sPrimeArr = sPrimeArr;
tparams[tid].sPrimeSz = sPrimeSz;
tparams[tid].sPrimeOrderArr = sPrimeOrderArr;
@@ -921,74 +924,74 @@ void DifferenceCoverSample<TStr>::build(int nthreads) {
tbb_grp.wait();
#else
threads[tid] = new tthread::thread(VSorting_worker<TStr>, (void*)&tparams[tid]);
- }
- for (int tid = 0; tid < nthreads; tid++) {
- threads[tid]->join();
- }
+ }
+ for (int tid = 0; tid < nthreads; tid++) {
+ threads[tid]->join();
+ }
#endif
+ }
}
if(this->sanityCheck()) {
- sanityCheckOrderedSufs(t, length(t), sPrimeArr, sPrimeSz, v);
+ sanityCheckOrderedSufs(t, length(t), sPrimeArr, sPrimeSz, v);
for(size_t i = 0; i < sPrimeSz; i++) {
- assert_eq(sPrimeArr[i], sOrig[sPrimeOrderArr[i]]);
+ assert_eq(sPrimeArr[i], sOrig[sPrimeOrderArr[i]]);
}
delete[] sOrig;
}
- }
// Make sure sPrime and sPrimeOrder are consistent with
// their respective backing-store arrays
assert_eq(sPrimeArr[0], sPrime[0]);
- assert_eq(sPrimeArr[slen-1], sPrime[slen-1]);
+ assert_eq(sPrimeArr[sPrimeSz-1], sPrime[sPrimeSz-1]);
assert_eq(sPrimeOrderArr[0], sPrimeOrder[0]);
- assert_eq(sPrimeOrderArr[slen-1], sPrimeOrder[slen-1]);
+ assert_eq(sPrimeOrderArr[sPrimeSz-1], sPrimeOrder[sPrimeSz-1]);
}
// Now assign the ranking implied by the sorted sPrime/sPrimeOrder
// arrays back into sPrime.
VMSG_NL(" Allocating rank array");
- reserve(_isaPrime, length(sPrime)+1, Exact());
+ reserve(_isaPrime, length(sPrime), Exact());
fill(_isaPrime, length(sPrime), OFF_MASK, Exact());
assert_gt(length(_isaPrime), 0);
{
Timer timer(cout, " Ranking v-sort output time: ", this->verbose());
VMSG_NL(" Ranking v-sort output");
- for(size_t i = 0; i < length(sPrime)-1; i++) {
+ for(size_t i = 0; i < sPrimeSz-1; i++) {
// Place the appropriate ranking
_isaPrime[sPrimeOrder[i]] = nextRank;
// If sPrime[i] and sPrime[i+1] are identical up to v, then we
// should give the next suffix the same rank
if(!suffixSameUpTo(t, sPrime[i], sPrime[i+1], v)) nextRank++;
}
- _isaPrime[sPrimeOrder[length(sPrime)-1]] = nextRank; // finish off
+ _isaPrime[sPrimeOrder[sPrimeSz-1]] = nextRank; // finish off
}
// sPrimeOrder is destroyed
// All the information we need is now in _isaPrime
}
- #ifndef NDEBUG
+#ifndef NDEBUG
// Check that all ranks are sane
- for(size_t i = 0; i < length(_isaPrime); i++) {
+ for(size_t i = 0; i < sPrimeSz; i++) {
assert_neq(_isaPrime[i], OFF_MASK);
- assert_lt(_isaPrime[i], length(_isaPrime));
+ assert_lt(_isaPrime[i], sPrimeSz);
}
- #endif
+#endif
// Now pass the sPrimeRanks[] array to LarssonSadakane (in SeqAn).
- append(_isaPrime, length(_isaPrime));
- append(sPrime, length(sPrime));
+ _isaPrime[length(_isaPrime) - 1] = sPrimeSz;
+ sPrime[length(sPrime) - 1] = sPrimeSz;
{
Timer timer(cout, " Invoking Larsson-Sadakane on ranks time: ", this->verbose());
VMSG_NL(" Invoking Larsson-Sadakane on ranks");
_Context_LSS<TIndexOff> c;
c.suffixsort(
(TIndexOff*)begin(_isaPrime, Standard()),
(TIndexOff*)begin(sPrime, Standard()),
- length(sPrime) - 1,
- (unsigned)length(_isaPrime),
+ (TIndexOff)sPrimeSz,
+ (TIndexOff)length(sPrime),
0);
}
// sPrime now contains the suffix array (which we ignore)
assert_eq(length(_isaPrime), length(sPrime));
assert_gt(length(_isaPrime), 0);
// chop off final character of _isaPrime
- resize(_isaPrime, length(_isaPrime)-1);
+ resize(_isaPrime, sPrimeSz);
// Subtract 1 from each isaPrime (to adjust for LarssonSadakane
// always ranking the final suffix as lexicographically first)
for(size_t i = 0; i < length(_isaPrime); i++) {

0 comments on commit f0b83ad

Please sign in to comment.