From b1dab64425dcc4d4aebcd1fc5b0b9e460d615c72 Mon Sep 17 00:00:00 2001 From: Valentin Antonescu Date: Mon, 9 Jun 2014 18:56:34 -0400 Subject: [PATCH] These still needs review. --- aligner.h | 110 +++---- ebwt.h | 20 +- ebwt_search_backtrack.h | 86 +++--- random_source.h | 7 + range.h | 12 +- ref_aligner.h | 750 ++++++++++++++++++++++++------------------------ 6 files changed, 496 insertions(+), 489 deletions(-) diff --git a/aligner.h b/aligner.h index b3bbabc..93903e2 100644 --- a/aligner.h +++ b/aligner.h @@ -408,8 +408,8 @@ class UnpairedAlignerV2 : public Aligner { * Helper for reporting an alignment. */ inline bool report(const Range& ra, - uint32_t first, - uint32_t second, + TIndexOffU first, + TIndexOffU second, uint32_t tlen) { bool ebwtFw = ra.ebwt->fw(); @@ -433,7 +433,7 @@ class UnpairedAlignerV2 : public Aligner { ra.refcs, // reference characters for mms ra.numMms, // # mismatches make_pair(first, second), // position - make_pair(0, 0), // (bogus) mate position + make_pair(0, 0), // (bogus) mate position true, // (bogus) mate orientation 0, // (bogus) mate length make_pair(ra.top, ra.bot),// arrows @@ -808,23 +808,23 @@ class PairedBWAlignerV1 : public Aligner { */ bool report(const Range& rL, // range for upstream mate const Range& rR, // range for downstream mate - uint32_t first, // ref idx - uint32_t upstreamOff, // offset for upstream mate - uint32_t dnstreamOff, // offset for downstream mate - uint32_t tlen, // length of ref + TIndexOffU first, // ref idx + TIndexOffU upstreamOff, // offset for upstream mate + TIndexOffU dnstreamOff, // offset for downstream mate + TIndexOffU tlen, // length of ref bool pairFw, // whether the pair is being mapped to fw strand bool ebwtFwL, bool ebwtFwR, const ReferenceMap* rmap) { assert(gAllowMateContainment || upstreamOff < dnstreamOff); - uint32_t spreadL = rL.bot - rL.top; - uint32_t spreadR = rR.bot - rR.top; - uint32_t oms = min(spreadL, spreadR) - 1; + TIndexOffU spreadL = rL.bot - rL.top; + TIndexOffU spreadR = rR.bot - rR.top; + TIndexOffU oms = min(spreadL, spreadR) - 1; ReadBuf* bufL = pairFw ? bufa_ : bufb_; ReadBuf* bufR = pairFw ? bufb_ : bufa_; - uint32_t lenL = pairFw ? alen_ : blen_; - uint32_t lenR = pairFw ? blen_ : alen_; + TIndexOffU lenL = pairFw ? alen_ : blen_; + TIndexOffU lenR = pairFw ? blen_ : alen_; bool ret; assert(!params_->sink().exceededOverThresh()); params_->setFw(rL.fw); @@ -900,10 +900,10 @@ class PairedBWAlignerV1 : public Aligner { bool report(const Range& rL, // range for upstream mate const Range& rR, // range for downstream mate - uint32_t first, // ref idx - uint32_t upstreamOff, // offset for upstream mate - uint32_t dnstreamOff, // offset for downstream mate - uint32_t tlen, // length of ref + TIndexOffU first, // ref idx + TIndexOffU upstreamOff, // offset for upstream mate + TIndexOffU dnstreamOff, // offset for downstream mate + TIndexOffU tlen, // length of ref bool pairFw, // whether the pair is being mapped to fw strand const ReferenceMap* rmap) { @@ -920,7 +920,7 @@ class PairedBWAlignerV1 : public Aligner { */ bool resolveOutstandingInRef(const bool off1, const UPair& off, - const uint32_t tlen, + const TIndexOffU tlen, const Range& range) { assert(refs_->loaded()); @@ -976,8 +976,8 @@ class PairedBWAlignerV1 : public Aligner { if((uint32_t)maxins <= max(qlen, alen)) { return false; } - const uint32_t tidx = off.first; - const uint32_t toff = off.second; + const TIndexOffU tidx = off.first; + const TIndexOffU toff = off.second; // Set begin/end to be a range of all reference // positions that are legally permitted to be involved in // the alignment of the outstanding mate. @@ -985,8 +985,8 @@ class PairedBWAlignerV1 : public Aligner { // Note that one of the constraints imposed on which positions // go into this range is that the opposite mate cannot be // contained entirely within the anchor mate, or vice versa. - uint32_t begin, end; - uint32_t insDiff = maxins - minins; + TIndexOffU begin, end; + TIndexOffU insDiff = maxins - minins; if(matchRight) { end = toff + maxins; // Adding 1 disallows the opposite from starting at the @@ -999,12 +999,12 @@ class PairedBWAlignerV1 : public Aligner { begin += alen-qlen; } if(end > insDiff + qlen) { - begin = max(begin, end - insDiff - qlen); + begin = max(begin, end - insDiff - qlen); } - end = min(refs_->approxLen(tidx), end); - begin = min(refs_->approxLen(tidx), begin); + end = min(refs_->approxLen(tidx), end); + begin = min(refs_->approxLen(tidx), begin); } else { - if(toff + alen < (uint32_t)maxins) { + if(toff + alen < (TIndexOffU)maxins) { begin = 0; } else { begin = toff + alen - maxins; @@ -1014,7 +1014,7 @@ class PairedBWAlignerV1 : public Aligner { end = toff + alen; } else { end = toff + mi - 1; - end = min(end, toff + alen - minins + qlen - 1); + end = min(end, toff + alen - minins + qlen - 1); if(toff + alen + qlen < (uint32_t)minins + 1) end = 0; } } @@ -1022,7 +1022,7 @@ class PairedBWAlignerV1 : public Aligner { // alignment for the outstanding mate. if(end - begin < qlen) return false; std::vector ranges; - std::vector offs; + std::vector offs; refAligner_->find(1, tidx, refs_, seq, qual, begin, end, ranges, offs, doneFw_ ? &pairs_rc_ : &pairs_fw_, toff, fw); @@ -1033,7 +1033,7 @@ class PairedBWAlignerV1 : public Aligner { r.fw = fw; r.cost |= (r.stratum << 14); r.mate1 = !off1; - const uint32_t result = offs[i]; + const TIndexOffU result = offs[i]; // NOTE: We have no idea what the BW range delimiting the // opposite hit is, because we were operating entirely in // reference space when we found it. For now, we just copy @@ -1102,8 +1102,8 @@ class PairedBWAlignerV1 : public Aligner { assert(drR_->foundRange); const Range& r = drR_->range(); assert(r.repOk()); - uint32_t top = r.top; - uint32_t bot = r.bot; + TIndexOffU top = r.top; + TIndexOffU bot = r.bot; uint32_t qlen = doneFw_? qlen1_ : qlen2_; rchase_->setTopBot(top, bot, qlen, rand_, r.ebwt); *chaseR_ = true; @@ -1147,8 +1147,8 @@ class PairedBWAlignerV1 : public Aligner { assert(drL_->foundRange); const Range& r = drL_->range(); assert(r.repOk()); - uint32_t top = r.top; - uint32_t bot = r.bot; + TIndexOffU top = r.top; + TIndexOffU bot = r.bot; uint32_t qlen = doneFw_? qlen2_ : qlen1_; rchase_->setTopBot(top, bot, qlen, rand_, r.ebwt); *chaseL_ = true; @@ -1687,23 +1687,23 @@ class PairedBWAlignerV2 : public Aligner { */ bool report(const Range& rL, // range for upstream mate const Range& rR, // range for downstream mate - uint32_t first, // ref idx - uint32_t upstreamOff, // offset for upstream mate - uint32_t dnstreamOff, // offset for downstream mate - uint32_t tlen, // length of ref + TIndexOffU first, // ref idx + TIndexOffU upstreamOff, // offset for upstream mate + TIndexOffU dnstreamOff, // offset for downstream mate + TIndexOffU tlen, // length of ref bool pairFw, // whether the pair is being mapped to fw strand bool ebwtFwL, bool ebwtFwR, const ReferenceMap *rmap) { assert(gAllowMateContainment || upstreamOff < dnstreamOff); - uint32_t spreadL = rL.bot - rL.top; - uint32_t spreadR = rR.bot - rR.top; - uint32_t oms = min(spreadL, spreadR) - 1; + TIndexOffU spreadL = rL.bot - rL.top; + TIndexOffU spreadR = rR.bot - rR.top; + TIndexOffU oms = min(spreadL, spreadR) - 1; ReadBuf* bufL = pairFw ? bufa_ : bufb_; ReadBuf* bufR = pairFw ? bufb_ : bufa_; - uint32_t lenL = pairFw ? alen_ : blen_; - uint32_t lenR = pairFw ? blen_ : alen_; + TIndexOffU lenL = pairFw ? alen_ : blen_; + TIndexOffU lenR = pairFw ? blen_ : alen_; bool ret; assert(!params_->sink().exceededOverThresh()); params_->setFw(rL.fw); @@ -1829,7 +1829,7 @@ class PairedBWAlignerV2 : public Aligner { } void resolveOutstanding(const UPair& off, - const uint32_t tlen, + const TIndexOffU tlen, const Range& range) { assert(!this->done); @@ -1866,7 +1866,7 @@ class PairedBWAlignerV2 : public Aligner { * 'offs' array. It returns the number that it actually picked. */ bool resolveOutstandingInRef(const UPair& off, - const uint32_t tlen, + const TIndexOffU tlen, const Range& range) { assert(!donePe_); @@ -1920,14 +1920,14 @@ class PairedBWAlignerV2 : public Aligner { if((uint32_t)maxins <= max(qlen, alen)) { return false; } - const uint32_t tidx = off.first; // text id where anchor mate hit - const uint32_t toff = off.second; // offset where anchor mate hit + const TIndexOffU tidx = off.first; // text id where anchor mate hit + const TIndexOffU toff = off.second; // offset where anchor mate hit // Set begin/end to the range of reference positions where // outstanding mate may align while fulfilling insert-length // constraints. - uint32_t begin, end; + TIndexOffU begin, end; assert_geq(maxins, minins); - uint32_t insDiff = maxins - minins; + TIndexOffU insDiff = maxins - minins; if(matchRight) { end = toff + maxins; // Adding 1 disallows the opposite from starting at the @@ -1940,12 +1940,12 @@ class PairedBWAlignerV2 : public Aligner { begin += alen-qlen; } if(end > insDiff + qlen) { - begin = max(begin, end - insDiff - qlen); + begin = max(begin, end - insDiff - qlen); } - end = min(refs_->approxLen(tidx), end); - begin = min(refs_->approxLen(tidx), begin); + end = min(refs_->approxLen(tidx), end); + begin = min(refs_->approxLen(tidx), begin); } else { - if(toff + alen < (uint32_t)maxins) { + if(toff + alen < (TIndexOffU)maxins) { begin = 0; } else { begin = toff + alen - maxins; @@ -1955,15 +1955,15 @@ class PairedBWAlignerV2 : public Aligner { end = toff + alen - 1; } else { end = toff + mi - 1; - end = min(end, toff + alen - minins + qlen - 1); - if(toff + alen + qlen < (uint32_t)minins + 1) end = 0; + end = min(end, toff + alen - minins + qlen - 1); + if(toff + alen + qlen < (TIndexOffU)minins + 1) end = 0; } } // Check if there's not enough space in the range to fit an // alignment for the outstanding mate. if(end - begin < qlen) return false; std::vector ranges; - std::vector offs; + std::vector offs; refAligner_->find(1, tidx, refs_, seq, qual, begin, end, ranges, offs, pairFw ? &pairs_fw_ : &pairs_rc_, toff, fw); @@ -1973,7 +1973,7 @@ class PairedBWAlignerV2 : public Aligner { r.fw = fw; r.cost |= (r.stratum << 14); r.mate1 = !range.mate1; - const uint32_t result = offs[i]; + const TIndexOffU result = offs[i]; // Just copy the known range's top and bot for now r.top = range.top; r.bot = range.bot; diff --git a/ebwt.h b/ebwt.h index 541d031..5fc3430 100644 --- a/ebwt.h +++ b/ebwt.h @@ -1131,8 +1131,8 @@ class Ebwt { // Searching and reporting void joinedToTextOff(TIndexOffU qlen, TIndexOffU off, TIndexOffU& tidx, TIndexOffU& textoff, TIndexOffU& tlen) const; - inline bool report(const String& query, String* quals, String* name, bool color, char primer, char trimc, bool colExEnds, int snpPhred, const BitPairReference* ref, const std::vector& mmui32, const std::vector& refcs, size_t numMms, TIndexOffU off, uint32_t top, uint32_t bot, uint32_t qlen, int stratum, uint16_t cost, uint32_t patid, uint32_t seed, const EbwtSearchParams& params) const; - inline bool reportChaseOne(const String& query, String* quals, String* name, bool color, char primer, char trimc, bool colExEnds, int snpPhred, const BitPairReference* ref, const std::vector& mmui32, const std::vector& refcs, size_t numMms, TIndexOffU i, uint32_t top, uint32_t bot, uint32_t qlen, int stratum, uint16_t cost, uint32_t patid, uint32_t seed, const EbwtSearchParams& params, SideLocus *l = NULL) const; + inline bool report(const String& query, String* quals, String* name, bool color, char primer, char trimc, bool colExEnds, int snpPhred, const BitPairReference* ref, const std::vector& mmui32, const std::vector& refcs, size_t numMms, TIndexOffU off, TIndexOffU top, TIndexOffU bot, uint32_t qlen, int stratum, uint16_t cost, uint32_t patid, uint32_t seed, const EbwtSearchParams& params) const; + inline bool reportChaseOne(const String& query, String* quals, String* name, bool color, char primer, char trimc, bool colExEnds, int snpPhred, const BitPairReference* ref, const std::vector& mmui32, const std::vector& refcs, size_t numMms, TIndexOffU i, TIndexOffU top, TIndexOffU bot, uint32_t qlen, int stratum, uint16_t cost, uint32_t patid, uint32_t seed, const EbwtSearchParams& params, SideLocus *l = NULL) const; inline bool reportReconstruct(const String& query, String* quals, String* name, String& lbuf, String& rbuf, const uint32_t *mmui32, const char* refcs, size_t numMms, uint32_t i, uint32_t top, uint32_t bot, uint32_t qlen, int stratum, const EbwtSearchParams& params, SideLocus *l = NULL) const; inline int rowL(const SideLocus& l) const; inline TIndexOffU countUpTo(const SideLocus& l, int c) const; @@ -1297,7 +1297,7 @@ class EbwtSearchParams { const BitPairReference* ref, // reference (= NULL if not necessary) const ReferenceMap* rmap, // map to another reference coordinate system bool ebwtFw, // whether index is forward (true) or mirror (false) - const std::vector& mmui32, // mismatch list + const std::vector& mmui32, // mismatch list const std::vector& refcs, // reference characters size_t numMms, // # mismatches UPair h, // ref coords @@ -2650,12 +2650,12 @@ inline bool Ebwt::report(const String& query, bool colExEnds, int snpPhred, const BitPairReference* ref, - const std::vector& mmui32, + const std::vector& mmui32, const std::vector& refcs, size_t numMms, TIndexOffU off, - uint32_t top, - uint32_t bot, + TIndexOffU top, + TIndexOffU bot, uint32_t qlen, int stratum, uint16_t cost, @@ -2689,7 +2689,7 @@ inline bool Ebwt::report(const String& query, refcs, // reference characters for mms numMms, // # mismatches make_pair(tidx, textoff), // position - make_pair(0, 0), // (bogus) mate position + make_pair(0, 0), // (bogus) mate position true, // (bogus) mate orientation 0, // (bogus) mate length make_pair(top, bot), // arrows @@ -2722,12 +2722,12 @@ inline bool Ebwt::reportChaseOne(const String& query, bool colExEnds, int snpPhred, const BitPairReference* ref, - const std::vector& mmui32, + const std::vector& mmui32, const std::vector& refcs, size_t numMms, TIndexOffU i, - uint32_t top, - uint32_t bot, + TIndexOffU top, + TIndexOffU bot, uint32_t qlen, int stratum, uint16_t cost, diff --git a/ebwt_search_backtrack.h b/ebwt_search_backtrack.h index fc31492..c2945d4 100644 --- a/ebwt_search_backtrack.h +++ b/ebwt_search_backtrack.h @@ -105,7 +105,7 @@ class GreedyDFSRangeSource { _qlen = length(*_qry); // Resize _pairs if(_pairs != NULL) { delete[] _pairs; } - _pairs = new uint32_t[_qlen*_qlen*8]; + _pairs = new TIndexOffU[_qlen*_qlen*8]; // Resize _elims if(_elims != NULL) { delete[] _elims; } _elims = new uint8_t[_qlen*_qlen]; @@ -257,9 +257,9 @@ class GreedyDFSRangeSource { uint32_t m = min(_unrevOff, _qlen); if(nsInFtab == 0 && m >= (uint32_t)ftabChars) { uint32_t ftabOff = calcFtabOff(); - uint32_t top = ebwt.ftabHi(ftabOff); - uint32_t bot = ebwt.ftabLo(ftabOff+1); - if(_qlen == (uint32_t)ftabChars && bot > top) { + TIndexOffU top = ebwt.ftabHi(ftabOff); + TIndexOffU bot = ebwt.ftabLo(ftabOff+1); + if(_qlen == (TIndexOffU)ftabChars && bot > top) { // We have a match! if(_reportPartials > 0) { // Oops - we're trying to find seedlings, so we've @@ -335,8 +335,8 @@ class GreedyDFSRangeSource { * aware backtracking. */ bool backtrack(uint32_t depth, - uint32_t top, - uint32_t bot, + TIndexOffU top, + TIndexOffU bot, uint32_t iham = 0, bool disableFtab = false) { @@ -370,11 +370,11 @@ class GreedyDFSRangeSource { uint32_t oneRevOff,// depths < oneRevOff are 1-revisitable uint32_t twoRevOff,// depths < twoRevOff are 2-revisitable uint32_t threeRevOff,// depths < threeRevOff are 3-revisitable - uint32_t top, // top arrow in pair prior to 'depth' - uint32_t bot, // bottom arrow in pair prior to 'depth' + TIndexOffU top, // top arrow in pair prior to 'depth' + TIndexOffU bot, // bottom arrow in pair prior to 'depth' uint32_t ham, // weighted hamming distance so far uint32_t iham, // initial weighted hamming distance - uint32_t* pairs, // portion of pairs array to be used for this backtrack frame + TIndexOffU* pairs, // portion of pairs array to be used for this backtrack frame uint8_t* elims, // portion of elims array to be used for this backtrack frame bool disableFtab = false) { @@ -441,13 +441,13 @@ class GreedyDFSRangeSource { // # positions tied for "best" outgoing qual uint32_t eligibleNum = 0; // total range-size for all eligibles - uint32_t eligibleSz = 0; + TIndexOffU eligibleSz = 0; // If there is just one eligible slot at the moment (a common // case), these are its parameters uint32_t eli = 0; bool elignore = true; // ignore the el values because they didn't come from a recent override - uint32_t eltop = 0; - uint32_t elbot = 0; + TIndexOffU eltop = 0; + TIndexOffU elbot = 0; uint32_t elham = ham; char elchar = 0; int elcint = 0; @@ -547,9 +547,9 @@ class GreedyDFSRangeSource { } } else if(curIsAlternative) { // Clear pairs - memset(&pairs[d*8], 0, 8 * 4); + memset(&pairs[d*8], 0, 8 * OFF_SIZE); // Calculate next quartet of ranges - ebwt.mapLFEx(ltop, lbot, (TIndexOffU*)&pairs[d*8], (TIndexOffU*)&pairs[(d*8)+4]); + ebwt.mapLFEx(ltop, lbot, &pairs[d*8], &pairs[(d*8)+4]); // Update top and bot if(c < 4) { top = pairTop(pairs, d, c); bot = pairBot(pairs, d, c); @@ -585,7 +585,7 @@ class GreedyDFSRangeSource { for(int i = 0; i < 4; i++) { if(i == c) continue; assert_leq(pairTop(pairs, d, i), pairBot(pairs, d, i)); - uint32_t spread = pairSpread(pairs, d, i); + TIndexOffU spread = pairSpread(pairs, d, i); if(spread == 0) { // Indicate this char at this position is // eliminated as far as this backtracking frame is @@ -761,8 +761,8 @@ class GreedyDFSRangeSource { ASSERT_ONLY(uint32_t eligiblesVisited = 0); size_t i = d, j = 0; assert_geq(i, depth); - uint32_t bttop = 0; - uint32_t btbot = 0; + TIndexOffU bttop = 0; + TIndexOffU btbot = 0; uint32_t btham = ham; char btchar = 0; int btcint = 0; @@ -779,7 +779,7 @@ class GreedyDFSRangeSource { if((qi == lowAltQual || !_considerQuals) && elims[i] != 15) { // This is the leftmost eligible position with at // least one remaining backtrack target - uint32_t posSz = 0; + TIndexOffU posSz = 0; // Add up the spreads for A, C, G, T for(j = 0; j < 4; j++) { if((elims[i] & (1 << j)) == 0) { @@ -845,7 +845,7 @@ class GreedyDFSRangeSource { // Slide over to the next backtacking frame within // pairs and elims; won't interfere with our frame or // any of our parents' frames - uint32_t *newPairs = pairs + (_qlen*8); + TIndexOffU *newPairs = pairs + (_qlen*8); uint8_t *newElims = elims + (_qlen); // If we've selected a backtracking target that's in // the 1-revisitable region, then we ask the recursive @@ -919,7 +919,7 @@ class GreedyDFSRangeSource { // so we can go ahead and use it // Rightmost char gets least significant bit-pairs int ftabChars = ebwt._eh._ftabChars; - uint32_t ftabOff = (*_qry)[_qlen - ftabChars]; + TIndexOffU ftabOff = (*_qry)[_qlen - ftabChars]; assert_lt(ftabOff, 4); assert_lt(ftabOff, ebwt._eh._ftabLen-1); for(int j = ftabChars - 1; j > 0; j--) { @@ -927,14 +927,14 @@ class GreedyDFSRangeSource { if(_qlen-j == icur) { ftabOff |= btcint; } else { - assert_lt((uint32_t)(*_qry)[_qlen-j], 4); - ftabOff |= (uint32_t)(*_qry)[_qlen-j]; + assert_lt((TIndexOffU)(*_qry)[_qlen-j], 4); + ftabOff |= (TIndexOffU)(*_qry)[_qlen-j]; } assert_lt(ftabOff, ebwt._eh._ftabLen-1); } assert_lt(ftabOff, ebwt._eh._ftabLen-1); - uint32_t ftabTop = ebwt.ftabHi(ftabOff); - uint32_t ftabBot = ebwt.ftabLo(ftabOff+1); + TIndexOffU ftabTop = ebwt.ftabHi(ftabOff); + TIndexOffU ftabBot = ebwt.ftabLo(ftabOff+1); assert_geq(ftabBot, ftabTop); if(ftabTop == ftabBot) { ret = false; @@ -1011,7 +1011,7 @@ class GreedyDFSRangeSource { // 'depth' up to 'd') lowAltQual = 0xff; for(size_t k = d; k >= depth && k <= _qlen; k--) { - uint32_t kcur = _qlen - k - 1; // current offset into _qry + size_t kcur = _qlen - k - 1; // current offset into _qry uint8_t kq = qualAt(kcur); if(k < unrevOff) break; // already visited all revisitable positions bool kCurIsAlternative = (ham + mmPenalty(_maqPenalty, kq) <= _qualThresh); @@ -1028,7 +1028,7 @@ class GreedyDFSRangeSource { for(int l = 0; l < 4; l++) { if((elims[k] & (1 << l)) == 0) { // Not yet eliminated - uint32_t spread = pairSpread(pairs, k, l); + TIndexOffU spread = pairSpread(pairs, k, l); if(kCurOverridesEligible) { // Clear previous eligible results; // this one's better @@ -1165,7 +1165,7 @@ class GreedyDFSRangeSource { * currently under consideration. Stratum is equal to the number * of mismatches in the seed portion of the alignment. */ - int calcStratum(const std::vector& mms, uint32_t stackDepth) { + int calcStratum(const std::vector& mms, uint32_t stackDepth) { int stratum = 0; for(size_t i = 0; i < stackDepth; i++) { if(mms[i] >= (_qlen - _3revOff)) { @@ -1204,7 +1204,7 @@ class GreedyDFSRangeSource { bool hhCheckTop(uint32_t stackDepth, uint32_t d, uint32_t iham, - const std::vector& mms, + const std::vector& mms, uint64_t prehits = 0xffffffffffffffffllu) { assert_eq(0, _reportPartials); @@ -1249,7 +1249,7 @@ class GreedyDFSRangeSource { int loHalfMms = 0, hiHalfMms = 0; assert_geq(mms.size(), stackDepth); for(size_t i = 0; i < stackDepth; i++) { - uint32_t d = _qlen - mms[i] - 1; + TIndexOffU d = _qlen - mms[i] - 1; if (d < _5depth) hiHalfMms++; else if(d < _3depth) loHalfMms++; else assert(false); @@ -1287,18 +1287,18 @@ class GreedyDFSRangeSource { } /// Get the top offset for character c at depth d - inline uint32_t pairTop(uint32_t* pairs, size_t d, size_t c) { + inline TIndexOffU pairTop(TIndexOffU* pairs, size_t d, size_t c) { return pairs[d*8 + c + 0]; } /// Get the bot offset for character c at depth d - inline uint32_t pairBot(uint32_t* pairs, size_t d, size_t c) { + inline TIndexOffU pairBot(TIndexOffU* pairs, size_t d, size_t c) { return pairs[d*8 + c + 4]; } /// Get the spread between the bot and top offsets for character c /// at depth d - inline uint32_t pairSpread(uint32_t* pairs, size_t d, size_t c) { + inline TIndexOffU pairSpread(TIndexOffU* pairs, size_t d, size_t c) { assert_geq(pairBot(pairs, d, c), pairTop(pairs, d, c)); return pairBot(pairs, d, c) - pairTop(pairs, d, c); } @@ -1456,8 +1456,8 @@ class GreedyDFSRangeSource { * full alignments were successfully reported and the caller can * stop searching. */ - bool reportAlignment(uint32_t stackDepth, uint32_t top, - uint32_t bot, uint16_t cost) + bool reportAlignment(uint32_t stackDepth, TIndexOffU top, + TIndexOffU bot, uint16_t cost) { #ifndef NDEBUG // No two elements of _mms[] should be the same @@ -1524,8 +1524,8 @@ class GreedyDFSRangeSource { * caller can stop searching. */ bool reportFullAlignment(uint32_t stackDepth, - uint32_t top, - uint32_t bot, + TIndexOffU top, + TIndexOffU bot, int stratum, uint16_t cost) { @@ -1537,11 +1537,11 @@ class GreedyDFSRangeSource { return false; } assert(!_reportRanges); - uint32_t spread = bot - top; + TIndexOffU spread = bot - top; // Pick a random spot in the range to begin report - uint32_t r = top + (_rand.nextU32() % spread); - for(uint32_t i = 0; i < spread; i++) { - uint32_t ri = r + i; + TIndexOffU r = top + (_rand.nextU() % spread); + for(TIndexOffU i = 0; i < spread; i++) { + TIndexOffU ri = r + i; if(ri >= bot) ri -= spread; // reportChaseOne takes the _mms[] list in terms of // their indices into the query string; not in terms @@ -1670,7 +1670,7 @@ class GreedyDFSRangeSource { uint32_t lowAltQual, uint32_t eligibleSz, uint32_t eligibleNum, - uint32_t* pairs, + TIndexOffU* pairs, uint8_t* elims) { // Sanity check that the lay of the land is as we @@ -1716,12 +1716,12 @@ class GreedyDFSRangeSource { bool _maqPenalty; uint32_t _qualThresh; // only accept hits with weighted // hamming distance <= _qualThresh - uint32_t *_pairs; // ranges, leveled in parallel + TIndexOffU *_pairs; // ranges, leveled in parallel // with decision stack uint8_t *_elims; // which ranges have been // eliminated, leveled in parallel // with decision stack - std::vector _mms; // array for holding mismatches + std::vector _mms; // array for holding mismatches std::vector _refcs; // array for holding mismatches // Entries in _mms[] are in terms of offset into // _qry - not in terms of offset from 3' or 5' end diff --git a/random_source.h b/random_source.h index fd99927..540e5e7 100644 --- a/random_source.h +++ b/random_source.h @@ -50,6 +50,13 @@ class RandomSource { } } + template + T nextU() { + if(sizeof(T)>4) + return nextU64(); + return nextU32(); + } + uint32_t nextU2() { assert(inited_); if(lastOff > 30) { diff --git a/range.h b/range.h index 7ab11df..8fca78f 100644 --- a/range.h +++ b/range.h @@ -13,7 +13,7 @@ */ struct Range { Range() : - top(0xffffffff), bot(0), cost(0), stratum(0), numMms(0), + top(OFF_MASK), bot(0), cost(0), stratum(0), numMms(0), fw(true), mate1(true), ebwt(NULL) { mms.clear(); @@ -21,21 +21,21 @@ struct Range { } bool valid() const { - return top < 0xffffffff; + return top < OFF_MASK; } void invalidate() { - top = 0xffffffff; + top = OFF_MASK; } - uint32_t top; // top of range - uint32_t bot; // bottom of range + TIndexOffU top; // top of range + TIndexOffU bot; // bottom of range uint16_t cost; // cost uint32_t stratum; // stratum uint32_t numMms; // # mismatches bool fw; // the forward orientation of read aligned? bool mate1; // read aligned is #1 mate/single? - std::vector mms; // list of positions with mismatches + std::vector mms; // list of positions with mismatches std::vector refcs; // reference characters at mismatch positions const Ebwt > *ebwt; diff --git a/ref_aligner.h b/ref_aligner.h index c85beec..4eee21a 100644 --- a/ref_aligner.h +++ b/ref_aligner.h @@ -62,22 +62,22 @@ class RefAligner { * reference string ref. Store the alignment details in range. */ virtual void find(uint32_t numToFind, - const uint32_t tidx, + const size_t tidx, const BitPairReference *refs, const TDna5Str& qry, const TCharStr& quals, - uint32_t begin, - uint32_t end, + TIndexOffU begin, + TIndexOffU end, TRangeVec& ranges, - TU32Vec& results, + std::vector& results, TSetPairs* pairs = NULL, - uint32_t aoff = 0xffffffff, + TIndexOffU aoff = OFF_MASK, bool seedOnLeft = false) { assert_gt(numToFind, 0); assert_gt(end, begin); - uint32_t spread = end - begin + (color_ ? 1 : 0); - uint32_t spreadPlus = spread + 12; + TIndexOffU spread = end - begin + (color_ ? 1 : 0); + TIndexOffU spreadPlus = spread + 12; // Make sure the buffer is large enough to accommodate the spread if(spreadPlus > this->refbufSz_) { this->newBuf(spreadPlus); @@ -111,16 +111,16 @@ class RefAligner { * find anchors quickly. */ virtual void anchor64Find(uint32_t numToFind, - uint32_t tidx, + size_t tidx, uint8_t* ref, const TDna5Str& qry, const TCharStr& quals, - uint32_t begin, - uint32_t end, + TIndexOffU begin, + TIndexOffU end, TRangeVec& ranges, - TU32Vec& results, + std::vector& results, TSetPairs* pairs = NULL, - uint32_t aoff = 0xffffffff, + TIndexOffU aoff = OFF_MASK, bool seedOnLeft = false) const = 0; /** @@ -192,16 +192,16 @@ class ExactRefAligner : public RefAligner { * 'qry' is the 5' end. */ void naiveFind(uint32_t numToFind, - uint32_t tidx, + size_t tidx, uint8_t* ref, const TDna5Str& qry, const TCharStr& quals, - uint32_t begin, - uint32_t end, + TIndexOffU begin, + TIndexOffU end, TRangeVec& ranges, - TU32Vec& results, + std::vector& results, TSetPairs* pairs, - uint32_t aoff, + TIndexOffU aoff, bool seedOnLeft) const { assert_gt(numToFind, 0); @@ -209,13 +209,13 @@ class ExactRefAligner : public RefAligner { assert_geq(end - begin, qlen); // caller should have checked this assert_gt(end, begin); assert_gt(qlen, 0); - uint32_t qend = end - qlen; - uint32_t lim = qend - begin; - uint32_t halfway = begin + (lim >> 1); + size_t qend = end - qlen; + size_t lim = qend - begin; + size_t halfway = begin + (lim >> 1); bool hi = false; - for(uint32_t i = 1; i <= lim+1; i++) { - uint32_t ri; // leftmost position in candidate alignment - uint32_t rir; // same, minus begin; for indexing into ref[] + for(size_t i = 1; i <= lim+1; i++) { + size_t ri; // leftmost position in candidate alignment + size_t rir; // same, minus begin; for indexing into ref[] if(hi) { ri = halfway + (i >> 1); rir = ri - begin; assert_leq(ri, qend); @@ -226,7 +226,7 @@ class ExactRefAligner : public RefAligner { hi = !hi; // Do the naive comparison bool match = true; - for(uint32_t j = 0; j < qlen; j++) { + for(size_t j = 0; j < qlen; j++) { #if 0 // Count Ns in the reference as mismatches const int q = (int)qry[j]; @@ -277,16 +277,16 @@ class ExactRefAligner : public RefAligner { * 'qry' is the 5' end. */ virtual void anchor64Find(uint32_t numToFind, - uint32_t tidx, + size_t tidx, uint8_t *ref, const TDna5Str& qry, const TCharStr& quals, - uint32_t begin, - uint32_t end, + TIndexOffU begin, + TIndexOffU end, TRangeVec& ranges, - TU32Vec& results, + std::vector& results, TSetPairs* pairs, - uint32_t aoff, // offset of anchor mate + TIndexOffU aoff, // offset of anchor mate bool seedOnLeft) const { assert_gt(numToFind, 0); @@ -305,9 +305,9 @@ class ExactRefAligner : public RefAligner { #endif const uint32_t anchorBitPairs = min(qlen, 32); // anchorOverhang = # read bases not included in the anchor - const uint32_t anchorOverhang = qlen <= 32 ? 0 : qlen - 32; - const uint32_t lim = end - qlen - begin; - const uint32_t halfway = begin + (lim >> 1); + const size_t anchorOverhang = qlen <= 32 ? 0 : qlen - 32; + const size_t lim = end - qlen - begin; + const size_t halfway = begin + (lim >> 1); uint64_t anchor = 0llu; uint64_t buffw = 0llu; // Set up a mask that we'll apply to the two bufs every round @@ -323,9 +323,9 @@ class ExactRefAligner : public RefAligner { // contents of the 'buffw' dword. If there are fewer than 32 // anchorBitPairs, the content will be packed into the least // significant bits of the word. - uint32_t skipLeftToRights = 0; - uint32_t skipRightToLefts = 0; - for(uint32_t i = 0; i < anchorBitPairs; i++) { + size_t skipLeftToRights = 0; + size_t skipRightToLefts = 0; + for(size_t i = 0; i < anchorBitPairs; i++) { int c = (int)qry[i]; // next query character assert_leq(c, 4); if(c & 4) { @@ -361,12 +361,12 @@ class ExactRefAligner : public RefAligner { // were, we might need to make the 'anchorOverhang' adjustment on // the left end of the range rather than the right. bool hi = false; - uint32_t riHi = halfway; - uint32_t rirHi = halfway - begin; - uint32_t rirHiAnchor = rirHi + anchorBitPairs - 1; - uint32_t riLo = halfway + 1; - uint32_t rirLo = halfway - begin + 1; - for(uint32_t i = 1; i <= lim + 1; i++) { + size_t riHi = halfway; + size_t rirHi = halfway - begin; + size_t rirHiAnchor = rirHi + anchorBitPairs - 1; + size_t riLo = halfway + 1; + size_t rirLo = halfway - begin + 1; + for(size_t i = 1; i <= lim + 1; i++) { int r; // new reference char assert_lt(skipLeftToRights, qlen); assert_leq(skipRightToLefts, qlen); @@ -417,13 +417,13 @@ class ExactRefAligner : public RefAligner { } // Seed hit! bool foundHit = true; - uint32_t ri = hi ? riLo : riHi; - uint32_t rir = hi ? rirLo : rirHi; + size_t ri = hi ? riLo : riHi; + size_t rir = hi ? rirLo : rirHi; if(anchorOverhang > 0) { // Does the non-anchor part of the alignment (the // "overhang") ruin it? bool skipCandidate = false; - for(uint32_t j = 0; j < anchorOverhang; j++) { + for(size_t j = 0; j < anchorOverhang; j++) { assert_lt(ri + anchorBitPairs + j, end); int rc = (int)ref[rir + anchorBitPairs + j]; if(rc == 4) { @@ -526,16 +526,16 @@ class OneMMRefAligner : public RefAligner { * 'qry' is the 5' end. */ void naiveFind(uint32_t numToFind, - uint32_t tidx, + size_t tidx, uint8_t* ref, const TDna5Str& qry, const TCharStr& quals, - uint32_t begin, - uint32_t end, + TIndexOffU begin, + TIndexOffU end, TRangeVec& ranges, TU32Vec& results, TSetPairs* pairs, - uint32_t aoff, + TIndexOffU aoff, bool seedOnLeft) const { assert_gt(numToFind, 0); @@ -543,13 +543,13 @@ class OneMMRefAligner : public RefAligner { assert_geq(end - begin, qlen); // caller should have checked this assert_gt(end, begin); assert_gt(qlen, 0); - uint32_t qend = end - qlen; - uint32_t lim = qend - begin; - uint32_t halfway = begin + (lim >> 1); + size_t qend = end - qlen; + size_t lim = qend - begin; + size_t halfway = begin + (lim >> 1); bool hi = false; - for(uint32_t i = 1; i <= lim+1; i++) { - uint32_t ri; // leftmost position in candidate alignment - uint32_t rir; // same, minus begin; for indexing into ref[] + for(size_t i = 1; i <= lim+1; i++) { + size_t ri; // leftmost position in candidate alignment + size_t rir; // same, minus begin; for indexing into ref[] if(hi) { ri = halfway + (i >> 1); rir = ri - begin; assert_leq(ri, qend); @@ -561,9 +561,9 @@ class OneMMRefAligner : public RefAligner { // Do the naive comparison bool match = true; int refc = -1; - uint32_t mmOff = 0xffffffff; + TIndexOffU mmOff = OFF_MASK; int mms = 0; - for(uint32_t j = 0; j < qlen; j++) { + for(size_t j = 0; j < qlen; j++) { #if 0 // Count Ns in the reference as mismatches const int q = (int)qry[j]; @@ -621,16 +621,16 @@ class OneMMRefAligner : public RefAligner { * 'qry' is the 5' end. */ virtual void anchor64Find(uint32_t numToFind, - uint32_t tidx, + size_t tidx, uint8_t* ref, const TDna5Str& qry, const TCharStr& quals, - uint32_t begin, - uint32_t end, + TIndexOffU begin, + TIndexOffU end, TRangeVec& ranges, - TU32Vec& results, + std::vector& results, TSetPairs* pairs = NULL, - uint32_t aoff = 0xffffffff, + TIndexOffU aoff = OFF_MASK, bool seedOnLeft = false) const { assert_gt(numToFind, 0); @@ -651,9 +651,9 @@ class OneMMRefAligner : public RefAligner { const int lhsShift = ((anchorBitPairs - 1) << 1); const uint32_t anchorCushion = 32 - anchorBitPairs; // anchorOverhang = # read bases not included in the anchor - const uint32_t anchorOverhang = (qlen <= 32 ? 0 : (qlen - 32)); - const uint32_t lim = end - qlen - begin; - const uint32_t halfway = begin + (lim >> 1); + const size_t anchorOverhang = (qlen <= 32 ? 0 : (qlen - 32)); + const size_t lim = end - qlen - begin; + const size_t halfway = begin + (lim >> 1); uint64_t anchor = 0llu; uint64_t buffw = 0llu; // rotating ref sequence buffer // OR the 'diff' buffer with this so that we can always count @@ -669,11 +669,11 @@ class OneMMRefAligner : public RefAligner { } int nsInAnchor = 0; int nPos = -1; - uint32_t skipLeftToRights = 0; - uint32_t skipRightToLefts = 0; + size_t skipLeftToRights = 0; + size_t skipRightToLefts = 0; // Construct the 'anchor' 64-bit buffer so that it holds all of // the first 'anchorBitPairs' bit pairs of the query. - for(uint32_t i = 0; i < anchorBitPairs; i++) { + for(size_t i = 0; i < anchorBitPairs; i++) { int c = (int)qry[i]; // next query character int r = (int)ref[halfway - begin + i]; // next reference character if(r & 4) { @@ -706,7 +706,7 @@ class OneMMRefAligner : public RefAligner { } // Check whether read is disqualified by Ns outside of the anchor // region - for(uint32_t i = anchorBitPairs; i < qlen; i++) { + for(size_t i = anchorBitPairs; i < qlen; i++) { if((int)qry[i] == 4) { if(++nsInAnchor > 1) { assert_eq(r2.size(), ranges.size() - rangesInitSz); @@ -721,12 +721,12 @@ class OneMMRefAligner : public RefAligner { // were, we might need to make the 'anchorOverhang' adjustment on // the left end of the range rather than the right. bool hi = false; - uint32_t riHi = halfway; - uint32_t rirHi = halfway - begin; - uint32_t rirHiAnchor = rirHi + anchorBitPairs - 1; - uint32_t riLo = halfway + 1; - uint32_t rirLo = halfway - begin + 1; - for(uint32_t i = 1; i <= lim + 1; i++) { + size_t riHi = halfway; + size_t rirHi = halfway - begin; + size_t rirHiAnchor = rirHi + anchorBitPairs - 1; + size_t riLo = halfway + 1; + size_t rirLo = halfway - begin + 1; + for(size_t i = 1; i <= lim + 1; i++) { int r; // new reference char uint64_t diff; assert_lt(skipLeftToRights, qlen); @@ -777,8 +777,8 @@ class OneMMRefAligner : public RefAligner { } if((diff & 0xffffffff00000000llu) && (diff & 0x00000000ffffffffllu)) continue; - uint32_t ri = hi ? riLo : riHi; - uint32_t rir = hi ? rirLo : rirHi; + size_t ri = hi ? riLo : riHi; + size_t rir = hi ? rirLo : rirHi; // Could use pop count uint8_t *diff8 = reinterpret_cast(&diff); // As a first cut, see if there are too many mismatches in @@ -824,7 +824,7 @@ class OneMMRefAligner : public RefAligner { if(anchorOverhang > 0) { assert_leq(ri + anchorBitPairs + anchorOverhang, end); bool skipCandidate = false; - for(uint32_t j = 0; j < anchorOverhang; j++) { + for(size_t j = 0; j < anchorOverhang; j++) { int rc = (int)ref[rir + 32 + j]; if(rc == 4) { // Oops, encountered an N in the reference in @@ -930,16 +930,16 @@ class TwoMMRefAligner : public RefAligner { * 'qry' is the 5' end. */ void naiveFind(uint32_t numToFind, - uint32_t tidx, + size_t tidx, uint8_t* ref, const TDna5Str& qry, const TCharStr& quals, - uint32_t begin, - uint32_t end, + TIndexOffU begin, + TIndexOffU end, TRangeVec& ranges, TU32Vec& results, TSetPairs* pairs, - uint32_t aoff, + TIndexOffU aoff, bool seedOnLeft) const { assert_gt(numToFind, 0); @@ -947,13 +947,13 @@ class TwoMMRefAligner : public RefAligner { assert_geq(end - begin, qlen); // caller should have checked this assert_gt(end, begin); assert_gt(qlen, 0); - uint32_t qend = end - qlen; - uint32_t lim = qend - begin; - uint32_t halfway = begin + (lim >> 1); + size_t qend = end - qlen; + size_t lim = qend - begin; + size_t halfway = begin + (lim >> 1); bool hi = false; - for(uint32_t i = 1; i <= lim+1; i++) { - uint32_t ri; // leftmost position in candidate alignment - uint32_t rir; // same, minus begin; for indexing into ref[] + for(size_t i = 1; i <= lim+1; i++) { + TIndexOffU ri; // leftmost position in candidate alignment + TIndexOffU rir; // same, minus begin; for indexing into ref[] if(hi) { ri = halfway + (i >> 1); rir = ri - begin; assert_leq(ri, qend); @@ -965,11 +965,11 @@ class TwoMMRefAligner : public RefAligner { // Do the naive comparison bool match = true; int refc1 = -1; - uint32_t mmOff1 = 0xffffffff; + TIndexOffU mmOff1 = OFF_MASK; int refc2 = -1; - uint32_t mmOff2 = 0xffffffff; + TIndexOffU mmOff2 = OFF_MASK; int mms = 0; - for(uint32_t j = 0; j < qlen; j++) { + for(size_t j = 0; j < qlen; j++) { #if 0 // Count Ns in the reference as mismatches const int q = (int)qry[j]; @@ -1040,23 +1040,23 @@ class TwoMMRefAligner : public RefAligner { * 'qry' is the 5' end. */ virtual void anchor64Find(uint32_t numToFind, - uint32_t tidx, + size_t tidx, uint8_t* ref, const TDna5Str& qry, const TCharStr& quals, - uint32_t begin, - uint32_t end, + TIndexOffU begin, + TIndexOffU end, TRangeVec& ranges, - TU32Vec& results, + std::vector& results, TSetPairs* pairs = NULL, - uint32_t aoff = 0xffffffff, + TIndexOffU aoff = OFF_MASK, bool seedOnLeft = false) const { assert_gt(numToFind, 0); ASSERT_ONLY(const uint32_t rangesInitSz = ranges.size()); ASSERT_ONLY(uint32_t duplicates = 0); ASSERT_ONLY(uint32_t r2i = 0); - const uint32_t qlen = seqan::length(qry); + const size_t qlen = seqan::length(qry); assert_geq(end - begin, qlen); // caller should have checked this assert_gt(end, begin); assert_gt(qlen, 0); @@ -1071,8 +1071,8 @@ class TwoMMRefAligner : public RefAligner { const uint32_t anchorCushion = 32 - anchorBitPairs; // anchorOverhang = # read bases not included in the anchor const uint32_t anchorOverhang = (qlen <= 32 ? 0 : (qlen - 32)); - const uint32_t lim = end - qlen - begin; - const uint32_t halfway = begin + (lim >> 1); + const size_t lim = end - qlen - begin; + const size_t halfway = begin + (lim >> 1); uint64_t anchor = 0llu; uint64_t buffw = 0llu; // rotating ref sequence buffer // OR the 'diff' buffer with this so that we can always count @@ -1090,11 +1090,11 @@ class TwoMMRefAligner : public RefAligner { uint32_t nPoss = 0; int nPos1 = -1; int nPos2 = -1; - uint32_t skipLeftToRights = 0; - uint32_t skipRightToLefts = 0; + size_t skipLeftToRights = 0; + size_t skipRightToLefts = 0; // Construct the 'anchor' 64-bit buffer so that it holds all of // the first 'anchorBitPairs' bit pairs of the query. - for(uint32_t i = 0; i < anchorBitPairs; i++) { + for(size_t i = 0; i < anchorBitPairs; i++) { int c = (int)qry[i]; // next query character int r = (int)ref[halfway - begin + i]; // next reference character if(r & 4) { @@ -1133,7 +1133,7 @@ class TwoMMRefAligner : public RefAligner { assert_leq(nPoss, 2); // Check whether read is disqualified by Ns outside of the anchor // region - for(uint32_t i = anchorBitPairs; i < qlen; i++) { + for(size_t i = anchorBitPairs; i < qlen; i++) { if((int)qry[i] == 4) { if(++nsInAnchor > 2) { return; // can't match if query has Ns @@ -1147,12 +1147,12 @@ class TwoMMRefAligner : public RefAligner { // were, we might need to make the 'anchorOverhang' adjustment on // the left end of the range rather than the right. bool hi = false; - uint32_t riHi = halfway; - uint32_t rirHi = halfway - begin; - uint32_t rirHiAnchor = rirHi + anchorBitPairs - 1; - uint32_t riLo = halfway + 1; - uint32_t rirLo = halfway - begin + 1; - uint32_t i; + size_t riHi = halfway; + size_t rirHi = halfway - begin; + size_t rirHiAnchor = rirHi + anchorBitPairs - 1; + size_t riLo = halfway + 1; + size_t rirLo = halfway - begin + 1; + size_t i; for(i = 1; i <= lim + 1; i++) { int r; // new reference char uint64_t diff; @@ -1205,8 +1205,8 @@ class TwoMMRefAligner : public RefAligner { if((diff & 0xfffff00000000000llu) && (diff & 0x00000ffffff00000llu) && (diff & 0x00000000000fffffllu)) continue; - uint32_t ri = hi ? riLo : riHi; - uint32_t rir = hi ? rirLo : rirHi; + size_t ri = hi ? riLo : riHi; + size_t rir = hi ? rirLo : rirHi; // Could use pop count uint8_t *diff8 = reinterpret_cast(&diff); // As a first cut, see if there are too many mismatches in @@ -1219,9 +1219,9 @@ class TwoMMRefAligner : public RefAligner { u8toMms[(int)diff8[4]] + u8toMms[(int)diff8[5]] + u8toMms[(int)diff8[6]]; - uint32_t mmpos1 = 0xffffffff; + TIndexOffU mmpos1 = OFF_MASK; int refc1 = -1; - uint32_t mmpos2 = 0xffffffff; + TIndexOffU mmpos2 = OFF_MASK; int refc2 = -1; if(diffs > 2) { // Too many differences @@ -1356,14 +1356,14 @@ class TwoMMRefAligner : public RefAligner { assert_eq(0, range.mms.size()); assert_eq(0, range.refcs.size()); if(diffs > 0) { - assert_neq(mmpos1, 0xffffffff); + assert_neq(mmpos1, OFF_MASK); assert_eq(mmpos1, r2[r2i].mms[0]); assert_neq(-1, refc1); assert_eq(refc1, r2[r2i].refcs[0]); range.mms.push_back(mmpos1); range.refcs.push_back(refc1); if(diffs > 1) { - assert_neq(mmpos2, 0xffffffff); + assert_neq(mmpos2, OFF_MASK); assert_eq(mmpos2, r2[r2i].mms[1]); assert_neq(-1, refc2); assert_eq(refc2, r2[r2i].refcs[1]); @@ -1408,16 +1408,16 @@ class ThreeMMRefAligner : public RefAligner { * 'qry' is the 5' end. */ void naiveFind(uint32_t numToFind, - uint32_t tidx, + size_t tidx, uint8_t* ref, const TDna5Str& qry, const TCharStr& quals, - uint32_t begin, - uint32_t end, + TIndexOffU begin, + TIndexOffU end, TRangeVec& ranges, TU32Vec& results, TSetPairs* pairs, - uint32_t aoff, + TIndexOffU aoff, bool seedOnLeft) const { assert_gt(numToFind, 0); @@ -1425,13 +1425,13 @@ class ThreeMMRefAligner : public RefAligner { assert_geq(end - begin, qlen); // caller should have checked this assert_gt(end, begin); assert_gt(qlen, 0); - uint32_t qend = end - qlen; - uint32_t lim = qend - begin; - uint32_t halfway = begin + (lim >> 1); + TIndexOffU qend = end - qlen; + TIndexOffU lim = qend - begin; + TIndexOffU halfway = begin + (lim >> 1); bool hi = false; - for(uint32_t i = 1; i <= lim+1; i++) { - uint32_t ri; // leftmost position in candidate alignment - uint32_t rir; // same, minus begin; for indexing into ref[] + for(size_t i = 1; i <= lim+1; i++) { + size_t ri; // leftmost position in candidate alignment + size_t rir; // same, minus begin; for indexing into ref[] if(hi) { ri = halfway + (i >> 1); rir = ri - begin; assert_leq(ri, qend); @@ -1443,13 +1443,13 @@ class ThreeMMRefAligner : public RefAligner { // Do the naive comparison bool match = true; int refc1 = -1; - uint32_t mmOff1 = 0xffffffff; + TIndexOffU mmOff1 = OFF_MASK; int refc2 = -1; - uint32_t mmOff2 = 0xffffffff; + TIndexOffU mmOff2 = OFF_MASK; int refc3 = -1; - uint32_t mmOff3 = 0xffffffff; + TIndexOffU mmOff3 = OFF_MASK; int mms = 0; - for(uint32_t j = 0; j < qlen; j++) { + for(size_t j = 0; j < qlen; j++) { #if 0 // Count Ns in the reference as mismatches const int q = (int)qry[j]; @@ -1530,23 +1530,23 @@ class ThreeMMRefAligner : public RefAligner { * 'qry' is the 5' end. */ virtual void anchor64Find(uint32_t numToFind, - uint32_t tidx, + size_t tidx, uint8_t* ref, const TDna5Str& qry, const TCharStr& quals, - uint32_t begin, - uint32_t end, + TIndexOffU begin, + TIndexOffU end, TRangeVec& ranges, - TU32Vec& results, + std::vector& results, TSetPairs* pairs = NULL, - uint32_t aoff = 0xffffffff, + TIndexOffU aoff = OFF_MASK, bool seedOnLeft = false) const { assert_gt(numToFind, 0); ASSERT_ONLY(const uint32_t rangesInitSz = ranges.size()); ASSERT_ONLY(uint32_t duplicates = 0); ASSERT_ONLY(uint32_t r2i = 0); - const uint32_t qlen = seqan::length(qry); + const size_t qlen = seqan::length(qry); assert_geq(end - begin, qlen); // caller should have checked this assert_gt(end, begin); assert_gt(qlen, 0); @@ -1561,8 +1561,8 @@ class ThreeMMRefAligner : public RefAligner { const uint32_t anchorCushion = 32 - anchorBitPairs; // anchorOverhang = # read bases not included in the anchor const uint32_t anchorOverhang = (qlen <= 32 ? 0 : (qlen - 32)); - const uint32_t lim = end - qlen - begin; - const uint32_t halfway = begin + (lim >> 1); + const size_t lim = end - qlen - begin; + const size_t halfway = begin + (lim >> 1); uint64_t anchor = 0llu; uint64_t buffw = 0llu; // rotating ref sequence buffer // OR the 'diff' buffer with this so that we can always count @@ -1581,11 +1581,11 @@ class ThreeMMRefAligner : public RefAligner { int nPos1 = -1; int nPos2 = -1; int nPos3 = -1; - uint32_t skipLeftToRights = 0; - uint32_t skipRightToLefts = 0; + size_t skipLeftToRights = 0; + size_t skipRightToLefts = 0; // Construct the 'anchor' 64-bit buffer so that it holds all of // the first 'anchorBitPairs' bit pairs of the query. - for(uint32_t i = 0; i < anchorBitPairs; i++) { + for(size_t i = 0; i < anchorBitPairs; i++) { int c = (int)qry[i]; // next query character int r = (int)ref[halfway - begin + i]; // next reference character if(r & 4) { @@ -1628,7 +1628,7 @@ class ThreeMMRefAligner : public RefAligner { assert_leq(nPoss, 3); // Check whether read is disqualified by Ns outside of the anchor // region - for(uint32_t i = anchorBitPairs; i < qlen; i++) { + for(size_t i = anchorBitPairs; i < qlen; i++) { if((int)qry[i] == 4) { if(++nsInAnchor > 3) { assert_eq(r2.size(), ranges.size() - rangesInitSz); @@ -1643,12 +1643,12 @@ class ThreeMMRefAligner : public RefAligner { // were, we might need to make the 'anchorOverhang' adjustment on // the left end of the range rather than the right. bool hi = false; - uint32_t riHi = halfway; - uint32_t rirHi = halfway - begin; - uint32_t rirHiAnchor = rirHi + anchorBitPairs - 1; - uint32_t riLo = halfway + 1; - uint32_t rirLo = halfway - begin + 1; - for(uint32_t i = 1; i <= lim + 1; i++) { + size_t riHi = halfway; + size_t rirHi = halfway - begin; + size_t rirHiAnchor = rirHi + anchorBitPairs - 1; + size_t riLo = halfway + 1; + size_t rirLo = halfway - begin + 1; + for(size_t i = 1; i <= lim + 1; i++) { int r; // new reference char uint64_t diff; assert_lt(skipLeftToRights, qlen); @@ -1701,8 +1701,8 @@ class ThreeMMRefAligner : public RefAligner { (diff & 0x0000ffff00000000llu) && (diff & 0x00000000ffff0000llu) && (diff & 0x000000000000ffffllu)) continue; - uint32_t ri = hi ? riLo : riHi; - uint32_t rir = hi ? rirLo : rirHi; + size_t ri = hi ? riLo : riHi; + size_t rir = hi ? rirLo : rirHi; // Could use pop count uint8_t *diff8 = reinterpret_cast(&diff); // As a first cut, see if there are too many mismatches in @@ -1715,11 +1715,11 @@ class ThreeMMRefAligner : public RefAligner { u8toMms[(int)diff8[4]] + u8toMms[(int)diff8[5]] + u8toMms[(int)diff8[6]]; - uint32_t mmpos1 = 0xffffffff; + TIndexOffU mmpos1 = OFF_MASK; int refc1 = -1; - uint32_t mmpos2 = 0xffffffff; + TIndexOffU mmpos2 = OFF_MASK; int refc2 = -1; - uint32_t mmpos3 = 0xffffffff; + TIndexOffU mmpos3 = OFF_MASK; int refc3 = -1; if(diffs > 3) { // Too many differences @@ -1901,21 +1901,21 @@ class ThreeMMRefAligner : public RefAligner { assert_eq(0, range.mms.size()); assert_eq(0, range.refcs.size()); if(diffs > 0) { - assert_neq(mmpos1, 0xffffffff); + assert_neq(mmpos1, OFF_MASK); assert_eq(mmpos1, r2[r2i].mms[0]); assert_neq(-1, refc1); assert_eq(refc1, r2[r2i].refcs[0]); range.mms.push_back(mmpos1); range.refcs.push_back(refc1); if(diffs > 1) { - assert_neq(mmpos2, 0xffffffff); + assert_neq(mmpos2, OFF_MASK); assert_eq(mmpos2, r2[r2i].mms[1]); assert_neq(-1, refc2); assert_eq(refc2, r2[r2i].refcs[1]); range.mms.push_back(mmpos2); range.refcs.push_back(refc2); if(diffs > 2) { - assert_neq(mmpos3, 0xffffffff); + assert_neq(mmpos3, OFF_MASK); assert_eq(mmpos3, r2[r2i].mms[2]); assert_neq(-1, refc3); assert_eq(refc3, r2[r2i].refcs[2]); @@ -1983,16 +1983,16 @@ class Seed0RefAligner : public RefAligner { * length > 0. */ void naiveFind(uint32_t numToFind, - uint32_t tidx, + size_t tidx, uint8_t* ref, const TDna5Str& qry, const TCharStr& quals, - uint32_t begin, - uint32_t end, + TIndexOffU begin, + TIndexOffU end, TRangeVec& ranges, TU32Vec& results, TSetPairs* pairs, - uint32_t aoff, + TIndexOffU aoff, bool seedOnLeft) const { assert_gt(numToFind, 0); @@ -2002,8 +2002,8 @@ class Seed0RefAligner : public RefAligner { assert_geq(end - begin, qlen); // caller should have checked this assert_gt(this->seedLen_, 0); const uint32_t slen = min(qlen, this->seedLen_); - uint32_t qend = end; - uint32_t qbegin = begin; + TIndexOffU qend = end; + TIndexOffU qbegin = begin; // If the seed is on the left-hand side of the alignment, then // leave a gap at the right-hand side of the interval; // otherwise, do the opposite @@ -2015,17 +2015,17 @@ class Seed0RefAligner : public RefAligner { qbegin += qlen; } // lim = number of alignments to try - const uint32_t lim = qend - qbegin; + const TIndexOffU lim = qend - qbegin; // halfway = position in the reference to start at (and then // we work our way out to the right and to the left). - const uint32_t halfway = qbegin + (lim >> 1); + const size_t halfway = qbegin + (lim >> 1); // Vectors for holding edit information std::vector nonSeedMms; std::vector nonSeedRefcs; bool hi = false; - for(uint32_t i = 1; i <= lim+1; i++) { - uint32_t ri; // leftmost position in candidate alignment - uint32_t rir; // same, minus begin; for indexing into ref[] + for(size_t i = 1; i <= lim+1; i++) { + size_t ri; // leftmost position in candidate alignment + size_t rir; // same, minus begin; for indexing into ref[] if(hi) { ri = halfway + (i >> 1); rir = ri - begin; assert_leq(ri, qend); @@ -2041,15 +2041,15 @@ class Seed0RefAligner : public RefAligner { nonSeedMms.clear(); nonSeedRefcs.clear(); // Walk through each position of the alignment - for(uint32_t jj = 0; jj < qlen; jj++) { - uint32_t j = jj; + for(size_t jj = 0; jj < qlen; jj++) { + size_t j = jj; if(!seedOnLeft) { // If seed is on the right, scan right-to-left j = qlen - jj - 1; } else { // Go left-to-right } - uint32_t rirj = rir + j; + size_t rirj = rir + j; if(!seedOnLeft) { assert_geq(rir, jj); rirj = rir - jj - 1; @@ -2160,16 +2160,16 @@ class Seed0RefAligner : public RefAligner { * length > 0. */ virtual void anchor64Find(uint32_t numToFind, - uint32_t tidx, + size_t tidx, uint8_t* ref, const TDna5Str& qry, const TCharStr& quals, - uint32_t begin, - uint32_t end, + TIndexOffU begin, + TIndexOffU end, TRangeVec& ranges, - TU32Vec& results, + std::vector& results, TSetPairs* pairs = NULL, - uint32_t aoff = 0xffffffff, + TIndexOffU aoff = OFF_MASK, bool seedOnLeft = false) const { assert_gt(numToFind, 0); @@ -2181,7 +2181,7 @@ class Seed0RefAligner : public RefAligner { assert_gt(end, begin); assert_geq(end - begin, qlen); // caller should have checked this assert_gt(this->seedLen_, 0); - uint32_t slen = min(qlen, this->seedLen_); + size_t slen = min(qlen, this->seedLen_); #ifndef NDEBUG // Get results from the naive matcher for sanity-checking TRangeVec r2; TU32Vec re2; @@ -2197,8 +2197,8 @@ class Seed0RefAligner : public RefAligner { const uint32_t readSeedOverhang = (slen == qlen ? 0 : (qlen - slen)); assert(anchorCushion == 0 || seedAnchorOverhang == 0); assert_eq(qlen, readSeedOverhang + slen); - uint32_t qend = end; - uint32_t qbegin = begin; + TIndexOffU qend = end; + TIndexOffU qbegin = begin; if(seedOnLeft) { // Leave read-sized gap on right-hand side of the interval qend -= qlen; @@ -2209,9 +2209,9 @@ class Seed0RefAligner : public RefAligner { qend -= slen; } // lim = # possible alignments in the range - const uint32_t lim = qend - qbegin; + const size_t lim = qend - qbegin; // halfway = point on the genome to radiate out from - const uint32_t halfway = qbegin + (lim >> 1); + const size_t halfway = qbegin + (lim >> 1); uint64_t anchor = 0llu; uint64_t buffw = 0llu; // rotating ref sequence buffer // Set up a mask that we'll apply to the two bufs every round @@ -2222,13 +2222,13 @@ class Seed0RefAligner : public RefAligner { clearMask >>= ((32-anchorBitPairs) << 1); useMask = true; } - uint32_t skipLeftToRights = 0; - uint32_t skipRightToLefts = 0; + size_t skipLeftToRights = 0; + size_t skipRightToLefts = 0; const uint32_t halfwayRi = halfway - begin; // Construct the 'anchor' 64-bit buffer so that it holds all of // the first 'anchorBitPairs' bit pairs of the query. - for(uint32_t ii = 0; ii < anchorBitPairs; ii++) { - uint32_t i = ii; + for(size_t ii = 0; ii < anchorBitPairs; ii++) { + size_t i = ii; if(!seedOnLeft) { // Fill in the anchor using characters from the right- // hand side of the query (but take the characters in @@ -2243,8 +2243,8 @@ class Seed0RefAligner : public RefAligner { // alignments that involve an N in the reference. Set // the skip* variables accordingly. r = 0; - uint32_t lrSkips = ii; - uint32_t rlSkips = qlen - ii; + size_t lrSkips = ii; + size_t rlSkips = qlen - ii; if(!seedOnLeft && readSeedOverhang) { lrSkips += readSeedOverhang; assert_geq(rlSkips, readSeedOverhang); @@ -2273,8 +2273,8 @@ class Seed0RefAligner : public RefAligner { // region but outside the anchor region if(seedAnchorOverhang) { assert_lt(anchorBitPairs, slen); - for(uint32_t ii = anchorBitPairs; ii < slen; ii++) { - uint32_t i = ii; + for(size_t ii = anchorBitPairs; ii < slen; ii++) { + size_t i = ii; if(!seedOnLeft) { i = qlen - slen + ii; } @@ -2291,19 +2291,19 @@ class Seed0RefAligner : public RefAligner { // between right-to-left and left-to-right shifts, until all of // the positions from qbegin to qend have been covered. bool hi = false; - uint32_t riHi = halfway; - uint32_t rirHi = halfway - begin; - uint32_t rirHiAnchor = rirHi + anchorBitPairs - 1; - uint32_t riLo = halfway + 1; - uint32_t rirLo = halfway - begin + 1; - uint32_t lrSkips = anchorBitPairs; - uint32_t rlSkips = qlen; + size_t riHi = halfway; + size_t rirHi = halfway - begin; + size_t rirHiAnchor = rirHi + anchorBitPairs - 1; + size_t riLo = halfway + 1; + size_t rirLo = halfway - begin + 1; + size_t lrSkips = anchorBitPairs; + size_t rlSkips = qlen; if(!seedOnLeft && readSeedOverhang) { lrSkips += readSeedOverhang; assert_geq(rlSkips, readSeedOverhang); rlSkips -= readSeedOverhang; } - for(uint32_t i = 1; i <= lim + 1; i++) { + for(size_t i = 1; i <= lim + 1; i++) { int r; // new reference char uint64_t diff; assert_leq(skipLeftToRights, qlen); @@ -2353,14 +2353,14 @@ class Seed0RefAligner : public RefAligner { diff = bufbw ^ anchor; } if(diff) continue; - uint32_t ri = hi ? riLo : riHi; - uint32_t rir = hi ? rirLo : rirHi; + size_t ri = hi ? riLo : riHi; + size_t rir = hi ? rirLo : rirHi; unsigned int ham = 0; // If the seed is longer than the anchor, then scan the // rest of the seed characters bool foundHit = true; if(seedAnchorOverhang) { - for(uint32_t j = 0; j < seedAnchorOverhang; j++) { + for(size_t j = 0; j < seedAnchorOverhang; j++) { int rc = (int)ref[rir + anchorBitPairs + j]; if(rc == 4) { // Oops, encountered an N in the reference in @@ -2389,7 +2389,7 @@ class Seed0RefAligner : public RefAligner { foundHit = false; // Skip this candidate break; } - uint32_t qoff = anchorBitPairs + j; + TIndexOffU qoff = anchorBitPairs + j; if(!seedOnLeft) { qoff += readSeedOverhang; } @@ -2410,9 +2410,9 @@ class Seed0RefAligner : public RefAligner { int mms = 0; // start counting total mismatches if((qlen - slen) > 0) { // Going left-to-right - for(uint32_t j = 0; j < readSeedOverhang; j++) { - uint32_t roff = rir + slen + j; - uint32_t qoff = slen + j; + for(size_t j = 0; j < readSeedOverhang; j++) { + TIndexOffU roff = rir + slen + j; + TIndexOffU qoff = slen + j; if(!seedOnLeft) { assert_geq(roff, qlen); roff -= qlen; @@ -2586,16 +2586,16 @@ class Seed1RefAligner : public RefAligner { * length > 0. */ void naiveFind(uint32_t numToFind, - uint32_t tidx, + size_t tidx, uint8_t* ref, const TDna5Str& qry, const TCharStr& quals, - uint32_t begin, - uint32_t end, + TIndexOffU begin, + TIndexOffU end, TRangeVec& ranges, TU32Vec& results, TSetPairs* pairs, - uint32_t aoff, + TIndexOffU aoff, bool seedOnLeft) const { assert_gt(numToFind, 0); @@ -2605,8 +2605,8 @@ class Seed1RefAligner : public RefAligner { assert_geq(end - begin, qlen); // caller should have checked this assert_gt(this->seedLen_, 0); const uint32_t slen = min(qlen, this->seedLen_); - uint32_t qend = end; - uint32_t qbegin = begin; + TIndexOffU qend = end; + TIndexOffU qbegin = begin; // If the seed is on the left-hand side of the alignment, then // leave a gap at the right-hand side of the interval; // otherwise, do the opposite @@ -2618,19 +2618,19 @@ class Seed1RefAligner : public RefAligner { qbegin += qlen; } // lim = number of alignments to try - const uint32_t lim = qend - qbegin; + const size_t lim = qend - qbegin; // halfway = position in the reference to start at (and then // we work our way out to the right and to the left). - const uint32_t halfway = qbegin + (lim >> 1); + const size_t halfway = qbegin + (lim >> 1); // Vectors for holding edit information std::vector nonSeedMms; assert_eq(0, nonSeedMms.size()); std::vector nonSeedRefcs; assert_eq(0, nonSeedRefcs.size()); bool hi = false; - for(uint32_t i = 1; i <= lim+1; i++) { - uint32_t ri; // leftmost position in candidate alignment - uint32_t rir; // same, minus begin; for indexing into ref[] + for(size_t i = 1; i <= lim+1; i++) { + size_t ri; // leftmost position in candidate alignment + size_t rir; // same, minus begin; for indexing into ref[] if(hi) { ri = halfway + (i >> 1); rir = ri - begin; assert_leq(ri, qend); @@ -2642,22 +2642,22 @@ class Seed1RefAligner : public RefAligner { // Do the naive comparison bool match = true; int refc = -1; - uint32_t mmOff = 0xffffffff; + TIndexOffU mmOff = OFF_MASK; int mms = 0; int seedMms = 0; unsigned int ham = 0; nonSeedMms.clear(); nonSeedRefcs.clear(); // Walk through each position of the alignment - for(uint32_t jj = 0; jj < qlen; jj++) { - uint32_t j = jj; + for(size_t jj = 0; jj < qlen; jj++) { + size_t j = jj; if(!seedOnLeft) { // If seed is on the right, scan right-to-left j = qlen - jj - 1; } else { // Go left-to-right } - uint32_t rirj = rir + j; + size_t rirj = rir + j; if(!seedOnLeft) { assert_geq(rir, jj); rirj = rir - jj - 1; @@ -2785,16 +2785,16 @@ class Seed1RefAligner : public RefAligner { * length > 0. */ virtual void anchor64Find(uint32_t numToFind, - uint32_t tidx, + size_t tidx, uint8_t* ref, const TDna5Str& qry, const TCharStr& quals, - uint32_t begin, - uint32_t end, + TIndexOffU begin, + TIndexOffU end, TRangeVec& ranges, - TU32Vec& results, + std::vector& results, TSetPairs* pairs = NULL, - uint32_t aoff = 0xffffffff, + TIndexOffU aoff = OFF_MASK, bool seedOnLeft = false) const { assert_gt(numToFind, 0); @@ -2806,7 +2806,7 @@ class Seed1RefAligner : public RefAligner { assert_gt(end, begin); assert_geq(end - begin, qlen); // caller should have checked this assert_gt(this->seedLen_, 0); - uint32_t slen = min(qlen, this->seedLen_); + size_t slen = min(qlen, this->seedLen_); #ifndef NDEBUG // Get results from the naive matcher for sanity-checking TRangeVec r2; TU32Vec re2; @@ -2822,8 +2822,8 @@ class Seed1RefAligner : public RefAligner { const uint32_t readSeedOverhang = (slen == qlen ? 0 : (qlen - slen)); assert(anchorCushion == 0 || seedAnchorOverhang == 0); assert_eq(qlen, readSeedOverhang + slen); - uint32_t qend = end; - uint32_t qbegin = begin; + TIndexOffU qend = end; + TIndexOffU qbegin = begin; if(seedOnLeft) { // Leave read-sized gap on right-hand side of the interval qend -= qlen; @@ -2834,9 +2834,9 @@ class Seed1RefAligner : public RefAligner { qend -= slen; } // lim = # possible alignments in the range - const uint32_t lim = qend - qbegin; + const size_t lim = qend - qbegin; // halfway = point on the genome to radiate out from - const uint32_t halfway = qbegin + (lim >> 1); + const size_t halfway = qbegin + (lim >> 1); uint64_t anchor = 0llu; uint64_t buffw = 0llu; // rotating ref sequence buffer // OR the 'diff' buffer with this so that we can always count @@ -2852,13 +2852,13 @@ class Seed1RefAligner : public RefAligner { } int nsInSeed = 0; int nPos = -1; - uint32_t skipLeftToRights = 0; - uint32_t skipRightToLefts = 0; - const uint32_t halfwayRi = halfway - begin; + size_t skipLeftToRights = 0; + size_t skipRightToLefts = 0; + const size_t halfwayRi = halfway - begin; // Construct the 'anchor' 64-bit buffer so that it holds all of // the first 'anchorBitPairs' bit pairs of the query. - for(uint32_t ii = 0; ii < anchorBitPairs; ii++) { - uint32_t i = ii; + for(size_t ii = 0; ii < anchorBitPairs; ii++) { + size_t i = ii; if(!seedOnLeft) { // Fill in the anchor using characters from the right- // hand side of the query (but take the characters in @@ -2873,8 +2873,8 @@ class Seed1RefAligner : public RefAligner { // alignments that involve an N in the reference. Set // the skip* variables accordingly. r = 0; - uint32_t lrSkips = ii; - uint32_t rlSkips = qlen - ii; + size_t lrSkips = ii; + size_t rlSkips = qlen - ii; if(!seedOnLeft && readSeedOverhang) { lrSkips += readSeedOverhang; assert_geq(rlSkips, readSeedOverhang); @@ -2911,8 +2911,8 @@ class Seed1RefAligner : public RefAligner { // region but outside the anchor region if(seedAnchorOverhang) { assert_lt(anchorBitPairs, slen); - for(uint32_t ii = anchorBitPairs; ii < slen; ii++) { - uint32_t i = ii; + for(size_t ii = anchorBitPairs; ii < slen; ii++) { + size_t i = ii; if(!seedOnLeft) { i = qlen - slen + ii; } @@ -2931,13 +2931,13 @@ class Seed1RefAligner : public RefAligner { // between right-to-left and left-to-right shifts, until all of // the positions from qbegin to qend have been covered. bool hi = false; - uint32_t riHi = halfway; - uint32_t rirHi = halfway - begin; - uint32_t rirHiAnchor = rirHi + anchorBitPairs - 1; - uint32_t riLo = halfway + 1; - uint32_t rirLo = halfway - begin + 1; - uint32_t lrSkips = anchorBitPairs; - uint32_t rlSkips = qlen; + size_t riHi = halfway; + size_t rirHi = halfway - begin; + size_t rirHiAnchor = rirHi + anchorBitPairs - 1; + size_t riLo = halfway + 1; + size_t rirLo = halfway - begin + 1; + size_t lrSkips = anchorBitPairs; + size_t rlSkips = qlen; if(!seedOnLeft && readSeedOverhang) { lrSkips += readSeedOverhang; assert_geq(rlSkips, readSeedOverhang); @@ -2994,8 +2994,8 @@ class Seed1RefAligner : public RefAligner { } if((diff & 0xffffffff00000000llu) && (diff & 0x00000000ffffffffllu)) continue; - uint32_t ri = hi ? riLo : riHi; - uint32_t rir = hi ? rirLo : rirHi; + size_t ri = hi ? riLo : riHi; + size_t rir = hi ? rirLo : rirHi; // Could use pop count uint8_t *diff8 = reinterpret_cast(&diff); // As a first cut, see if there are too many mismatches in @@ -3008,7 +3008,7 @@ class Seed1RefAligner : public RefAligner { u8toMms[(int)diff8[4]] + u8toMms[(int)diff8[5]] + u8toMms[(int)diff8[6]]; - uint32_t mmpos = 0xffffffff; + TIndexOffU mmpos = OFF_MASK; int refc = -1; unsigned int ham = 0; if(diffs > 1) { @@ -3061,7 +3061,7 @@ class Seed1RefAligner : public RefAligner { // rest of the seed characters bool foundHit = true; if(seedAnchorOverhang) { - for(uint32_t j = 0; j < seedAnchorOverhang; j++) { + for(size_t j = 0; j < seedAnchorOverhang; j++) { int rc = (int)ref[rir + anchorBitPairs + j]; if(rc == 4) { // Oops, encountered an N in the reference in @@ -3090,7 +3090,7 @@ class Seed1RefAligner : public RefAligner { foundHit = false; // Skip this candidate break; } - uint32_t qoff = anchorBitPairs + j; + TIndexOffU qoff = anchorBitPairs + j; if(!seedOnLeft) { qoff += readSeedOverhang; } @@ -3100,7 +3100,7 @@ class Seed1RefAligner : public RefAligner { foundHit = false; break; } else { - assert_eq(0xffffffff, mmpos); + assert_eq(OFF_MASK, mmpos); mmpos = qoff; assert_eq(-1, refc); refc = "ACGT"[(int)ref[rir + anchorBitPairs + j]]; @@ -3125,9 +3125,9 @@ class Seed1RefAligner : public RefAligner { int mms = diffs; // start counting total mismatches if((qlen - slen) > 0) { // Going left-to-right - for(uint32_t j = 0; j < readSeedOverhang; j++) { - uint32_t roff = rir + slen + j; - uint32_t qoff = slen + j; + for(size_t j = 0; j < readSeedOverhang; j++) { + TIndexOffU roff = rir + slen + j; + TIndexOffU qoff = slen + j; if(!seedOnLeft) { assert_geq(roff, qlen); roff -= qlen; @@ -3231,7 +3231,7 @@ class Seed1RefAligner : public RefAligner { if(mms > 0) { ASSERT_ONLY(size_t mmcur = 0); if(seedOnLeft && diffs > 0) { - assert_neq(mmpos, 0xffffffff); + assert_neq(mmpos, OFF_MASK); assert_lt(mmpos, qlen); assert_lt(mmcur, (size_t)mms); assert_eq(mmpos, r2[r2i].mms[mmcur]); @@ -3252,7 +3252,7 @@ class Seed1RefAligner : public RefAligner { range.refcs.push_back(nonSeedRefcs[i]); } if(!seedOnLeft && diffs > 0) { - assert_neq(mmpos, 0xffffffff); + assert_neq(mmpos, OFF_MASK); assert_lt(mmpos, qlen); assert_lt(mmcur, (size_t)mms); assert_eq(mmpos, r2[r2i].mms[mmcur]); @@ -3325,16 +3325,16 @@ class Seed2RefAligner : public RefAligner { * length > 0. */ void naiveFind(uint32_t numToFind, - uint32_t tidx, + size_t tidx, uint8_t* ref, const TDna5Str& qry, const TCharStr& quals, - uint32_t begin, - uint32_t end, + TIndexOffU begin, + TIndexOffU end, TRangeVec& ranges, TU32Vec& results, TSetPairs* pairs, - uint32_t aoff, + TIndexOffU aoff, bool seedOnLeft) const { assert_gt(numToFind, 0); @@ -3344,8 +3344,8 @@ class Seed2RefAligner : public RefAligner { assert_geq(end - begin, qlen); // caller should have checked this assert_gt(this->seedLen_, 0); const uint32_t slen = min(qlen, this->seedLen_); - uint32_t qend = end; - uint32_t qbegin = begin; + TIndexOffU qend = end; + TIndexOffU qbegin = begin; // If the seed is on the left-hand side of the alignment, then // leave a gap at the right-hand side of the interval; // otherwise, do the opposite @@ -3357,17 +3357,17 @@ class Seed2RefAligner : public RefAligner { qbegin += qlen; } // lim = number of alignments to try - const uint32_t lim = qend - qbegin; + const size_t lim = qend - qbegin; // halfway = position in the reference to start at (and then // we work our way out to the right and to the left). - const uint32_t halfway = qbegin + (lim >> 1); + const size_t halfway = qbegin + (lim >> 1); // Vectors for holding edit information std::vector nonSeedMms; std::vector nonSeedRefcs; bool hi = false; - for(uint32_t i = 1; i <= lim+1; i++) { - uint32_t ri; // leftmost position in candidate alignment - uint32_t rir; // same, minus begin; for indexing into ref[] + for(size_t i = 1; i <= lim+1; i++) { + size_t ri; // leftmost position in candidate alignment + size_t rir; // same, minus begin; for indexing into ref[] if(hi) { ri = halfway + (i >> 1); rir = ri - begin; assert_leq(ri, qend); @@ -3379,24 +3379,24 @@ class Seed2RefAligner : public RefAligner { // Do the naive comparison bool match = true; int refc1 = -1; - uint32_t mmOff1 = 0xffffffff; + TIndexOffU mmOff1 = OFF_MASK; int refc2 = -1; - uint32_t mmOff2 = 0xffffffff; + TIndexOffU mmOff2 = OFF_MASK; int mms = 0; int seedMms = 0; unsigned int ham = 0; nonSeedMms.clear(); nonSeedRefcs.clear(); // Walk through each position of the alignment - for(uint32_t jj = 0; jj < qlen; jj++) { - uint32_t j = jj; + for(size_t jj = 0; jj < qlen; jj++) { + size_t j = jj; if(!seedOnLeft) { // If seed is on the right, scan right-to-left j = qlen - jj - 1; } else { // Go left-to-right } - uint32_t rirj = rir + j; + size_t rirj = rir + j; if(!seedOnLeft) { assert_geq(rir, jj); rirj = rir - jj - 1; @@ -3542,16 +3542,16 @@ class Seed2RefAligner : public RefAligner { * length > 0. */ virtual void anchor64Find(uint32_t numToFind, - uint32_t tidx, + size_t tidx, uint8_t* ref, const TDna5Str& qry, const TCharStr& quals, - uint32_t begin, - uint32_t end, + TIndexOffU begin, + TIndexOffU end, TRangeVec& ranges, - TU32Vec& results, + std::vector& results, TSetPairs* pairs = NULL, - uint32_t aoff = 0xffffffff, + TIndexOffU aoff = OFF_MASK, bool seedOnLeft = false) const { assert_gt(numToFind, 0); @@ -3563,7 +3563,7 @@ class Seed2RefAligner : public RefAligner { assert_gt(end, begin); assert_geq(end - begin, qlen); // caller should have checked this assert_gt(this->seedLen_, 0); - uint32_t slen = min(qlen, this->seedLen_); + size_t slen = min(qlen, this->seedLen_); #ifndef NDEBUG // Get results from the naive matcher for sanity-checking TRangeVec r2; TU32Vec re2; @@ -3579,8 +3579,8 @@ class Seed2RefAligner : public RefAligner { const uint32_t readSeedOverhang = (slen == qlen ? 0 : (qlen - slen)); assert(anchorCushion == 0 || seedAnchorOverhang == 0); assert_eq(qlen, readSeedOverhang + slen); - uint32_t qend = end; - uint32_t qbegin = begin; + TIndexOffU qend = end; + TIndexOffU qbegin = begin; if(seedOnLeft) { // Leave read-sized gap on right-hand side of the interval qend -= qlen; @@ -3591,9 +3591,9 @@ class Seed2RefAligner : public RefAligner { qend -= slen; } // lim = # possible alignments in the range - const uint32_t lim = qend - qbegin; + const size_t lim = qend - qbegin; // halfway = point on the genome to radiate out from - const uint32_t halfway = qbegin + (lim >> 1); + const size_t halfway = qbegin + (lim >> 1); uint64_t anchor = 0llu; uint64_t buffw = 0llu; // rotating ref sequence buffer // OR the 'diff' buffer with this so that we can always count @@ -3611,14 +3611,14 @@ class Seed2RefAligner : public RefAligner { uint32_t nPoss = 0; int nPos1 = -1; int nPos2 = -1; - uint32_t skipLeftToRights = 0; - uint32_t skipRightToLefts = 0; - const uint32_t halfwayRi = halfway - begin; + size_t skipLeftToRights = 0; + size_t skipRightToLefts = 0; + const size_t halfwayRi = halfway - begin; assert_leq(anchorBitPairs, slen); // Construct the 'anchor' 64-bit buffer so that it holds all of // the first 'anchorBitPairs' bit pairs of the query. - for(uint32_t ii = 0; ii < anchorBitPairs; ii++) { - uint32_t i = ii; + for(size_t ii = 0; ii < anchorBitPairs; ii++) { + size_t i = ii; if(!seedOnLeft) { // Fill in the anchor using characters from the seed // portion of the read, starting at the left. Note @@ -3635,8 +3635,8 @@ class Seed2RefAligner : public RefAligner { // alignments that involve an N in the reference. Set // the skip* variables accordingly. r = 0; - uint32_t lrSkips = ii; - uint32_t rlSkips = qlen - ii; + size_t lrSkips = ii; + size_t rlSkips = qlen - ii; if(!seedOnLeft && readSeedOverhang) { lrSkips += readSeedOverhang; assert_geq(rlSkips, readSeedOverhang); @@ -3679,8 +3679,8 @@ class Seed2RefAligner : public RefAligner { // region but outside the anchor region if(seedAnchorOverhang) { assert_lt(anchorBitPairs, slen); - for(uint32_t ii = anchorBitPairs; ii < slen; ii++) { - uint32_t i = ii; + for(size_t ii = anchorBitPairs; ii < slen; ii++) { + size_t i = ii; if(!seedOnLeft) { i = qlen - slen + ii; } @@ -3699,19 +3699,19 @@ class Seed2RefAligner : public RefAligner { // between right-to-left and left-to-right shifts, until all of // the positions from qbegin to qend have been covered. bool hi = false; - uint32_t riHi = halfway; - uint32_t rirHi = halfway - begin; - uint32_t rirHiAnchor = rirHi + anchorBitPairs - 1; - uint32_t riLo = halfway + 1; - uint32_t rirLo = halfway - begin + 1; - uint32_t lrSkips = anchorBitPairs; - uint32_t rlSkips = qlen; + size_t riHi = halfway; + size_t rirHi = halfway - begin; + size_t rirHiAnchor = rirHi + anchorBitPairs - 1; + size_t riLo = halfway + 1; + size_t rirLo = halfway - begin + 1; + size_t lrSkips = anchorBitPairs; + size_t rlSkips = qlen; if(!seedOnLeft && readSeedOverhang) { lrSkips += readSeedOverhang; assert_geq(rlSkips, readSeedOverhang); rlSkips -= readSeedOverhang; } - for(uint32_t i = 1; i <= lim + 1; i++) { + for(size_t i = 1; i <= lim + 1; i++) { int r; // new reference char uint64_t diff; assert_leq(skipLeftToRights, qlen); @@ -3780,9 +3780,9 @@ class Seed2RefAligner : public RefAligner { u8toMms[(int)diff8[4]] + u8toMms[(int)diff8[5]] + u8toMms[(int)diff8[6]]; - uint32_t mmpos1 = 0xffffffff; + TIndexOffU mmpos1 = OFF_MASK; int refc1 = -1; - uint32_t mmpos2 = 0xffffffff; + TIndexOffU mmpos2 = OFF_MASK; int refc2 = -1; unsigned int ham = 0; if(diffs > 2) { @@ -3876,7 +3876,7 @@ class Seed2RefAligner : public RefAligner { continue; } if(mmpos2 < mmpos1) { - uint32_t mmtmp = mmpos1; + TIndexOffU mmtmp = mmpos1; mmpos1 = mmpos2; mmpos2 = mmtmp; int refctmp = refc1; @@ -3890,7 +3890,7 @@ class Seed2RefAligner : public RefAligner { // rest of the seed characters bool foundHit = true; if(seedAnchorOverhang) { - for(uint32_t j = 0; j < seedAnchorOverhang; j++) { + for(size_t j = 0; j < seedAnchorOverhang; j++) { int rc = (int)ref[rir + anchorBitPairs + j]; if(rc == 4) { // Oops, encountered an N in the reference in @@ -3919,7 +3919,7 @@ class Seed2RefAligner : public RefAligner { foundHit = false; // Skip this candidate break; } - uint32_t qoff = anchorBitPairs + j; + TIndexOffU qoff = anchorBitPairs + j; if(!seedOnLeft) { qoff += readSeedOverhang; } @@ -3930,13 +3930,13 @@ class Seed2RefAligner : public RefAligner { foundHit = false; break; } else if(diffs == 2) { - assert_eq(0xffffffff, mmpos2); + assert_eq(OFF_MASK, mmpos2); mmpos2 = qoff; assert_eq(-1, refc2); refc2 = "ACGT"[(int)ref[rir + anchorBitPairs + j]]; } else { assert_eq(1, diffs); - assert_eq(0xffffffff, mmpos1); + assert_eq(OFF_MASK, mmpos1); mmpos1 = qoff; assert_eq(-1, refc1); refc1 = "ACGT"[(int)ref[rir + anchorBitPairs + j]]; @@ -3961,9 +3961,9 @@ class Seed2RefAligner : public RefAligner { int mms = diffs; // start counting total mismatches if((qlen - slen) > 0) { // Going left-to-right - for(uint32_t j = 0; j < readSeedOverhang; j++) { - uint32_t roff = rir + slen + j; - uint32_t qoff = slen + j; + for(size_t j = 0; j < readSeedOverhang; j++) { + TIndexOffU roff = rir + slen + j; + TIndexOffU qoff = slen + j; if(!seedOnLeft) { assert_geq(roff, qlen); roff -= qlen; @@ -4067,7 +4067,7 @@ class Seed2RefAligner : public RefAligner { if(mms > 0) { ASSERT_ONLY(size_t mmcur = 0); if(seedOnLeft && diffs > 0) { - assert_neq(mmpos1, 0xffffffff); + assert_neq(mmpos1, OFF_MASK); assert_lt(mmpos1, qlen); assert_lt(mmcur, (size_t)mms); assert_eq(mmpos1, r2[r2i].mms[mmcur]); @@ -4078,7 +4078,7 @@ class Seed2RefAligner : public RefAligner { range.refcs.push_back(refc1); if(diffs > 1) { assert_eq(2, diffs); - assert_neq(mmpos2, 0xffffffff); + assert_neq(mmpos2, OFF_MASK); assert_lt(mmpos2, qlen); assert_lt(mmcur, (size_t)mms); assert_eq(mmpos2, r2[r2i].mms[mmcur]); @@ -4100,7 +4100,7 @@ class Seed2RefAligner : public RefAligner { range.refcs.push_back(nonSeedRefcs[i]); } if(!seedOnLeft && diffs > 0) { - assert_neq(mmpos1, 0xffffffff); + assert_neq(mmpos1, OFF_MASK); assert_lt(mmpos1, qlen); assert_lt(mmcur, (size_t)mms); assert_eq(mmpos1, r2[r2i].mms[mmcur]); @@ -4111,7 +4111,7 @@ class Seed2RefAligner : public RefAligner { range.refcs.push_back(refc1); if(diffs > 1) { assert_eq(2, diffs); - assert_neq(mmpos2, 0xffffffff); + assert_neq(mmpos2, OFF_MASK); assert_lt(mmpos2, qlen); assert_lt(mmcur, (size_t)mms); assert_eq(mmpos2, r2[r2i].mms[mmcur]); @@ -4185,16 +4185,16 @@ class Seed3RefAligner : public RefAligner { * length > 0. */ void naiveFind(uint32_t numToFind, - uint32_t tidx, + size_t tidx, uint8_t* ref, const TDna5Str& qry, const TCharStr& quals, - uint32_t begin, - uint32_t end, + TIndexOffU begin, + TIndexOffU end, TRangeVec& ranges, TU32Vec& results, TSetPairs* pairs, - uint32_t aoff, + TIndexOffU aoff, bool seedOnLeft) const { assert_gt(numToFind, 0); @@ -4204,8 +4204,8 @@ class Seed3RefAligner : public RefAligner { assert_geq(end - begin, qlen); // caller should have checked this assert_gt(this->seedLen_, 0); const uint32_t slen = min(qlen, this->seedLen_); - uint32_t qend = end; - uint32_t qbegin = begin; + TIndexOffU qend = end; + TIndexOffU qbegin = begin; // If the seed is on the left-hand side of the alignment, then // leave a gap at the right-hand side of the interval; // otherwise, do the opposite @@ -4217,17 +4217,17 @@ class Seed3RefAligner : public RefAligner { qbegin += qlen; } // lim = number of alignments to try - const uint32_t lim = qend - qbegin; + const size_t lim = qend - qbegin; // halfway = position in the reference to start at (and then // we work our way out to the right and to the left). - const uint32_t halfway = qbegin + (lim >> 1); + const size_t halfway = qbegin + (lim >> 1); // Vectors for holding edit information std::vector nonSeedMms; std::vector nonSeedRefcs; bool hi = false; - for(uint32_t i = 1; i <= lim+1; i++) { - uint32_t ri; // leftmost position in candidate alignment - uint32_t rir; // same, minus begin; for indexing into ref[] + for(size_t i = 1; i <= lim+1; i++) { + TIndexOffU ri; // leftmost position in candidate alignment + TIndexOffU rir; // same, minus begin; for indexing into ref[] if(hi) { ri = halfway + (i >> 1); rir = ri - begin; assert_leq(ri, qend); @@ -4239,26 +4239,26 @@ class Seed3RefAligner : public RefAligner { // Do the naive comparison bool match = true; int refc1 = -1; - uint32_t mmOff1 = 0xffffffff; + TIndexOffU mmOff1 = OFF_MASK; int refc2 = -1; - uint32_t mmOff2 = 0xffffffff; + TIndexOffU mmOff2 = OFF_MASK; int refc3 = -1; - uint32_t mmOff3 = 0xffffffff; + TIndexOffU mmOff3 = OFF_MASK; int mms = 0; int seedMms = 0; unsigned int ham = 0; nonSeedMms.clear(); nonSeedRefcs.clear(); // Walk through each position of the alignment - for(uint32_t jj = 0; jj < qlen; jj++) { - uint32_t j = jj; + for(size_t jj = 0; jj < qlen; jj++) { + size_t j = jj; if(!seedOnLeft) { // If seed is on the right, scan right-to-left j = qlen - jj - 1; } else { // Go left-to-right } - uint32_t rirj = rir + j; + TIndexOffU rirj = rir + j; if(!seedOnLeft) { assert_geq(rir, jj); rirj = rir - jj - 1; @@ -4422,16 +4422,16 @@ class Seed3RefAligner : public RefAligner { * length > 0. */ virtual void anchor64Find(uint32_t numToFind, - uint32_t tidx, + size_t tidx, uint8_t* ref, const TDna5Str& qry, const TCharStr& quals, - uint32_t begin, - uint32_t end, + TIndexOffU begin, + TIndexOffU end, TRangeVec& ranges, - TU32Vec& results, + std::vector& results, TSetPairs* pairs = NULL, - uint32_t aoff = 0xffffffff, + TIndexOffU aoff = OFF_MASK, bool seedOnLeft = false) const { assert_gt(numToFind, 0); @@ -4443,7 +4443,7 @@ class Seed3RefAligner : public RefAligner { assert_gt(end, begin); assert_geq(end - begin, qlen); // caller should have checked this assert_gt(this->seedLen_, 0); - uint32_t slen = min(qlen, this->seedLen_); + size_t slen = min(qlen, this->seedLen_); #ifndef NDEBUG // Get results from the naive matcher for sanity-checking TRangeVec r2; TU32Vec re2; @@ -4459,8 +4459,8 @@ class Seed3RefAligner : public RefAligner { const uint32_t readSeedOverhang = (slen == qlen ? 0 : (qlen - slen)); assert(anchorCushion == 0 || seedAnchorOverhang == 0); assert_eq(qlen, readSeedOverhang + slen); - uint32_t qend = end; - uint32_t qbegin = begin; + TIndexOffU qend = end; + TIndexOffU qbegin = begin; if(seedOnLeft) { // Leave read-sized gap on right-hand side of the interval qend -= qlen; @@ -4471,9 +4471,9 @@ class Seed3RefAligner : public RefAligner { qend -= slen; } // lim = # possible alignments in the range - const uint32_t lim = qend - qbegin; + const size_t lim = qend - qbegin; // halfway = point on the genome to radiate out from - const uint32_t halfway = qbegin + (lim >> 1); + const TIndexOffU halfway = qbegin + (lim >> 1); uint64_t anchor = 0llu; uint64_t buffw = 0llu; // rotating ref sequence buffer // OR the 'diff' buffer with this so that we can always count @@ -4492,13 +4492,13 @@ class Seed3RefAligner : public RefAligner { int nPos1 = -1; int nPos2 = -1; int nPos3 = -1; - uint32_t skipLeftToRights = 0; - uint32_t skipRightToLefts = 0; + size_t skipLeftToRights = 0; + size_t skipRightToLefts = 0; const uint32_t halfwayRi = halfway - begin; // Construct the 'anchor' 64-bit buffer so that it holds all of // the first 'anchorBitPairs' bit pairs of the query. - for(uint32_t ii = 0; ii < anchorBitPairs; ii++) { - uint32_t i = ii; + for(size_t ii = 0; ii < anchorBitPairs; ii++) { + size_t i = ii; if(!seedOnLeft) { // Fill in the anchor using characters from the right- // hand side of the query (but take the characters in @@ -4516,8 +4516,8 @@ class Seed3RefAligner : public RefAligner { // alignments that involve an N in the reference. Set // the skip* variables accordingly. r = 0; - uint32_t lrSkips = ii; - uint32_t rlSkips = qlen - ii; + size_t lrSkips = ii; + size_t rlSkips = qlen - ii; if(!seedOnLeft && readSeedOverhang) { lrSkips += readSeedOverhang; assert_geq(rlSkips, readSeedOverhang); @@ -4583,19 +4583,19 @@ class Seed3RefAligner : public RefAligner { // between right-to-left and left-to-right shifts, until all of // the positions from qbegin to qend have been covered. bool hi = false; - uint32_t riHi = halfway; - uint32_t rirHi = halfway - begin; - uint32_t rirHiAnchor = rirHi + anchorBitPairs - 1; - uint32_t riLo = halfway + 1; - uint32_t rirLo = halfway - begin + 1; - uint32_t lrSkips = anchorBitPairs; - uint32_t rlSkips = qlen; + size_t riHi = halfway; + size_t rirHi = halfway - begin; + size_t rirHiAnchor = rirHi + anchorBitPairs - 1; + size_t riLo = halfway + 1; + size_t rirLo = halfway - begin + 1; + size_t lrSkips = anchorBitPairs; + size_t rlSkips = qlen; if(!seedOnLeft && readSeedOverhang) { lrSkips += readSeedOverhang; assert_geq(rlSkips, readSeedOverhang); rlSkips -= readSeedOverhang; } - for(uint32_t i = 1; i <= lim + 1; i++) { + for(size_t i = 1; i <= lim + 1; i++) { int r; // new reference char uint64_t diff; assert_leq(skipLeftToRights, qlen); @@ -4652,8 +4652,8 @@ class Seed3RefAligner : public RefAligner { (diff & 0x3c003c003c003c00llu) && (diff & 0x03c003c003c003c0llu) && (diff & 0x003c003c003c003cllu)) continue; - uint32_t ri = hi ? riLo : riHi; - uint32_t rir = hi ? rirLo : rirHi; + size_t ri = hi ? riLo : riHi; + size_t rir = hi ? rirLo : rirHi; // Could use pop count uint8_t *diff8 = reinterpret_cast(&diff); // As a first cut, see if there are too many mismatches in @@ -4666,11 +4666,11 @@ class Seed3RefAligner : public RefAligner { u8toMms[(int)diff8[4]] + u8toMms[(int)diff8[5]] + u8toMms[(int)diff8[6]]; - uint32_t mmpos1 = 0xffffffff; + TIndexOffU mmpos1 = OFF_MASK; int refc1 = -1; - uint32_t mmpos2 = 0xffffffff; + TIndexOffU mmpos2 = OFF_MASK; int refc2 = -1; - uint32_t mmpos3 = 0xffffffff; + TIndexOffU mmpos3 = OFF_MASK; int refc3 = -1; unsigned int ham = 0; if(diffs > 3) { @@ -4732,7 +4732,7 @@ class Seed3RefAligner : public RefAligner { assert_neq(0, diff); assert_geq(mmpos1, 0); assert_lt(mmpos1, 32); - uint32_t savedMmpos1 = mmpos1; + TIndexOffU savedMmpos1 = mmpos1; mmpos1 -= anchorCushion; assert_lt(mmpos1, anchorBitPairs); refc1 = "ACGT"[(int)ref[rir + mmpos1]]; @@ -4764,7 +4764,7 @@ class Seed3RefAligner : public RefAligner { assert_neq(0, diff2); assert_geq(mmpos2, 0); assert_lt(mmpos2, 32); - uint32_t savedMmpos2 = mmpos2; + TIndexOffU savedMmpos2 = mmpos2; mmpos2 -= anchorCushion; assert_neq(mmpos1, mmpos2); refc2 = "ACGT"[(int)ref[rir + mmpos2]]; @@ -4778,7 +4778,7 @@ class Seed3RefAligner : public RefAligner { continue; } if(mmpos2 < mmpos1) { - uint32_t mmtmp = mmpos1; + TIndexOffU mmtmp = mmpos1; mmpos1 = mmpos2; mmpos2 = mmtmp; int refctmp = refc1; @@ -4818,7 +4818,7 @@ class Seed3RefAligner : public RefAligner { continue; } if(mmpos3 < mmpos1) { - uint32_t mmtmp = mmpos1; + TIndexOffU mmtmp = mmpos1; mmpos1 = mmpos3; mmpos3 = mmpos2; mmpos2 = mmtmp; @@ -4827,7 +4827,7 @@ class Seed3RefAligner : public RefAligner { refc3 = refc2; refc2 = refctmp; } else if(mmpos3 < mmpos2) { - uint32_t mmtmp = mmpos2; + TIndexOffU mmtmp = mmpos2; mmpos2 = mmpos3; mmpos3 = mmtmp; int refctmp = refc2; @@ -4843,7 +4843,7 @@ class Seed3RefAligner : public RefAligner { // rest of the seed characters bool foundHit = true; if(seedAnchorOverhang) { - for(uint32_t j = 0; j < seedAnchorOverhang; j++) { + for(size_t j = 0; j < seedAnchorOverhang; j++) { int rc = (int)ref[rir + anchorBitPairs + j]; if(rc == 4) { // Oops, encountered an N in the reference in @@ -4872,7 +4872,7 @@ class Seed3RefAligner : public RefAligner { foundHit = false; // Skip this candidate break; } - uint32_t qoff = anchorBitPairs + j; + TIndexOffU qoff = anchorBitPairs + j; if(!seedOnLeft) { qoff += readSeedOverhang; } @@ -4883,18 +4883,18 @@ class Seed3RefAligner : public RefAligner { foundHit = false; break; } else if(diffs == 3) { - assert_eq(0xffffffff, mmpos3); + assert_eq(OFF_MASK, mmpos3); mmpos3 = qoff; assert_eq(-1, refc3); refc3 = "ACGT"[(int)ref[rir + anchorBitPairs + j]]; } else if(diffs == 2) { - assert_eq(0xffffffff, mmpos2); + assert_eq(OFF_MASK, mmpos2); mmpos2 = qoff; assert_eq(-1, refc2); refc2 = "ACGT"[(int)ref[rir + anchorBitPairs + j]]; } else { assert_eq(1, diffs); - assert_eq(0xffffffff, mmpos1); + assert_eq(OFF_MASK, mmpos1); mmpos1 = qoff; assert_eq(-1, refc1); refc1 = "ACGT"[(int)ref[rir + anchorBitPairs + j]]; @@ -4919,9 +4919,9 @@ class Seed3RefAligner : public RefAligner { int mms = diffs; // start counting total mismatches if((qlen - slen) > 0) { // Going left-to-right - for(uint32_t j = 0; j < readSeedOverhang; j++) { - uint32_t roff = rir + slen + j; - uint32_t qoff = slen + j; + for(size_t j = 0; j < readSeedOverhang; j++) { + TIndexOffU roff = rir + slen + j; + TIndexOffU qoff = slen + j; if(!seedOnLeft) { assert_geq(roff, qlen); roff -= qlen; @@ -5025,7 +5025,7 @@ class Seed3RefAligner : public RefAligner { if(mms > 0) { ASSERT_ONLY(size_t mmcur = 0); if(seedOnLeft && diffs > 0) { - assert_neq(mmpos1, 0xffffffff); + assert_neq(mmpos1, OFF_MASK); assert_lt(mmpos1, qlen); assert_lt(mmcur, (size_t)mms); assert_eq(mmpos1, r2[r2i].mms[mmcur]); @@ -5035,7 +5035,7 @@ class Seed3RefAligner : public RefAligner { range.mms.push_back(mmpos1); range.refcs.push_back(refc1); if(diffs > 1) { - assert_neq(mmpos2, 0xffffffff); + assert_neq(mmpos2, OFF_MASK); assert_lt(mmpos2, qlen); assert_lt(mmcur, (size_t)mms); assert_eq(mmpos2, r2[r2i].mms[mmcur]); @@ -5046,7 +5046,7 @@ class Seed3RefAligner : public RefAligner { range.refcs.push_back(refc2); if(diffs > 2) { assert_eq(3, diffs); - assert_neq(mmpos3, 0xffffffff); + assert_neq(mmpos3, OFF_MASK); assert_lt(mmpos3, qlen); assert_lt(mmcur, (size_t)mms); assert_eq(mmpos3, r2[r2i].mms[mmcur]); @@ -5069,7 +5069,7 @@ class Seed3RefAligner : public RefAligner { range.refcs.push_back(nonSeedRefcs[i]); } if(!seedOnLeft && diffs > 0) { - assert_neq(mmpos1, 0xffffffff); + assert_neq(mmpos1, OFF_MASK); assert_lt(mmpos1, qlen); assert_lt(mmcur, (size_t)mms); assert_eq(mmpos1, r2[r2i].mms[mmcur]); @@ -5079,7 +5079,7 @@ class Seed3RefAligner : public RefAligner { range.mms.push_back(mmpos1); range.refcs.push_back(refc1); if(diffs > 1) { - assert_neq(mmpos2, 0xffffffff); + assert_neq(mmpos2, OFF_MASK); assert_lt(mmpos2, qlen); assert_lt(mmcur, (size_t)mms); assert_eq(mmpos2, r2[r2i].mms[mmcur]); @@ -5090,7 +5090,7 @@ class Seed3RefAligner : public RefAligner { range.refcs.push_back(refc2); if(diffs > 2) { assert_eq(3, diffs); - assert_neq(mmpos3, 0xffffffff); + assert_neq(mmpos3, OFF_MASK); assert_lt(mmpos3, qlen); assert_lt(mmcur, (size_t)mms); assert_eq(mmpos3, r2[r2i].mms[mmcur]);