Skip to content

Commit

Permalink
Verify that the suffix array sample interval is a power of 2
Browse files Browse the repository at this point in the history
  • Loading branch information
rob-p committed May 15, 2015
1 parent 0e46903 commit 26bb03a
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 4 deletions.
17 changes: 16 additions & 1 deletion src/BuildSalmonIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,12 @@ bool buildAuxKmerIndex(boost::filesystem::path& outputPrefix, uin32_t k,
}
*/

// Cool way to do this from
// http://stackoverflow.com/questions/108318/whats-the-simplest-way-to-test-whether-a-number-is-a-power-of-2-in-c
bool isPowerOfTwo(uint32_t n) {
return (n > 0 and (n & (n-1)) == 0);
}

int salmonIndex(int argc, char* argv[]) {

using std::string;
Expand All @@ -160,7 +166,8 @@ int salmonIndex(int argc, char* argv[]) {
("sasamp,s", po::value<uint32_t>(&saSampInterval)->default_value(1)->required(),
"The interval at which the suffix array should be sampled. "
"Smaller values are faster, but produce a larger index. "
"The default should be OK, unless your transcriptome is huge.")
"The default should be OK, unless your transcriptome is huge. "
"This value should be a power of 2.")
;

po::variables_map vm;
Expand All @@ -181,6 +188,14 @@ Creates a salmon index.
}
po::notify(vm);

uint32_t sasamp = vm["sasamp"].as<uint32_t>();
if (!isPowerOfTwo(sasamp)) {
fmt::MemoryWriter errWriter;
errWriter << "Error: The suffix array sampling interval must be "
"a power of 2. The value provided, " << sasamp << ", is not.";
throw(std::logic_error(errWriter.str()));
}

fmt::MemoryWriter optWriter;
optWriter << vm["sasamp"].as<uint32_t>();

Expand Down
7 changes: 6 additions & 1 deletion src/FragmentStartPositionDistribution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,19 @@ double FragmentStartPositionDistribution::operator()(
int32_t hitPos,
uint32_t txpLen,
double logEffLen) {

if (hitPos < 0) { hitPos = 0; }
assert(hitPos < txpLen);
if (hitPos >= txpLen) {
std::cerr << "\n\nhitPos = " << hitPos << ", txpLen = " << txpLen << "!!\n\n\n";
return salmon::math::LOG_0;
}
// If we haven't updated the CDF yet, then
// just return log(1 / effLen);
if (!isUpdated_) {
return -logEffLen;
}

if (hitPos < 0) { hitPos = 0; }
double a = hitPos * (1.0 / txpLen);

double effLen = std::exp(logEffLen);
Expand Down
4 changes: 2 additions & 2 deletions src/SalmonQuantify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1250,7 +1250,7 @@ template <typename CoverageCalculator>
inline bool nearEndOfTranscript(
CoverageCalculator& hit,
Transcript& txp,
uint32_t cutoff=200) {
int32_t cutoff=std::numeric_limits<int32_t>::max()) {
// check if hit appears close to the end of the given transcript
auto hitPos = hit.bestHitPos;
return (hitPos < cutoff or std::abs(txp.RefLength - hitPos) < cutoff);
Expand Down Expand Up @@ -1380,7 +1380,7 @@ void getHitsForFragment(std::pair<header_sequence_qual, header_sequence_qual>& f
uint32_t firstTranscriptID = std::numeric_limits<uint32_t>::max();
double bestScore = -std::numeric_limits<double>::max();
if (BOOST_UNLIKELY(isOrphan)) {
return;
//return;
bool foundValidHit{false};
// search for a hit on the left
for (auto& tHitList : leftHits) {
Expand Down

0 comments on commit 26bb03a

Please sign in to comment.