Skip to content

Commit

Permalink
Some consts and code cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
rob-p committed Jun 3, 2015
1 parent f7ab62d commit 76db4a7
Show file tree
Hide file tree
Showing 7 changed files with 114 additions and 72 deletions.
69 changes: 36 additions & 33 deletions include/EquivalenceClassBuilder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,10 @@ class EquivalenceClassBuilder {

bool finish() {
active_ = false;
/*
// unordered_map implementation
for (auto& kv : countMap_) {
kv.second.normalizeAux();
countVec_.push_back(kv);
}
*/

for (auto kv = countMap_.begin(); !kv.is_end(); ++kv) {
kv->second.normalizeAux();
countVec_.push_back(*kv);
}
//countVec_ = countMap_.snapshot_table();

logger_->info("Computed {} weighted equivalence classes "
"for further processing", countVec_.size());
Expand All @@ -75,37 +66,18 @@ class EquivalenceClassBuilder {
inline void addGroup(TranscriptGroup&& g,
std::vector<double>& weights) {

/*
// unordered_map implementation
std::lock_guard<std::mutex> lock(mapMut_);
auto it = countMap_.find(g);
if (it == countMap_.end()) {
TGValue v(weights, 1);
countMap_.emplace(g, v);
} else {
auto& x = it->second;
x.count++;
for (size_t i = 0; i < x.weights.size(); ++i) {
x.weights[i] =
salmon::math::logAdd(x.weights[i], weights[i]);
}
}
*/

auto upfn = [&weights](TGValue& x) -> TGValue& {

// update the count
x.count++;

// update the weights
for (size_t i = 0; i < x.weights.size(); ++i) {
x.weights[i] =
salmon::math::logAdd(x.weights[i], weights[i]);
}

return x;
};
TGValue v(weights, 1);
countMap_.upsert(g, upfn, v);

}

std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec() {
Expand All @@ -115,12 +87,43 @@ class EquivalenceClassBuilder {
private:
std::atomic<bool> active_;
cuckoohash_map<TranscriptGroup, TGValue, TranscriptGroupHasher> countMap_;
//std::unordered_map<TranscriptGroup, TGValue, TranscriptGroupHasher> countMap_;

std::vector<std::pair<const TranscriptGroup, TGValue>> countVec_;
std::shared_ptr<spdlog::logger> logger_;
std::mutex mapMut_;
};

#endif // EQUIVALENCE_CLASS_BUILDER_HPP

/** Unordered map implementation */
//std::unordered_map<TranscriptGroup, TGValue, TranscriptGroupHasher> countMap_;
//std::mutex mapMut_;
/*
bool finish() {
// unordered_map implementation
for (auto& kv : countMap_) {
kv.second.normalizeAux();
countVec_.push_back(kv);
}
return true;
}
*/

/*
inline void addGroup(TranscriptGroup&& g,
std::vector<double>& weights) {
// unordered_map implementation
std::lock_guard<std::mutex> lock(mapMut_);
auto it = countMap_.find(g);
if (it == countMap_.end()) {
TGValue v(weights, 1);
countMap_.emplace(g, v);
} else {
auto& x = it->second;
x.count++;
for (size_t i = 0; i < x.weights.size(); ++i) {
x.weights[i] =
salmon::math::logAdd(x.weights[i], weights[i]);
}
}
}
*/
12 changes: 6 additions & 6 deletions include/ReadPair.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ struct ReadPair {
return l;//bam_name_len(read1);
}

inline uint32_t fragLen() {
inline uint32_t fragLen() const {
if (!isPaired()) { return 0; }
auto leftmost1 = bam_pos(read1);
auto leftmost2 = bam_pos(read2);
Expand All @@ -99,18 +99,18 @@ struct ReadPair {
//return std::abs(read1->core.isize) + std::abs(read1->core.l_qseq) + std::abs(read2->core.l_qseq);
}

inline bool isRight() { return isPaired() ? false : (isRightOrphan() ? true : false) ; }
inline bool isLeft() { return isPaired() ? false : (isLeftOrphan() ? true : false); }
inline bool isRight() const { return isPaired() ? false : (isRightOrphan() ? true : false) ; }
inline bool isLeft() const { return isPaired() ? false : (isLeftOrphan() ? true : false); }

inline int32_t left() {
inline int32_t left() const {
if (isPaired()) {
return std::min(bam_pos(read1), bam_pos(read2));
} else {
return bam_pos(read1);
}
}

inline int32_t right() {
inline int32_t right() const {
if (isPaired()) {
return std::max(bam_pos(read1) + bam_seq_len(read1),
bam_pos(read2) + bam_seq_len(read2));
Expand All @@ -119,7 +119,7 @@ struct ReadPair {
}
}

inline ReadType fragType() { return ReadType::PAIRED_END; }
inline ReadType fragType() const { return ReadType::PAIRED_END; }
inline int32_t transcriptID() const { return bam_ref(read1); }

inline double logQualProb() {
Expand Down
4 changes: 4 additions & 0 deletions include/SalmonOpts.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ struct SalmonOpts {

bool noSeqBiasModel; // Don't learn and use a sequence-specific bias model.

bool noRichEqClasses; // Don't use rich equivalence classes --- forget the
// aggregate weights for each transcript to each
// equivalence class of fragments.

bool useReadCompat; // Give a fragment assignment a likelihood based on the compatibility
// between the manner in which it mapped and the expected read
// library format.
Expand Down
14 changes: 7 additions & 7 deletions include/UnpairedRead.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,15 @@ struct UnpairedRead {
return bam_name_len(read);
}

inline bool isRight() { return bam_flag(read) & BAM_FREVERSE; }
inline bool isLeft() { return !isRight(); }
inline int32_t left() { return bam_pos(read); }
inline int32_t right() { return left() + bam_seq_len(read); }
inline uint32_t fragLen() { return 0; }
inline ReadType fragType() { return ReadType::SINGLE_END; }
inline bool isRight() const { return bam_flag(read) & BAM_FREVERSE; }
inline bool isLeft() const { return !isRight(); }
inline int32_t left() const { return bam_pos(read); }
inline int32_t right() const { return left() + bam_seq_len(read); }
inline uint32_t fragLen() const { return 0; }
inline ReadType fragType() const { return ReadType::SINGLE_END; }
inline int32_t transcriptID() const { return bam_ref(read); }

inline double logQualProb() {
inline double logQualProb() const {
return salmon::math::LOG_1;
/*
int q = bam_map_qual(read);
Expand Down
58 changes: 42 additions & 16 deletions src/CollapsedEMOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ double normalize(std::vector<tbb::atomic<double>>& vec) {
}

/*
* Use atomic compare-and-swap to update val to
* Use atomic compare-and-swap to update val to
* val + inc. Update occurs in a loop in case other
* threads update in the meantime.
*/
Expand All @@ -71,7 +71,7 @@ void incLoop(tbb::atomic<double>& val, double inc) {
}

/*
* Use the "standard" EM algorithm over equivalence
* Use the "standard" EM algorithm over equivalence
* classes to estimate the latent variables (alphaOut)
* given the current estimates (alphaIn).
*/
Expand Down Expand Up @@ -127,7 +127,7 @@ void EMUpdate_(
}

/*
* Use the Variational Bayesian EM algorithm over equivalence
* Use the Variational Bayesian EM algorithm over equivalence
* classes to estimate the latent variables (alphaOut)
* given the current estimates (alphaIn).
*/
Expand Down Expand Up @@ -176,13 +176,10 @@ void VBEMUpdate_(
const std::vector<uint32_t>& txps = tgroup.txps;
const std::vector<double>& auxs = kv.second.weights;


double denom = 0.0;
for (size_t i = 0; i < txps.size(); ++i) {
auto tid = txps[i];
auto aux = auxs[i];
//double el = effLens(tid);
//if (el <= 0) { el = 1.0; }
if (expTheta[tid] > 0.0) {
double v = expTheta[tid] * aux;
denom += v;
Expand Down Expand Up @@ -279,15 +276,6 @@ bool CollapsedEMOptimizer::optimize(ExpT& readExp,
std::vector<Transcript>& transcripts = readExp.transcripts();

using VecT = CollapsedEMOptimizer::VecType;
// With Eigen
/*
Eigen::VectorXd alphas(transcripts.size());
Eigen::VectorXd alphasPrime(transcripts.size());
alphasPrime.setZero();
Eigen::VectorXd effLens(transcripts.size());
Eigen::VectorXd expTheta(transcripts.size());
*/
// With atomics
VecType alphas(transcripts.size(), 0.0);
VecType alphasPrime(transcripts.size(), 0.0);
Expand All @@ -300,7 +288,6 @@ bool CollapsedEMOptimizer::optimize(ExpT& readExp,
bool useVBEM{sopt.useVBOpt};
auto jointLog = sopt.jointLog;

double logTotalMass = salmon::math::LOG_0;
double totalLen{0.0};

for (size_t i = 0; i < transcripts.size(); ++i) {
Expand All @@ -313,6 +300,34 @@ bool CollapsedEMOptimizer::optimize(ExpT& readExp,
totalLen += effLens(i);
}

// If the user requested *not* to use "rich" equivalence classes,
// then wipe out all of the weight information here and simply replace
// the weights with the effective length terms (here, the *inverse* of
// the effective length).
if (sopt.noRichEqClasses) {
tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(eqVec.size())),
[&eqVec, &effLens]( const BlockedIndexRange& range) -> void {
// For each index in the equivalence class vector
for (auto eqID : boost::irange(range.begin(), range.end())) {
// The vector entry
auto& kv = eqVec[eqID];
// The label of the equivalence class
const TranscriptGroup& k = kv.first;
// The size of the label
size_t classSize = k.txps.size();
// The weights of the label
TGValue& v = kv.second;

// Iterate over each weight and set it equal to
// 1 / effLen of the corresponding transcript
for (size_t i = 0; i < classSize; ++i) {
double el = effLens(k.txps[i]);
v.weights[i] = (el <= 0.0) ? 1.0 : (1.0 / el);
}
}
});
}

auto numRemoved = markDegenerateClasses(eqVec, alphas, sopt.jointLog);
sopt.jointLog->info("Marked {} weighted equivalence classes as degenerate",
numRemoved);
Expand Down Expand Up @@ -403,6 +418,17 @@ bool CollapsedEMOptimizer::optimize<AlignmentLibrary<ReadPair>>(

// Unused / old
//

// With Eigen
/*
Eigen::VectorXd alphas(transcripts.size());
Eigen::VectorXd alphasPrime(transcripts.size());
alphasPrime.setZero();
Eigen::VectorXd effLens(transcripts.size());
Eigen::VectorXd expTheta(transcripts.size());
*/

/*
* Serial implementation of EM update
*
Expand Down
20 changes: 10 additions & 10 deletions src/SalmonQuantify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -506,21 +506,12 @@ void processMiniBatch(
hitPos,
aln.libFormat());
}
/*
if ( i % 1000 == 0 ) {
std::cerr << "biasProb = " << std::exp(logBiasProb) << "\n";
}
*/

}

// Increment the count of this type of read that we've seen
++libTypeCounts[aln.libFormat().formatID()];

// pre FSPD
/*
aln.logProb = (transcriptLogCount - logRefLength) +
logFragProb + logAlignCompatProb;
*/
double auxProb = startPosProb + logFragProb +
logAlignCompatProb + logBiasProb;

Expand Down Expand Up @@ -2899,6 +2890,7 @@ int salmonQuantify(int argc, char *argv[]) {
sopt.disableMappingCache = true;
// no sequence bias for now
sopt.noSeqBiasModel = true;
sopt.noRichEqClasses = false;

po::options_description advanced("\n"
"advanced options");
Expand Down Expand Up @@ -2937,6 +2929,14 @@ int salmonQuantify(int argc, char *argv[]) {
("noFragStartPosDist", po::bool_switch(&(sopt.noFragStartPosDist))->default_value(false), "[Currently Experimental] : "
"Don't consider / model non-uniformity in the fragment start positions "
"across the transcript.")
/*
// Don't expose this yet
("noRichEqClasses", po::bool_switch(&(sopt.noRichEqClasses))->default_value(false),
"Disable \"rich\" equivalent classes. If this flag is passed, then "
"all information about the relative weights for each transcript in the "
"label of an equivalence class will be ignored, and only the relative "
"abundance and effective length of each transcript will be considered.")
*/
//("noSeqBiasModel", po::bool_switch(&(sopt.noSeqBiasModel))->default_value(false),
// "Don't learn and apply a model of sequence-specific bias")
("numAuxModelSamples", po::value<uint32_t>(&(sopt.numBurninFrags))->default_value(5000000), "The first <numAuxModelSamples> are used to train the "
Expand Down
9 changes: 9 additions & 0 deletions src/SalmonQuantifyAlignments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,7 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {

// no sequence bias for now
sopt.noSeqBiasModel = true;
sopt.noRichEqClasses = false;

po::options_description advanced("\nadvanced options");
advanced.add_options()
Expand Down Expand Up @@ -863,6 +864,14 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
("noFragStartPosDist", po::bool_switch(&(sopt.noFragStartPosDist))->default_value(false), "[Currently Experimental] : "
"Don't consider / model non-uniformity in the fragment start positions "
"across the transcript.")
/*
// Don't expose this yet
("noRichEqClasses", po::bool_switch(&(sopt.noRichEqClasses))->default_value(false),
"Disable \"rich\" equivalent classes. If this flag is passed, then "
"all information about the relative weights for each transcript in the "
"label of an equivalence class will be ignored, and only the relative "
"abundance and effective length of each transcript will be considered.")
*/
("numErrorBins", po::value<uint32_t>(&(sopt.numErrorBins))->default_value(6), "The number of bins into which to divide "
"each read when learning and applying the error model. For example, a value of 10 would mean that "
"effectively, a separate error model is leared and applied to each 10th of the read, while a value of "
Expand Down

0 comments on commit 76db4a7

Please sign in to comment.