Skip to content

Commit

Permalink
minor updates and efficiency improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
rob-p committed Jun 8, 2015
1 parent 49716ce commit 9152cca
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 42 deletions.
14 changes: 14 additions & 0 deletions include/EquivalenceClassBuilder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,23 @@ struct TGValue {

// const is a lie
void normalizeAux() const {
double sumOfAux{0.0};
for (size_t i = 0; i < weights.size(); ++i) {
sumOfAux += weights[i];
}
double norm = 1.0 / sumOfAux;
for (size_t i = 0; i < weights.size(); ++i) {
weights[i] *= norm;
}
/* LOG SPACE
double sumOfAux = salmon::math::LOG_0;
for (size_t i = 0; i < weights.size(); ++i) {
sumOfAux = salmon::math::logAdd(sumOfAux, weights[i]);
}
for (size_t i = 0; i < weights.size(); ++i) {
weights[i] = std::exp(weights[i] - sumOfAux);
}
*/
}

// forget synchronizing this for the time being
Expand Down Expand Up @@ -71,8 +81,12 @@ class EquivalenceClassBuilder {
x.count++;
// update the weights
for (size_t i = 0; i < x.weights.size(); ++i) {
// Possibly atomicized in the future
weights[i] += x.weights[i];
/* LOG SPACE
x.weights[i] =
salmon::math::logAdd(x.weights[i], weights[i]);
*/
}
return x;
};
Expand Down
2 changes: 2 additions & 0 deletions include/SalmonUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ TranscriptGeneMap readTranscriptToGeneMap( std::ifstream &ifile );

TranscriptGeneMap transcriptToGeneMapFromFasta( const std::string& transcriptsFile );

void incLoop(tbb::atomic<double>& val, double inc);

void aggregateEstimatesToGeneLevel(TranscriptGeneMap& tgm, boost::filesystem::path& inputPath);

// NOTE: Throws an invalid_argument exception of the quant or quant_bias_corrected files do
Expand Down
104 changes: 64 additions & 40 deletions src/CollapsedEMOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,28 +97,36 @@ void EMUpdate_(
const std::vector<double>& auxs = kv.second.weights;

double denom = 0.0;
for (size_t i = 0; i < txps.size(); ++i) {
auto tid = txps[i];
auto aux = auxs[i];
//double el = effLens(tid);
//if (el <= 0) { el = 1.0; }
double v = alphaIn[tid] * aux;
denom += v;
}

if (denom <= ::minEQClassWeight) {
// tgroup.setValid(false);
} else {
size_t groupSize = txps.size();
// If this is a single-transcript group,
// then it gets the full count. Otherwise,
// update according to our VBEM rule.
if (BOOST_LIKELY(groupSize > 1)) {
for (size_t i = 0; i < groupSize; ++i) {
auto tid = txps[i];
auto aux = auxs[i];
//double el = effLens(tid);
//if (el <= 0) { el = 1.0; }
double v = alphaIn[tid] * aux;
denom += v;
}

double invDenom = 1.0 / denom;
for (size_t i = 0; i < txps.size(); ++i) {
auto tid = txps[i];
auto aux = auxs[i];
double v = alphaIn[tid] * aux;
if (!std::isnan(v)) {
incLoop(alphaOut[tid], count * v * invDenom);
if (denom <= ::minEQClassWeight) {
// tgroup.setValid(false);
} else {

double invDenom = 1.0 / denom;
for (size_t i = 0; i < groupSize; ++i) {
auto tid = txps[i];
auto aux = auxs[i];
double v = alphaIn[tid] * aux;
if (!std::isnan(v)) {
incLoop(alphaOut[tid], count * v * invDenom);
}
}
}
} else {
incLoop(alphaOut[txps.front()], count);
}
}
}
Expand All @@ -135,6 +143,7 @@ void VBEMUpdate_(
std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec,
std::vector<Transcript>& transcripts,
Eigen::VectorXd& effLens,
double priorAlpha,
double totLen,
const CollapsedEMOptimizer::VecType& alphaIn,
CollapsedEMOptimizer::VecType& alphaOut,
Expand All @@ -147,10 +156,10 @@ void VBEMUpdate_(
double logNorm = boost::math::digamma(alphaSum);

tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(transcripts.size())),
[logNorm, totLen, &effLens, &alphaIn,
[logNorm, priorAlpha, totLen, &effLens, &alphaIn,
&alphaOut, &expTheta]( const BlockedIndexRange& range) -> void {

double prior = 0.1;
double prior = priorAlpha;
double priorNorm = prior * totLen;

for (auto i : boost::irange(range.begin(), range.end())) {
Expand All @@ -177,26 +186,35 @@ void VBEMUpdate_(
const std::vector<double>& auxs = kv.second.weights;

double denom = 0.0;
for (size_t i = 0; i < txps.size(); ++i) {
auto tid = txps[i];
auto aux = auxs[i];
if (expTheta[tid] > 0.0) {
double v = expTheta[tid] * aux;
denom += v;
}
}
if (denom <= ::minEQClassWeight) {
// tgroup.setValid(false);
} else {
double invDenom = 1.0 / denom;
for (size_t i = 0; i < txps.size(); ++i) {
size_t groupSize = txps.size();
// If this is a single-transcript group,
// then it gets the full count. Otherwise,
// update according to our VBEM rule.
if (BOOST_LIKELY(groupSize > 1)) {
for (size_t i = 0; i < groupSize; ++i) {
auto tid = txps[i];
auto aux = auxs[i];
if (expTheta[tid] > 0.0) {
double v = expTheta[tid] * aux;
incLoop(alphaOut[tid], count * v * invDenom);
}
if (expTheta[tid] > 0.0) {
double v = expTheta[tid] * aux;
denom += v;
}
}
if (denom <= ::minEQClassWeight) {
// tgroup.setValid(false);
} else {
double invDenom = 1.0 / denom;
for (size_t i = 0; i < groupSize; ++i) {
auto tid = txps[i];
auto aux = auxs[i];
if (expTheta[tid] > 0.0) {
double v = expTheta[tid] * aux;
incLoop(alphaOut[tid], count * v * invDenom);
}
}
}

} else {
incLoop(alphaOut[txps.front()], count);
}
}
}});
Expand Down Expand Up @@ -286,6 +304,9 @@ bool CollapsedEMOptimizer::optimize(ExpT& readExp,
readExp.equivalenceClassBuilder().eqVec();

bool useVBEM{sopt.useVBOpt};
// If we use VBEM, we'll need the prior parameters
double priorAlpha = 0.1;

auto jointLog = sopt.jointLog;

double totalNumFrags{readExp.numMappedReads()};
Expand Down Expand Up @@ -341,7 +362,8 @@ bool CollapsedEMOptimizer::optimize(ExpT& readExp,
while (itNum < maxIter and !converged) {

if (useVBEM) {
VBEMUpdate_(eqVec, transcripts, effLens, totalLen, alphas, alphasPrime, expTheta);
VBEMUpdate_(eqVec, transcripts, effLens,
priorAlpha, totalLen, alphas, alphasPrime, expTheta);
} else {
EMUpdate_(eqVec, transcripts, effLens, alphas, alphasPrime);
}
Expand Down Expand Up @@ -372,7 +394,9 @@ bool CollapsedEMOptimizer::optimize(ExpT& readExp,
itNum, maxRelDiff);

if (useVBEM) {
double priorAlpha = 0.1;
// The expected value of the posterior is
// E[\eta_{i}] = (\alpha^{0}_{i} + \alpha_{i}) /
// (\sum_{j} \alpha^{0}_{j} + \alpha_{j})
double totPrior = priorAlpha * transcripts.size();
double denom = totPrior + totalNumFrags;
for (size_t i = 0; i < transcripts.size(); ++i) {
Expand Down
10 changes: 8 additions & 2 deletions src/SalmonQuantify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -567,9 +567,15 @@ void processMiniBatch(

// EQCLASS
TranscriptGroup tg(txpIDs, txpIDsHash);
double auxProbSum{0.0};
for (auto& p : auxProbs) {
p -= auxDenom;
//p = std::exp(p - auxDenom);
//p -= auxDenom;
p = std::exp(p - auxDenom);
auxProbSum += p;
}
if (std::abs(auxProbSum - 1.0) > 0.01) {
std::cerr << "weights had sum of " << auxProbSum
<< " but it should be 1!!\n\n";
}
eqBuilder.addGroup(std::move(tg), auxProbs);

Expand Down
17 changes: 17 additions & 0 deletions src/SalmonUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -986,6 +986,23 @@ std::vector<std::string> split(const std::string& str, int delimiter(int) = ::is
return result;
}

/*
* Use atomic compare-and-swap to update val to
* val + inc. Update occurs in a loop in case other
* threads update in the meantime.
*/
void incLoop(tbb::atomic<double>& val, double inc) {
double oldMass = val.load();
double returnedMass = oldMass;
double newMass{oldMass + inc};
do {
oldMass = returnedMass;
newMass = oldMass + inc;
returnedMass = val.compare_and_swap(newMass, oldMass);
} while (returnedMass != oldMass);
}


void aggregateEstimatesToGeneLevel(TranscriptGeneMap& tgm, boost::filesystem::path& inputPath) {

using std::vector;
Expand Down

0 comments on commit 9152cca

Please sign in to comment.