Skip to content

Commit

Permalink
Actually *use* the non-uniform start distribution
Browse files Browse the repository at this point in the history
Also, implemented the non-uniform start distribution
for alignment-based salmon.
  • Loading branch information
rob-p committed May 15, 2015
1 parent a3f294b commit 0e46903
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 3 deletions.
32 changes: 32 additions & 0 deletions include/AlignmentLibrary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ extern "C" {
#include "LibraryFormat.hpp"
#include "SalmonOpts.hpp"
#include "FragmentLengthDistribution.hpp"
#include "FragmentStartPositionDistribution.hpp"
#include "AlignmentGroup.hpp"
#include "ErrorModel.hpp"
#include "AlignmentModel.hpp"
Expand Down Expand Up @@ -53,6 +54,7 @@ class AlignmentLibrary {
transcriptFile_(transcriptFile),
libFmt_(libFmt),
transcripts_(std::vector<Transcript>()),
fragStartDists_(5),
quantificationPasses_(0) {
namespace bfs = boost::filesystem;

Expand Down Expand Up @@ -104,6 +106,26 @@ class AlignmentLibrary {
fmt::print(stderr, "Populating targets from aln = {}, fasta = {} . . .",
alnFiles.front(), transcriptFile_);
fp.populateTargets(transcripts_);
for (auto& txp : transcripts_) {
// Length classes taken from
// ======
// Roberts, Adam, et al.
// "Improving RNA-Seq expression estimates by correcting for fragment bias."
// Genome Biol 12.3 (2011): R22.
// ======
// perhaps, define these in a more data-driven way
if (txp.RefLength <= 1334) {
txp.lengthClassIndex(0);
} else if (txp.RefLength <= 2104) {
txp.lengthClassIndex(0);
} else if (txp.RefLength <= 2988) {
txp.lengthClassIndex(0);
} else if (txp.RefLength <= 4389) {
txp.lengthClassIndex(0);
} else {
txp.lengthClassIndex(0);
}
}
fmt::print(stderr, "done\n");

// Create the cluster forest for this set of transcripts
Expand Down Expand Up @@ -140,6 +162,10 @@ class AlignmentLibrary {

inline SAM_hdr* header() { return bq->header(); }

inline std::vector<FragmentStartPositionDistribution>& fragmentStartPositionDistributions() {
return fragStartDists_;
}

inline FragmentLengthDistribution& fragmentLengthDistribution() {
return *flDist_.get();
}
Expand Down Expand Up @@ -224,6 +250,12 @@ class AlignmentLibrary {
* in the same cluster.
*/
std::unique_ptr<ClusterForest> clusters_;

/**
* The emperical fragment start position distribution
*/
std::vector<FragmentStartPositionDistribution> fragStartDists_;

/**
* The emperical fragment length distribution.
*
Expand Down
5 changes: 3 additions & 2 deletions src/SalmonQuantify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -477,12 +477,13 @@ void processMiniBatch(
++libTypeCounts[aln.libFormat().formatID()];

// pre FSPD
/*
aln.logProb = (transcriptLogCount - logRefLength) +
logFragProb + logAlignCompatProb;
/*
*/
aln.logProb = (transcriptLogCount + startPosProb) +
logFragProb + logAlignCompatProb;
*/


if (std::abs(aln.logProb) == LOG_0) { continue; }

Expand Down
37 changes: 36 additions & 1 deletion src/SalmonQuantifyAlignments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,15 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
auto& fragmentQueue = alnLib.fragmentQueue();
auto& alignmentGroupQueue = alnLib.alignmentGroupQueue();

std::vector<FragmentStartPositionDistribution>& fragStartDists =
alnLib.fragmentStartPositionDistributions();

auto& fragLengthDist = alnLib.fragmentLengthDistribution();
auto& alnMod = alnLib.alignmentModel();

bool useFSPD{!salmonOpts.noFragStartPosDist};
bool useFragLengthDist{!salmonOpts.noFragLengthDist};

double startingCumulativeMass = fmCalc.cumulativeLogMassAt(firstTimestepOfRound);
const auto expectedLibraryFormat = alnLib.format();
uint32_t numBurninFrags{salmonOpts.numBurninFrags};
Expand Down Expand Up @@ -318,10 +324,25 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
errLike = alnMod.logLikelihood(*aln, transcript);
}

// Allow for a non-uniform fragment start position distribution
double startPosProb = -logRefLength;
auto hitPos = aln->left();
if (useFSPD and burnedIn and hitPos < refLength) {
auto& fragStartDist =
fragStartDists[transcript.lengthClassIndex()];
startPosProb = fragStartDist(hitPos, refLength, logRefLength);
}

// Pre FSPD
/*
double qualProb = -logRefLength + logFragProb +
aln->logQualProb() +
errLike + logAlignCompatProb;

*/
double qualProb = startPosProb + logFragProb +
aln->logQualProb() +
errLike + logAlignCompatProb;



// The overall mass of this transcript, which is used to
Expand Down Expand Up @@ -380,13 +401,24 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,

double r = uni(eng);
if (!burnedIn and r < std::exp(aln->logProb)) {
// Update the error model
if (salmonOpts.useErrorModel) {
alnMod.update(*aln, transcript, LOG_1, logForgettingMass);
}
// Update the fragment length distribution
if (aln->isPaired() and !salmonOpts.noFragLengthDist) {
double fragLength = aln->fragLen();
fragLengthDist.addVal(fragLength, logForgettingMass);
}
// Update the fragment start position distribution
if (useFSPD) {
auto hitPos = aln->left();
auto& fragStartDist =
fragStartDists[transcript.lengthClassIndex()];
fragStartDist.addVal(hitPos,
transcript.RefLength,
logForgettingMass);
}
}
}

Expand Down Expand Up @@ -792,6 +824,9 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
"unlikely lengths will be assigned a smaller relative probability than those with more likely "
"lengths. When this flag is passed in, the observed fragment length has no effect on that fragment's "
"a priori probability.")
("noFragStartPosDist", po::bool_switch(&(sopt.noFragStartPosDist))->default_value(false), "[Currently Experimental] : "
"Don't consider / model non-uniformity in the fragment start positions "
"across the transcript.")
("numErrorBins", po::value<uint32_t>(&(sopt.numErrorBins))->default_value(6), "The number of bins into which to divide "
"each read when learning and applying the error model. For example, a value of 10 would mean that "
"effectively, a separate error model is leared and applied to each 10th of the read, while a value of "
Expand Down

0 comments on commit 0e46903

Please sign in to comment.