Skip to content

Commit

Permalink
make allowOrphans bool_switch
Browse files Browse the repository at this point in the history
  • Loading branch information
geetduggal committed Jun 27, 2015
1 parent 21bf89a commit 2173085
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 24 deletions.
14 changes: 14 additions & 0 deletions include/Transcript.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class Transcript {
logPerBasePrior_ = other.logPerBasePrior_;
priorMass_ = other.priorMass_;
avgMassBias_.store(other.avgMassBias_.load());
hasAnchorFragment_.store(other.hasAnchorFragment_.load());
}

Transcript& operator=(Transcript&& other) {
Expand All @@ -61,6 +62,7 @@ class Transcript {
logPerBasePrior_ = other.logPerBasePrior_;
priorMass_ = other.priorMass_;
avgMassBias_.store(other.avgMassBias_.load());
hasAnchorFragment_.store(other.hasAnchorFragment_.load());
return *this;
}

Expand Down Expand Up @@ -210,6 +212,14 @@ class Transcript {
void lengthClassIndex(uint32_t ind) { lengthClassIndex_ = ind; }
uint32_t lengthClassIndex() { return lengthClassIndex_; }

void setAnchorFragment() {
hasAnchorFragment_.store(true);
}

bool hasAnchorFragment() {
return hasAnchorFragment_.load();
}

std::string RefName;
uint32_t RefLength;
uint32_t id;
Expand All @@ -234,6 +244,10 @@ class Transcript {
tbb::atomic<double> avgMassBias_;
uint32_t lengthClassIndex_;
double logPerBasePrior_;
// In a paired-end protocol, a transcript has
// an "anchor" fragment if it has a proper
// pair of reads mapping to it.
std::atomic<bool> hasAnchorFragment_{false};
};

#endif //TRANSCRIPT
54 changes: 30 additions & 24 deletions src/SalmonQuantify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1494,18 +1494,18 @@ void getHitsForFragment(std::pair<header_sequence_qual, header_sequence_qual>& f

// vector-based code -- June 23
// Sort the left and right hits
std::sort(leftHits.begin(), leftHits.end(),
std::sort(leftHits.begin(), leftHits.end(),
[](CoverageCalculator& c1, CoverageCalculator& c2) -> bool {
return c1.targetID < c2.targetID;
});
std::sort(rightHits.begin(), rightHits.end(),
std::sort(rightHits.begin(), rightHits.end(),
[](CoverageCalculator& c1, CoverageCalculator& c2) -> bool {
return c1.targetID < c2.targetID;
});
// Take the intersection of these two hit lists
// Adopted from : http://en.cppreference.com/w/cpp/algorithm/set_intersection
{
auto leftIt = leftHits.begin();
auto leftIt = leftHits.begin();
auto leftEnd = leftHits.end();
auto rightIt = rightHits.begin();
auto rightEnd = rightHits.end();
Expand All @@ -1514,17 +1514,17 @@ void getHitsForFragment(std::pair<header_sequence_qual, header_sequence_qual>& f
++leftIt;
} else {
if (!(rightIt->targetID < leftIt->targetID)) {
jointHits.push_back({leftIt->targetID,
std::distance(leftHits.begin(), leftIt),
jointHits.push_back({leftIt->targetID,
std::distance(leftHits.begin(), leftIt),
std::distance(rightHits.begin(), rightIt)});
++leftIt;
}
++rightIt;
}
}
}
// End June 23
// End June 23

/* map based code
{
auto notFound = maxHitList.end();
Expand All @@ -1550,12 +1550,14 @@ void getHitsForFragment(std::pair<header_sequence_qual, header_sequence_qual>& f
//std::vector<CoverageCalculator> allHits;
//allHits.reserve(totalHits);
bool foundValidHit{false};

// search for a hit on the left
for (auto& tHitList : leftHits) {
auto transcriptID = tHitList.targetID;
auto& covChain = tHitList;
Transcript& t = transcripts[transcriptID];
if (!t.hasAnchorFragment()) { continue; }

covChain.computeBestChain(t, frag.first.seq);
double score = covChain.bestHitScore;

Expand Down Expand Up @@ -1584,17 +1586,19 @@ void getHitsForFragment(std::pair<header_sequence_qual, header_sequence_qual>& f
readHits += score;
++hitListCount;
++leftHitCount;
}
}
}

// search for a hit on the right
for (auto& tHitList : rightHits) {
// Prior
// auto transcriptID = tHitList.first;
// June 23
// June 23
auto transcriptID = tHitList.targetID;
auto& covChain = tHitList;
Transcript& t = transcripts[transcriptID];
if (!t.hasAnchorFragment()) { continue; }

covChain.computeBestChain(t, frag.second.seq);
double score = covChain.bestHitScore;

Expand Down Expand Up @@ -1651,11 +1655,11 @@ void getHitsForFragment(std::pair<header_sequence_qual, header_sequence_qual>& f
return c1.targetID < c2.targetID;
});
double totProb{0.0};
for (auto& h : allHits) {
boost::hash_combine(txpIDsHash, h.targetID);
for (auto& h : allHits) {
boost::hash_combine(txpIDsHash, h.targetID);
txpIDs.push_back(h.targetID);
double refLen = std::max(1.0, static_cast<double>(transcripts[h.targetID].RefLength));
double startProb = 1.0 / refLen;
double startProb = 1.0 / refLen;
auxProbs.push_back(startProb);
totProb += startProb;
}
Expand All @@ -1666,7 +1670,7 @@ void getHitsForFragment(std::pair<header_sequence_qual, header_sequence_qual>& f
TranscriptGroup tg(txpIDs, txpIDsHash);
eqBuilder.addGroup(std::move(tg), auxProbs);
} else {
salmonOpts.jointLog->warn("Unexpected empty hit group [orphaned]");
salmonOpts.jointLog->warn("Unexpected empty hit group [orphaned]");
}
}
} else { // Not an orphan
Expand Down Expand Up @@ -1713,6 +1717,8 @@ void getHitsForFragment(std::pair<header_sequence_qual, header_sequence_qual>& f
if (transcriptID < lastTranscriptId) {
sortedByTranscript = false;
}
// ANCHOR TEST
t.setAnchorFragment();
alnList.emplace_back(transcriptID, fmt, score, minHitPos, fragLength);
++readHits;
++hitListCount;
Expand Down Expand Up @@ -1740,11 +1746,11 @@ void getHitsForFragment(std::pair<header_sequence_qual, header_sequence_qual>& f

size_t txpIDsHash{0};
double totProb{0.0};
for (auto& h : jointHits) {
boost::hash_combine(txpIDsHash, h.transcriptID);
for (auto& h : jointHits) {
boost::hash_combine(txpIDsHash, h.transcriptID);
txpIDs.push_back(h.transcriptID);
double refLen = std::max(1.0, static_cast<double>(transcripts[h.transcriptID].RefLength));
double startProb = 1.0 / refLen;
double startProb = 1.0 / refLen;
auxProbs.push_back(startProb);
totProb += startProb;
}
Expand All @@ -1755,8 +1761,8 @@ void getHitsForFragment(std::pair<header_sequence_qual, header_sequence_qual>& f
TranscriptGroup tg(txpIDs, txpIDsHash);
eqBuilder.addGroup(std::move(tg), auxProbs);
} else {
salmonOpts.jointLog->warn("Unexpected empty hit group [paired]");
}
salmonOpts.jointLog->warn("Unexpected empty hit group [paired]");
}
}

} // end else
Expand Down Expand Up @@ -1937,8 +1943,8 @@ void getHitsForFragment(jellyfish::header_sequence_qual& frag,
std::vector<double> auxProbs(hits.size(), uniProb);

size_t txpIDsHash{0};
for (auto& h : hits) {
boost::hash_combine(txpIDsHash, h.targetID);
for (auto& h : hits) {
boost::hash_combine(txpIDsHash, h.targetID);
txpIDs.push_back(h.targetID);
}

Expand Down Expand Up @@ -3036,10 +3042,10 @@ int salmonQuantify(int argc, char *argv[]) {
"File containing the #1 mates")
("mates2,2", po::value<vector<string>>(&mate2ReadFiles)->multitoken(),
"File containing the #2 mates")
("allowOrphans", po::value<bool>(&(sopt.allowOrphans))->default_value(false), "Consider orphaned reads as valid hits when "
("allowOrphans", po::bool_switch(&(sopt.allowOrphans))->default_value(false), "Consider orphaned reads as valid hits when "
"performing lightweight-alignment. This option will increase sensitivity (allow more reads to map and "
"more transcripts to be detected), but may decrease specificity as orphaned alignments are more likely "
"to be spurious.")
"to be spurious.")
("threads,p", po::value<uint32_t>(&(sopt.numThreads))->default_value(sopt.numThreads), "The number of threads to use concurrently.")
("incompatPrior", po::value<double>(&(sopt.incompatPrior))->default_value(1e-20), "This option "
"sets the prior probability that an alignment that disagrees with the specified "
Expand Down Expand Up @@ -3269,7 +3275,7 @@ transcript abundance from RNA-seq reads

// Parameter validation
// If we're allowing orphans, make sure that the read libraries are paired-end.
// Otherwise, this option makes no sense.
// Otherwise, this option makes no sense.
if (sopt.allowOrphans) {
for (auto& rl : readLibraries) {
if (!rl.isPairedEnd()) {
Expand Down

0 comments on commit 2173085

Please sign in to comment.