Skip to content

Commit

Permalink
Add command to optionally write quasi-mappings
Browse files Browse the repository at this point in the history
The --writeMappings command will write the quasi-mappings
computed by Salmon to stdout in SAM format.  If given the
name of a file, it will write the results to that file
instead.
  • Loading branch information
rob-p committed Aug 28, 2016
1 parent 7bbaae8 commit 25d0a48
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 6 deletions.
9 changes: 9 additions & 0 deletions include/SalmonOpts.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
// Logger includes
#include "spdlog/spdlog.h"

#include <fstream>
#include <ostream>
#include <memory> // for shared_ptr


Expand Down Expand Up @@ -124,6 +126,13 @@ struct SalmonOpts {
bool useVBOpt; // Use Variational Bayesian EM instead of "regular" EM in the batch passes

bool useQuasi; // Are we using the quasi-mapping based index or not.

// For writing quasi-mappings
std::string qmFileName;
std::ofstream qmFile;
std::unique_ptr<std::ostream> qmStream{nullptr};
std::shared_ptr<spdlog::logger> qmLog{nullptr};


std::unique_ptr<std::ofstream> unmappedFile{nullptr};
bool writeUnmappedNames; // write the names of unmapped reads
Expand Down
89 changes: 88 additions & 1 deletion src/SalmonQuantify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ extern "C" {
#include "SACollector.hpp"
#include "SASearcher.hpp"
#include "SalmonOpts.hpp"
#include "PairAlignmentFormatter.hpp"
#include "SingleAlignmentFormatter.hpp"
#include "RapMapUtils.hpp"
//#include "TextBootstrapWriter.hpp"

/****** QUASI MAPPING DECLARATIONS *********/
Expand Down Expand Up @@ -766,7 +769,13 @@ void processReadsQuasi(
std::vector<QuasiAlignment> rightHits;
rapmap::utils::HitCounters hctr;
salmon::utils::MappingType mapType{salmon::utils::MappingType::UNMAPPED};


PairAlignmentFormatter<RapMapIndexT*> formatter(qidx);
fmt::MemoryWriter sstream;
auto* qmLog = salmonOpts.qmLog.get();
bool writeQuasimappings = (qmLog != nullptr);

auto rg = parser->getReadGroup();
while (parser->refill(rg)) {
rangeSize = rg.size();
Expand Down Expand Up @@ -993,6 +1002,12 @@ void processReadsQuasi(
} break;
}
}

if (writeQuasimappings) {
rapmap::utils::writeAlignmentsToStream(rp, formatter,
hctr, jointHits, sstream);
}

} else {
// This read was completely unmapped.
mapType = salmon::utils::MappingType::UNMAPPED;
Expand All @@ -1004,6 +1019,7 @@ void processReadsQuasi(
unmappedNames << rp.first.name << ' ' << salmon::utils::str(mapType) << '\n';
}


validHits += jointHits.size();
localNumAssignedFragments += (jointHits.size() > 0);
locRead++;
Expand Down Expand Up @@ -1039,6 +1055,15 @@ void processReadsQuasi(
unmappedNames.clear();
}

if (writeQuasimappings) {
std::string outStr(sstream.str());
// Get rid of last newline
if (!outStr.empty()) {
outStr.pop_back();
qmLog->info(std::move(outStr));
}
sstream.clear();
}

prevObservedFrags = numObservedFragments;
AlnGroupVecRange<QuasiAlignment> hitLists = boost::make_iterator_range(
Expand Down Expand Up @@ -1118,6 +1143,12 @@ void processReadsQuasi(
SACollector<RapMapIndexT> hitCollector(qidx);
SASearcher<RapMapIndexT> saSearcher(qidx);
rapmap::utils::HitCounters hctr;

SingleAlignmentFormatter<RapMapIndexT*> formatter(qidx);
fmt::MemoryWriter sstream;
auto* qmLog = salmonOpts.qmLog.get();
bool writeQuasimappings = (qmLog != nullptr);

auto rg = parser->getReadGroup();
while (parser->refill(rg)) {
rangeSize = rg.size();
Expand Down Expand Up @@ -1211,6 +1242,11 @@ void processReadsQuasi(
} break;
}
}

if (writeQuasimappings) {
rapmap::utils::writeAlignmentsToStream(rp, formatter,
hctr, jointHits, sstream);
}

if (writeUnmapped and jointHits.empty()) {
// If we have no mappings --- then there's nothing to do
Expand Down Expand Up @@ -1252,6 +1288,16 @@ void processReadsQuasi(
unmappedNames.clear();
}

if (writeQuasimappings) {
std::string outStr(sstream.str());
// Get rid of last newline
if (!outStr.empty()) {
outStr.pop_back();
qmLog->info(std::move(outStr));
}
sstream.clear();
}

prevObservedFrags = numObservedFragments;
AlnGroupVecRange<QuasiAlignment> hitLists = boost::make_iterator_range(
structureVec.begin(), structureVec.begin() + rangeSize);
Expand Down Expand Up @@ -1343,6 +1389,9 @@ void processReadLibrary(
// change value before the lambda below is evaluated --- crazy!
if (largeIndex) {
if (perfectHashIndex) { // Perfect Hash
if (salmonOpts.qmFileName != "" and i == 0) {
rapmap::utils::writeSAMHeader(*(sidx->quasiIndexPerfectHash64()), salmonOpts.qmLog);
}
auto threadFun = [&, i]() -> void {
processReadsQuasi<RapMapSAIndex<int64_t, PerfectHash<int64_t>>>(
pairedParserPtr.get(), readExp, rl, structureVec[i],
Expand All @@ -1354,6 +1403,9 @@ void processReadLibrary(
};
threads.emplace_back(threadFun);
} else { // Dense Hash
if (salmonOpts.qmFileName != "" and i == 0) {
rapmap::utils::writeSAMHeader(*(sidx->quasiIndex64()), salmonOpts.qmLog);
}
auto threadFun = [&, i]() -> void {
processReadsQuasi<RapMapSAIndex<int64_t, DenseHash<int64_t>>>(
pairedParserPtr.get(), readExp, rl, structureVec[i],
Expand All @@ -1367,6 +1419,9 @@ void processReadLibrary(
}
} else {
if (perfectHashIndex) { // Perfect Hash
if (salmonOpts.qmFileName != "" and i == 0) {
rapmap::utils::writeSAMHeader(*(sidx->quasiIndexPerfectHash32()), salmonOpts.qmLog);
}
auto threadFun = [&, i]() -> void {
processReadsQuasi<RapMapSAIndex<int32_t, PerfectHash<int32_t>>>(
pairedParserPtr.get(), readExp, rl, structureVec[i],
Expand All @@ -1378,6 +1433,9 @@ void processReadLibrary(
};
threads.emplace_back(threadFun);
} else { // Dense Hash
if (salmonOpts.qmFileName != "" and i == 0) {
rapmap::utils::writeSAMHeader(*(sidx->quasiIndex32()), salmonOpts.qmLog);
}
auto threadFun = [&, i]() -> void {
processReadsQuasi<RapMapSAIndex<int32_t, DenseHash<int32_t>>>(
pairedParserPtr.get(), readExp, rl, structureVec[i],
Expand Down Expand Up @@ -1494,10 +1552,14 @@ void processReadLibrary(
bool largeIndex = sidx->is64BitQuasi();
bool perfectHashIndex = sidx->isPerfectHashQuasi();
for (int i = 0; i < numThreads; ++i) {

// NOTE: we *must* capture i by value here, b/c it can (sometimes, does)
// change value before the lambda below is evaluated --- crazy!
if (largeIndex) {
if (perfectHashIndex) { // Perfect Hash
if (salmonOpts.qmFileName != "" and i == 0) {
rapmap::utils::writeSAMHeader(*(sidx->quasiIndexPerfectHash64()), salmonOpts.qmLog);
}
auto threadFun = [&, i]() -> void {
processReadsQuasi<RapMapSAIndex<int64_t, PerfectHash<int64_t>>>(
pairedParserPtr.get(), readExp, rl, structureVec[i],
Expand All @@ -1509,6 +1571,10 @@ void processReadLibrary(
};
threads.emplace_back(threadFun);
} else { // Dense Hash
if (salmonOpts.qmFileName != "" and i == 0) {
rapmap::utils::writeSAMHeader(*(sidx->quasiIndex64()), salmonOpts.qmLog);
}

auto threadFun = [&, i]() -> void {
processReadsQuasi<RapMapSAIndex<int64_t, DenseHash<int64_t>>>(
singleParserPtr.get(), readExp, rl, structureVec[i],
Expand All @@ -1522,6 +1588,10 @@ void processReadLibrary(
}
} else {
if (perfectHashIndex) { // Perfect Hash
if (salmonOpts.qmFileName != "" and i == 0) {
rapmap::utils::writeSAMHeader(*(sidx->quasiIndexPerfectHash32()), salmonOpts.qmLog);
}

auto threadFun = [&, i]() -> void {
processReadsQuasi<RapMapSAIndex<int32_t, PerfectHash<int32_t>>>(
singleParserPtr.get(), readExp, rl, structureVec[i],
Expand All @@ -1533,6 +1603,10 @@ void processReadLibrary(
};
threads.emplace_back(threadFun);
} else { // Dense Hash
if (salmonOpts.qmFileName != "" and i == 0) {
rapmap::utils::writeSAMHeader(*(sidx->quasiIndex32()), salmonOpts.qmLog);
}

auto threadFun = [&, i]() -> void {
processReadsQuasi<RapMapSAIndex<int32_t, DenseHash<int32_t>>>(
singleParserPtr.get(), readExp, rl, structureVec[i],
Expand Down Expand Up @@ -1923,7 +1997,12 @@ int salmonQuantify(int argc, char* argv[]) {
"should be parsed. Files ending in \'.gtf\' or \'.gff\' are assumed to "
"be in GTF "
"format; files with any other extension are assumed to be in the simple "
"format.");
"format.")
(
"writeMappings", po::value<string>(&sopt.qmFileName)->default_value("")->implicit_value("-"),
"If this option is provided, then the quasi-mapping results will be written out in SAM-compatible "
"format. By default, output will be directed to stdout, but an alternative file name can be "
"provided instead.");

sopt.noRichEqClasses = false;
// mapping cache has been deprecated
Expand Down Expand Up @@ -2467,6 +2546,14 @@ transcript abundance from RNA-seq reads
if (sopt.unmappedFile) { sopt.unmappedFile->close(); }
}
}

// if we wrote quasimappings, flush that buffer
if (sopt.qmFileName != "" ){
sopt.qmLog->flush();
// if we wrote to a buffer other than stdout, close
// the file
if (sopt.qmFileName != "-") { sopt.qmFile.close(); }
}
} catch (po::error& e) {
std::cerr << "Exception : [" << e.what() << "]. Exiting.\n";
std::exit(1);
Expand Down
53 changes: 48 additions & 5 deletions src/SalmonUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1325,14 +1325,24 @@ bool processQuantOptions(SalmonOpts& sopt,
std::cout << "Logs will be written to " << logDirectory.string() << "\n";
}

// Determine what we'll do with quasi-mapping results
bool writeQuasimappings = (sopt.qmFileName != "");

bfs::path logPath = logDirectory / "salmon_quant.log";
// must be a power-of-two

size_t max_q_size = 2097152;

// make it larger if we're writing mappings or
// unmapped names.
std::streambuf* qmBuf;
if (writeQuasimappings or sopt.writeUnmappedNames) {
max_q_size = 16777216;
}

spdlog::set_async_mode(max_q_size);

auto fileSink = std::make_shared<spdlog::sinks::simple_file_sink_mt>(
logPath.string(), true);
logPath.string());
auto rawConsoleSink = std::make_shared<spdlog::sinks::stdout_sink_mt>();
auto consoleSink =
std::make_shared<spdlog::sinks::ansicolor_sink>(rawConsoleSink);
Expand Down Expand Up @@ -1361,9 +1371,8 @@ bool processQuantOptions(SalmonOpts& sopt,
std::ofstream* outFile = new std::ofstream(unmappedNameFile.string());

// Must be a power of 2
size_t queueSize{268435456};

spdlog::set_async_mode(queueSize);
//size_t queueSize{268435456};
//spdlog::set_async_mode(queueSize);
auto outputSink =
std::make_shared<spdlog::sinks::ostream_sink_mt>(*outFile);

Expand All @@ -1375,9 +1384,43 @@ bool processQuantOptions(SalmonOpts& sopt,
} else {
jointLog->error("Couldn't create auxiliary directory in which to place "
"\"unmapped_names.txt\"");
return false;
}
}

if (writeQuasimappings) {
// output to stdout
if (sopt.qmFileName == "-") {
qmBuf = std::cout.rdbuf();
} else { // output to the requested path, making the directory if it doesn't exist
// get the parent directory
bfs::path qmDir = boost::filesystem::path(sopt.qmFileName).parent_path();
// if it's not already a directory that exists
bool qmDirSuccess = boost::filesystem::is_directory(qmDir);
// try to create it
if (!qmDirSuccess) {
qmDirSuccess = boost::filesystem::create_directories(qmDir);
}
// if the directory already existed, or we created it successfully, open the file
if (qmDirSuccess) {
sopt.qmFile.open(sopt.qmFileName);
qmBuf = sopt.qmFile.rdbuf();
} else {
bfs::path qmFileName = boost::filesystem::path(sopt.qmFileName).filename();
jointLog->error("Couldn't create requested directory {} in which "
"to place the mapping output {}", qmDir.string(), qmFileName.string());
return false;
}
}
// Now set the output stream to the buffer, which is
// either std::cout, or a file.
sopt.qmStream.reset(new std::ostream(qmBuf));

auto outputSink = std::make_shared<spdlog::sinks::ostream_sink_mt>(*(sopt.qmStream.get()));
sopt.qmLog = std::make_shared<spdlog::logger>("qmStream", outputSink);
sopt.qmLog->set_pattern("%v");
}

// Verify that no inconsistent options were provided
if (sopt.numGibbsSamples > 0 and sopt.numBootstraps > 0) {
jointLog->error("You cannot perform both Gibbs sampling and bootstrapping. "
Expand Down

0 comments on commit 25d0a48

Please sign in to comment.