Skip to content

Commit

Permalink
Add "peeking" to determine SE/PE from BAM
Browse files Browse the repository at this point in the history
Rather than making the user provide AS or AP to determine
the library type automatically in alignment-based mode,
peek the first record in the file and check if it is paired.

Bump the version of Staden's IOLib we are using.
  • Loading branch information
rob-p committed Aug 25, 2016
1 parent 774082c commit 6116b2a
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 10 deletions.
8 changes: 4 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -506,11 +506,11 @@ message("Build system will compile Staden IOLib")
message("==================================================================")
ExternalProject_Add(libstadenio
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/staden-io_lib/releases/download/v1.13.10/io_lib-1.13.10.tar.gz -o staden-io_lib-v1.13.10.tar.gz &&
mkdir -p staden-io_lib-1.13.10 &&
tar -xzf staden-io_lib-v1.13.10.tar.gz --strip-components=1 -C staden-io_lib-1.13.10 &&
DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/staden-io_lib/archive/v1.14.8.tar.gz -o staden-io_lib-v1.14.8.tar.gz &&
mkdir -p staden-io_lib-1.14.8 &&
tar -xzf staden-io_lib-v1.14.8.tar.gz --strip-components=1 -C staden-io_lib-1.14.8 &&
rm -fr staden-io_lib &&
mv -f staden-io_lib-1.13.10 staden-io_lib
mv -f staden-io_lib-1.14.8 staden-io_lib
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/staden-io_lib
INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
CONFIGURE_COMMAND ./configure --enable-shared=no --without-libcurl --prefix=<INSTALL_DIR> LDFLAGS=${LIBSTADEN_LDFLAGS} CFLAGS=${LIBSTADEN_CFLAGS} CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER}
Expand Down
3 changes: 3 additions & 0 deletions include/SalmonUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ std::vector<ReadLibrary> extractReadLibraries(boost::program_options::parsed_opt

LibraryFormat parseLibraryFormatString(std::string& fmt);

bool peekBAMIsPaired(const boost::filesystem::path& fname);


size_t numberOfReadsInFastaFile(const std::string& fname);

bool readKmerOrder( const std::string& fname, std::vector<uint64_t>& kmers );
Expand Down
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ SalmonQuantify.cpp
FragmentLengthDistribution.cpp
FragmentStartPositionDistribution.cpp
SequenceBiasModel.cpp
StadenUtils.cpp
TranscriptGroup.cpp
GZipWriter.cpp
#${GAT_SOURCE_DIR}/external/install/src/rapmap/sais.c
Expand All @@ -61,6 +60,7 @@ GenomicFeature.cpp
VersionChecker.cpp
SBModel.cpp
FastxParser.cpp
StadenUtils.cpp
SalmonUtils.cpp
DistributionUtils.cpp
SalmonStringUtils.cpp
Expand Down
12 changes: 7 additions & 5 deletions src/SalmonQuantifyAlignments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1428,13 +1428,15 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
std::string libFmtStr = vm["libType"].as<std::string>();

// If we're auto-detecting, set things up appropriately
std::set<std::string> autoTypes = {"AS", "as", "AP", "ap"};
bool autoDetectFmt = (autoTypes.find(libFmtStr) != autoTypes.end());
bool autoDetectFmt = (libFmtStr == "a" or libFmtStr == "A");//(autoTypes.find(libFmtStr) != autoTypes.end());
if (autoDetectFmt) {
if (libFmtStr == "as" or libFmtStr == "AS") {
libFmt = LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::U);
} else if (libFmtStr == "ap" or libFmtStr == "AP") {

bool isPairedEnd = salmon::utils::peekBAMIsPaired(alignmentFiles.front());

if (isPairedEnd) {
libFmt = LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::U);
} else {
libFmt = LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::U);
}
} else { // Parse the provided type
libFmt = salmon::utils::parseLibraryFormatStringNew(libFmtStr);
Expand Down
51 changes: 51 additions & 0 deletions src/SalmonUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
#include "SGSmooth.hpp"
#include "TranscriptGeneMap.hpp"

#include "StadenUtils.hpp"

namespace salmon {
namespace utils {

Expand Down Expand Up @@ -850,6 +852,55 @@ LibraryFormat parseLibraryFormatString(std::string& fmt) {
return lf;
}

bool peekBAMIsPaired(const boost::filesystem::path& file) {
namespace bfs = boost::filesystem;
std::string readMode = "r";

if (bfs::is_regular_file(file)) {
if (bfs::is_empty(file)) {
fmt::MemoryWriter errstr;
errstr << "file [" << file.string() << "] appears to be empty "
"(i.e. it has size 0). This is likely an error. "
"Please re-run salmon with a corrected input file.\n\n";
throw std::invalid_argument(errstr.str());
return false;
}
}
if (file.extension() == ".bam") {
readMode = "rb";
}

auto* fp = scram_open(file.c_str(), readMode.c_str());

// If we couldn't open the file, then report this and exit.
if (fp == NULL) {
fmt::MemoryWriter errstr;
errstr << "ERROR: Failed to open file " << file.string() << ", exiting!\n";
throw std::invalid_argument(errstr.str());
return false;
}

bam_seq_t* read = nullptr;
read = staden::utils::bam_init();

bool didRead = (scram_get_seq(fp, &read) >= 0);
bool isPaired{false};

if (didRead) {
isPaired = bam_flag(read) & BAM_FPAIRED;
} else {
fmt::MemoryWriter errstr;
errstr << "ERROR: Failed to read alignment from " << file.string() << ", exiting!\n";
staden::utils::bam_destroy(read);
throw std::invalid_argument(errstr.str());
return false;
}

scram_close(fp);
staden::utils::bam_destroy(read);
return isPaired;
}

uint64_t encode(uint64_t tid, uint64_t offset) {
uint64_t res = (((tid & 0xFFFFFFFF) << 32) | (offset & 0xFFFFFFFF));
return res;
Expand Down

0 comments on commit 6116b2a

Please sign in to comment.