Skip to content

Commit

Permalink
working on rad mode
Browse files Browse the repository at this point in the history
  • Loading branch information
Rob Patro committed Jul 16, 2020
1 parent cf4c5e4 commit 723727e
Show file tree
Hide file tree
Showing 8 changed files with 1,011 additions and 2 deletions.
3 changes: 3 additions & 0 deletions include/AlevinOpts.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ struct AlevinOpts {
bool dumpCellEq;
//Stop progress sumps
bool quiet;
// just perform alignment and produce
// an output directory with a PAM file.
bool just_align;
//flag for deduplication
bool noDedup;
//Number of generator threads
Expand Down
1 change: 1 addition & 0 deletions include/SalmonDefaults.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ namespace defaults {
constexpr const bool dumpMtx{false};
constexpr const bool noEM{false};
constexpr const bool debug{true};
constexpr const bool just_align{false};
constexpr const uint32_t trimRight{0};
constexpr const uint32_t numBootstraps{0};
constexpr const uint32_t numGibbsSamples{0};
Expand Down
139 changes: 139 additions & 0 deletions include/radicl/BasicBinWriter.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#ifndef __BASIC_BIN_WRITER__
#define __BASIC_BIN_WRITER__

#include <vector>
#include <algorithm>
#include <iostream>

/**
* low-level interface for writing information direclty into a binary
* stream (vector of char).
*/
class BasicBinWriter {
private:
std::vector<char> _bin_data;
static constexpr const uint16_t max_str_len{std::numeric_limits<uint16_t>::max()};
public:
BasicBinWriter(){};
BasicBinWriter(size_t reserve_size) { _bin_data.reserve(reserve_size); }
// copy bin data to this record
BasicBinWriter(const std::vector<char>& bin_data) : _bin_data(bin_data){};
// or just move if possible
BasicBinWriter(std::vector<char>&& bin_data) : _bin_data(move(bin_data)){};

void clear() { _bin_data.clear(); }

template <typename T>
bool write_integer_at_offset(size_t offset, const T& v) {
if ( offset + sizeof(v) >= _bin_data.size() ) {
return false;
}
std::memcpy(_bin_data.data() + offset, &v, sizeof(v));
return true;
}

BasicBinWriter& operator<<(const bool& inval) {
char* inCharPtr = const_cast<char*>(reinterpret_cast<const char*>(&inval));
std::copy(inCharPtr, inCharPtr + sizeof(inval),
std::back_inserter(_bin_data));
return *this;
}
BasicBinWriter& operator<<(const uint8_t& inval) {
char* inCharPtr = const_cast<char*>(reinterpret_cast<const char*>(&inval));
std::copy(inCharPtr, inCharPtr + sizeof(inval),
std::back_inserter(_bin_data));
return *this;
}
BasicBinWriter& operator<<(const uint16_t& inval) {
char* inCharPtr = const_cast<char*>(reinterpret_cast<const char*>(&inval));
std::copy(inCharPtr, inCharPtr + sizeof(inval),
std::back_inserter(_bin_data));
return *this;
}
BasicBinWriter& operator<<(const uint32_t& inval) {
char* inCharPtr = const_cast<char*>(reinterpret_cast<const char*>(&inval));
std::copy(inCharPtr, inCharPtr + sizeof(inval),
std::back_inserter(_bin_data));
return *this;
}
BasicBinWriter& operator<<(const uint64_t& inval) {
char* inCharPtr = const_cast<char*>(reinterpret_cast<const char*>(&inval));
std::copy(inCharPtr, inCharPtr + sizeof(inval),
std::back_inserter(_bin_data));
return *this;
}
BasicBinWriter& operator<<(const int32_t& inval) {
char* inCharPtr = const_cast<char*>(reinterpret_cast<const char*>(&inval));
std::copy(inCharPtr, inCharPtr + sizeof(inval),
std::back_inserter(_bin_data));
return *this;
}

BasicBinWriter& operator<<(const double& inval) {
char* inCharPtr = const_cast<char*>(reinterpret_cast<const char*>(&inval));
std::copy(inCharPtr, inCharPtr + sizeof(inval),
std::back_inserter(_bin_data));
return *this;
}
BasicBinWriter& operator<<(const std::string& inval) {
if (inval.size() >= max_str_len) {
std::cerr << "ERROR!! DOESN'T SUPPORT STRING LENGTH LONGER THAN "
<< max_str_len << ". String length: "
<< inval.size() << "\n";
std::exit(1);
}
(*this) << static_cast<uint16_t>(inval.size());
//_bin_data.push_back(tmp);
//(*this) << inval.size();
// std::cout << inval.size() << " " << inval.c_str() << " " << inval <<
// "\n";
char* inCharPtr = const_cast<char*>(inval.c_str());
std::copy(inCharPtr, inCharPtr + inval.size(),
std::back_inserter(_bin_data));
return *this;
}

#ifdef STX_NO_STD_STRING_VIEW
/*
BasicBinWriter& operator<<(const stx::string_view& inval) {
if (inval.size() >= max_str_len) {
std::cerr << "ERROR!! DOESN'T SUPPORT STRING LENGTH LONGER THAN 255. "
"String length: "
<< inval.size() << "\n";
std::exit(1);
}
(*this) << static_cast<uint16_t>(inval.size());
//(*this) << inval.size();
// std::cout << inval.size() << " " << inval.data() << " " << inval <<
// "\n";
char* inCharPtr = const_cast<char*>(inval.data());
std::copy(inCharPtr, inCharPtr + inval.size(),
std::back_inserter(_bin_data));
return *this;
}
*/
#endif


uint64_t num_bytes() { return _bin_data.size(); }

// support for logging directly from spdlog
template <typename OStream>
friend OStream& operator<<(OStream& os, const BasicBinWriter& bin_record) {
std::ostream_iterator<char> out_iter(os);
std::copy(bin_record._bin_data.begin(), bin_record._bin_data.end(),
out_iter);
return os;
}
};

template
bool BasicBinWriter::write_integer_at_offset<uint16_t>(size_t offset, const uint16_t& v);

template
bool BasicBinWriter::write_integer_at_offset<uint32_t>(size_t offset, const uint32_t& v);

template
bool BasicBinWriter::write_integer_at_offset<uint64_t>(size_t offset, const uint64_t& v);

#endif //__BASIC_BIN_WRITER__
42 changes: 42 additions & 0 deletions include/radicl/RADHeader.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#ifndef __RAD_HEADER__
#define __RAD_HEADER__

#include "radicl/BasicBinWriter.hpp"
#include <cinttypes>

class RADHeader {
private:

bool is_paired_{false};
uint64_t ref_count_{0};
std::vector<std::string> ref_names;
uint64_t num_chunks_{0};

public:

//bool from_file();

// adds n to the list of reference names and
// returns the total number of reference names
uint64_t add_refname(const std::string& n) {
ref_names.emplace_back(n);
++ref_count_;
return ref_count_;
}

bool is_paired() const { return is_paired_; }
void is_paired(bool ip) { is_paired_ = ip; }

bool dump_to_bin(BasicBinWriter& bw) {
uint8_t p = is_paired_ ? 1 : 0;
bw << p;
bw << ref_count_;
for (auto& n : ref_names) {
bw << n;
}
bw << num_chunks_;
return true;
}
};

#endif // __RAD_HEADER__
21 changes: 19 additions & 2 deletions src/Alevin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ using bcEnd = BarcodeEnd;
namespace apt = alevin::protocols;
namespace aut = alevin::utils;

template <typename ProtocolT>
int alevin_sc_align(AlevinOpts<ProtocolT>& aopt,
SalmonOpts& sopt,
boost::program_options::parsed_options& orderedOptions);

template <typename ProtocolT>
int alevinQuant(AlevinOpts<ProtocolT>& aopt,
SalmonOpts& sopt,
Expand Down Expand Up @@ -627,8 +632,8 @@ bool writeFastq(AlevinOpts<ProtocolT>& aopt,
}

/*
function to Rapidly parse through the barcode file, generate density
of each Unique barcode, use knee method to select true barcodes and
function to rapidly parse through the barcode file, generate the density
of each Unique barcode, use the knee method to select true barcodes and
use our model to generate mapping of each 16M barcodes to true/null
barcode.
*/
Expand Down Expand Up @@ -869,6 +874,18 @@ void initiatePipeline(AlevinOpts<ProtocolT>& aopt,
fmt::print(stderr, "{}\n\n", commentString);
}

if (aopt.just_align) {
// if we are just aligning
auto rc = alevin_sc_align(aopt, sopt, orderedOptions);
if (rc == 0) {
aopt.jointLog->info("sc-align successful.");
std::exit(0);
} else {
aopt.jointLog->error("sc-align exited with return code {}", rc);
std::exit(rc);
}
}

/*
Barcode Knee generation
*/
Expand Down
7 changes: 7 additions & 0 deletions src/AlevinUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,7 @@ namespace alevin {
std::vector<spdlog::sink_ptr> sinks{consoleSink, fileSink};
aopt.jointLog = spdlog::create("alevinLog", std::begin(sinks), std::end(sinks));

aopt.just_align = vm["justAlign"].as<bool>();
aopt.quiet = vm["quiet"].as<bool>();
aopt.noEM = vm["noem"].as<bool>();
aopt.noDedup = vm["noDedup"].as<bool>();
Expand All @@ -590,6 +591,12 @@ namespace alevin {
aopt.forceCells = vm["forceCells"].as<uint32_t>();
aopt.expectCells = vm["expectCells"].as<uint32_t>();

if (aopt.just_align) {
aopt.jointLog->info("The --justAlign flag was passed to alevin. The "
"reads will be selectively aligned and the output written to a PAM file."
"Arguments passed that correspond to other processing steps will be ignored");
}

if (aopt.umiEditDistance > 4 ) {
aopt.jointLog->error("Too high edit distance collapsing {}, expected <= 4",
aopt.umiEditDistance);
Expand Down
3 changes: 3 additions & 0 deletions src/ProgramOptionsGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,9 @@ namespace salmon {
("version,v", "print version string")
("help,h", "produce help message")
("output,o", po::value<std::string>()->required(), "Output quantification directory.")
("justAlign,j", po::bool_switch()->default_value(alevin::defaults::just_align),
"just selectively align the data and write the results to a PAM file. Do not perform "
"the rest of the quantification procedure.")
("threads,p",
po::value<uint32_t>(&(sopt.numThreads))->default_value(sopt.numThreads),
"The number of threads to use concurrently.")
Expand Down

0 comments on commit 723727e

Please sign in to comment.