Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:COMBINE-lab/salmon into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
k3yavi committed Nov 3, 2020
2 parents 54b3181 + 12ea3a7 commit b349ec3
Show file tree
Hide file tree
Showing 30 changed files with 5,573 additions and 208 deletions.
2 changes: 1 addition & 1 deletion doc/source/salmon.rst
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,7 @@ that the boost options parser that we use works, and the fact that
``--writeMappings`` has an implicit argument of ``stdout``, if you
provide an explicit argument to ``--writeMappings``, you must do so
with the syntax ``--writeMappings=<outfile>`` rather than the synatx
``--writeMappings <outfile>``. This is a due to a limitation of the
``--writeMappings <outfile>``. This is due to a limitation of the
parser in how the latter could be interpreted.

.. note:: Compatible mappings
Expand Down
3 changes: 2 additions & 1 deletion include/AlevinOpts.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ struct AlevinOpts {
//Stop progress sumps
bool quiet;
// just perform alignment and produce
// an output directory with a PAM file.
// an output directory with a RAD file.
bool just_align;
bool sketch_mode;
//flag for deduplication
bool noDedup;
//flag for not performing external whitelisting
Expand Down
6 changes: 4 additions & 2 deletions include/AlevinUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,16 +79,18 @@ namespace alevin{

template <typename ProtocolT>
bool extractUMI(std::string& read,
std::string& read2,
ProtocolT& pt,
std::string& umi);

template <typename ProtocolT>
void getReadSequence(ProtocolT& pt,
std::string* getReadSequence(ProtocolT& pt,
std::string& seq,
std::string& seq2,
std::string& subseq);

template <typename ProtocolT>
nonstd::optional<std::string> extractBarcode(std::string& read, ProtocolT& pt);
bool extractBarcode(std::string& read, std::string& read2, ProtocolT& pt, std::string& bc);

template <typename OrderedOptionsT>
bool writeCmdInfo(boost::filesystem::path cmdInfoPath,
Expand Down
8 changes: 4 additions & 4 deletions include/BAMQueue.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ BAMQueue<FragT>::~BAMQueue() {
AlignmentGroup<FragT*>* grp;
//while(!alnGroupPool_.empty()) { alnGroupPool_.pop(grp); delete grp; grp = nullptr; }
while(alnGroupPool_.try_dequeue(grp)) { delete grp; grp = nullptr; }
fmt::print(stderr, "\nEmptied Alignemnt Group Pool. . ");
fmt::print(stderr, "\nEmptied Alignment Group Pool. . ");
while(alnGroupQueue_.try_dequeue(grp)) { delete grp; grp = nullptr; }
fmt::print(stderr, "\nEmptied Alignment Group Queue. . . ");
fmt::print(stderr, "done\n");
Expand Down Expand Up @@ -337,7 +337,7 @@ inline AlignmentType getPairedAlignmentType_(bam_seq_t* aln) {
if (!mateIsMapped and !readIsMapped) {
return AlignmentType::UnmappedPair;
}
std::cerr << "\n\n\nEncountered unknown alignemnt type; this should not happen!\n"
std::cerr << "\n\n\nEncountered unknown alignment type; this should not happen!\n"
<< "Please file a bug report on GitHub. Exiting.\n";
std::exit(1);
}
Expand Down Expand Up @@ -410,7 +410,7 @@ inline bool BAMQueue<FragT>::getFrag_(ReadPair& rpair, FilterT filt) {
break;
// === end if MappedDiscordantPair case
default:
logger_->error("\n\n\nEncountered unknown alignemnt type; this should not happen!\n"
logger_->error("\n\n\nEncountered unknown alignment type; this should not happen!\n"
"Please file a bug report on GitHub. Exiting.\n");
std::exit(1);
break;
Expand Down Expand Up @@ -531,7 +531,7 @@ inline bool BAMQueue<FragT>::getFrag_(ReadPair& rpair, FilterT filt) {
}
break;
default:
logger_->error("\n\n\nEncountered unknown alignemnt type; this should not happen!\n"
logger_->error("\n\n\nEncountered unknown alignment type; this should not happen!\n"
"Please file a bug report on GitHub. Exiting.\n");
std::exit(1);
break;
Expand Down
10 changes: 9 additions & 1 deletion include/FastxParserThreadUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,15 @@ ALWAYS_INLINE void yieldSleep() {

ALWAYS_INLINE void backoffExp(size_t& curMaxIters) {
thread_local std::uniform_int_distribution<size_t> dist;
thread_local std::minstd_rand gen(std::random_device{}());

// see : https://github.com/coryan/google-cloud-cpp-common/blob/a6e7b6b362d72451d6dc1fec5bc7643693dbea96/google/cloud/internal/random.cc
#if defined(__linux) && defined(__GLIBCXX__) && __GLIBCXX__ >= 20200128
thread_local std::random_device rd("/dev/urandom");
#else
thread_local std::random_device rd;
#endif // defined(__GLIBCXX__) && __GLIBCXX__ >= 20200128

thread_local std::minstd_rand gen(rd());
const size_t spinIters =
dist(gen, decltype(dist)::param_type{0, curMaxIters});
curMaxIters = std::min(2 * curMaxIters, MAX_BACKOFF_ITERS);
Expand Down
1 change: 1 addition & 0 deletions include/SalmonDefaults.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ namespace defaults {
constexpr const bool noEM{false};
constexpr const bool debug{true};
constexpr const bool just_align{false};
constexpr const bool sketch_mode{false};
constexpr const uint32_t trimRight{0};
constexpr const uint32_t numBootstraps{0};
constexpr const uint32_t numGibbsSamples{0};
Expand Down
2 changes: 1 addition & 1 deletion include/SalmonOpts.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ struct SalmonOpts {
// the optimization.

bool allowOrphans; // Consider orphaned reads when performing lightweight
// alignemnt.
// alignment.

std::string auxDir; // The directory where auxiliary files will be written.

Expand Down
3 changes: 3 additions & 0 deletions include/SalmonUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ extern "C" {
#include <boost/program_options.hpp>
#include <iostream>
#include <memory>
#include <random>
#include <tuple>
#include <unordered_map>
#include <unordered_set>
Expand Down Expand Up @@ -315,6 +316,8 @@ LibraryFormat hitType(int32_t readStart, bool isForward);

double compute_1_edit_threshold(int32_t l, const SalmonOpts& sopt);

//std::random_device get_random_device();

/**
* Cache the mappings provided in an efficient binary format
* to the provided file handle.
Expand Down
7 changes: 6 additions & 1 deletion include/Sampler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,12 @@ void sampleMiniBatch(AlignmentLibraryT<FragT>& alnLib,
OutputQueue<FragT>& outputQueue) {

// Seed with a real random value, if available
std::random_device rd;
#if defined(__linux) && defined(__GLIBCXX__) && __GLIBCXX__ >= 20200128
std::random_device rd("/dev/urandom");
#else
std::random_device rd;
#endif // defined(__GLIBCXX__) && __GLIBCXX__ >= 2020012

auto log = spdlog::get("jointLog");

// Create a random uniform distribution
Expand Down
110 changes: 110 additions & 0 deletions include/SingleCellProtocols.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,93 @@
#define __SINGLE_CELL_PROTOCOLS_HPP__

#include <string>
#include <ostream>

#include "AlevinOpts.hpp"
#include "AlevinTypes.hpp"
#include "pufferfish/chobo/static_vector.hpp"

namespace alevin{
namespace protocols {

static constexpr size_t num_tag_pieces{16};
struct TagGeometry {
// uint32_t read_num{0};
// tuples are read_num, start_pos, length
chobo::static_vector<std::pair<uint32_t, size_t>, num_tag_pieces> substr_locs1{};
chobo::static_vector<std::pair<uint32_t, size_t>, num_tag_pieces> substr_locs2{};
// the total length of the tag on read 1
size_t length1{0};
// the total length of the tag on read 2
size_t length2{0};
// the largest index on read 1
size_t largest_index1{0};
// the largest index on read 2
size_t largest_index2{0};

inline bool unbounded1() const { return length1 == std::string::npos; }
inline bool unbounded2() const { return length2 == std::string::npos; }

inline bool uses_r1() const { return !substr_locs1.empty(); }
inline bool uses_r2() const { return !substr_locs2.empty(); }

size_t length() const { return length1 + length2; }

// Given the geometry of the tag, fill in the tag from
// read 1 (`from1`) and read 2 (`from2`), placing the constructed
// tag in `to`.
//
// *assumption*: `to` is large enough to hold the tag
// *returns*: true if the tag was written completely, and false otherwise
inline bool extract_tag(std::string& from1, std::string& from2, std::string&to) {
// if anything is too short, just ignore the whole thing
if (uses_r1() and (from1.length() < largest_index1)) { return false; }
if (uses_r2() and (from2.length() < largest_index2)) { return false; }

// will point to the next place to
// begin filling the output string
auto fill_it = to.begin();

// grab anything from read 1
auto f1b = from1.begin();
for (auto& st_len : substr_locs1) {
auto f1 = f1b + st_len.first;
fill_it = std::copy(f1, f1 + st_len.second, fill_it);
}

// grab anything from read 2
auto f2b = from2.begin();
for (auto& st_len : substr_locs2) {
auto f2 = f2b + st_len.first;
fill_it = std::copy(f2, f2 + st_len.second, fill_it);
}
return true;
}

inline bool extract_read(std::string& from1, std::string& from2, std::string&to) {
// if anything is too short, just ignore the whole thing
if (uses_r1() and !unbounded1() and (from1.length() < largest_index1)) { return false; }
if (uses_r2() and !unbounded2() and (from2.length() < largest_index2)) { return false; }

// since the read extraction doesn't have a
// fixed size, we'll append rather than
// overwrite.
to.clear();
// grab anything from read 1
for (auto& st_len : substr_locs1) {
to.append(from1, st_len.first, st_len.second);
}
// grab anything from read 2
for (auto& st_len : substr_locs2) {
to.append(from2, st_len.first, st_len.second);
}
return true;
}

};

std::ostream& operator<<(std::ostream& os, const TagGeometry& tg);

struct Rule{
Rule(){}
Rule(uint32_t barcodeLength_,
Expand All @@ -21,10 +101,19 @@ namespace alevin{
maxValue(maxValue_){
alevin::types::AlevinUMIKmer::k(umiLength);
}
// NOTE: these do nothing but satisfy
// template requirements right now
void set_umi_geo(TagGeometry& g) { (void)g; };
void set_bc_geo(TagGeometry& g) { (void)g; };
void set_read_geo(TagGeometry& g) { (void)g; };
uint32_t barcode_length() const { return barcodeLength; }
uint32_t umi_length() const { return umiLength; }

uint32_t barcodeLength, umiLength, maxValue;
BarcodeEnd end;
};


struct DropSeq : Rule{
//Drop-Seq starts from 5 end with 12 length
//barcode and 8 length umi & iupac can be
Expand Down Expand Up @@ -87,6 +176,27 @@ namespace alevin{
struct Custom : Rule{
Custom() : Rule(0,0,BarcodeEnd::FIVE,0){}
};


// for the new type of specification
struct CustomGeometry {
// vector of offset, length pairs
TagGeometry umi_geo;
TagGeometry bc_geo;
TagGeometry read_geo;

void set_umi_geo(TagGeometry& g) { umi_geo = g; umiLength = umi_geo.length1 + umi_geo.length2; }
void set_bc_geo(TagGeometry& g) { bc_geo = g; barcodeLength = bc_geo.length1 + bc_geo.length2; }
void set_read_geo(TagGeometry& g) { read_geo = g; }
uint32_t barcode_length() const { return barcodeLength; }
uint32_t umi_length() const { return umiLength; }

// These values do nothing in this class except
// maintain template compat ... fix this design later.
uint32_t barcodeLength, umiLength, maxValue;
BarcodeEnd end;
};

}
}

Expand Down

0 comments on commit b349ec3

Please sign in to comment.