Skip to content

Commit

Permalink
change interface for extractBarcode
Browse files Browse the repository at this point in the history
  • Loading branch information
Rob Patro committed Oct 9, 2020
1 parent a27954b commit 19d66ca
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 80 deletions.
2 changes: 1 addition & 1 deletion include/AlevinUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ namespace alevin{
std::string& subseq);

template <typename ProtocolT>
nonstd::optional<std::string> extractBarcode(std::string& read, ProtocolT& pt);
bool extractBarcode(std::string& read, ProtocolT& pt, std::string& bc);

template <typename OrderedOptionsT>
bool writeCmdInfo(boost::filesystem::path cmdInfoPath,
Expand Down
19 changes: 10 additions & 9 deletions src/Alevin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ void densityCalculator(single_parser* parser,

uint64_t usedNumBarcodesLocal{0};
uint64_t totNumBarcodesLocal{0};
std::string extractedBarcode;
while (parser->refill(rg)) {
rangeSize = rg.size();
for (size_t i = 0; i < rangeSize; ++i) { // For all the read in this batch
Expand All @@ -131,14 +132,14 @@ void densityCalculator(single_parser* parser,
//if (aopt.protocol.end == bcEnd::THREE) {
// std::reverse(seq.begin(), seq.end());
//}

nonstd::optional<std::string> extractedBarcode = aut::extractBarcode(seq, aopt.protocol);
bool seqOk = (extractedBarcode.has_value()) ? aut::sequenceCheck(*extractedBarcode) : false;
extractedBarcode.clear();
bool extraction_ok = aut::extractBarcode(seq, aopt.protocol, extractedBarcode);
bool seqOk = (extraction_ok) ? aut::sequenceCheck(extractedBarcode) : false;
if (not seqOk){
bool recovered = aut::recoverBarcode(*extractedBarcode);
bool recovered = aut::recoverBarcode(extractedBarcode);
if (not recovered) { continue; }
}
freqCounter[*extractedBarcode] += 1;
freqCounter[extractedBarcode] += 1;
++usedNumBarcodesLocal;
}//end-for
}//end-while
Expand Down Expand Up @@ -561,15 +562,15 @@ bool writeFastq(AlevinOpts<ProtocolT>& aopt,
rangeSize = rg.size();
for (size_t i = 0; i < rangeSize; ++i) { // For all the read in this batch
auto& rp = rg[i];
barcode.clear();
if(aopt.protocol.end == bcEnd::FIVE){
{
nonstd::optional<std::string> extractedSeq = aut::extractBarcode(rp.first.seq, aopt.protocol);
bool seqOk = (extractedSeq.has_value()) ? aut::sequenceCheck(*extractedSeq) : false;
bool extracted_barcode = aut::extractBarcode(rp.first.seq, aopt.protocol, barcode);
bool seqOk = (extracted_barcode) ? aut::sequenceCheck(barcode) : false;
if (not seqOk){
bool recovered = aut::recoverBarcode(*extractedSeq);
bool recovered = aut::recoverBarcode(barcode);
if (not recovered) { continue; }
}
barcode = *extractedSeq;
}

{
Expand Down
87 changes: 47 additions & 40 deletions src/AlevinUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,87 +194,94 @@ namespace alevin {
}

template <>
nonstd::optional<std::string> extractBarcode<apt::DropSeq>(std::string& read,
apt::DropSeq& pt){
bool extractBarcode<apt::DropSeq>(std::string& read,
apt::DropSeq& pt,
std::string& bc){
return (read.length() >= pt.barcodeLength) ?
nonstd::optional<std::string>(read.substr(0, pt.barcodeLength)) : nonstd::nullopt;
(bc = read.substr(0, pt.barcodeLength), true) : false;
}
template <>
nonstd::optional<std::string> extractBarcode<apt::CITESeq>(std::string& read,
apt::CITESeq& pt){
bool extractBarcode<apt::CITESeq>(std::string& read,
apt::CITESeq& pt,
std::string& bc){
return (read.length() >= pt.barcodeLength) ?
nonstd::optional<std::string>(read.substr(0, pt.barcodeLength)) : nonstd::nullopt;
(bc = read.substr(0, pt.barcodeLength), true) : false;
}
template <>
nonstd::optional<std::string> extractBarcode<apt::ChromiumV3>(std::string& read,
apt::ChromiumV3& pt){
bool extractBarcode<apt::ChromiumV3>(std::string& read,
apt::ChromiumV3& pt,
std::string& bc){
return (read.length() >= pt.barcodeLength) ?
nonstd::optional<std::string>(read.substr(0, pt.barcodeLength)) : nonstd::nullopt;
(bc = read.substr(0, pt.barcodeLength), true) : false;
}
template <>
nonstd::optional<std::string> extractBarcode<apt::Chromium>(std::string& read,
apt::Chromium& pt){
bool extractBarcode<apt::Chromium>(std::string& read,
apt::Chromium& pt,
std::string& bc){
return (read.length() >= pt.barcodeLength) ?
nonstd::optional<std::string>(read.substr(0, pt.barcodeLength)) : nonstd::nullopt;
(bc = read.substr(0, pt.barcodeLength), true) : false;
}
template <>
nonstd::optional<std::string> extractBarcode<apt::Gemcode>(std::string& read,
apt::Gemcode& pt){
bool extractBarcode<apt::Gemcode>(std::string& read,
apt::Gemcode& pt,
std::string& bc){
return (read.length() >= pt.barcodeLength) ?
nonstd::optional<std::string>(read.substr(0, pt.barcodeLength)) : nonstd::nullopt;
(bc=read.substr(0, pt.barcodeLength), true) : false;
}
template <>
nonstd::optional<std::string> extractBarcode<apt::Custom>(std::string& read,
apt::Custom& pt){
bool extractBarcode<apt::Custom>(std::string& read,
apt::Custom& pt,
std::string& bc){
if (pt.end == BarcodeEnd::FIVE) {
return (read.length() >= pt.barcodeLength) ?
nonstd::optional<std::string>(read.substr(0, pt.barcodeLength)) : nonstd::nullopt;
(bc = read.substr(0, pt.barcodeLength), true) : false;
} else if (pt.end == BarcodeEnd::THREE) {
return (read.length() >= (pt.umiLength + pt.barcodeLength)) ?
nonstd::optional<std::string>(read.substr(pt.umiLength, pt.barcodeLength)) : nonstd::nullopt;
(bc = read.substr(pt.umiLength, pt.barcodeLength), true) : false;
} else {
return nonstd::nullopt;
return false;
}
}
template <>
nonstd::optional<std::string> extractBarcode<apt::CustomGeometry>(std::string& read,
apt::CustomGeometry& pt){
std::string bc;
bool ok = pt.bc_geo.extract(read, bc);
return ok ? nonstd::optional<std::string>(read) : nonstd::nullopt;
bool extractBarcode<apt::CustomGeometry>(std::string& read,
apt::CustomGeometry& pt,
std::string& bc){
return pt.bc_geo.extract(read, bc);
}
template <>
nonstd::optional<std::string> extractBarcode<apt::QuartzSeq2>(std::string& read,
apt::QuartzSeq2& pt){
bool extractBarcode<apt::QuartzSeq2>(std::string& read,
apt::QuartzSeq2& pt,
std::string& bc){
return (read.length() >= pt.barcodeLength) ?
nonstd::optional<std::string>(read.substr(0, pt.barcodeLength)) : nonstd::nullopt;
(bc = read.substr(0, pt.barcodeLength), true) : false;
}
template <>
nonstd::optional<std::string> extractBarcode<apt::CELSeq2>(std::string& read,
apt::CELSeq2& pt){
bool extractBarcode<apt::CELSeq2>(std::string& read,
apt::CELSeq2& pt,
std::string& bc){
return (read.length() >= (pt.umiLength + pt.barcodeLength)) ?
nonstd::optional<std::string>(read.substr(pt.umiLength, pt.barcodeLength)) : nonstd::nullopt;

(bc = read.substr(pt.umiLength, pt.barcodeLength), true) : false;
}
template <>
nonstd::optional<std::string> extractBarcode<apt::CELSeq>(std::string& read,
apt::CELSeq& pt){
bool extractBarcode<apt::CELSeq>(std::string& read,
apt::CELSeq& pt,
std::string& bc){
return (read.length() >= (pt.umiLength + pt.barcodeLength)) ?
nonstd::optional<std::string>(read.substr(pt.umiLength, pt.barcodeLength)) : nonstd::nullopt;
(bc = read.substr(pt.umiLength, pt.barcodeLength), true) : false;
}
template <>
nonstd::optional<std::string> extractBarcode<apt::InDrop>(std::string& read, apt::InDrop& pt){
bool extractBarcode<apt::InDrop>(std::string& read, apt::InDrop& pt, std::string& bc){
std::string::size_type index = read.find(pt.w1);
if (index == std::string::npos){
return nonstd::nullopt;
return false;
}
auto bc = read.substr(0, index);
bc = read.substr(0, index);
if(bc.size()<8 or bc.size()>12){
return nonstd::nullopt;
return false;
}
uint32_t offset = bc.size()+pt.w1.size();
bc += read.substr(offset, offset+8);
return nonstd::optional<std::string>(bc);
return true;
}

void getIndelNeighbors(
Expand Down
60 changes: 30 additions & 30 deletions src/SalmonAlevin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,7 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re

std::string readSubSeq;
std::string umi;
std::string barcode;
//////////////////////

bool tryAlign{salmonOpts.validateMappings};
Expand Down Expand Up @@ -632,28 +633,26 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
size_t barcodeLength = alevinOpts.protocol.barcodeLength;
size_t umiLength = alevinOpts.protocol.umiLength;
umi.clear();
nonstd::optional<std::string> barcode;
barcode.clear();
nonstd::optional<uint32_t> barcodeIdx;
extraBAMtags.clear();
bool seqOk;

if (alevinOpts.protocol.end == bcEnd::FIVE ||
alevinOpts.protocol.end == bcEnd::THREE){
barcode = aut::extractBarcode(rp.first.seq, alevinOpts.protocol);
seqOk = (barcode.has_value()) ?
aut::sequenceCheck(*barcode, Sequence::BARCODE) : false;
bool extracted_bc = aut::extractBarcode(rp.first.seq, alevinOpts.protocol, barcode);
seqOk = (extracted_bc) ?
aut::sequenceCheck(barcode, Sequence::BARCODE) : false;

if (not seqOk){
bool recovered = aut::recoverBarcode(*barcode);
bool recovered = aut::recoverBarcode(barcode);
if (recovered) { seqOk = true; }
}

// If we have a valid barcode
if (seqOk) {
// corrBarcodeIndex = barcodeMap[barcodeIndex];
// jointHitGroup.setBarcode(*barcodeIdx);
aut::extractUMI(rp.first.seq, alevinOpts.protocol, umi);
//aopt.jointLog->info("BC : {}, UMI : {}". *barcode, umi);
//aopt.jointLog->info("BC : {}, UMI : {}". barcode, umi);
if (umiLength != umi.size()) {
smallSeqs += 1;
} else {
Expand Down Expand Up @@ -796,7 +795,7 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
}

alevin::types::AlevinCellBarcodeKmer bck;
bool barcode_ok = bck.fromChars(*barcode);
bool barcode_ok = bck.fromChars(barcode);

// NOTE: Think if we should put decoy mappings in the RAD file
if (mapType == salmon::utils::MappingType::SINGLE_MAPPED) {
Expand All @@ -815,7 +814,7 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
}
}
} else { // must use a string for the barcode
bw << *barcode;
bw << barcode;
}

// umi
Expand Down Expand Up @@ -1046,6 +1045,7 @@ void process_reads_sc_align(paired_parser* parser, ReadExperimentT& readExp, Rea

std::string readSubSeq;
std::string umi;
std::string barcode;
//////////////////////

bool tryAlign{salmonOpts.validateMappings};
Expand Down Expand Up @@ -1092,26 +1092,24 @@ void process_reads_sc_align(paired_parser* parser, ReadExperimentT& readExp, Rea
size_t barcodeLength = alevinOpts.protocol.barcodeLength;
size_t umiLength = alevinOpts.protocol.umiLength;
umi.clear();
nonstd::optional<std::string> barcode;
barcode.clear();
nonstd::optional<uint32_t> barcodeIdx;
extraBAMtags.clear();
bool seqOk;

if (alevinOpts.protocol.end == bcEnd::FIVE ||
alevinOpts.protocol.end == bcEnd::THREE){
barcode = aut::extractBarcode(rp.first.seq, alevinOpts.protocol);
seqOk = (barcode.has_value()) ?
aut::sequenceCheck(*barcode, Sequence::BARCODE) : false;
bool extracted_barcode = aut::extractBarcode(rp.first.seq, alevinOpts.protocol, barcode);
seqOk = (extracted_barcode) ?
aut::sequenceCheck(barcode, Sequence::BARCODE) : false;

if (not seqOk){
bool recovered = aut::recoverBarcode(*barcode);
bool recovered = aut::recoverBarcode(barcode);
if (recovered) { seqOk = true; }
}

// If we have a valid barcode
if (seqOk) {
// corrBarcodeIndex = barcodeMap[barcodeIndex];
// jointHitGroup.setBarcode(*barcodeIdx);
aut::extractUMI(rp.first.seq, alevinOpts.protocol, umi);

if (umiLength != umi.size()) {
Expand Down Expand Up @@ -1240,7 +1238,7 @@ void process_reads_sc_align(paired_parser* parser, ReadExperimentT& readExp, Rea
} //end-if validate mapping

alevin::types::AlevinCellBarcodeKmer bck;
bool barcode_ok = bck.fromChars(*barcode);
bool barcode_ok = bck.fromChars(barcode);

// NOTE: Think if we should put decoy mappings in the RAD file
if (mapType == salmon::utils::MappingType::SINGLE_MAPPED) {
Expand All @@ -1254,7 +1252,7 @@ void process_reads_sc_align(paired_parser* parser, ReadExperimentT& readExp, Rea
if ( alevinOpts.protocol.barcodeLength <= 32 ) {

if (barcode_ok) {
//alevinOpts.jointLog->info("BARCODE : {} \t ENC : {} ", *barcode, bck.word(0));
//alevinOpts.jointLog->info("BARCODE : {} \t ENC : {} ", barcode, bck.word(0));
if (alevinOpts.protocol.barcodeLength <= 16) { // can use 32-bit int
uint32_t shortbck = static_cast<uint32_t>(0x00000000FFFFFFFF & bck.word(0));
//alevinOpts.jointLog->info("shortbck : {} ", shortbck);
Expand All @@ -1264,7 +1262,7 @@ void process_reads_sc_align(paired_parser* parser, ReadExperimentT& readExp, Rea
}
}
} else { // must use a string for the barcode
bw << *barcode;
bw << barcode;
}

// umi
Expand Down Expand Up @@ -1500,6 +1498,8 @@ void processReadsQuasi(
msi.collect_decoys(writeQuasimappings);

std::string readSubSeq;
std::string umi;
std::string barcode;
//////////////////////

bool tryAlign{salmonOpts.validateMappings};
Expand Down Expand Up @@ -1545,39 +1545,39 @@ void processReadsQuasi(
// extracting barcodes
size_t barcodeLength = alevinOpts.protocol.barcodeLength;
size_t umiLength = alevinOpts.protocol.umiLength;
std::string umi;//, barcode;
nonstd::optional<std::string> barcode;
umi.clear();
barcode.clear();
nonstd::optional<uint32_t> barcodeIdx;
extraBAMtags.clear();
bool seqOk;

if (alevinOpts.protocol.end == bcEnd::FIVE ||
alevinOpts.protocol.end == bcEnd::THREE){
barcode = aut::extractBarcode(rp.first.seq, alevinOpts.protocol);
seqOk = (barcode.has_value()) ?
aut::sequenceCheck(*barcode, Sequence::BARCODE) : false;
bool extracted_barcode = aut::extractBarcode(rp.first.seq, alevinOpts.protocol, barcode);
seqOk = (extracted_barcode) ?
aut::sequenceCheck(barcode, Sequence::BARCODE) : false;

if (not seqOk){
bool recovered = aut::recoverBarcode(*barcode);
bool recovered = aut::recoverBarcode(barcode);
if (recovered) { seqOk = true; }
}

// If we have a barcode sequence, but not yet an index
if (seqOk and (not barcodeIdx)) {
// If we get here, we have a sequence-valid barcode.
// Check if it is in the trBcs map.
auto trIt = trBcs.find(*barcode);
auto trIt = trBcs.find(barcode);

// If it is, use that index
if(trIt != trBcs.end()){
barcodeIdx = trIt->second;
} else{
// If it's not, see if it's in the barcode map
auto indIt = barcodeMap.find(*barcode);
auto indIt = barcodeMap.find(barcode);
// If so grab the representative and get its index
if (indIt != barcodeMap.end()){
barcode = indIt->second.front().first;
auto trItLoc = trBcs.find(*barcode);
auto trItLoc = trBcs.find(barcode);
if(trItLoc == trBcs.end()){
salmonOpts.jointLog->error("Wrong entry in barcode softmap.\n"
"Please Report this on github");
Expand Down Expand Up @@ -1609,7 +1609,7 @@ void processReadsQuasi(
jointHitGroup.setUMI(umiIdx.word(0));
if (writeQuasimappings) {
extraBAMtags += "\tCB:Z:";
extraBAMtags += *barcode;
extraBAMtags += barcode;
extraBAMtags += "\tUR:Z:";
extraBAMtags += umi;
}
Expand Down

0 comments on commit 19d66ca

Please sign in to comment.