Skip to content

Commit

Permalink
Merge pull request #726 from COMBINE-lab/split-seq
Browse files Browse the repository at this point in the history
Add split-seq V1 and V2 to alevin
  • Loading branch information
rob-p committed Dec 8, 2021
2 parents e2b92cb + efc2ce9 commit 0056f8d
Show file tree
Hide file tree
Showing 10 changed files with 180 additions and 4 deletions.
2 changes: 2 additions & 0 deletions include/SalmonDefaults.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ namespace defaults {
constexpr const bool isCITESeq{false};
constexpr const bool isCELSeq{false};
constexpr const bool isCELSeq2{false};
constexpr const bool isSplitSeqV1{false};
constexpr const bool isSplitSeqV2{false};
constexpr const bool isQuartzSeq2{false};
constexpr const bool isSciSeq3{false};
constexpr const bool noQuant{false};
Expand Down
10 changes: 10 additions & 0 deletions include/SingleCellProtocols.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,16 @@ namespace alevin{
CELSeq2(): Rule(6, 6, BarcodeEnd::FIVE, 4096){}
};

struct SplitSeqV2 : Rule{
SplitSeqV2(): Rule(24, 10, BarcodeEnd::FIVE, 4294967295){}
std::size_t const bcLen = 8, bc1Pos = 10, bc2Pos = 48, bc3Pos = 78;
};

struct SplitSeqV1 : Rule{
SplitSeqV1(): Rule(24, 10, BarcodeEnd::FIVE, 4294967295){}
std::size_t const bcLen = 8, bc1Pos = 10, bc2Pos = 48, bc3Pos = 86;
};

//dummy class
struct Custom : Rule{
Custom() : Rule(0,0,BarcodeEnd::FIVE,0){}
Expand Down
18 changes: 18 additions & 0 deletions src/Alevin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1029,6 +1029,8 @@ salmon-based processing of single-cell RNA-seq data.
bool gemcode = vm["gemcode"].as<bool>();
bool celseq = vm["celseq"].as<bool>();
bool celseq2 = vm["celseq2"].as<bool>();
bool splitseqV1 = vm["splitseqV1"].as<bool>();
bool splitseqV2 = vm["splitseqV2"].as<bool>();
bool quartzseq2 = vm["quartzseq2"].as<bool>();
bool sciseq3 = vm["sciseq3"].as<bool>();
bool custom_old = vm.count("barcodeLength") and
Expand All @@ -1047,6 +1049,8 @@ salmon-based processing of single-cell RNA-seq data.
if (gemcode) validate_num_protocols += 1;
if (celseq) validate_num_protocols += 1;
if (celseq2) validate_num_protocols += 1;
if (splitseqV1) validate_num_protocols += 1;
if (splitseqV2) validate_num_protocols += 1;
if (quartzseq2) validate_num_protocols += 1;
if (sciseq3) validate_num_protocols += 1;
if (custom) validate_num_protocols += 1;
Expand Down Expand Up @@ -1147,6 +1151,20 @@ salmon-based processing of single-cell RNA-seq data.
vm, commentString, noTgMap,
barcodeFiles, readFiles, salmonIndex);
}
else if(splitseqV1){
AlevinOpts<apt::SplitSeqV1> aopt;
//aopt.jointLog->warn("Using Split-SeqV2 Setting for Alevin");
initiatePipeline(aopt, sopt, orderedOptions,
vm, commentString, noTgMap,
barcodeFiles, readFiles, salmonIndex);
}
else if(splitseqV2){
AlevinOpts<apt::SplitSeqV2> aopt;
//aopt.jointLog->warn("Using Split-SeqV2 Setting for Alevin");
initiatePipeline(aopt, sopt, orderedOptions,
vm, commentString, noTgMap,
barcodeFiles, readFiles, salmonIndex);
}
else if(quartzseq2){
AlevinOpts<apt::QuartzSeq2> aopt;
//aopt.jointLog->warn("Using Quartz-Seq2 Setting for Alevin");
Expand Down
8 changes: 8 additions & 0 deletions src/AlevinHash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,14 @@ int salmonHashQuantify(AlevinOpts<apt::CELSeq2>& aopt,
bfs::path& outputDirectory,
CFreqMapT& freqCounter);
template
int salmonHashQuantify(AlevinOpts<apt::SplitSeqV1>& aopt,
bfs::path& outputDirectory,
CFreqMapT& freqCounter);
template
int salmonHashQuantify(AlevinOpts<apt::SplitSeqV2>& aopt,
bfs::path& outputDirectory,
CFreqMapT& freqCounter);
template
int salmonHashQuantify(AlevinOpts<apt::QuartzSeq2>& aopt,
bfs::path& outputDirectory,
CFreqMapT& freqCounter);
Expand Down
72 changes: 68 additions & 4 deletions src/AlevinUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,22 @@ namespace alevin {
return &seq2;
}
template <>
std::string* getReadSequence(apt::SplitSeqV1& protocol,
std::string& seq,
std::string& seq2,
std::string& subseq){
(void)seq2; // fastq2 contains barcode and umi
return &seq;
}
template <>
std::string* getReadSequence(apt::SplitSeqV2& protocol,
std::string& seq,
std::string& seq2,
std::string& subseq){
(void)seq2; // fastq2 contains barcode and umi
return &seq;
}
template <>
std::string* getReadSequence(apt::QuartzSeq2& protocol,
std::string& seq,
std::string& seq2,
Expand Down Expand Up @@ -180,6 +196,24 @@ namespace alevin {
(umi.assign(read, pt.barcodeLength, pt.umiLength), true) : false;
}
template <>
bool extractUMI<apt::SplitSeqV1>(std::string& read,
std::string& read2,
apt::SplitSeqV1& pt,
std::string& umi){
(void)read;
return (read2.length() >= pt.barcodeLength + pt.umiLength) ?
(umi.assign(read2, 0, pt.umiLength), true) : false;
}
template <>
bool extractUMI<apt::SplitSeqV2>(std::string& read,
std::string& read2,
apt::SplitSeqV2& pt,
std::string& umi){
(void)read;
return (read2.length() >= pt.barcodeLength + pt.umiLength) ?
(umi.assign(read2, 0, pt.umiLength), true) : false;
}
template <>
bool extractUMI<apt::Gemcode>(std::string& read,
std::string& read2,
apt::Gemcode& pt,
Expand Down Expand Up @@ -273,17 +307,17 @@ namespace alevin {
template <>
bool extractBarcode<apt::CITESeq>(std::string& read,
std::string& read2,
apt::CITESeq& pt,
std::string& bc){
apt::CITESeq& pt,
std::string& bc){
(void)read2;
return (read.length() >= pt.barcodeLength) ?
(bc.assign(read, 0, pt.barcodeLength), true) : false;
}
template <>
bool extractBarcode<apt::ChromiumV3>(std::string& read,
std::string& read2,
apt::ChromiumV3& pt,
std::string& bc){
apt::ChromiumV3& pt,
std::string& bc){
(void)read2;
return (read.length() >= pt.barcodeLength) ?
(bc.assign(read,0, pt.barcodeLength), true) : false;
Expand Down Expand Up @@ -328,6 +362,28 @@ namespace alevin {
}
}
template <>
bool extractBarcode<apt::SplitSeqV1>(std::string& read,
std::string& read2,
apt::SplitSeqV1& pt,
std::string& bc){
(void)read;

return (read2.length() >= pt.bc3Pos + pt.bcLen) ?
(bc.assign(read2.substr(pt.bc1Pos, pt.bcLen) + read2.substr(pt.bc2Pos, pt.bcLen)
+ read2.substr(pt.bc3Pos, pt.bcLen), 0, pt.barcodeLength), true) : false;
}
template <>
bool extractBarcode<apt::SplitSeqV2>(std::string& read,
std::string& read2,
apt::SplitSeqV2& pt,
std::string& bc){
(void)read;

return (read2.length() >= pt.bc3Pos + pt.bcLen) ?
(bc.assign(read2.substr(pt.bc1Pos, pt.bcLen) + read2.substr(pt.bc2Pos, pt.bcLen)
+ read2.substr(pt.bc3Pos, pt.bcLen), 0, pt.barcodeLength), true) : false;
}
template <>
bool extractBarcode<apt::Custom>(std::string& read,
std::string& read2,
apt::Custom& pt,
Expand Down Expand Up @@ -1387,6 +1443,14 @@ namespace alevin {
SalmonOpts& sopt, bool noTgMap,
boost::program_options::variables_map& vm);
template
bool processAlevinOpts(AlevinOpts<apt::SplitSeqV1>& aopt,
SalmonOpts& sopt, bool noTgMap,
boost::program_options::variables_map& vm);
template
bool processAlevinOpts(AlevinOpts<apt::SplitSeqV2>& aopt,
SalmonOpts& sopt, bool noTgMap,
boost::program_options::variables_map& vm);
template
bool processAlevinOpts(AlevinOpts<apt::QuartzSeq2>& aopt,
SalmonOpts& sopt, bool noTgMap,
boost::program_options::variables_map& vm);
Expand Down
20 changes: 20 additions & 0 deletions src/CollapsedCellOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1495,6 +1495,26 @@ bool CollapsedCellOptimizer::optimize(EqMapT& fullEqMap,
CFreqMapT& freqCounter,
size_t numLowConfidentBarcode);
template
bool CollapsedCellOptimizer::optimize(EqMapT& fullEqMap,
spp::sparse_hash_map<uint32_t, uint32_t>& txpToGeneMap,
spp::sparse_hash_map<std::string, uint32_t>& geneIdxMap,
AlevinOpts<apt::SplitSeqV1>& aopt,
GZipWriter& gzw,
std::vector<std::string>& trueBarcodes,
std::vector<uint32_t>& umiCount,
CFreqMapT& freqCounter,
size_t numLowConfidentBarcode);
template
bool CollapsedCellOptimizer::optimize(EqMapT& fullEqMap,
spp::sparse_hash_map<uint32_t, uint32_t>& txpToGeneMap,
spp::sparse_hash_map<std::string, uint32_t>& geneIdxMap,
AlevinOpts<apt::SplitSeqV2>& aopt,
GZipWriter& gzw,
std::vector<std::string>& trueBarcodes,
std::vector<uint32_t>& umiCount,
CFreqMapT& freqCounter,
size_t numLowConfidentBarcode);
template
bool CollapsedCellOptimizer::optimize(EqMapT& fullEqMap,
spp::sparse_hash_map<uint32_t, uint32_t>& txpToGeneMap,
spp::sparse_hash_map<std::string, uint32_t>& geneIdxMap,
Expand Down
14 changes: 14 additions & 0 deletions src/GZipWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1889,6 +1889,14 @@ bool GZipWriter::writeEquivCounts<SCExpT, apt::CELSeq2>(
const AlevinOpts<apt::CELSeq2>& aopts,
SCExpT& readExp);
template
bool GZipWriter::writeEquivCounts<SCExpT, apt::SplitSeqV1>(
const AlevinOpts<apt::SplitSeqV1>& aopts,
SCExpT& readExp);
template
bool GZipWriter::writeEquivCounts<SCExpT, apt::SplitSeqV2>(
const AlevinOpts<apt::SplitSeqV2>& aopts,
SCExpT& readExp);
template
bool GZipWriter::writeEquivCounts<SCExpT, apt::QuartzSeq2>(
const AlevinOpts<apt::QuartzSeq2>& aopts,
SCExpT& readExp);
Expand Down Expand Up @@ -1926,6 +1934,12 @@ template bool
GZipWriter::writeMetaAlevin<apt::CELSeq2>(const AlevinOpts<apt::CELSeq2>& opts,
boost::filesystem::path aux_dir);
template bool
GZipWriter::writeMetaAlevin<apt::SplitSeqV1>(const AlevinOpts<apt::SplitSeqV1>& opts,
boost::filesystem::path aux_dir);
template bool
GZipWriter::writeMetaAlevin<apt::SplitSeqV2>(const AlevinOpts<apt::SplitSeqV2>& opts,
boost::filesystem::path aux_dir);
template bool
GZipWriter::writeMetaAlevin<apt::QuartzSeq2>(const AlevinOpts<apt::QuartzSeq2>& opts,
boost::filesystem::path aux_dir);
template bool
Expand Down
6 changes: 6 additions & 0 deletions src/ProgramOptionsGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,12 @@ namespace salmon {
(
"celseq2", po::bool_switch()->default_value(alevin::defaults::isCELSeq2),
"Use CEL-Seq2 Single Cell protocol for the library.")
(
"splitseqV1", po::bool_switch()->default_value(alevin::defaults::isSplitSeqV1),
"Use Split-SeqV1 Single Cell protocol for the library.")
(
"splitseqV2", po::bool_switch()->default_value(alevin::defaults::isSplitSeqV2),
"Use Split-SeqV2 Single Cell protocol for the library.")
(
"quartzseq2", po::bool_switch()->default_value(alevin::defaults::isQuartzSeq2),
"Use Quartz-Seq2 v3.2 Single Cell protocol for the library assumes 15 length barcode and 8 length UMI.")
Expand Down
26 changes: 26 additions & 0 deletions src/SalmonAlevin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3138,6 +3138,32 @@ alevinQuant(AlevinOpts<apt::CELSeq2>& aopt, SalmonOpts& sopt,
CFreqMapT& freqCounter, size_t numLowConfidentBarcode,
std::unique_ptr<SalmonIndex>& salmonIndex);

template int
alevin_sc_align(AlevinOpts<apt::SplitSeqV1>& aopt, SalmonOpts& sopt,
boost::program_options::parsed_options& orderedOptions,
std::unique_ptr<SalmonIndex>& salmonIndex);
template int
alevinQuant(AlevinOpts<apt::SplitSeqV1>& aopt, SalmonOpts& sopt,
SoftMapT& barcodeMap, TrueBcsT& trueBarcodes,
spp::sparse_hash_map<uint32_t, uint32_t>& txpToGeneMap,
spp::sparse_hash_map<std::string, uint32_t>& geneIdxMap,
boost::program_options::parsed_options& orderedOptions,
CFreqMapT& freqCounter, size_t numLowConfidentBarcode,
std::unique_ptr<SalmonIndex>& salmonIndex);

template int
alevin_sc_align(AlevinOpts<apt::SplitSeqV2>& aopt, SalmonOpts& sopt,
boost::program_options::parsed_options& orderedOptions,
std::unique_ptr<SalmonIndex>& salmonIndex);
template int
alevinQuant(AlevinOpts<apt::SplitSeqV2>& aopt, SalmonOpts& sopt,
SoftMapT& barcodeMap, TrueBcsT& trueBarcodes,
spp::sparse_hash_map<uint32_t, uint32_t>& txpToGeneMap,
spp::sparse_hash_map<std::string, uint32_t>& geneIdxMap,
boost::program_options::parsed_options& orderedOptions,
CFreqMapT& freqCounter, size_t numLowConfidentBarcode,
std::unique_ptr<SalmonIndex>& salmonIndex);

template int
alevin_sc_align(AlevinOpts<apt::QuartzSeq2>& aopt, SalmonOpts& sopt,
boost::program_options::parsed_options& orderedOptions,
Expand Down
8 changes: 8 additions & 0 deletions src/WhiteList.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,14 @@ namespace alevin {
std::vector<std::string>& trueBarcodes,
bool useRibo, bool useMito,
size_t numLowConfidentBarcode);
template bool performWhitelisting(AlevinOpts<alevin::protocols::SplitSeqV1>& aopt,
std::vector<std::string>& trueBarcodes,
bool useRibo, bool useMito,
size_t numLowConfidentBarcode);
template bool performWhitelisting(AlevinOpts<alevin::protocols::SplitSeqV2>& aopt,
std::vector<std::string>& trueBarcodes,
bool useRibo, bool useMito,
size_t numLowConfidentBarcode);
template bool performWhitelisting(AlevinOpts<alevin::protocols::QuartzSeq2>& aopt,
std::vector<std::string>& trueBarcodes,
bool useRibo, bool useMito,
Expand Down

0 comments on commit 0056f8d

Please sign in to comment.