Skip to content

Commit

Permalink
guard: check record type
Browse files Browse the repository at this point in the history
If the user passed the `--writeQualities` flag and we are writing
the mappings to an output file, check to make sure the records actually
contain quality values (i.e. the input is not in FASTA format). If
they do not, then print a warning and disable writing of the quality
scores.
  • Loading branch information
Rob Patro committed Jun 22, 2022
1 parent 0657c4e commit ddf2443
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 2 deletions.
36 changes: 34 additions & 2 deletions src/SalmonAlevin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1571,9 +1571,19 @@ void processReadsQuasi(
std::vector<pufferfish::util::MemCluster> recoveredHits;
std::vector<pufferfish::util::JointMems> jointHits;
PairedAlignmentFormatter<IndexT*> formatter(qidx);

// Says if we should check that quality values exist
// in the case the user requested to `--writeQualities`,
// because they may have accidentially passed in a FASTA
// file.
bool check_qualities = true;
if (salmonOpts.writeQualities) {
formatter.enable_qualities();
} else {
// we don't have to worry about
// checking qualities because
// we aren't writing them.
check_qualities = false;
formatter.disable_qualities();
}
pufferfish::util::QueryCache qc;
Expand All @@ -1590,6 +1600,9 @@ void processReadsQuasi(
fmt::MemoryWriter sstream;
auto* qmLog = salmonOpts.qmLog.get();
bool writeQuasimappings = (qmLog != nullptr);
// if we aren't writing output at all, don't bother
// checking for quality scores either.
if (!writeQuasimappings) { check_qualities = false; }

//////////////////////
// NOTE: validation mapping based new parameters
Expand Down Expand Up @@ -1641,6 +1654,25 @@ void processReadsQuasi(
extraBAMtags.reserve(reserveSize);
}

// if we need to disable writing quality values
// because the user passed in a FASTA file, do that
// check here.
if (check_qualities and (rangeSize > 0)) {
auto& rp = rg[0];
// a valid FASTQ record can't have an
// empty quality string, so then we will
// treat this as a FASTA.
if (rp.first.qual.empty() or rp.second.qual.empty()) {
formatter.disable_qualities();
salmonOpts.jointLog->warn("The flag --writeQualities was provided,\n"
"but read records (e.g. {}/{}) appear not to have quality strings!\n"
"The input is being interpreted as a FASTA file, and the writing\n"
"of quality scores is being disabled.\n", rp.first.name, rp.second.name);
}
// we won't bother to perform this check more than once.
check_qualities = false;
}

auto localProtocol = alevinOpts.protocol;
for (size_t i = 0; i < rangeSize; ++i) { // For all the read in this batch
auto& rp = rg[i];
Expand Down Expand Up @@ -2457,8 +2489,8 @@ bool do_sc_align(ReadExperimentT& experiment,

rad_file.close();

// we want to check if the RAD file stream was written to properly
// while we likely would have caught this earlier, it is possible the
// We want to check if the RAD file stream was written to properly.
// While we likely would have caught this earlier, it is possible the
// badbit may not be set until the stream actually flushes (perhaps even
// at close), so we check here one final time that the status of the
// stream is as expected.
Expand Down
64 changes: 64 additions & 0 deletions src/SalmonQuantify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -853,9 +853,19 @@ void processReads(
std::vector<pufferfish::util::MemCluster> recoveredHits;
std::vector<pufferfish::util::JointMems> jointHits;
PairedAlignmentFormatter<IndexT*> formatter(qidx);

// Says if we should check that quality values exist
// in the case the user requested to `--writeQualities`,
// because they may have accidentially passed in a FASTA
// file.
bool check_qualities = true;
if (salmonOpts.writeQualities) {
formatter.enable_qualities();
} else {
// we don't have to worry about
// checking qualities because
// we aren't writing them.
check_qualities = false;
formatter.disable_qualities();
}
pufferfish::util::QueryCache qc;
Expand All @@ -876,6 +886,9 @@ void processReads(
fmt::MemoryWriter sstream;
auto* qmLog = salmonOpts.qmLog.get();
bool writeQuasimappings = (qmLog != nullptr);
// if we aren't writing output at all, don't bother
// checking for quality scores either.
if (!writeQuasimappings) { check_qualities = false; }

/*
auto ap{selective_alignment::utils::AlignmentPolicy::DEFAULT};
Expand Down Expand Up @@ -913,6 +926,25 @@ void processReads(

LibraryFormat expectedLibraryFormat = rl.format();

// if we need to disable writing quality values
// because the user passed in a FASTA file, do that
// check here.
if (check_qualities and (rangeSize > 0)) {
auto& rp = rg[0];
// a valid FASTQ record can't have an
// empty quality string, so then we will
// treat this as a FASTA.
if (rp.first.qual.empty() or rp.second.qual.empty()) {
formatter.disable_qualities();
salmonOpts.jointLog->warn("The flag --writeQualities was provided,\n"
"but read records (e.g. {}/{}) appear not to have quality strings!\n"
"The input is being interpreted as a FASTA file, and the writing\n"
"of quality scores is being disabled.\n", rp.first.name, rp.second.name);
}
// we won't bother to perform this check more than once.
check_qualities = false;
}

bool tryAlign{salmonOpts.validateMappings};
for (size_t i = 0; i < rangeSize; ++i) { // For all the reads in this batch
auto& rp = rg[i];
Expand Down Expand Up @@ -1649,9 +1681,19 @@ void processReads(
std::vector<pufferfish::util::MemCluster> recoveredHits;
std::vector<pufferfish::util::JointMems> jointHits;
PairedAlignmentFormatter<IndexT*> formatter(qidx);

// Says if we should check that quality values exist
// in the case the user requested to `--writeQualities`,
// because they may have accidentially passed in a FASTA
// file.
bool check_qualities = true;
if (salmonOpts.writeQualities) {
formatter.enable_qualities();
} else {
// we don't have to worry about
// checking qualities because
// we aren't writing them.
check_qualities = false;
formatter.disable_qualities();
}
pufferfish::util::QueryCache qc;
Expand All @@ -1671,6 +1713,9 @@ void processReads(
fmt::MemoryWriter sstream;
auto* qmLog = salmonOpts.qmLog.get();
bool writeQuasimappings = (qmLog != nullptr);
// if we aren't writing output at all, don't bother
// checking for quality scores either.
if (!writeQuasimappings) { check_qualities = false; }

std::string rc1;
rc1.reserve(300);
Expand Down Expand Up @@ -1704,6 +1749,25 @@ void processReads(

LibraryFormat expectedLibraryFormat = rl.format();

// if we need to disable writing quality values
// because the user passed in a FASTA file, do that
// check here.
if (check_qualities and (rangeSize > 0)) {
auto& rp = rg[0];
// a valid FASTQ record can't have an
// empty quality string, so then we will
// treat this as a FASTA.
if (rp.qual.empty()) {
formatter.disable_qualities();
salmonOpts.jointLog->warn("The flag --writeQualities was provided,\n"
"but read records (e.g. {}) appear not to have quality strings!\n"
"The input is being interpreted as a FASTA file, and the writing\n"
"of quality scores is being disabled.\n", rp.name);
}
// we won't bother to perform this check more than once.
check_qualities = false;
}

bool tryAlign{salmonOpts.validateMappings};
for (size_t i = 0; i < rangeSize; ++i) { // For all the read in this batch
auto& rp = rg[i];
Expand Down

0 comments on commit ddf2443

Please sign in to comment.