Skip to content

Commit

Permalink
Fixed segfault when no markers are retained in depth computations
Browse files Browse the repository at this point in the history
- Added a parameter to the depth command to specify the minimum frequency
- Added a check for number of retained markers with an error message + exit when no markers are retained
  • Loading branch information
RomainFeron committed Apr 21, 2021
1 parent bad6866 commit 425430d
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 9 deletions.
27 changes: 22 additions & 5 deletions src/depth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
Depth::Depth(Parameters& parameters, bool compare_groups, bool store_groups, bool store_sequence) : Analysis(parameters, compare_groups, store_groups, store_sequence) {

this->results = DepthResults(this->markers_table.header.n_individuals);
this->min_individuals = static_cast<uint>(this->parameters.depth_min_frequency * this->markers_table.header.n_individuals);

}

Expand All @@ -38,12 +39,11 @@ void Depth::process_marker(Marker& marker) {

for (uint i = 0; i < marker.individual_depths.size(); ++i) {

if (marker.n_individuals >= 0.75 * this->markers_table.header.n_individuals) this->results.depths[i].push_back(marker.individual_depths[i]); // Only consider markers present in at least 75% of individuals
if (marker.individual_depths[i] > 0) {
if (marker.n_individuals >= this->min_individuals) this->results.depths[i].push_back(marker.individual_depths[i]); // Only consider markers present in a fraction of individuals for computations

if (marker.individual_depths[i] > 0) {
++this->results.individual_markers_count[i]; // Increment total number of markers for this individual
this->results.individual_reads_count[i] += marker.individual_depths[i];

this->results.individual_reads_count[i] += marker.individual_depths[i]; // Add marker depth to total reads count for this individual
}

}
Expand All @@ -56,7 +56,24 @@ void Depth::process_marker(Marker& marker) {

void Depth::generate_output() {

// Check that there are actually markers retained for the analysis
bool fail_exit = false;
for (uint i = 0; i < this->results.depths.size(); ++i) {
if (this->results.depths[i].size() == 0) fail_exit = true;
}
if (fail_exit) {
std::string msg = "No markers were present in at least <" +
std::to_string(static_cast<uint>(this->parameters.depth_min_frequency * 100)) +
">% of all individuals (" +
std::to_string(this->min_individuals) +
"/" + std::to_string(this->markers_table.header.n_individuals) +
" individuals)";
log(msg, LOG_ERROR);
exit(1);
}

// Generate output file
log("Writing depth results to output file <" + this->parameters.output_file_path + ">");
std::ofstream output_file = open_output(this->parameters.output_file_path);
output_file << "Sample\tGroup\tReads\tMarkers\tRetained\tMin_depth\tMax_depth\tMedian_depth\tAverage_depth\n";

Expand All @@ -65,9 +82,9 @@ void Depth::generate_output() {

std::sort(this->results.depths[i].begin(), this->results.depths[i].end()); // Sort depth vector to easily compute all metrics (necessary for median anyway)

const auto size = this->results.depths[i].size();
const auto start = this->results.depths[i].begin();
const auto end = this->results.depths[i].end();
const auto size = this->results.depths[i].size();

const uint16_t min_depth = *start; // Sorted vector: minimum depth is the first element
const uint16_t max_depth = *(end - 1); // Sorted vector: maximum depth is the last element
Expand Down
5 changes: 3 additions & 2 deletions src/depth.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ struct DepthResults {
DepthResults(const uint16_t& n_individuals) {

this->depths = std::vector<std::vector<uint16_t>>(n_individuals);
this->individual_markers_count = std::vector<uint32_t>(n_individuals);
this->individual_reads_count = std::vector<uint32_t>(n_individuals);
this->individual_markers_count = std::vector<uint32_t>(n_individuals, 0);
this->individual_reads_count = std::vector<uint32_t>(n_individuals, 0);

};

Expand Down Expand Up @@ -129,4 +129,5 @@ class Depth: public Analysis {
private:

DepthResults results; ///< DepthResults instance to store individual marker depths and total number of retained markers
uint min_individuals; ///< Minimum number of individuals with a marker to retain this marker in computations
};
1 change: 1 addition & 0 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ inline Parameters parse_args(int& argc, char** argv) {
depth->add_option("-t,--markers-table", parameters.markers_table_path, "Path to a marker depths table generated by \"process\"")->required()->check(CLI::ExistingFile);
depth->add_option("-p,--popmap", parameters.popmap_file_path, "Path to a tabulated map specifying groups for all individuals (population map)")->required()->check(CLI::ExistingFile);
depth->add_option("-o,--output-file", parameters.output_file_path, "Path to the output file (table of depth for each individual)")->required();
depth->add_option("-f,--min-frequency", parameters.depth_min_frequency, "Minimum frequency of a marker in all individuals to retain this marker in depth computations")->check(CLI::Range(0.0, 1.0));

// Distrib subcommand parser
CLI::App* distrib = parser.add_subcommand("distrib", "Compute the distribution of markers between group1 and group2");
Expand Down
2 changes: 2 additions & 0 deletions src/parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,6 @@ struct Parameters {
unsigned int map_min_quality = 20; ///< Minimum mapping quality to retained an aligned marker
float map_min_frequency = static_cast<float>(0.1); ///< Minimum frequency of a marker in the population to retain the marker

// "depth" specific parameters
float depth_min_frequency = static_cast<float>(0.75); ///< Minimum frequency of a marker in the population to retain this marker when computing depths
};
5 changes: 3 additions & 2 deletions src/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <chrono>
#include <cmath>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <string>
Expand Down Expand Up @@ -124,12 +125,12 @@ template<typename T>
* \param flushline If true, std::endl is added at the end of the log line
*/

inline void log(T line, const std::string level = LOG_INFO, bool flushline = true) {
inline void log(T line, const std::string level = LOG_INFO, bool flushline = true, uint float_precision = 2) {

char logtime[MAX_TIME_SIZE];
std::string indent((8 - level.size()), ' '); // Nicely align messages for all log levels

std::cerr << "[" << print_time(logtime) << "]::" << level << indent << std::boolalpha << line;
std::cerr << "[" << print_time(logtime) << "]::" << level << indent << std::boolalpha << std::fixed << std::setprecision(float_precision) << line;
if (flushline) std::cerr << std::endl;
}

Expand Down

0 comments on commit 425430d

Please sign in to comment.