Skip to content

Commit

Permalink
Metrics no longer counting duplicate reads by default
Browse files Browse the repository at this point in the history
Fixes bug with low RP qual scores for libraries with high duplication rates.
  • Loading branch information
d-cameron committed Feb 20, 2018
1 parent fa80c18 commit 3af5da9
Show file tree
Hide file tree
Showing 12 changed files with 36 additions and 57 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<groupId>au.edu.wehi</groupId>
<artifactId>gridss</artifactId>
<packaging>jar</packaging>
<version>1.5.0</version>
<version>1.5.1</version>
<name>gridss</name>
<url>https://github.com/PapenfussLab/gridss</url>
<properties>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public double scoreReadPair(IdsvSamFileMetrics metrics, int fragmentSize, int ma
public double scoreUnmappedMate(IdsvSamFileMetrics metrics, int mapq) {
IdsvMetrics im = metrics.getIdsvMetrics();
// completely unmapped read pairs are excluded for consistency with sc and dp calculation
double score = MathUtil.prToPhred((double)im.READ_PAIRS_ONE_MAPPED / (double)(im.READ_PAIRS - im.READ_PAIRS_ZERO_MAPPED));
double score = MathUtil.prToPhred((double)im.READ_PAIRS_ONE_MAPPED / (double)(im.MAPPED_READS));
score = Math.min(score, mapq);
return score;
}
Expand Down
1 change: 1 addition & 0 deletions src/main/java/gridss/ComputeCoverage.java
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ private IntervalCoverageAccumulator initIntervalCoverageAccumulator() {
}
@Override
protected void acceptRead(SAMRecord record, ReferenceSequence refSeq) {
if (record.getDuplicateReadFlag() && !INCLUDE_DUPLICATES) return;
ReadGcSummary gc = new ReadGcSummary(record, refSeq, UNPAIRED_FRAGMENT_SIZE, getReadPairConcordanceCalculator());
if (ica_gc != null) {
ica_gc.add(record, gc, gcAdjust.adjustmentMultiplier((int)gc.gcPercentage));
Expand Down
1 change: 1 addition & 0 deletions src/main/java/gridss/ExtractSVReads.java
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ public boolean[] shouldExtract(List<SAMRecord> records, ReferenceLookup lookup)
extract[i] = !hasConsistentReadAlignment[SAMRecordUtil.getSegmentIndex(r)] && !readfilter.filterOut(r);
// supp records should use the primary alignment when considering concordance
extract[i] |= !hasConsistentReadPair && !pairfilter.filterOut(primaryAlignmentForSupplementary(r));
extract[i] &= (!r.getDuplicateReadFlag() || INCLUDE_DUPLICATES);
}
return extract;
}
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/gridss/analysis/CollectCigarMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ public class CollectCigarMetrics extends SinglePassSamProgram {
@Argument(shortName="Z", doc="If set to true include a zero length operator for each operator not included in the alignment CIGAR.")
public boolean INCLUDE_OMITTED_OPERATORS = true;

@Argument(doc="If true, also include reads marked as duplicates.")
public boolean INCLUDE_DUPLICATES = false;

private EnumMap<CigarOperator, List<CigarDetailMetrics>> cigar;

/** Required main method. */
Expand All @@ -79,6 +82,7 @@ protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) {
// Skip unwanted records
if (rec.getReadUnmappedFlag()) return;
if (rec.getCigar() == null) return;
if (rec.getDuplicateReadFlag() && !INCLUDE_DUPLICATES) return;
List<CigarElement> list = rec.getCigar().getCigarElements();
if (list == null || list.size() == 0) return;
for (CigarElement ce : list) {
Expand Down
7 changes: 2 additions & 5 deletions src/main/java/gridss/analysis/CollectFragmentGCMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,8 @@ public class CollectFragmentGCMetrics extends GcSinglePassSamProgram {
}

@Override protected void acceptRead(final SAMRecord record, final ReferenceSequence ref) {
if (record.getDuplicateReadFlag() && IGNORE_DUPLICATES) {
// ignore duplicates
} else {
multiCollector.acceptRecord(record, ref);
}
if (record.getDuplicateReadFlag() && !INCLUDE_DUPLICATES) return;
multiCollector.acceptRecord(record, ref);
}

@Override protected void finish() {
Expand Down
29 changes: 5 additions & 24 deletions src/main/java/gridss/analysis/CollectIdsvMetrics.java
Original file line number Diff line number Diff line change
@@ -1,31 +1,8 @@
/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package gridss.analysis;

import java.io.File;

import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;

import au.edu.wehi.idsv.sam.SAMRecordUtil;
Expand All @@ -48,6 +25,9 @@
public class CollectIdsvMetrics extends SinglePassSamProgram {
public static final String METRICS_SUFFIX = ".idsv_metrics";

@Argument(doc="If true, also include reads marked as duplicates.")
public boolean INCLUDE_DUPLICATES = false;

private IdsvMetrics idsv;

/** Required main method. */
Expand All @@ -63,6 +43,7 @@ public void setup(final SAMFileHeader header, final File samFile) {

@Override
public void acceptRead(final SAMRecord record, final ReferenceSequence ref) {
if (record.getDuplicateReadFlag() && !INCLUDE_DUPLICATES) return;
idsv.MAX_READ_LENGTH = Math.max(idsv.MAX_READ_LENGTH, record.getReadLength());
if (!record.getReadUnmappedFlag()) {
idsv.MAX_READ_MAPPED_LENGTH = Math.max(idsv.MAX_READ_MAPPED_LENGTH, record.getAlignmentEnd() - record.getAlignmentStart() + 1);
Expand Down
28 changes: 4 additions & 24 deletions src/main/java/gridss/analysis/CollectMapqMetrics.java
Original file line number Diff line number Diff line change
@@ -1,27 +1,3 @@
/*
* The MIT License
*
* Copyright (c) 2016 Daniel Cameron
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package gridss.analysis;

import java.io.File;
Expand Down Expand Up @@ -59,6 +35,9 @@ public class CollectMapqMetrics extends SinglePassSamProgram {
public static final String METRICS_SUFFIX = ".mapq_metrics";
public static final String HISTOGRAM_SUFFIX = ".mapq_histogram.pdf";
private static final String Histogram_R_SCRIPT = "gridss/analysis/mapqHistogram.R";

@Argument(doc="If true, also include reads marked as duplicates.")
public boolean INCLUDE_DUPLICATES = false;

@Argument(shortName="H", doc="File to write insert size Histogram chart to.")
public File Histogram_FILE = null;
Expand Down Expand Up @@ -98,6 +77,7 @@ protected String[] customCommandLineValidation() {
}

@Override protected void acceptRead(final SAMRecord record, final ReferenceSequence ref) {
if (record.getDuplicateReadFlag() && !INCLUDE_DUPLICATES) return;
multiCollector.acceptRecord(record, ref);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
)
public class CollectStructuralVariantReadMetrics extends ProcessStructuralVariantReadsCommandLineProgram {
public static final String METRICS_SUFFIX = ".sv_metrics";

//private static final Log log = Log.getInstance(CollectStructuralVariantReadMetrics.class);
public static void main(String[] argv) {
System.exit(new CollectStructuralVariantReadMetrics().instanceMain(argv));
Expand All @@ -50,6 +51,13 @@ public void setup(SAMFileHeader header, File samFile) {
}
@Override
public void acceptFragment(List<SAMRecord> records, ReferenceLookup lookup) {
if (!INCLUDE_DUPLICATES) {
for (int i = records.size() - 1; i >= 0; i--) {
if (records.get(i).getDuplicateReadFlag()) {
records.remove(i);
}
}
}
boolean hasConsistentReadPair = ExtractSVReads.hasReadPairingConsistentWithReference(getReadPairConcordanceCalculator(), records);
boolean[] hasConsistentReadAlignment = ExtractSVReads.hasReadAlignmentConsistentWithReference(records);
boolean hasOeaAnchor = false;
Expand Down
5 changes: 5 additions & 0 deletions src/main/java/gridss/analysis/CollectTagMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import java.util.HashMap;
import java.util.Map;

import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;

import htsjdk.samtools.SAMFileHeader;
Expand All @@ -49,6 +50,9 @@
public class CollectTagMetrics extends SinglePassSamProgram {
public static final String METRICS_SUFFIX = ".tag_metrics";

@Argument(doc="If true, also include reads marked as duplicates.")
public boolean INCLUDE_DUPLICATES = false;

private Map<String, TagSummaryMetrics> tags = new HashMap<>();

/** Required main method. */
Expand All @@ -64,6 +68,7 @@ protected void setup(final SAMFileHeader header, final File samFile) {

@Override
protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) {
if (rec.getDuplicateReadFlag() && !INCLUDE_DUPLICATES) return;
for (SAMTagAndValue attr : rec.getAttributes()) {
String tag = attr.tag;
TagSummaryMetrics metric = tags.get(tag);
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/gridss/cmdline/GcSinglePassSamProgram.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ public ReadPairConcordanceCalculator getReadPairConcordanceCalculator() {
}
// --------- end chunk from ProcessStructuralVariantReadsCommandLineProgram ---------
// --------- start chunk from ReferenceCommandLineProgram ---------
@Argument(doc = "Ignore reads marked as duplicates.", optional = true)
public boolean IGNORE_DUPLICATES = true;
@Argument(doc="If true, also include reads marked as duplicates.")
public boolean INCLUDE_DUPLICATES = false;
private ReferenceLookup reference;
public ReferenceLookup getReference() {
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ public abstract class ProcessStructuralVariantReadsCommandLineProgram extends By
public File INSERT_SIZE_METRICS = null;
@Argument(doc="Include unmapped reads", optional=true)
public boolean UNMAPPED_READS = true;
@Argument(doc="If true, also include reads marked as duplicates.")
public boolean INCLUDE_DUPLICATES = false;
@Override
protected String[] customCommandLineValidation() {
if (READ_PAIR_CONCORDANCE_METHOD == ReadPairConcordanceMethod.PERCENTAGE && INSERT_SIZE_METRICS == null) {
Expand Down

0 comments on commit 3af5da9

Please sign in to comment.