From a69d90a7b698b785d4a825b8b56e99f7ff23eed3 Mon Sep 17 00:00:00 2001 From: Olivia Wen-Mei Lang Date: Thu, 23 Dec 2021 14:22:46 -0500 Subject: [PATCH 1/4] fix TP Anti 1bp shift Bug discovered with test data showing 1bp shift downstream in ticket #75 The getUnclippedEnd() function returns a 1-indexed inclusive coordinate which needs to be decremented to be 0-indexed when assigning the FivePrime mark variable's value. This is assigned in two places (paired end and not-paired end data code blocks). --- src/scripts/Read_Analysis/PileupScripts/PileupExtract.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/scripts/Read_Analysis/PileupScripts/PileupExtract.java b/src/scripts/Read_Analysis/PileupScripts/PileupExtract.java index 3d0f33d26..ccc537081 100644 --- a/src/scripts/Read_Analysis/PileupScripts/PileupExtract.java +++ b/src/scripts/Read_Analysis/PileupScripts/PileupExtract.java @@ -81,7 +81,7 @@ else if(param.getTrans() == 2) { if((sr.getFirstOfPairFlag() && param.getRead() == 0) || (!sr.getFirstOfPairFlag() && param.getRead() == 1) || param.getRead() == 2 || (sr.getFirstOfPairFlag() && param.getRead() == 3)) { int FivePrime = sr.getUnclippedStart() - 1; if(sr.getReadNegativeStrandFlag()) { - FivePrime = sr.getUnclippedEnd(); + FivePrime = sr.getUnclippedEnd() - 1; FivePrime -= SHIFT; //SHIFT DATA HERE IF NECCESSARY } else { FivePrime += SHIFT; } @@ -119,7 +119,7 @@ else if(param.getTrans() == 2) { } else if(param.getRead() == 0 || param.getRead() == 2) { //Also outputs if not paired-end since by default it is read-1 int FivePrime = sr.getUnclippedStart() - 1; if(sr.getReadNegativeStrandFlag()) { - FivePrime = sr.getUnclippedEnd(); + FivePrime = sr.getUnclippedEnd() - 1; FivePrime -= SHIFT; //SHIFT DATA HERE IF NECCESSARY } else { FivePrime += SHIFT; } FivePrime -= (BEDSTART - QUERYWINDOW); From c3109d56483c9cc41e0116bd377a6f224adeee09 Mon Sep 17 00:00:00 2001 From: Olivia Wen-Mei Lang Date: Thu, 23 Dec 2021 14:49:37 -0500 Subject: [PATCH 2/4] fix TP BEDcoord-strand shift By removing the code block shifting the BED coordinates that appears to be some sort of strand-specific correction, we restore the data to the correct composite (checking for BEDcoord with different strands and checking both sense and antisense composites). Suspect the correction code block that is removed in this commit was for correction of code that is no longer in place. Relates to issue #75 --- src/scripts/Read_Analysis/PileupScripts/PileupExtract.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/scripts/Read_Analysis/PileupScripts/PileupExtract.java b/src/scripts/Read_Analysis/PileupScripts/PileupExtract.java index ccc537081..7b2a15d2e 100644 --- a/src/scripts/Read_Analysis/PileupScripts/PileupExtract.java +++ b/src/scripts/Read_Analysis/PileupScripts/PileupExtract.java @@ -48,11 +48,6 @@ public void extract(BEDCoord read) { int BEDSTART = (int)read.getStart(); int BEDSTOP = (int)read.getStop(); - //Correct for '-' strand BED coord so they align with '+' strand - if(read.getDir().equals("-")) { - BEDSTART++; - BEDSTOP++; - } //Correct Window Size for proper transformations int WINDOW = (BEDSTOP - BEDSTART) + ((param.getBin() / 2) * 2); From b54b347ce9a3f9129195a236a397df6599463152 Mon Sep 17 00:00:00 2001 From: Olivia Wen-Mei Lang Date: Fri, 24 Dec 2021 07:48:48 -0500 Subject: [PATCH 3/4] TP change insert size calculation The rationale is outlined in issue #76. PileupExtract is updated for pileups that require proper pairs. The midpoint is changed to get the leftmost coordinate of the insert (getAlignmentStart or getMateAlignmentStart depending if R1 or R2 is the leftmost read) and add on half of the insert size. Note the correction for when the BED interval is even and on the negative strand. This ensures that even though we perform a floor integer division calculation, the distance from the 5' end of the BED interval is consistent between BED intervals. Odd intervals have consistent 5' distances regardless of direction. The filter for insert size is changed to use the built-in SamRecord function getInferredInsertSize() by checking if the absolute value is more or less than the limits specified by the PileupParameters object. I also switched the filter to use a continue statement instead of setting FivePrime to an invalid position in order to save a little on downstream computation. --- .../PileupScripts/PileupExtract.java | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/scripts/Read_Analysis/PileupScripts/PileupExtract.java b/src/scripts/Read_Analysis/PileupScripts/PileupExtract.java index 7b2a15d2e..aab7a5265 100644 --- a/src/scripts/Read_Analysis/PileupScripts/PileupExtract.java +++ b/src/scripts/Read_Analysis/PileupScripts/PileupExtract.java @@ -81,19 +81,24 @@ else if(param.getTrans() == 2) { } else { FivePrime += SHIFT; } if(sr.getProperPairFlag()) { //prevent cases where non-properly paired Read1 gets to this point - int recordStart = sr.getUnclippedStart() - 1; - int recordStop = sr.getMateAlignmentStart() + sr.getReadLength() - 1; - if(sr.getMateAlignmentStart() - 1 < recordStart) { - recordStart = sr.getMateAlignmentStart() - 1; - recordStop = sr.getUnclippedEnd(); + //Find midpoint if read flag == 3 + if(param.getRead() == 3) { + if(sr.getInferredInsertSize()>0) { + FivePrime = sr.getAlignmentStart() - 1 + (sr.getInferredInsertSize() / 2); + } else if(sr.getInferredInsertSize()<0) { + FivePrime = sr.getMateAlignmentStart() - 1 - (sr.getInferredInsertSize() / 2); + } else { + //Most aligners will flag records with an insert size of zero as improper pairs + System.err.println("This statement should never print (insert size=0 when finding midpoint in PileupExtract)"); + continue; + } + // Correction to ensure that even insert size mark reoriented for negative strands + if(sr.getInferredInsertSize() % 2 == 0 && read.getDir().equals("-") ) { FivePrime--; } } - - //Find midpoint is read flag == 3 - if(param.getRead() == 3) { FivePrime = (recordStart + recordStop) / 2; } - - if(recordStop - recordStart < param.getMinInsert() && param.getMinInsert() != -9999) { FivePrime = -1; } //Test for MIN insert size cutoff here - if(recordStop - recordStart > param.getMaxInsert() && param.getMaxInsert() != -9999) { FivePrime = -1; } //Test for MAX insert size cutoff here - } else if(param.getRead() == 3) { FivePrime = -1; } // Make sure that midpoint pileup must come from properly paired read + // Apply insert size filters + if(Math.abs(sr.getInferredInsertSize()) < param.getMinInsert() && param.getMinInsert() != -9999) { continue; } //Test for MIN insert size cutoff here + if(Math.abs(sr.getInferredInsertSize()) > param.getMaxInsert() && param.getMaxInsert() != -9999) { continue; } //Test for MAX insert size cutoff here + } else if(param.getRead() == 3) { continue; } // Make sure that midpoint pileup must come from properly paired read //Adjust tag start to be within array reference FivePrime -= (BEDSTART - QUERYWINDOW); From 0d300ece39709f54db80147da4d272d0dde4a8f3 Mon Sep 17 00:00:00 2001 From: Olivia Wen-Mei Lang Date: Fri, 24 Dec 2021 08:09:42 -0500 Subject: [PATCH 4/4] reset default BED coord strand parsing Invert if statement int TagPileup so that it parses BED coordinates such that unexpected strand characters default to the positive strand ("+"). --- src/scripts/Read_Analysis/TagPileup.java | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/scripts/Read_Analysis/TagPileup.java b/src/scripts/Read_Analysis/TagPileup.java index c6e3bcd19..2477792ff 100644 --- a/src/scripts/Read_Analysis/TagPileup.java +++ b/src/scripts/Read_Analysis/TagPileup.java @@ -378,16 +378,13 @@ public Vector loadCoord(File INPUT) throws FileNotFoundException { } if (Integer.parseInt(temp[1]) >= 0) { if (temp.length > 4) { - if (temp[5].equals("+")) { - COORD.add(new BEDCoord(temp[0], Integer.parseInt(temp[1]), Integer.parseInt(temp[2]), - "+", name)); + if (temp[5].equals("-")) { + COORD.add(new BEDCoord(temp[0], Integer.parseInt(temp[1]), Integer.parseInt(temp[2]), "-", name)); } else { - COORD.add(new BEDCoord(temp[0], Integer.parseInt(temp[1]), Integer.parseInt(temp[2]), - "-", name)); + COORD.add(new BEDCoord(temp[0], Integer.parseInt(temp[1]), Integer.parseInt(temp[2]), "+", name)); } } else { - COORD.add(new BEDCoord(temp[0], Integer.parseInt(temp[1]), Integer.parseInt(temp[2]), "+", - name)); + COORD.add(new BEDCoord(temp[0], Integer.parseInt(temp[1]), Integer.parseInt(temp[2]), "+", name)); } } else {