Skip to content

Commit

Permalink
Aligning using native SSE2 striped smith-watermann through JNI when p…
Browse files Browse the repository at this point in the history
…ossible

Decoupled alignment from jaligner
  • Loading branch information
d-cameron committed Feb 23, 2015
1 parent b22c794 commit 666defd
Show file tree
Hide file tree
Showing 16 changed files with 198 additions and 78 deletions.
Binary file added lib/libsswjni.so
Binary file not shown.
Binary file added repo/ssw/ssw/1.0/ssw-1.0.jar
Binary file not shown.
1 change: 1 addition & 0 deletions repo/ssw/ssw/1.0/ssw-1.0.jar.md5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
d00d0e5c5a3b5a1dbc6a5a9b5566bdca
1 change: 1 addition & 0 deletions repo/ssw/ssw/1.0/ssw-1.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3ce9cfbb055f1b91f6720dbd3325930f6995c20b
8 changes: 8 additions & 0 deletions repo/ssw/ssw/1.0/ssw-1.0.pom
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<modelVersion>4.0.0</modelVersion>
<groupId>ssw</groupId>
<artifactId>ssw</artifactId>
<version>1.0</version>
</project>
1 change: 1 addition & 0 deletions repo/ssw/ssw/1.0/ssw-1.0.pom.md5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3a18003c5d37f085a61043040df5adc1
1 change: 1 addition & 0 deletions repo/ssw/ssw/1.0/ssw-1.0.pom.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
6520d4a19af74e977b1bf07834c10618deca41c4
12 changes: 12 additions & 0 deletions repo/ssw/ssw/maven-metadata.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<metadata>
<groupId>ssw</groupId>
<artifactId>ssw</artifactId>
<versioning>
<release>1.0</release>
<versions>
<version>1.0</version>
</versions>
<lastUpdated>20150223004749</lastUpdated>
</versioning>
</metadata>
1 change: 1 addition & 0 deletions repo/ssw/ssw/maven-metadata.xml.md5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2c99ce3336f85f1d50d5b7eac707ebbf
1 change: 1 addition & 0 deletions repo/ssw/ssw/maven-metadata.xml.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a42b905a2ae1c0cf17e96afbe6b68a55d33d5efd
11 changes: 11 additions & 0 deletions src/main/java/au/edu/wehi/idsv/alignment/Aligner.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package au.edu.wehi.idsv.alignment;

public interface Aligner {
/**
* Performs Smith-Waterman alignment of the given sequence against the given reference
* @param seq sequence to align
* @param ref reference sequence
* @return Alignment of sequence relative to reference
*/
public Alignment align_smith_waterman(byte[] seq, byte[] ref);
}
29 changes: 29 additions & 0 deletions src/main/java/au/edu/wehi/idsv/alignment/AlignerFactory.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package au.edu.wehi.idsv.alignment;

import htsjdk.samtools.util.Log;
import au.edu.wehi.idsv.Idsv;

public class AlignerFactory {
private static final Log log = Log.getInstance(Idsv.class);
private static boolean sswjniLoaded;
static {
try {
System.loadLibrary("sswjni");
sswjniLoaded = true;
} catch (UnsatisfiedLinkError e) {
sswjniLoaded = false;
log.warn("Unable to load sswjni library - assembly will be 10x slower than normal. Please ensure libsswjni.so can be found by setting LD_LIBRARY_PATH or java.library.path");
}
}
public static Aligner create(int match, int mismatch, int ambiguous, int gapOpen, int gapExtend) {
if (sswjniLoaded) {
return new SswJniAligner(match, mismatch, ambiguous, gapOpen, gapExtend);
} else {
return new JAlignerAligner(match, mismatch, ambiguous, gapOpen, gapExtend);
}
}
public static Aligner create() {
return create(1, -4, -4, 6, 1); // bwa mem
//return create(2, -6, -1, 5, 3); // bowtie2
}
}
34 changes: 34 additions & 0 deletions src/main/java/au/edu/wehi/idsv/alignment/Alignment.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package au.edu.wehi.idsv.alignment;

/**
* Alignment of a sequence relative to a reference
* @author cameron.d
*
*/
public class Alignment {
private final int startPosition;
private final String cigar;
/**
* Sequence pair alignment outcome
* @param startPosition Zero-based start position of sequence relative to reference
* @param cigar sequence alignment CIGAR
*/
public Alignment(int startPosition, String cigar) {
this.startPosition = startPosition;
this.cigar = cigar;
}
/**
* Zero-based start position of sequence relative to reference
* @return
*/
public int getStartPosition() {
return startPosition;
}
/**
* sequence alignment CIGAR
* @return CIGAR string of sequence alignment
*/
public String getCigar() {
return cigar;
}
}
Original file line number Diff line number Diff line change
@@ -1,31 +1,62 @@
package au.edu.wehi.idsv;
package au.edu.wehi.idsv.alignment;

import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;

import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import jaligner.Alignment;
import htsjdk.samtools.util.SequenceUtil;
import jaligner.Sequence;
import jaligner.SmithWatermanGotoh;
import jaligner.matrix.Matrix;

import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;

// TODO: add JNI interface to https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library
public class AlignmentHelper {
private AlignmentHelper() { }
public class JAlignerAligner implements Aligner {
private final Matrix matrix;
private final int gapOpen;
private final int gapExtend;
public JAlignerAligner(int match, int mismatch, int ambiguous, int gapOpen, int gapExtend) {
this.matrix = createMatrix(match, mismatch, ambiguous);
this.gapOpen = gapOpen;
this.gapExtend = gapExtend;
}
static {
// quieten down the JAligner logging spam
Logger jalignerLogger = Logger.getLogger(SmithWatermanGotoh.class.getName());
jalignerLogger.setLevel(java.util.logging.Level.OFF);
}
@Override
public Alignment align_smith_waterman(byte[] seq, byte[] ref) {
Sequence sSeq = new Sequence("seq", new String(seq));
Sequence sRef = new Sequence("ref", new String(ref));
jaligner.Alignment jaln = SmithWatermanGotoh.align(sRef, sSeq, matrix, gapOpen, gapExtend);

Alignment alignment = new Alignment(jaln.getStart1(), alignmentToCigar(jaln).toString());
return alignment;
}
private static Matrix createMatrix(int match, int mismatch, int ambiguous) {
float[][] scores = new float[Matrix.SIZE][Matrix.SIZE];
// Fill the matrix with the scores
for (int i = 0; i < Matrix.SIZE; i++) {
for (int j = 0; j < Matrix.SIZE; j++) {
if (Character.toUpperCase(i) == Character.toUpperCase(j)) {
scores[i][j] = match;
} else if (SequenceUtil.isValidBase((byte) i) && SequenceUtil.isValidBase((byte) j)) {
scores[i][j] = mismatch;
} else {
scores[i][j] = ambiguous;
}
}
}
return new Matrix("matrix", scores);
}
/**
* Converts an alignment of a read (seq2) against a reference sequence (seq1) to a read cigar
* @param alignment computed alignment
* @return CIGAR of seq2 relative to reference seq1
*/
public static Cigar alignmentToCigar(Alignment alignment) {
private static Cigar alignmentToCigar(jaligner.Alignment alignment) {
List<CigarElement> cigar = new ArrayList<CigarElement>();
if (alignment.getStart2() > 0) {
cigar.add(new CigarElement(alignment.getStart2(), CigarOperator.SOFT_CLIP));
Expand All @@ -36,9 +67,9 @@ public static Cigar alignmentToCigar(Alignment alignment) {
CigarOperator op = CigarOperator.MATCH_OR_MISMATCH;
for (int i = 0; i < seq1.length; i++) {
CigarOperator currentOp;
if (seq1[i] == Alignment.GAP) {
if (seq1[i] == jaligner.Alignment.GAP) {
currentOp = CigarOperator.INSERTION;
} else if (seq2[i] == Alignment.GAP) {
} else if (seq2[i] == jaligner.Alignment.GAP) {
currentOp = CigarOperator.DELETION;
} else {
currentOp = CigarOperator.MATCH_OR_MISMATCH;
Expand All @@ -60,60 +91,4 @@ public static Cigar alignmentToCigar(Alignment alignment) {
}
return new Cigar(cigar);
}
public static Alignment align_local(Sequence ref, Sequence seq) {
return align_bwamem_local(ref, seq);
}
/**
* Align using alignment algorithm similar to bowtie2
* @param ref reference sequence. Must be a small subsequence as full Smith Waterman Gotoh is performed
* @param seq sequence
* @return local sequence alignment
*/
public static Alignment align_bowtie2_local(Sequence ref, Sequence seq) {
return SmithWatermanGotoh.align(ref, seq, bowtie2Matrix, bowtie2GapOpen, bowtie2GapExtend);
}
// bowtie2 scoring scheme for high-quality bases (http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#scores-higher-more-similar)
private static final Matrix bowtie2Matrix = createMatrix(2, -6, -1);
private static final float bowtie2GapOpen = 5;
private static final float bowtie2GapExtend = 3;
/**
* Align using alignment algorithm similar to bwa mem
* @param ref reference sequence. Must be a small subsequence as full Smith Waterman Gotoh is performed
* @param seq sequence
* @return local sequence alignment
*/
public static Alignment align_bwamem_local(Sequence ref, Sequence seq) {
return SmithWatermanGotoh.align(ref, seq, bwamemMatrix, bwamemGapOpen, bwamemGapExtend);
}
private static final Matrix bwamemMatrix = createMatrix(1, -4, -4);
private static final float bwamemGapOpen = 6;
private static final float bwamemGapExtend = 1;

private static Matrix createMatrix(int match, int mismatch, int ambiguous) {
float[][] scores = new float[Matrix.SIZE][Matrix.SIZE];
// Fill the matrix with the scores
for (int i = 0; i < Matrix.SIZE; i++) {
for (int j = 0; j < Matrix.SIZE; j++) {
if (Character.toUpperCase(i) == Character.toUpperCase(j)) {
scores[i][j] = match;
} else if (isUnambiguousBase(i) && isUnambiguousBase(j)) {
scores[i][j] = mismatch;
} else {
scores[i][j] = ambiguous;
}
}
}
return new Matrix("bowtie2", scores);
}
private static boolean isUnambiguousBase(int base) {
switch (base) {
case 'A':
case 'C':
case 'G':
case 'T':
return false;
default:
return true;
}
}
}
44 changes: 44 additions & 0 deletions src/main/java/au/edu/wehi/idsv/alignment/SswJniAligner.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package au.edu.wehi.idsv.alignment;

import htsjdk.samtools.util.SequenceUtil;

public class SswJniAligner implements Aligner {
private static final int MATRIX_SIZE = 128;
private final int gapOpen;
private final int gapExtend;
private final int[][] matrix;
public SswJniAligner(int match, int mismatch, int ambiguous, int gapOpen, int gapExtend) {
this.gapOpen = gapOpen;
this.gapExtend = gapExtend;
this.matrix = createMatrix(match, mismatch, ambiguous);
}
private static int[][] createMatrix(int match, int mismatch, int ambiguous) {
int[][] scores = new int[MATRIX_SIZE][MATRIX_SIZE];
// Fill the matrix with the scores
for (int i = 0; i < MATRIX_SIZE; i++) {
for (int j = 0; j < MATRIX_SIZE; j++) {
if (Character.toUpperCase(i) == Character.toUpperCase(j)) {
scores[i][j] = match;
} else if (SequenceUtil.isValidBase((byte) i) && SequenceUtil.isValidBase((byte) j)) {
scores[i][j] = mismatch;
} else {
scores[i][j] = ambiguous;
}
}
}
return scores;
}
@Override
public Alignment align_smith_waterman(byte[] seq, byte[] ref) {
ssw.Alignment result = ssw.Aligner.align(seq, ref, matrix, gapOpen, gapExtend, true);
String cigar = result.cigar;
if (result.read_begin1 != 0) {
cigar = Integer.toString(result.read_begin1) + "S" + cigar;
}
int endOffset = seq.length - result.read_end1 - 1;
if (endOffset != 0) {
cigar += Integer.toString(endOffset) + "S";
}
return new Alignment(result.ref_begin1, cigar);
}
}
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
package au.edu.wehi.idsv;
package au.edu.wehi.idsv.alignment;

import static org.junit.Assert.assertEquals;
import jaligner.Alignment;
import jaligner.Sequence;

import org.junit.Test;

import au.edu.wehi.idsv.TestHelper;

public class AlignmentHelperTest {


public class AlignmentHelperTest extends TestHelper {
@Test
public void alignment_should_ignore_case() {
String ref = "gtacC";
String seq = "GTACC";
Alignment alignment = AlignmentHelper.align_local(new Sequence(ref), new Sequence(seq));
assertEquals("5M", AlignmentHelper.alignmentToCigar(alignment).toString());
Alignment alignment = AlignerFactory.create().align_smith_waterman(B(seq), B(ref));
assertEquals("5M", alignment.getCigar());
}
@Test
public void test778_gridss_chr15_67104459_67104459f_108952510026_r() {
String chr15_67104400_67104500 = "tgcagtttcttcctagcattgatggtctttacaatttggcatgtttttgcagtggctgggaccagttgttcctttccatgtttagtgcttccttcaggagc";
String assembly = "TTTGGCATGTTTTTGCAGTGGCTGGGGGGGGGTGGTTTTT";
Alignment alignment = AlignmentHelper.align_local(new Sequence(chr15_67104400_67104500), new Sequence(assembly));
assertEquals("26M14S", AlignmentHelper.alignmentToCigar(alignment).toString());
Alignment alignment = AlignerFactory.create().align_smith_waterman(B(assembly), B(chr15_67104400_67104500));
assertEquals("26M14S", alignment.getCigar());
}
@Test
public void test778_chr9_gl000198_random_60298() {
Expand All @@ -35,8 +36,8 @@ public void test778_chr9_gl000198_random_60298() {
"tactaaatcaatgtgcaaagatcactaacatttctctacaccaatgataaccacactgac" +
"agcaaaatcagaaaggccatc";
String assembly = "TAGCAAGAGAGAAAGCCAAACTATTTCTGTTTGCAGATAACATAATTCTATATCTAGAAAACCCTGTAGTTTCAGGTACAAAACCCTGTAGTTTCAGGTACAATGTGCAAAGATCACTAACATTTCT";
Alignment alignment = AlignmentHelper.align_local(new Sequence(chr9_gl000198_random_60000_60500), new Sequence(assembly));
Alignment alignment = AlignerFactory.create().align_smith_waterman(B(assembly), B(chr9_gl000198_random_60000_60500));
// Should match blastn result
assertEquals("4S78M45S", AlignmentHelper.alignmentToCigar(alignment).toString());
assertEquals("4S78M45S", alignment.getCigar());
}
}

0 comments on commit 666defd

Please sign in to comment.