Skip to content
This repository has been archived by the owner on Aug 26, 2023. It is now read-only.

Commit

Permalink
add position mapping between seq and ref
Browse files Browse the repository at this point in the history
  • Loading branch information
bicycle1885 committed Dec 2, 2015
1 parent 7c40798 commit 2a67e3b
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 9 deletions.
2 changes: 2 additions & 0 deletions src/align/Align.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ export Operation,
hasop,
Alignment,
AlignedSequence,
seq2ref,
ref2seq,
ismatchop,
isinsertop,
isdeleteop,
Expand Down
77 changes: 76 additions & 1 deletion src/align/anchors.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# Anchor definition
# ------------------
"""
Expand Down Expand Up @@ -181,6 +180,75 @@ function show(io::IO, aln::Alignment)
end
end

# find the index of the first anchor that satisfies `i ≤ pos`
@generated function findanchor{isseq}(aln::Alignment, i::Integer, ::Type{Val{isseq}})
pos = isseq ? :seqpos : :refpos
quote
anchors = aln.anchors
lo = 1
hi = endof(anchors)
if !(anchors[lo].$pos < i anchors[hi].$pos)
return 0
end
# binary search
while hi - lo > 4
m = div(lo + hi, 2)
if anchors[m].$pos < i
lo = m
else
hi = m
end
# invariant
@assert anchors[lo].$pos < i anchors[hi].$pos
end
# linear search
for j in lo+1:hi
if i aln.anchors[j].$pos
return j
end
end
# do not reach here
@assert false
return 0
end
end

"""
seq2ref(i, aln)
Map a position from sequence to reference.
"""
function seq2ref(i::Integer, aln::Alignment)
idx = findanchor(aln, i, Val{true})
if idx == 0
throw(ArgumentError("invalid sequence position: $i"))
end
anchor = aln.anchors[idx]
refpos = anchor.refpos
if ismatchop(anchor.op)
refpos += i - anchor.seqpos
end
return refpos, anchor.op
end

"""
ref2seq(i, aln)
Map a position from reference to sequence.
"""
function ref2seq(i::Integer, aln::Alignment)
idx = findanchor(aln, i, Val{false})
if idx == 0
throw(ArgumentError("invalid reference position: $i"))
end
anchor = aln.anchors[idx]
seqpos = anchor.seqpos
if ismatchop(anchor.op)
seqpos += i - anchor.refpos
end
return seqpos, anchor.op
end


immutable AlignedSequence{S}
seq::S
Expand Down Expand Up @@ -214,6 +282,13 @@ function last(alnseq::AlignedSequence)
return alnseq.aln.lastref
end

function seq2ref(i::Integer, alnseq::AlignedSequence)
return seq2ref(i, alnseq.aln)
end

function ref2seq(i::Integer, alnseq::AlignedSequence)
return ref2seq(i, alnseq.aln)
end

# simple letters and dashes representation of an alignment
function show(io::IO, alnseq::AlignedSequence)
Expand Down
2 changes: 0 additions & 2 deletions src/align/operations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,11 @@ const OP_INVALID = convert(Operation, 0xff)

const OP_MAX_VALID = OP_START


# classify operations
function ismatchop(op::Operation)
return op == OP_MATCH || op == OP_SEQ_MATCH || op == OP_SEQ_MISMATCH
end


function isinsertop(op::Operation)
return op == OP_INSERT || op == OP_SOFT_CLIP || op == OP_HARD_CLIP
end
Expand Down
24 changes: 18 additions & 6 deletions test/align/TestAlign.jl
Original file line number Diff line number Diff line change
Expand Up @@ -164,19 +164,31 @@ facts("Alignments") do
# | | | | | | |
# 4 8 12 17 20 23 27
anchors = [
AlignmentAnchor(0, 4, OP_START),
AlignmentAnchor(4, 8, OP_MATCH),
AlignmentAnchor(4, 12, OP_DELETE),
AlignmentAnchor(9, 17, OP_MATCH),
AlignmentAnchor( 0, 4, OP_START),
AlignmentAnchor( 4, 8, OP_MATCH),
AlignmentAnchor( 4, 12, OP_DELETE),
AlignmentAnchor( 9, 17, OP_MATCH),
AlignmentAnchor(12, 17, OP_INSERT),
AlignmentAnchor(15, 20, OP_MATCH),
AlignmentAnchor(15, 23, OP_DELETE),
AlignmentAnchor(19, 27, OP_MATCH)
]
query = "TGGCATCATTTAACGCAAG"
alnseq = AlignedSequence(query, anchors)
@fact Bio.Align.first(alnseq) --> 5
@fact Bio.Align.last(alnseq) --> 27
@fact Bio.Align.first(alnseq) --> 5
@fact Bio.Align.last(alnseq) --> 27
# OP_MATCH
for (seqpos, refpos) in [(1, 5), (2, 6), (4, 8), (13, 18), (19, 27)]
@fact seq2ref(seqpos, alnseq) --> (refpos, OP_MATCH)
@fact ref2seq(refpos, alnseq) --> (seqpos, OP_MATCH)
end
# OP_INSERT
@fact seq2ref(10, alnseq) --> (17, OP_INSERT)
@fact seq2ref(11, alnseq) --> (17, OP_INSERT)
# OP_DELETE
@fact ref2seq( 9, alnseq) --> ( 4, OP_DELETE)
@fact ref2seq(10, alnseq) --> ( 4, OP_DELETE)
@fact ref2seq(23, alnseq) --> (15, OP_DELETE)
end
end

Expand Down

0 comments on commit 2a67e3b

Please sign in to comment.