Skip to content

Commit

Permalink
Merge cc8237c into 3e3910c
Browse files Browse the repository at this point in the history
  • Loading branch information
alyst committed Mar 3, 2021
2 parents 3e3910c + cc8237c commit c8e434c
Show file tree
Hide file tree
Showing 9 changed files with 236 additions and 157 deletions.
6 changes: 6 additions & 0 deletions src/BioAlignments.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,14 @@ export
AlignmentAnchor,
Alignment,
AlignedSequence,

seq2ref,
ref2seq,
seq2aln,
ref2aln,
aln2seq,
aln2ref,

ismatchop,
isinsertop,
isdeleteop,
Expand Down
19 changes: 10 additions & 9 deletions src/alignedseq.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ function AlignedSequence(seq::BioSequences.BioSequence, seqpos::Integer,
end
seqpos -= 1
refpos -= 1
alnpos = 0
op = OP_START
newseq = similar(seq, 0) # sequence without gap symbols
anchors = AlignmentAnchor[]
Expand All @@ -44,7 +45,7 @@ function AlignedSequence(seq::BioSequences.BioSequence, seqpos::Integer,
end

if op′ != op
push!(anchors, AlignmentAnchor(seqpos, refpos, op))
push!(anchors, AlignmentAnchor(seqpos, refpos, alnpos, op))
op = op′
end

Expand All @@ -55,8 +56,9 @@ function AlignedSequence(seq::BioSequences.BioSequence, seqpos::Integer,
if y != BioSequences.gap(eltype(ref))
refpos += 1
end
alnpos += 1 # one or another don't have gap
end
push!(anchors, AlignmentAnchor(seqpos, refpos, op))
push!(anchors, AlignmentAnchor(seqpos, refpos, alnpos, op))
return AlignedSequence(newseq, anchors)
end

Expand All @@ -70,13 +72,12 @@ function IntervalTrees.last(alnseq::AlignedSequence)
return alnseq.aln.lastref
end

function seq2ref(alnseq::AlignedSequence, i::Integer)
return seq2ref(alnseq.aln, i)
end

function ref2seq(alnseq::AlignedSequence, i::Integer)
return ref2seq(alnseq.aln, i)
end
seq2ref(alnseq::AlignedSequence, i) = seq2ref(alnseq.aln, i)
ref2seq(alnseq::AlignedSequence, i) = ref2seq(alnseq.aln, i)
seq2aln(alnseq::AlignedSequence, i) = seq2aln(alnseq.aln, i)
ref2aln(alnseq::AlignedSequence, i) = ref2aln(alnseq.aln, i)
aln2seq(alnseq::AlignedSequence, i) = aln2seq(alnseq.aln, i)
aln2ref(alnseq::AlignedSequence, i) = aln2ref(alnseq.aln, i)

# simple letters and dashes representation of an alignment
function Base.show(io::IO, alnseq::AlignedSequence)
Expand Down
174 changes: 107 additions & 67 deletions src/alignment.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
# License is MIT: https://github.com/BioJulia/Bio.jl/blob/master/LICENSE.md

"""
Alignment of two sequences.
Defines how to align a given sequence onto a reference sequence.
The alignment is represented as a sequence of elementary operations (match, insertion, deletion etc)
anchored to specific positions of the input and reference sequence.
"""
struct Alignment
anchors::Vector{AlignmentAnchor}
Expand Down Expand Up @@ -58,9 +60,10 @@ function Alignment(cigar::AbstractString, seqpos::Int=1, refpos::Int=1)
# path starts prior to the first aligned position pair
seqpos -= 1
refpos -= 1
alnpos = 0

n = 0
anchors = AlignmentAnchor[AlignmentAnchor(seqpos, refpos, OP_START)]
anchors = AlignmentAnchor[AlignmentAnchor(seqpos, refpos, alnpos, OP_START)]
for c in cigar
if isdigit(c)
n = n * 10 + convert(Int, c - '0')
Expand All @@ -79,8 +82,9 @@ function Alignment(cigar::AbstractString, seqpos::Int=1, refpos::Int=1)
else
error("The $(op) CIGAR operation is not yet supported.")
end
alnpos += n

push!(anchors, AlignmentAnchor(seqpos, refpos, op))
push!(anchors, AlignmentAnchor(seqpos, refpos, alnpos, op))
n = 0
end
end
Expand All @@ -100,41 +104,74 @@ function Base.show(io::IO, aln::Alignment)
print(io, " CIGAR string: ", cigar(aln))
end

"""
seq2ref(aln::Alignment, i::Integer)::Tuple{Int,Operation}
Map a position `i` from sequence to reference.
"""
function seq2ref(aln::Alignment, i::Integer)::Tuple{Int,Operation}
idx = findanchor(aln, i, Val{true})
# generic function for mapping between sequence, reference and alignment positions
# getsrc specifies anchor source position getter
# getdest specifies anchor destination position getter
function pos2pos(aln::Alignment, i::Integer,
srcpos::Function, destpos::Function)::Tuple{Int,Operation}
idx = findanchor(aln, i, srcpos)
if idx == 0
throw(ArgumentError("invalid sequence position: $i"))
if srcpos === seqpos
throw(ArgumentError("invalid sequence position: $i"))
elseif srcpos === refpos
throw(ArgumentError("invalid reference position: $i"))
elseif srcpos === alnpos
throw(ArgumentError("invalid alignment position: $i"))
else
throw(ArgumentError("Unknown position getter: $srcpos"))
end
end
anchor = aln.anchors[idx]
refpos = anchor.refpos
if ismatchop(anchor.op)
refpos += i - anchor.seqpos
pos = destpos(anchor)
if ismatchop(anchor.op) ||
((srcpos === alnpos) && ((destpos === seqpos) && isinsertop(anchor.op) || (destpos === refpos) && isdeleteop(anchor.op))) ||
((destpos === alnpos) && ((srcpos === seqpos) && isinsertop(anchor.op) || (srcpos === refpos) && isdeleteop(anchor.op)))
pos += i - srcpos(anchor)
end
return refpos, anchor.op
return pos, anchor.op
end

"""
ref2seq(aln::Alignment, i::Integer)::Tuple{Int,Operation}
seq2ref(aln::Union{Alignment, AlignedSequence, PairwiseAlignment}, i::Integer)::Tuple{Int,Operation}
Map a position `i` from sequence to reference.
"""
seq2ref(aln::Alignment, i::Integer) = pos2pos(aln, i, seqpos, refpos)

"""
ref2seq(aln::Union{Alignment, AlignedSequence, PairwiseAlignment}, i::Integer)::Tuple{Int,Operation}
Map a position `i` from reference to sequence.
"""
function ref2seq(aln::Alignment, i::Integer)::Tuple{Int,Operation}
idx = findanchor(aln, i, Val{false})
if idx == 0
throw(ArgumentError("invalid reference position: $i"))
end
anchor = aln.anchors[idx]
seqpos = anchor.seqpos
if ismatchop(anchor.op)
seqpos += i - anchor.refpos
end
return seqpos, anchor.op
end
ref2seq(aln::Alignment, i::Integer) = pos2pos(aln, i, refpos, seqpos)

"""
seq2aln(aln::Union{Alignment, AlignedSequence, PairwiseAlignment}, i::Integer)::Tuple{Int,Operation}
Map a position `i` from the input sequence to the alignment sequence.
"""
seq2aln(aln::Alignment, i::Integer) = pos2pos(aln, i, seqpos, alnpos)

"""
ref2aln(aln::Union{Alignment, AlignedSequence, PairwiseAlignment}, i::Integer)::Tuple{Int,Operation}
Map a position `i` from the reference sequence to the alignment sequence.
"""
ref2aln(aln::Alignment, i::Integer) = pos2pos(aln, i, refpos, alnpos)

"""
aln2seq(aln::Union{Alignment, AlignedSequence, PairwiseAlignment}, i::Integer)::Tuple{Int,Operation}
Map a position `i` from the alignment sequence to the input sequence.
"""
aln2seq(aln::Alignment, i::Integer) = pos2pos(aln, i, alnpos, seqpos)

"""
aln2ref(aln::Union{Alignment, AlignedSequence, PairwiseAlignment}, i::Integer)::Tuple{Int,Operation}
Map a position `i` from the alignment sequence to the reference sequence.
"""
aln2ref(aln::Alignment, i::Integer) = pos2pos(aln, i, alnpos, refpos)

"""
cigar(aln::Alignment)
Expand Down Expand Up @@ -172,65 +209,68 @@ function check_alignment_anchors(anchors)
end

for i in 2:lastindex(anchors)
if anchors[i].refpos < anchors[i-1].refpos ||
anchors[i].seqpos < anchors[i-1].seqpos
@inbounds acur, aprev = anchors[i], anchors[i-1]
if acur.refpos < aprev.refpos || acur.seqpos < aprev.seqpos || acur.alnpos < aprev.alnpos
error("Alignment anchors must be sorted.")
end

op = anchors[i].op
if convert(UInt8, op) > convert(UInt8, OP_MAX_VALID)
op = acur.op
if !isvalid(op)
error("Anchor at index $(i) has an invalid operation.")
end

# reference skip/delete operations
if isdeleteop(op)
if anchors[i].seqpos != anchors[i-1].seqpos
error("Invalid anchor positions for reference deletion.")
if acur.seqpos != aprev.seqpos
error("Invalid anchor sequence positions for reference deletion.")
end
if acur.alnpos - aprev.alnpos != acur.refpos - aprev.refpos
error("Invalid anchor reference positions for reference deletion.")
end
# reference insertion operations
elseif isinsertop(op)
if anchors[i].refpos != anchors[i-1].refpos
error("Invalid anchor positions for reference insertion.")
if acur.refpos != aprev.refpos
error("Invalid anchor reference positions for reference insertion.")
end
if acur.alnpos - aprev.alnpos != acur.seqpos - aprev.seqpos
error("Invalid anchor sequence positions for reference deletion.")
end
# match operations
elseif ismatchop(op)
if anchors[i].refpos - anchors[i-1].refpos !=
anchors[i].seqpos - anchors[i-1].seqpos
error("Invalid anchor positions for match operation.")
if (acur.refpos - aprev.refpos != acur.seqpos - aprev.seqpos) ||
(acur.alnpos - aprev.alnpos != acur.seqpos - aprev.seqpos)
error("Invalid anchor positions for match operation.")
end
end
end
end

# find the index of the first anchor that satisfies `i ≤ pos`
@generated function findanchor(aln::Alignment, i::Integer, ::Type{Val{isseq}}) where isseq
pos = isseq ? :seqpos : :refpos
quote
anchors = aln.anchors
lo = 1
hi = lastindex(anchors)
if !(anchors[lo].$pos < i anchors[hi].$pos)
return 0
end
# binary search
@inbounds while hi - lo > 2
m = (lo + hi) >> 1
if anchors[m].$pos < i
lo = m
else # i ≤ anchors[m].$pos
hi = m
end
# invariant (activate this for debugging)
#@assert anchors[lo].$pos < i ≤ anchors[hi].$pos
# find the index of the first anchor that satisfies `i ≤ pos(anchor)`
function findanchor(aln::Alignment, i::Integer, pos::Function)
anchors = aln.anchors
lo = 1
hi = lastindex(anchors)
@inbounds if !(pos(anchors[lo]) < i pos(anchors[hi]))
return 0
end
# binary search
@inbounds while hi - lo > 2
m = (lo + hi) >> 1
if pos(anchors[m]) < i
lo = m
else # i ≤ pos(anchors[m])
hi = m
end
# linear search
@inbounds for j in lo+1:hi
if i aln.anchors[j].$pos
return j
end
# invariant (activate this for debugging)
#@assert pos(anchors[lo]) < i ≤ pos(anchors[hi])
end
# linear search
@inbounds for j in lo+1:hi
if i pos(aln.anchors[j])
return j
end
# do not reach here
@assert false
return 0
end
# do not reach here
@assert false
return 0
end
10 changes: 6 additions & 4 deletions src/anchors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@ Alignment operation with anchoring positions.
struct AlignmentAnchor
seqpos::Int
refpos::Int
alnpos::Int
op::Operation
end

function AlignmentAnchor(pos::Tuple{Int,Int}, op)
return AlignmentAnchor(pos[1], pos[2], op)
end
seqpos(anc::AlignmentAnchor) = anc.seqpos
refpos(anc::AlignmentAnchor) = anc.refpos
alnpos(anc::AlignmentAnchor) = anc.alnpos

function Base.show(io::IO, anc::AlignmentAnchor)
print(io, "AlignmentAnchor(", anc.seqpos, ", ", anc.refpos, ", '", anc.op, "')")
print(io, "AlignmentAnchor(", anc.seqpos, ", ", anc.refpos,
", ", anc.alnpos, ", '", anc.op, "')")
end
2 changes: 1 addition & 1 deletion src/operations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ function Base.convert(::Type{Char}, op::Operation)
end

function Base.print(io::IO, op::Operation)
write(io, convert(Char, op))
write(io, isvalid(op) ? convert(Char, op) : '?')
return
end

Expand Down
29 changes: 23 additions & 6 deletions src/pairwise/algorithms/common.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,27 +32,44 @@ const TRACE_EXTINS = 0b10000

macro start_traceback()
esc(quote
anchor_point = (i, j)
__alnpos = 0
anchor_point = (i, j, __alnpos)
op = OP_INVALID
end)
end

# reverses the anchors sequence at the end of the traceback
# and offsets the alignment positions, so that (by default) it starts from 0
function reverse_anchors!(v::AbstractVector{AlignmentAnchor},
alignment_offset=!isempty(v) ? -v[end].alnpos : 0)
r = n = length(v)
@inbounds for i in 1:fld1(n, 2)
vr = v[r]
vi = v[i]
v[i] = AlignmentAnchor(vr.seqpos, vr.refpos, vr.alnpos + alignment_offset, vr.op)
(i != r) && (v[r] = AlignmentAnchor(vi.seqpos, vi.refpos, vi.alnpos + alignment_offset, vi.op))
r -= 1
end
return v
end

macro finish_traceback()
esc(quote
push!(anchors, AlignmentAnchor(anchor_point, op))
push!(anchors, AlignmentAnchor((i, j), OP_START))
reverse!(anchors)
push!(anchors, AlignmentAnchor(anchor_point..., op))
push!(anchors, AlignmentAnchor(i, j, __alnpos, OP_START))
reverse_anchors!(anchors)
pop!(anchors) # remove OP_INVALID
end)
end

macro anchor(ex)
esc(quote
if op != $ex
push!(anchors, AlignmentAnchor(anchor_point, op))
push!(anchors, AlignmentAnchor(anchor_point..., op))
op = $ex
anchor_point = (i, j)
anchor_point = (i, j, __alnpos)
end
__alnpos -= 1
if ismatchop(op)
i -= 1
j -= 1
Expand Down
Loading

0 comments on commit c8e434c

Please sign in to comment.