Skip to content

Commit

Permalink
alignment position support (seq2aln, ref2aln etc)
Browse files Browse the repository at this point in the history
- add alnpos to each anchor
- add mapping to/from alnpos
- add alnpos support to traceback
  • Loading branch information
alyst committed Mar 3, 2021
1 parent 90513fb commit 0156de4
Show file tree
Hide file tree
Showing 8 changed files with 168 additions and 58 deletions.
6 changes: 6 additions & 0 deletions src/BioAlignments.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,14 @@ export
AlignmentAnchor,
Alignment,
AlignedSequence,

seq2ref,
ref2seq,
seq2aln,
ref2aln,
aln2seq,
aln2ref,

ismatchop,
isinsertop,
isdeleteop,
Expand Down
10 changes: 8 additions & 2 deletions src/alignedseq.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ function AlignedSequence(seq::BioSequences.BioSequence, seqpos::Integer,
end
seqpos -= 1
refpos -= 1
alnpos = 0
op = OP_START
newseq = similar(seq, 0) # sequence without gap symbols
anchors = AlignmentAnchor[]
Expand All @@ -44,7 +45,7 @@ function AlignedSequence(seq::BioSequences.BioSequence, seqpos::Integer,
end

if op′ != op
push!(anchors, AlignmentAnchor(seqpos, refpos, op))
push!(anchors, AlignmentAnchor(seqpos, refpos, alnpos, op))
op = op′
end

Expand All @@ -55,8 +56,9 @@ function AlignedSequence(seq::BioSequences.BioSequence, seqpos::Integer,
if y != BioSequences.gap(eltype(ref))
refpos += 1
end
alnpos += 1 # one or another don't have gap
end
push!(anchors, AlignmentAnchor(seqpos, refpos, op))
push!(anchors, AlignmentAnchor(seqpos, refpos, alnpos, op))
return AlignedSequence(newseq, anchors)
end

Expand All @@ -72,6 +74,10 @@ end

seq2ref(alnseq::AlignedSequence, i) = seq2ref(alnseq.aln, i)
ref2seq(alnseq::AlignedSequence, i) = ref2seq(alnseq.aln, i)
seq2aln(alnseq::AlignedSequence, i) = seq2aln(alnseq.aln, i)
ref2aln(alnseq::AlignedSequence, i) = ref2aln(alnseq.aln, i)
aln2seq(alnseq::AlignedSequence, i) = aln2seq(alnseq.aln, i)
aln2ref(alnseq::AlignedSequence, i) = aln2ref(alnseq.aln, i)

# simple letters and dashes representation of an alignment
function Base.show(io::IO, alnseq::AlignedSequence)
Expand Down
53 changes: 47 additions & 6 deletions src/alignment.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,10 @@ function Alignment(cigar::AbstractString, seqpos::Int=1, refpos::Int=1)
# path starts prior to the first aligned position pair
seqpos -= 1
refpos -= 1
alnpos = 0

n = 0
anchors = AlignmentAnchor[AlignmentAnchor(seqpos, refpos, OP_START)]
anchors = AlignmentAnchor[AlignmentAnchor(seqpos, refpos, alnpos, OP_START)]
for c in cigar
if isdigit(c)
n = n * 10 + convert(Int, c - '0')
Expand All @@ -81,8 +82,9 @@ function Alignment(cigar::AbstractString, seqpos::Int=1, refpos::Int=1)
else
error("The $(op) CIGAR operation is not yet supported.")
end
alnpos += n

push!(anchors, AlignmentAnchor(seqpos, refpos, op))
push!(anchors, AlignmentAnchor(seqpos, refpos, alnpos, op))
n = 0
end
end
Expand All @@ -102,7 +104,7 @@ function Base.show(io::IO, aln::Alignment)
print(io, " CIGAR string: ", cigar(aln))
end

# generic function for mapping between seq and ref positions
# generic function for mapping between sequence, reference and alignment positions
# getsrc specifies anchor source position getter
# getdest specifies anchor destination position getter
function pos2pos(aln::Alignment, i::Integer,
Expand All @@ -113,13 +115,17 @@ function pos2pos(aln::Alignment, i::Integer,
throw(ArgumentError("invalid sequence position: $i"))
elseif srcpos === refpos
throw(ArgumentError("invalid reference position: $i"))
elseif srcpos === alnpos
throw(ArgumentError("invalid alignment position: $i"))
else
throw(ArgumentError("Unknown position getter: $srcpos"))
end
end
anchor = aln.anchors[idx]
pos = destpos(anchor)
if ismatchop(anchor.op)
if ismatchop(anchor.op) ||
((srcpos === alnpos) && ((destpos === seqpos) && isinsertop(anchor.op) || (destpos === refpos) && isdeleteop(anchor.op))) ||
((destpos === alnpos) && ((srcpos === seqpos) && isinsertop(anchor.op) || (srcpos === refpos) && isdeleteop(anchor.op)))
pos += i - srcpos(anchor)
end
return pos, anchor.op
Expand All @@ -139,6 +145,34 @@ Map a position `i` from reference to sequence.
"""
ref2seq(aln::Alignment, i::Integer) = pos2pos(aln, i, refpos, seqpos)

"""
seq2aln(aln::Union{Alignment, AlignedSequence, PairwiseAlignment}, i::Integer)::Tuple{Int,Operation}
Map a position `i` from the input sequence to the alignment sequence.
"""
seq2aln(aln::Alignment, i::Integer) = pos2pos(aln, i, seqpos, alnpos)

"""
ref2aln(aln::Union{Alignment, AlignedSequence, PairwiseAlignment}, i::Integer)::Tuple{Int,Operation}
Map a position `i` from the reference sequence to the alignment sequence.
"""
ref2aln(aln::Alignment, i::Integer) = pos2pos(aln, i, refpos, alnpos)

"""
aln2seq(aln::Union{Alignment, AlignedSequence, PairwiseAlignment}, i::Integer)::Tuple{Int,Operation}
Map a position `i` from the alignment sequence to the input sequence.
"""
aln2seq(aln::Alignment, i::Integer) = pos2pos(aln, i, alnpos, seqpos)

"""
aln2ref(aln::Union{Alignment, AlignedSequence, PairwiseAlignment}, i::Integer)::Tuple{Int,Operation}
Map a position `i` from the alignment sequence to the reference sequence.
"""
aln2ref(aln::Alignment, i::Integer) = pos2pos(aln, i, alnpos, refpos)

"""
cigar(aln::Alignment)
Expand Down Expand Up @@ -176,7 +210,7 @@ function check_alignment_anchors(anchors)

for i in 2:lastindex(anchors)
@inbounds acur, aprev = anchors[i], anchors[i-1]
if acur.refpos < aprev.refpos || acur.seqpos < aprev.seqpos
if acur.refpos < aprev.refpos || acur.seqpos < aprev.seqpos || acur.alnpos < aprev.alnpos
error("Alignment anchors must be sorted.")
end

Expand All @@ -190,14 +224,21 @@ function check_alignment_anchors(anchors)
if acur.seqpos != aprev.seqpos
error("Invalid anchor sequence positions for reference deletion.")
end
if acur.alnpos - aprev.alnpos != acur.refpos - aprev.refpos
error("Invalid anchor reference positions for reference deletion.")
end
# reference insertion operations
elseif isinsertop(op)
if acur.refpos != aprev.refpos
error("Invalid anchor reference positions for reference insertion.")
end
if acur.alnpos - aprev.alnpos != acur.seqpos - aprev.seqpos
error("Invalid anchor sequence positions for reference deletion.")
end
# match operations
elseif ismatchop(op)
if acur.refpos - aprev.refpos != acur.seqpos - aprev.seqpos
if (acur.refpos - aprev.refpos != acur.seqpos - aprev.seqpos) ||
(acur.alnpos - aprev.alnpos != acur.seqpos - aprev.seqpos)
error("Invalid anchor positions for match operation.")
end
end
Expand Down
11 changes: 7 additions & 4 deletions src/anchors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,15 @@ Alignment operation with anchoring positions.
struct AlignmentAnchor
seqpos::Int
refpos::Int
alnpos::Int
op::Operation
end

function Base.show(io::IO, anc::AlignmentAnchor)
print(io, "AlignmentAnchor(", anc.seqpos, ", ", anc.refpos, ", '", anc.op, "')")
end

seqpos(anc::AlignmentAnchor) = anc.seqpos
refpos(anc::AlignmentAnchor) = anc.refpos
alnpos(anc::AlignmentAnchor) = anc.alnpos

function Base.show(io::IO, anc::AlignmentAnchor)
print(io, "AlignmentAnchor(", anc.seqpos, ", ", anc.refpos,
", ", anc.alnpos, ", '", anc.op, "')")
end
25 changes: 21 additions & 4 deletions src/pairwise/algorithms/common.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,32 @@ const TRACE_EXTINS = 0b10000

macro start_traceback()
esc(quote
anchor_point = (i, j)
__alnpos = 0
anchor_point = (i, j, __alnpos)
op = OP_INVALID
end)
end

# reverses the anchors sequence at the end of the traceback
# and offsets the alignment positions, so that (by default) it starts from 0
function reverse_anchors!(v::AbstractVector{AlignmentAnchor},
alignment_offset=!isempty(v) ? -v[end].alnpos : 0)
r = n = length(v)
@inbounds for i in 1:fld1(n, 2)
vr = v[r]
vi = v[i]
v[i] = AlignmentAnchor(vr.seqpos, vr.refpos, vr.alnpos + alignment_offset, vr.op)
(i != r) && (v[r] = AlignmentAnchor(vi.seqpos, vi.refpos, vi.alnpos + alignment_offset, vi.op))
r -= 1
end
return v
end

macro finish_traceback()
esc(quote
push!(anchors, AlignmentAnchor(anchor_point..., op))
push!(anchors, AlignmentAnchor(i, j, OP_START))
reverse!(anchors)
push!(anchors, AlignmentAnchor(i, j, __alnpos, OP_START))
reverse_anchors!(anchors)
pop!(anchors) # remove OP_INVALID
end)
end
Expand All @@ -51,8 +67,9 @@ macro anchor(ex)
if op != $ex
push!(anchors, AlignmentAnchor(anchor_point..., op))
op = $ex
anchor_point = (i, j)
anchor_point = (i, j, __alnpos)
end
__alnpos -= 1
if ismatchop(op)
i -= 1
j -= 1
Expand Down
8 changes: 4 additions & 4 deletions src/pairwise/algorithms/hamming_distance.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
function hamming_distance(::Type{T}, a, b) where T
m = length(a)
@assert m == length(b)
anchors = [AlignmentAnchor(0, 0, OP_START)]
anchors = [AlignmentAnchor(0, 0, 0, OP_START)]
if m == 0
# empty string
return 0, anchors
Expand All @@ -19,17 +19,17 @@ function hamming_distance(::Type{T}, a, b) where T
for i in 2:m
if a[i] == b[i]
if !was_match
push!(anchors, AlignmentAnchor(i - 1, i - 1, OP_SEQ_MISMATCH))
push!(anchors, AlignmentAnchor(i - 1, i - 1, i - 1, OP_SEQ_MISMATCH))
end
was_match = true
else
if was_match
push!(anchors, AlignmentAnchor(i - 1, i - 1, OP_SEQ_MATCH))
push!(anchors, AlignmentAnchor(i - 1, i - 1, i - 1, OP_SEQ_MATCH))
end
d += T(1)
was_match = false
end
end
push!(anchors, AlignmentAnchor(m, m, was_match ? OP_SEQ_MATCH : OP_SEQ_MISMATCH))
push!(anchors, AlignmentAnchor(m, m, m, was_match ? OP_SEQ_MATCH : OP_SEQ_MISMATCH))
return d, anchors
end
4 changes: 4 additions & 0 deletions src/pairwise/alignment.jl
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ end

seq2ref(aln::PairwiseAlignment, i::Integer) = seq2ref(aln.a, i)
ref2seq(aln::PairwiseAlignment, i::Integer) = ref2seq(aln.a, i)
seq2aln(aln::PairwiseAlignment, i::Integer) = seq2aln(aln.a, i)
ref2aln(aln::PairwiseAlignment, i::Integer) = ref2aln(aln.a, i)
aln2seq(aln::PairwiseAlignment, i::Integer) = aln2seq(aln.a, i)
aln2ref(aln::PairwiseAlignment, i::Integer) = aln2ref(aln.a, i)


# Printers
Expand Down
Loading

0 comments on commit 0156de4

Please sign in to comment.