Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix interval indexing with offset axes #90

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 51 additions & 14 deletions src/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -184,30 +184,67 @@ axisindexes{T}(::Type{Dimensional}, ax::AbstractVector{T}, idx::ClosedInterval)

# Or repeated intervals, which only work if the axis is a range since otherwise
# there will be a non-constant number of indices in each repetition.
# Two small tricks are used here:
# * Compute the resulting interval axis with unsafe indexing without any offset
# - Since it's a range, we can do this, and it makes the resulting axis useful
# * Snap the offsets to the nearest datapoint to avoid fencepost problems
# Adds a dimension to the result; rows represent the interval and columns are offsets.
#
# There are a number of challenges here:
# * This operation adds a dimension to the result; rows represent the interval
# (or subset) and columns are offsets (or repetition). A RepeatedRangeMatrix
# represents the resulting matrix of indices very nicely.
# * We also want the returned matrix to keep track of its axes; the axis
# subset (ax_sub) is the relative location of the interval with respect to
# each offset, and the repetitions (ax_rep) is the array of offsets.
# * We are interested in the resulting *addition* of the interval against the
# offsets. Either the offsets or the interval may independently be out of
# bounds prior to this addition. Even worse: the interval may have different
# units than the axis (e.g., `(Day(-1)..Day(1)) + dates` for a three-day
# span around dates of interest over a Date axis).
# * It is possible (and likely!) that neither the interval endpoints nor the
# offsets fall exactly upon an axis value. Or even worse: the some offsets
# when added to the interval could span more elements than others (the
# fencepost problem). As such, we need to be careful about how and when we
# snap the provided intervals and offsets to exact axis values (and indices).
#
# To avoid the fencepost problems and to define the axes, we convert the
# interval to a UnitRange of relative indices and the array of offsets to an
# array of absolute indices (independently of each other). Exactly how we do so
# must be carefully considered.
#
# Note that this is fundamentally different than indexing by a single interval;
# whereas those intervals are specified in the same units as the elements of the
# axis itself, these intervals are specified in terms of _offsets_. At the same
# time, we want `A[interval] == vec(A[interval + [0]])`. To make these
# computations as similar as possible, we use a phony range of the form
# `step(ax):step(ax):step(ax)` in order to search for the interval.
phony_range(r::Range) = step(r):step(r):step(r)
phony_range(r::AbstractUnitRange) = step(r):step(r)
if VERSION < v"0.6-pre"
phony_range(r::FloatRange) = FloatRange(r.step, r.step, one(r.len), r.divisor)
else
phony_range(r::StepRangeLen) = StepRangeLen(r.step, r.step, 1)
end
function relativewindow(r::Range, x::ClosedInterval)
pr = phony_range(r)
idxs = Extrapolated.searchsorted(pr, x)
vals = Extrapolated.getindex(pr, idxs)
return (idxs, vals)
end

axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::RepeatedInterval) = error("repeated intervals might select a varying number of elements for non-range axes; use a repeated Range of indices instead")
function axisindexes(::Type{Dimensional}, ax::Range, idx::RepeatedInterval)
n = length(idx.offsets)
idxs = unsafe_searchsorted(ax, idx.window)
offsets = [searchsortednearest(ax, idx.offsets[i]) for i=1:n]
AxisArray(RepeatedRangeMatrix(idxs, offsets), Axis{:sub}(inbounds_getindex(ax, idxs)), Axis{:rep}(ax[offsets]))
idxs, vals = relativewindow(ax, idx.window)
offsets = [Extrapolated.searchsortednearest(ax, offset) for offset in idx.offsets]
AxisArray(RepeatedRangeMatrix(idxs, offsets), Axis{:sub}(vals), Axis{:rep}(Extrapolated.getindex(ax, offsets)))
end

# We also have special datatypes to represent intervals about indices
axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::IntervalAtIndex) = searchsorted(ax, idx.window + ax[idx.index])
function axisindexes(::Type{Dimensional}, ax::Range, idx::IntervalAtIndex)
idxs = unsafe_searchsorted(ax, idx.window)
AxisArray(idxs + idx.index, Axis{:sub}(inbounds_getindex(ax, idxs)))
idxs, vals = relativewindow(ax, idx.window)
AxisArray(idxs + idx.index, Axis{:sub}(vals))
end
axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::RepeatedIntervalAtIndexes) = error("repeated intervals might select a varying number of elements for non-range axes; use a repeated Range of indices instead")
function axisindexes(::Type{Dimensional}, ax::Range, idx::RepeatedIntervalAtIndexes)
n = length(idx.indexes)
idxs = unsafe_searchsorted(ax, idx.window)
AxisArray(RepeatedRangeMatrix(idxs, idx.indexes), Axis{:sub}(inbounds_getindex(ax, idxs)), Axis{:rep}(ax[idx.indexes]))
idxs, vals = relativewindow(ax, idx.window)
AxisArray(RepeatedRangeMatrix(idxs, idx.indexes), Axis{:sub}(vals), Axis{:rep}(ax[idx.indexes]))
end

# Categorical axes may be indexed by their elements
Expand Down
78 changes: 46 additions & 32 deletions src/search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,78 +15,92 @@ function searchsortednearest(vec::AbstractVector, x)
end
return idx
end
# Base only specializes searching ranges by Numbers; so optimize for Intervals
function Base.searchsorted(a::Range, I::ClosedInterval)
searchsortedfirst(a, I.left):searchsortedlast(a, I.right)
end

# We depend upon extrapolative behaviors in searching ranges to shift axes.
# This can be done by stealing Base's implementations and removing the bounds-
# correcting min/max.
"""
The internal `Extrapolated` module contains implementations for indexing and
searching into ranges beyond their bounds. The `@inbounds` macro is not
sufficient since it can be turned off by `--check-bounds=yes`.
"""
module Extrapolated
using ..ClosedInterval

function searchsortednearest(vec::Range, x)
idx = searchsortedfirst(vec, x) # Returns the first idx | vec[idx] >= x
if (getindex(vec, idx) - x) > (x - getindex(vec, idx-1))
idx -= 1 # The previous element is closer
end
return idx
end

# TODO: This could plug into the sorting system better, but it's fine for now
# TODO: This needs to support Dates.
"""
unsafe_searchsorted(a::Range, I::ClosedInterval)
searchsorted(a::Range, I::ClosedInterval)

Return the indices of the range that fall within an interval without checking
bounds, possibly extrapolating outside the range if needed.
"""
function unsafe_searchsorted(a::Range, I::ClosedInterval)
unsafe_searchsortedfirst(a, I.left):unsafe_searchsortedlast(a, I.right)
end
# Base only specializes searching ranges by Numbers; so optimize for Intervals
function Base.searchsorted(a::Range, I::ClosedInterval)
function searchsorted(a::Range, I::ClosedInterval)
searchsortedfirst(a, I.left):searchsortedlast(a, I.right)
end

# When running with "--check-bounds=yes" (like on Travis), the bounds-check isn't elided
@inline function inbounds_getindex{T}(v::Range{T}, i::Integer)
# When running with "--check-bounds=yes`(like on Travis), the bounds-check isn't elided
@inline function getindex{T}(v::Range{T}, i::Integer)
convert(T, first(v) + (i-1)*step(v))
end
@inline function inbounds_getindex{T<:Integer}(r::Range, s::Range{T})
@inline function getindex{T<:Integer}(r::Range, s::Range{T})
f = first(r)
st = oftype(f, f + (first(s)-1)*step(r))
range(st, step(r)*step(s), length(s))
end
getindex(r::Range, I::Array) = [getindex(r, i) for i in I]
if VERSION < v"0.6.0-dev.2390"
include_string("""
@inline function inbounds_getindex{T}(r::FloatRange{T}, i::Integer)
@inline function getindex{T}(r::FloatRange{T}, i::Integer)
convert(T, (r.start + (i-1)*r.step)/r.divisor)
end
@inline function inbounds_getindex(r::FloatRange, s::OrdinalRange)
@inline function getindex(r::FloatRange, s::OrdinalRange)
FloatRange(r.start + (first(s)-1)*r.step, step(s)*r.step, length(s), r.divisor)
end
""")
else
include_string("""
@inline inbounds_getindex(r::StepRangeLen, i::Integer) = Base.unsafe_getindex(r, i)
@inline function inbounds_getindex(r::StepRangeLen, s::OrdinalRange)
vfirst = Base.unsafe_getindex(r, first(s))
StepRangeLen(vfirst, step(r)*step(s), length(s))
@inline getindex(r::StepRangeLen, i::Integer) = Base.unsafe_getindex(r, i)
@inline function getindex(r::StepRangeLen, s::AbstractUnitRange)
soffset = 1 + (r.offset - first(s))
soffset = clamp(soffset, 1, length(s))
ioffset = first(s) + (soffset-1)
if ioffset == r.offset
StepRangeLen(r.ref, r.step, length(s), max(1,soffset))
else
StepRangeLen(r.ref + (ioffset-r.offset)*r.step, r.step, length(s), max(1,soffset))
end
end
""")
end

function unsafe_searchsortedlast{T<:Number}(a::Range{T}, x::Number)
function searchsortedlast(a::Range, x)
step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported"))
n = round(Integer,(x-first(a))/step(a))+1
isless(x, inbounds_getindex(a, n)) ? n-1 : n
isless(x, getindex(a, n)) ? n-1 : n
end
function unsafe_searchsortedfirst{T<:Number}(a::Range{T}, x::Number)
function searchsortedfirst(a::Range, x)
step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported"))
n = round(Integer,(x-first(a))/step(a))+1
isless(inbounds_getindex(a, n), x) ? n+1 : n
isless(getindex(a, n), x) ? n+1 : n
end
function unsafe_searchsortedlast{T<:Integer}(a::Range{T}, x::Number)
function searchsortedlast{T<:Integer}(a::Range{T}, x)
step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported"))
fld(floor(Integer,x)-first(a),step(a))+1
end
function unsafe_searchsortedfirst{T<:Integer}(a::Range{T}, x::Number)
function searchsortedfirst{T<:Integer}(a::Range{T}, x)
step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported"))
-fld(floor(Integer,-x)+first(a),step(a))+1
end
function unsafe_searchsortedfirst{T<:Integer}(a::Range{T}, x::Unsigned)
function searchsortedfirst{T<:Integer}(a::Range{T}, x::Unsigned)
step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported"))
-fld(first(a)-signed(x),step(a))+1
end
function unsafe_searchsortedlast{T<:Integer}(a::Range{T}, x::Unsigned)
function searchsortedlast{T<:Integer}(a::Range{T}, x::Unsigned)
step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported"))
fld(signed(x)-first(a),step(a))+1
end
end
22 changes: 22 additions & 0 deletions test/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,19 @@ B = AxisArray(reshape(1:15, 5,3), .1:.1:0.5, [:a, :b, :c])

@test B[Axis{:row}(ClosedInterval(0.15, 0.3))] == @view(B[Axis{:row}(ClosedInterval(0.15, 0.3))]) == B[2:3,:]

# Test indexing by Intervals that aren't of the form step:step:last
B = AxisArray(reshape(1:15, 5,3), 1.1:0.1:1.5, [:a, :b, :c])
@test B[ClosedInterval(1.0, 1.5), :] == B[ClosedInterval(1.0, 1.5)] == B[:,:]
@test B[ClosedInterval(1.0, 1.3), :] == B[ClosedInterval(1.0, 1.3)] == B[1:3,:]
@test B[ClosedInterval(1.15, 1.3), :] == B[ClosedInterval(1.15, 1.3)] == B[2:3,:]
@test B[ClosedInterval(1.2, 1.5), :] == B[ClosedInterval(1.2, 1.5)] == B[2:end,:]
@test B[ClosedInterval(1.2, 1.6), :] == B[ClosedInterval(1.2, 1.6)] == B[2:end,:]
@test @view(B[ClosedInterval(1.0, 1.5), :]) == @view(B[ClosedInterval(1.0, 1.5)]) == B[:,:]
@test @view(B[ClosedInterval(1.0, 1.3), :]) == @view(B[ClosedInterval(1.0, 1.3)]) == B[1:3,:]
@test @view(B[ClosedInterval(1.15, 1.3), :]) == @view(B[ClosedInterval(1.15, 1.3)]) == B[2:3,:]
@test @view(B[ClosedInterval(1.2, 1.5), :]) == @view(B[ClosedInterval(1.2, 1.5)]) == B[2:end,:]
@test @view(B[ClosedInterval(1.2, 1.6), :]) == @view(B[ClosedInterval(1.2, 1.6)]) == B[2:end,:]

A = AxisArray(reshape(1:256, 4,4,4,4), Axis{:d1}(.1:.1:.4), Axis{:d2}(1//10:1//10:4//10), Axis{:d3}(["1","2","3","4"]), Axis{:d4}([:a, :b, :c, :d]))
ax1 = axes(A)[1]
@test A[Axis{:d1}(2)] == A[ax1(2)]
Expand Down Expand Up @@ -115,6 +128,7 @@ AxisArrays.axistrait(::AbstractVector{IntLike}) = AxisArrays.Dimensional
end

for (r, Irel) in ((0.1:0.1:10.0, -0.5..0.5), # FloatRange
(22.1:0.1:32.0, -0.5..0.5),
(linspace(0.1, 10.0, 100), -0.51..0.51), # LinSpace
(IL.IntLike(1):IL.IntLike(1):IL.IntLike(100),
IL.IntLike(-5)..IL.IntLike(5))) # StepRange
Expand Down Expand Up @@ -165,3 +179,11 @@ A = AxisArray(rand(2,2), :x, :y)
acc = zeros(Int, 4, 1, 2)
Base.mapreducedim!(x->x>5, +, acc, A3)
@test acc == reshape([1 3; 2 3; 2 3; 2 3], 4, 1, 2)

# Test using dates
using Base.Dates: Day, Month
A = AxisArray(1:365, Date(2017,1,1):Date(2017,12,31))
@test A[Date(2017,2,1) .. Date(2017,2,28)] == collect(31 + (1:28)) # February
@test A[(-Day(13)..Day(14)) + Date(2017,2,14)] == collect(31 + (1:28))
@test A[(-Day(14)..Day(14)) + DateTime(2017,2,14,12)] == collect(31 + (1:28))
@test A[(Day(0)..Day(6)) + (Date(2017,1,1):Month(1):Date(2017,4,12))] == [1:7 32:38 60:66 91:97]
40 changes: 20 additions & 20 deletions test/search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,25 +10,25 @@ import AxisArrays: searchsortednearest
@test searchsortednearest([1,1,2,2,3,3], Inf) === 6
@test searchsortednearest([1,1,2,2,3,3], -Inf) === 1

# unsafe searchsorted for ranges
import AxisArrays: unsafe_searchsorted
@test unsafe_searchsorted(1:10, -1 .. 1) === -1:1
@test unsafe_searchsorted(1:10, 12 .. 15) === 12:15
@test unsafe_searchsorted(0:2:10, -3 .. -1) === 0:0
@test unsafe_searchsorted(0:2:10, -5 .. 3) === -1:2
# Extrapolated searching for ranges
import AxisArrays: Extrapolated
@test Extrapolated.searchsorted(1:10, -1 .. 1) === -1:1
@test Extrapolated.searchsorted(1:10, 12 .. 15) === 12:15
@test Extrapolated.searchsorted(0:2:10, -3 .. -1) === 0:0
@test Extrapolated.searchsorted(0:2:10, -5 .. 3) === -1:2

@test unsafe_searchsorted(1:2, 4.5 .. 4.5) === 5:4
@test unsafe_searchsorted(1:2, 3.5 .. 3.5) === 4:3
@test unsafe_searchsorted(1:2, 2.5 .. 2.5) === 3:2 === searchsorted(1:2, 2.5 .. 2.5)
@test unsafe_searchsorted(1:2, 1.5 .. 1.5) === 2:1 === searchsorted(1:2, 1.5 .. 1.5)
@test unsafe_searchsorted(1:2, 0.5 .. 0.5) === 1:0 === searchsorted(1:2, 0.5 .. 0.5)
@test unsafe_searchsorted(1:2, -0.5 .. -0.5) === 0:-1
@test unsafe_searchsorted(1:2, -1.5 .. -1.5) === -1:-2
@test Extrapolated.searchsorted(1:2, 4.5 .. 4.5) === 5:4
@test Extrapolated.searchsorted(1:2, 3.5 .. 3.5) === 4:3
@test Extrapolated.searchsorted(1:2, 2.5 .. 2.5) === 3:2 === searchsorted(1:2, 2.5 .. 2.5)
@test Extrapolated.searchsorted(1:2, 1.5 .. 1.5) === 2:1 === searchsorted(1:2, 1.5 .. 1.5)
@test Extrapolated.searchsorted(1:2, 0.5 .. 0.5) === 1:0 === searchsorted(1:2, 0.5 .. 0.5)
@test Extrapolated.searchsorted(1:2, -0.5 .. -0.5) === 0:-1
@test Extrapolated.searchsorted(1:2, -1.5 .. -1.5) === -1:-2

@test unsafe_searchsorted(2:2:4, 0x6 .. 0x6) === 3:3
@test unsafe_searchsorted(2:2:4, 0x5 .. 0x5) === searchsorted(2:2:4, 0x5 .. 0x5) === 3:2
@test unsafe_searchsorted(2:2:4, 0x4 .. 0x4) === searchsorted(2:2:4, 0x4 .. 0x4) === 2:2
@test unsafe_searchsorted(2:2:4, 0x3 .. 0x3) === searchsorted(2:2:4, 0x3 .. 0x3) === 2:1
@test unsafe_searchsorted(2:2:4, 0x2 .. 0x2) === searchsorted(2:2:4, 0x2 .. 0x2) === 1:1
@test unsafe_searchsorted(2:2:4, 0x1 .. 0x1) === searchsorted(2:2:4, 0x1 .. 0x1) === 1:0
@test unsafe_searchsorted(2:2:4, 0x0 .. 0x0) === 0:0
@test Extrapolated.searchsorted(2:2:4, 0x6 .. 0x6) === 3:3
@test Extrapolated.searchsorted(2:2:4, 0x5 .. 0x5) === searchsorted(2:2:4, 0x5 .. 0x5) === 3:2
@test Extrapolated.searchsorted(2:2:4, 0x4 .. 0x4) === searchsorted(2:2:4, 0x4 .. 0x4) === 2:2
@test Extrapolated.searchsorted(2:2:4, 0x3 .. 0x3) === searchsorted(2:2:4, 0x3 .. 0x3) === 2:1
@test Extrapolated.searchsorted(2:2:4, 0x2 .. 0x2) === searchsorted(2:2:4, 0x2 .. 0x2) === 1:1
@test Extrapolated.searchsorted(2:2:4, 0x1 .. 0x1) === searchsorted(2:2:4, 0x1 .. 0x1) === 1:0
@test Extrapolated.searchsorted(2:2:4, 0x0 .. 0x0) === 0:0