JuliaMath · timholy · May 16, 2015 · May 16, 2015 · May 16, 2015
diff --git a/src/Interpolations.jl b/src/Interpolations.jl
@@ -12,6 +12,7 @@ import Base:
 
 export
     Interpolation,
+    Interpolation!,
     Constant,
     Linear,
     Quadratic,
@@ -28,6 +29,7 @@ export
     Free,
     Periodic,
     Reflect,
+    Interior,
 
     degree,
     boundarycondition,
@@ -50,6 +52,7 @@ typealias Natural Line
 immutable Free <: BoundaryCondition end
 immutable Periodic <: BoundaryCondition end
 immutable Reflect <: BoundaryCondition end
+immutable Interior <: BoundaryCondition end
 
 abstract InterpolationType{D<:Degree,BC<:BoundaryCondition,GR<:GridRepresentation}
 
@@ -76,6 +79,14 @@ Interpolation(A::AbstractArray, it::InterpolationType, eb::ExtrapolationBehavior
 Interpolation(A::AbstractArray{Float32}, it::InterpolationType, eb::ExtrapolationBehavior) = Interpolation(Float32, A, it, eb)
 Interpolation(A::AbstractArray{Rational{Int}}, it::InterpolationType, eb::ExtrapolationBehavior) = Interpolation(Rational{Int}, A, it, eb)
 
+# In-place (destructive) interpolation
+function Interpolation!{T<:FloatingPoint,N,D<:Degree,G<:GridRepresentation,EB<:ExtrapolationBehavior}(A::AbstractArray{T,N}, it::InterpolationType{D,Interior,G}, ::EB)
+    IT = typeof(it)
+    isleaftype(typeof(IT)) || error("The interpolation type must be a leaf type (was $IT)")
+    isleaftype(T) || error("Must be an array of a concrete type T (eltype(A) == $(eltype(A)))")
+    Interpolation{T,N,T,IT,EB}(prefilter!(T,A,0,it))
+end
+
 # Unless otherwise specified, use coefficients as they are, i.e. without prefiltering
 # However, all prefilters copy the array, so do that here as well
 prefilter{TWeights,T,N,IT<:InterpolationType}(::Type{TWeights}, A::AbstractArray{T,N}, ::IT) = copy(A)
@@ -105,6 +116,7 @@ end
 include("constant.jl")
 include("linear.jl")
 include("quadratic.jl")
+include("filter1d.jl")
 
 function padded_index(sz::Tuple, pad)
     sz = Int[s+2pad for s in sz]
@@ -149,7 +161,7 @@ function getindex_impl(N, IT::Type, EB::Type)
         # Handle extrapolation, by either throwing an error,
         # returning a value, or shifting x to somewhere inside
         # the domain
-        $(extrap_transform_x(gr,eb,N))
+        $(extrap_transform_x(gr,eb,N,it))
 
         # Calculate the indices of all coefficients that will be used
         # and define fx = x - xi in each dimension
@@ -167,12 +179,12 @@ end
 # Resolve ambiguity with linear indexing
 getindex{T,N,TCoefs,IT,EB}(itp::Interpolation{T,N,TCoefs,IT,EB}, x::Real) =
     error("Linear indexing is not supported for interpolation objects")
-stagedfunction getindex{T,TCoefs,IT,EB}(itp::Interpolation{T,1,TCoefs,IT,EB}, x::Real)
+@generated function getindex{T,TCoefs,IT,EB}(itp::Interpolation{T,1,TCoefs,IT,EB}, x::Real)
     getindex_impl(1, IT, EB)
 end
 
 # Resolve ambiguity with real multilinear indexing
-stagedfunction getindex{T,N,TCoefs,IT,EB}(itp::Interpolation{T,N,TCoefs,IT,EB}, x::Real...)
+@generated function getindex{T,N,TCoefs,IT,EB}(itp::Interpolation{T,N,TCoefs,IT,EB}, x::Real...)
     n = length(x)
     n == N || return :(error("Must index ", $N, "-dimensional interpolation objects with ", $(nindexes(N))))
     getindex_impl(N, IT, EB)
@@ -191,15 +203,15 @@ function getindex{T,TCoefs,IT,EB}(itp::Interpolation{T,1,TCoefs,IT,EB}, c::Colon
 end
 
 # Allow multilinear indexing with any types
-stagedfunction getindex{T,N,TCoefs,TIndex,IT,EB}(itp::Interpolation{T,N,TCoefs,IT,EB}, x::TIndex...)
+@generated function getindex{T,N,TCoefs,TIndex,IT,EB}(itp::Interpolation{T,N,TCoefs,IT,EB}, x::TIndex...)
     n = length(x)
     n == N || return :(error("Must index ", $N, "-dimensional interpolation objects with ", $(nindexes(N))))
     getindex_impl(N, IT, EB)
 end
 
 # Define in-place gradient calculation
 #   gradient!(g::Vector, itp::Interpolation, NTuple{N}...)
-stagedfunction gradient!{T,N,TCoefs,TOut,IT,EB}(g::Vector{TOut}, itp::Interpolation{T,N,TCoefs,IT,EB}, x...)
+@generated function gradient!{T,N,TCoefs,TOut,IT,EB}(g::Vector{TOut}, itp::Interpolation{T,N,TCoefs,IT,EB}, x...)
     n = length(x)
     n == N || return :(error("Must index ", $N, "-dimensional interpolation objects with ", $(nindexes(N))))
     it = IT()
@@ -208,7 +220,7 @@ stagedfunction gradient!{T,N,TCoefs,TOut,IT,EB}(g::Vector{TOut}, itp::Interpolat
     quote
         length(g) == $N || error("g must be an array with exactly N elements (length(g) == "*string(length(g))*", N == "*string($N)*")")
         @nexprs $N d->(x_d = x[d])
-        $(extrap_transform_x(gr,eb,N))
+        $(extrap_transform_x(gr,eb,N,it))
         $(define_indices(it,N))
         @nexprs $N dim->begin
             @nexprs $N d->begin
@@ -224,36 +236,33 @@ end
 
 gradient{T,N}(itp::Interpolation{T,N}, x...) = gradient!(Array(T,N), itp, x...)
 
-# This creates prefilter specializations for all interpolation types that need them
-stagedfunction prefilter{TWeights,TCoefs,N,IT<:Quadratic}(::Type{TWeights}, A::Array{TCoefs,N}, it::IT)
-    quote
-        ret, pad = copy_with_padding(A, it)
-
-        szs = collect(size(ret))
-        strds = collect(strides(ret))
-
-        for dim in 1:$N
-            n = szs[dim]
-            szs[dim] = 1
-
-            M, b = prefiltering_system(TWeights, TCoefs, n, it)
-
-            @nloops $N i d->1:szs[d] begin
-                cc = @ntuple $N i
-
-                strt_diff = sum([(cc[i]-1)*strds[i] for i in 1:length(cc)])
-                strt = 1 + strt_diff
-                rng = range(strt, strds[dim], n)
+function prefilter{TWeights,TCoefs,N,IT<:Quadratic}(::Type{TWeights}, A::Array{TCoefs,N}, it::IT)
+    ret, pad = copy_with_padding(A, it)
+    prefilter!(TWeights, ret, pad, it)
+end
 
-                bdiff = ret[rng]
-                b += bdiff
-                ret[rng] = M \ b
-                b -= bdiff
+function prefilter!{TWeights,TCoefs,N,IT<:Quadratic}(::Type{TWeights}, ret::Array{TCoefs,N}, pad::Integer, it::IT)
+    sz = size(ret)
+    first = true
+    for dim in 1:N
+        M, b = prefiltering_system(TWeights, TCoefs, sz[dim], it)
+        if !isa(M, Woodbury)
+            A_ldiv_B_md!(ret, M, ret, dim, b)
+        else
+            if first
+                buf = Array(eltype(ret), length(ret,))
+                shape = sz
+                retrs = reshape(ret, shape)  # for type-stability against a future julia #10507
+                first = false
             end
-            szs[dim] = n
+            bufrs = reshape(buf, shape)
+            filter_dim1!(bufrs, M, retrs, b)
+            shape = (sz[dim+1:end]..., sz[1:dim]...)
+            retrs = reshape(ret, shape)
+            permutedims!(retrs, bufrs, ((2:N)..., 1))
         end
-        ret
     end
+    ret
 end
 
 nindexes(N::Int) = N == 1 ? "1 index" : "$N indexes"

diff --git a/src/constant.jl b/src/constant.jl
@@ -3,7 +3,7 @@ immutable Constant{GR<:GridRepresentation} <: InterpolationType{ConstantDegree,N
 Constant{GR<:GridRepresentation}(::GR) = Constant{GR}()
 
 function define_indices(::Constant, N)
-    :(@nexprs $N d->(ix_d = clamp(round(real(x_d)), 1, size(itp,d))))
+    :(@nexprs $N d->(ix_d = clamp(round(Int, real(x_d)), 1, size(itp,d))))
 end
 
 function coefficients(c::Constant, N)

diff --git a/src/extrapolation.jl b/src/extrapolation.jl
@@ -1,43 +1,43 @@
 abstract ExtrapolationBehavior
 
 immutable ExtrapError <: ExtrapolationBehavior end
-function extrap_transform_x(::OnGrid, ::ExtrapError, N)
+function extrap_transform_x(::OnGrid, ::ExtrapError, N, ::InterpolationType)
     quote
         @nexprs $N d->(1 <= real(x_d) <= size(itp,d) || throw(BoundsError()))
     end
 end
-function extrap_transform_x(::OnCell, ::ExtrapError, N)
+function extrap_transform_x(::OnCell, ::ExtrapError, N, ::InterpolationType)
     quote
         @nexprs $N d->(1//2 <= real(x_d) <= size(itp,d) + 1//2 || throw(BoundsError()))
     end
 end
 
 immutable ExtrapNaN <: ExtrapolationBehavior end
-function extrap_transform_x(::OnGrid, ::ExtrapNaN, N)
+function extrap_transform_x(::OnGrid, ::ExtrapNaN, N, ::InterpolationType)
     quote
         @nexprs $N d->(1 <= real(x_d) <= size(itp,d) || return convert(T, NaN))
     end
 end
-function extrap_transform_x(::OnCell, ::ExtrapNaN, N)
+function extrap_transform_x(::OnCell, ::ExtrapNaN, N, ::InterpolationType)
     quote
         @nexprs $N d->(1//2 <= real(x_d) <= size(itp,d) + 1//2 || return convert(T, NaN))
     end
 end
 
 immutable ExtrapConstant <: ExtrapolationBehavior end
-function extrap_transform_x(::OnGrid, ::ExtrapConstant, N)
+function extrap_transform_x(::OnGrid, ::ExtrapConstant, N, ::InterpolationType)
     quote
         @nexprs $N d->(x_d = clamp(x_d, 1, size(itp,d)))
     end
 end
-function extrap_transform_x(::OnCell, ::ExtrapConstant, N)
+function extrap_transform_x(::OnCell, ::ExtrapConstant, N, ::InterpolationType)
     quote
         @nexprs $N d->(x_d = clamp(x_d, 1//2, size(itp,d)+1//2))
     end
 end
 
 immutable ExtrapReflect <: ExtrapolationBehavior end
-function extrap_transform_x(::OnGrid, ::ExtrapReflect, N)
+function extrap_transform_x(::OnGrid, ::ExtrapReflect, N, ::InterpolationType)
     quote
         @nexprs $N d->begin
             # translate x_d to inside the domain, and count the translations
@@ -58,7 +58,7 @@ function extrap_transform_x(::OnGrid, ::ExtrapReflect, N)
         end
     end
 end
-function extrap_transform_x(::OnCell, ::ExtrapReflect, N)
+function extrap_transform_x(::OnCell, ::ExtrapReflect, N, ::InterpolationType)
     quote
         @nexprs $N d->begin
             # translate x_d to inside the domain, and count the translations
@@ -81,12 +81,12 @@ function extrap_transform_x(::OnCell, ::ExtrapReflect, N)
 end
 
 immutable ExtrapPeriodic <: ExtrapolationBehavior end
-function extrap_transform_x(::GridRepresentation, ::ExtrapPeriodic, N)
+function extrap_transform_x(::GridRepresentation, ::ExtrapPeriodic, N, ::InterpolationType)
     :(@nexprs $N d->(x_d = mod1(x_d, size(itp,d))))
 end
 
 immutable ExtrapLinear <: ExtrapolationBehavior end
-function extrap_transform_x(::OnGrid, ::ExtrapLinear, N)
+function extrap_transform_x(::OnGrid, ::ExtrapLinear, N, ::InterpolationType)
     quote
         @nexprs $N d->begin
             if x_d < 1
@@ -111,4 +111,4 @@ function extrap_transform_x(::OnGrid, ::ExtrapLinear, N)
         end
     end
 end
-extrap_transform_x(::OnCell, e::ExtrapLinear, N) = extrap_transform_x(OnGrid(), e, N)
+extrap_transform_x(::OnCell, e::ExtrapLinear, N, it::InterpolationType) = extrap_transform_x(OnGrid(), e, N, it)
diff --git a/src/filter1d.jl b/src/filter1d.jl
@@ -0,0 +1,140 @@
+import Base.LinAlg.LU, Base.getindex
+
+### Tridiagonal inversion along a particular dimension, first offsetting the values by b
+
+function A_ldiv_B_md!{T}(dest, F::LU{T,Tridiagonal{T}}, src, dim::Integer, b::AbstractVector)
+    1 <= dim <= max(ndims(dest),ndims(src)) || throw(DimensionMismatch("The chosen dimension $dim is larger than $(ndims(src)) and $(ndims(dest))"))
+    n = size(F, 1)
+    n == size(src, dim) && n == size(dest, dim) || throw(DimensionMismatch("Sizes $n, $(size(src,dim)), and $(size(dest,dim)) do not match"))
+    size(dest) == size(src) || throw(DimensionMismatch("Sizes $(size(dest)), $(size(src)) do not match"))
+    check_matrix(F)
+    length(b) == size(src,dim) || throw(DimensionMismatch("length(b) = $(length(b)), which does not match $(size(src,dim))"))
+    R1 = CartesianRange(size(dest)[1:dim-1])  # these are not type-stable, so let's use a function barrier
+    R2 = CartesianRange(size(dest)[dim+1:end])
+    _A_ldiv_B_md!(dest, F, src, R1, R2, b)
+end
+
+# Filtering along the first dimension
+function _A_ldiv_B_md!{T,CI<:CartesianIndex{0}}(dest, F::LU{T,Tridiagonal{T}}, src,  R1::CartesianRange{CI}, R2, b)
+    dinv = 1./F.factors.d  # might not want to do this for small R2
+    for I2 in R2
+        invert_column!(dest, F, src, I2, b, dinv)
+    end
+    dest
+end
+
+function invert_column!{T}(dest, F::LU{T,Tridiagonal{T}}, src, I2, b, dinv)
+    n = size(F, 1)
+    if n == 0
+        return nothing
+    end
+    dl = F.factors.dl
+    d  = F.factors.d
+    du = F.factors.du
+    @inbounds begin
+        # Forward substitution
+        dest[1, I2] = src[1, I2] + b[1]
+        for i = 2:n  # Can't use @simd here
+            dest[i, I2] = src[i, I2] + b[i] - dl[i-1]*dest[i-1, I2]
+        end
+        # Backward substitution
+        dest[n, I2] *= dinv[n]
+        for i = n-1:-1:1   # Can't use @simd here
+            dest[i, I2] = (dest[i, I2] - du[i]*dest[i+1, I2])*dinv[i]
+        end
+    end
+    nothing
+end
+
+# Filtering along any other dimension
+function _A_ldiv_B_md!{T}(dest, F::LU{T,Tridiagonal{T}}, src, R1, R2, b)
+    n = size(F, 1)
+    dl = F.factors.dl
+    d  = F.factors.d
+    du = F.factors.du
+    dinv = 1./d  # might not want to do this for small R1 and R2
+    # Forward substitution
+    @inbounds for I2 in R2
+        @simd for I1 in R1
+            dest[I1, 1, I2] = src[I1, 1, I2] + b[1]
+        end
+        for i = 2:n
+            @simd for I1 in R1
+                dest[I1, i, I2] = src[I1, i, I2] + b[i] - dl[i-1]*dest[I1, i-1, I2]
+            end
+        end
+        # Backward substitution
+        @simd for I1 in R1
+            dest[I1, n, I2] *= dinv[n]
+        end
+        for i = n-1:-1:1
+            @simd for I1 in R1
+                dest[I1, i, I2] = (dest[I1, i, I2] - du[i]*dest[I1, i+1, I2])*dinv[i]
+            end
+        end
+    end
+    dest
+end
+
+### Woodbury inversion along dimension 1
+# Here we support only dimension 1 because the matrix multiplications in Woodbury
+# inversion cannot be done in a cache-friendly way for any other dimension.
+# It's therefore better to permutedims.
+function filter_dim1!(dest, W::Woodbury, src, b)
+    size(src,1) == size(W,1) == length(b) || throw(DimensionMismatch("Sizes $(size(src,1)), $(size(W,1)), and $(length(b)) do not match"))
+    size(src) == size(dest) || throw(DimensionMismatch("Sizes $(size(dest)), $(size(src)) do not match"))
+    check_matrix(W.A)
+    R = CartesianRange(size(dest)[2:end])
+    _filter_dim1!(dest, W, src, b, R)
+end
+
+function _filter_dim1!(dest, W::Woodbury, src, b, R)
+    dinv = 1./W.A.factors.d
+    n = length(dinv)
+    tmp = W.tmpN2
+    dl, du = W.A.factors.dl, W.A.factors.du
+    for I in R   # iterate over "columns"
+        invert_column!(dest, W.A, src, I, b, dinv)
+        # TODO? Manually fuse the tridiagonal inversions with their "adjacent" matrix multiplications
+        A_mul_b!(W.tmpk1, W.V, dest, I)
+        A_mul_B!(W.tmpk2, W.Cp, W.tmpk1)
+        A_mul_B!(tmp, W.U, W.tmpk2)
+        # Fuses the final tridiagonal solve with the offset
+        @inbounds begin
+            # Forward substitution
+            for i = 2:n  # Can't use @simd here
+                tmp[i] = tmp[i] - dl[i-1]*tmp[i-1]
+            end
+            # Backward substitution
+            t = tmp[n]*dinv[n]
+            dest[n, I] -= t
+            for i = n-1:-1:1   # Can't use @simd here
+                t = (tmp[i] - du[i]*t)*dinv[i]
+                dest[i, I] -= t
+            end
+        end
+    end
+    dest
+end
+
+function check_matrix{T}(F::LU{T,Tridiagonal{T}})
+    for i = 1:size(F,1)
+        F.ipiv[i] == i || error("For efficiency, pivoting is not supported")
+    end
+    du2 = F.factors.du2
+    for i = 1:length(du2)
+        du2[i] == 0 || error("For efficiency, du2 must be all zeros")
+    end
+end
+
+check_matrix(M) = error("unsupported matrix type $(typeof(M))")
+
+function A_mul_b!(dest, A, B, I)
+    fill!(dest, 0)
+    for j = 1:size(A,2)
+        b = B[j,I]
+        @simd for i = 1:size(A,1)
+            dest[i] += A[i,j]*b
+        end
+    end 
+end
diff --git a/src/linear.jl b/src/linear.jl
@@ -5,7 +5,7 @@ Linear{GR<:GridRepresentation}(::GR) = Linear{GR}()
 function define_indices(::Linear, N)
     quote
         @nexprs $N d->begin
-            ix_d = clamp(floor(real(x_d)), 1, size(itp,d)-1)
+            ix_d = clamp(floor(Int, real(x_d)), 1, size(itp,d)-1)
             ixp_d = ix_d + 1
             fx_d = x_d - ix_d
         end