Skip to content

Commit

Permalink
Merge pull request #72 from PALEOtoolkit/simd_iteratorutils
Browse files Browse the repository at this point in the history
Update doc for SIMDutils, IteratorUtils
  • Loading branch information
sjdaines committed Apr 20, 2023
2 parents 28affad + 6a2a2f0 commit 0c89818
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 24 deletions.
69 changes: 63 additions & 6 deletions src/utils/IteratorUtils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,9 @@ Call `f(t1[n], t2[n])` for each element `n` of `t1::Tuple`, `t2::Tuple`.
# Implementation
Recursively generates inline code for speed and type stability.
NB: as of Julia 1.6, slows down and allocates if `length(t1) > 32` due to Julia limitation.
See [`foreach_longtuple`](@ref) for this.
# Limitations
As of Julia 1.6, slows down and allocates if `length(t1) > 32` due to Julia limitation.
See [`foreach_longtuple`](@ref).
See https://github.com/JuliaLang/julia/issues/31869
https://github.com/JuliaLang/julia/blob/master/base/tuple.jl (map implementation)
Expand All @@ -77,6 +78,62 @@ foreach_tuple_unchecked(f, t1::Tuple, t2::Tuple) =
Call `f(t1[n], t2[n], ... tm[n])` or `f(t1[n], t2[n], ... tm[n], p)` for each element
`n` of `t1::Tuple`, `t2`, ... `tm`.
# Examples
Here `input_concs_cell` and `input_concs` are `NamedTuple`s.
PB.IteratorUtils.foreach_longtuple(values(input_concs_cell), values(input_concs)) do ic_cell, ic
ic_cell[] = r_rhofac*ic[i] # generates a closure with r_rhofac as a captured variable
end
# closure with r_rhofac as a captured variable
function f(ic_cell, ic)
ic_cell[] = r_rhofac*ic[i]
end
PB.IteratorUtils.foreach_longtuple(f, values(input_concs_cell), values(input_concs))
PB.IteratorUtils.foreach_longtuple_p(values(input_concs_cell), values(input_concs), r_rhofac) do ic_cell, ic, _r_rhofac
ic_cell[] = _r_rhofac*ic[i] # no closure with _r_rhofac as an argument
end
# no closure with _r_rhofac as an argument
function f(ic_cell, ic, _r_rhofac)
ic_cell[] = _r_rhofac*ic[i]
end
PB.IteratorUtils.foreach_longtuple_p(f, values(input_concs_cell), values(input_concs), r_rhofac)
# Limitations
See Julia performance tips:
- There are potential problems with captured variables when using a closure as `f`, either explicitly or using do block syntax.
It is usually safer to explicitly write out `f` with all arguments, and pass them in using `foreach_longtuple_p`, using `p::Tuple` for multiple arguments.
- Passing type parameters to `f` is prone to creating additional problems if `f` is not specialized, which happens:
- if `f` is either an explicit closure or is a closure created using do block syntax
- if a type parameter is passed as a member of `p::Tuple`
- if a type parameter is passed as `p`, but `f` is not explicitly specialized using `p::Type{MyType}` and `f` is not inlined (do block syntax does appear to work)
It is safest to either explicitly specialize `f` on a type parameter using `p::Type{MyType}`, or avoid passing type parameters and eg use eltype to derive them.
# Allocates: generates a closure with BufType as a captured variable, with no specialization
PB.IteratorUtils.foreach_longtuple(values(input_concs_cell), values(input_concs)) do ic_cell, ic
ic_cell[] = convert(BufType, ic[i]) # allocates
end
# OK: no closure, with explicit specialization on BufType argument
function f(ic_cell, ic, ::Type{BufType}) where {BufType} # force specialization
ic_cell[] = convert(BufType, ic[i])
end
PB.IteratorUtils.foreach_longtuple_p(f, values(input_concs_cell), values(input_concs), BufType)
# Allocates: no closure, but BufType is a Tuple member so is passed as a `DataType` hence no specialization
function f(ic_cell, ic, (BufType, r_rhofac))
ic_cell[] = r_rhofac*convert(BufType, ic[i])
end
PB.IteratorUtils.foreach_longtuple_p(f, values(input_concs_cell), values(input_concs), (r_rhofac, BufType))
# Implementation
Uses `@generated` to generate unrolled code for speed and type stability without length restrictions on `tm`.
Expand Down Expand Up @@ -148,7 +205,7 @@ end
end

@inline foreach_longtuple_p(f::F, t1, t2, p; errmsg="iterables lengths differ") where{F} =
foreach_longtuple_unchecked_p(f, t1, t2, p)
(check_lengths_equal(t1, t2; errmsg=errmsg); foreach_longtuple_unchecked_p(f, t1, t2, p))
@generated function foreach_longtuple_unchecked_p(f, t1::Tuple, t2, p)
ex = quote ; end # empty expression
for j=1:fieldcount(t1)
Expand All @@ -160,7 +217,7 @@ end
end

@inline foreach_longtuple_p(f::F, t1, t2, t3, p; errmsg="iterables lengths differ") where{F} =
foreach_longtuple_unchecked_p(f, t1, t2, t3, p)
(check_lengths_equal(t1, t2, t3; errmsg=errmsg); foreach_longtuple_unchecked_p(f, t1, t2, t3, p))
@generated function foreach_longtuple_unchecked_p(f, t1::Tuple, t2, t3, p)
ex = quote ; end # empty expression
for j=1:fieldcount(t1)
Expand All @@ -172,7 +229,7 @@ end
end

@inline foreach_longtuple_p(f::F, t1, t2, t3, t4, p; errmsg="iterables lengths differ") where{F} =
foreach_longtuple_unchecked_p(f, t1, t2, t3, t4, p)
(check_lengths_equal(t1, t2, t3, t4; errmsg=errmsg); foreach_longtuple_unchecked_p(f, t1, t2, t3, t4, p))
@generated function foreach_longtuple_unchecked_p(f, t1::Tuple, t2, t3, t4, p)
ex = quote ; end # empty expression
for j=1:fieldcount(t1)
Expand All @@ -184,7 +241,7 @@ end
end

@inline foreach_longtuple_p(f::F, t1, t2, t3, t4, t5, p; errmsg="iterables lengths differ") where{F} =
foreach_longtuple_unchecked_p(f, t1, t2, t3, t4, t5, p)
(check_lengths_equal(t1, t2, t3, t4, t5; errmsg=errmsg); foreach_longtuple_unchecked_p(f, t1, t2, t3, t4, t5, p))
@generated function foreach_longtuple_unchecked_p(f, t1::Tuple, t2, t3, t4, t5, p)
ex = quote ; end # empty expression
for j=1:fieldcount(t1)
Expand Down
68 changes: 50 additions & 18 deletions src/utils/SIMDutils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ indices into a Vector). See Julia package [SIMD.jl](https://github.com/eschnett/
If `baseiter` contained 1 or more but less then `N` elements, then `indices` is filled with
repeats of the last available element.
Returns Tuple `indices` (length `N`).
Returns Tuple of indices (length `N`).
# Examples
Expand All @@ -48,22 +48,22 @@ Returns Tuple `indices` (length `N`).
# simplest version - Float64 x 4, ie type of v_a x 4
for indvec in SIMDIter(iter, Val(4))
x = v_a[indvec] # x is a packed SIMD vector
v_b[indvec] = x
for i in SIMDIter(iter, Val(4))
x = v_a[i] # x is a packed SIMD vector
v_b[i] = x
end
# with type conversion - Float32 x 8, ie explicitly change Type of SIMD vector
ST = SIMD.Vec{8, Float32}}
for indvec in SIMDIter(iter, ST)
# v = vec[indvec] <--> vgatherind(ST, vec, indvec)
# vec[indvec] = v <--> vscatterind!(v, vec, indvec)
# vec[indvec] += v <--> vaddind!(v, vec, indvec)
for i in SIMDIter(iter, ST)
# v = vec[i] <--> vgatherind(ST, vec, i)
# vec[i] = v <--> vscatterind!(v, vec, i)
# vec[i] += v <--> vaddind!(v, vec, i)
x = vgatherind(ST, v_a, indices) # x is a packed SIMD vector with type conversion to Float32
vscatterind!(x, v_b, indices)
x = vgatherind(ST, v_a, i) # x is a packed SIMD vector with type conversion to Float32
vscatterind!(x, v_b, i)
end
"""
Expand Down Expand Up @@ -145,7 +145,14 @@ end
# Fill SIMD type from scalar Vector
#####################################

"scalar fallback - just element in a vector"
"""
vgatherind(vec, indices::Integer, mask) -> v # v[i], scalar fallback
vgatherind(vec, indices::SIMD.Vec{N, <:Integer}, mask) -> v::SIMD.Vec{N, eltype(vec)}
Fill SIMD type from scalar Vector, without type conversion
See [`SIMDIter`](@ref) for example usage.
"""
@Base.propagate_inbounds function vgatherind(vec, indices::Integer, mask)
return vec[indices]
end
Expand All @@ -154,22 +161,28 @@ end
return SIMD.vgather(vec, indices, mask)
end

"scalar fallback - just element in a vector"
"""
vgatherind(::Type{T}, vec, indices::Integer, mask) -> v::T # v[i], scalar fallback
vgatherind(::Type{SIMD.Vec{N, T}}, vec, indices::SIMD.Vec{N, <:Integer}) -> v::SIMD.Vec{N, T}
Fill SIMD type from scalar Vector, with explicit type conversion to scalar type `T`
NB: assumes all `indices` are valid
See [`SIMDIter`](@ref) for example usage.
"""
@Base.propagate_inbounds function vgatherind(::Type{T}, vec, indices::Integer) where {T}
return convert(T, vec[indices])
end

"gather with type conversion, assumes indices all valid"
@Base.propagate_inbounds function vgatherind(::Type{SIMD.Vec{2, T}}, vec, indices::SIMD.Vec{2, <:Integer}) where T
return SIMD.Vec{2, T}((vec[indices[1]], vec[indices[2]]))
end

"gather with type conversion, assumes indices all valid"
@Base.propagate_inbounds function vgatherind(::Type{SIMD.Vec{4, T}}, vec, indices::SIMD.Vec{4, <:Integer}) where T
return SIMD.Vec{4, T}((vec[indices[1]], vec[indices[2]], vec[indices[3]], vec[indices[4]]))
end

"gather with type conversion, assumes indices all valid"
@Base.propagate_inbounds function vgatherind(::Type{SIMD.Vec{8, T}}, vec, indices::SIMD.Vec{8, <:Integer}) where T
return SIMD.Vec{8, T}(
(
Expand All @@ -183,13 +196,23 @@ end
# Unpack SIMD type and write to scalar Vector
#############################################

"scalar fallback - just set element in a vector"
"""
vscatterind!(v::T, vec::Array{T, N}, indices::Integer, mask) # scalar fallback, no type conversion
vscatterind!(v::T, vec::Array{T, N}, indices::Integer) # scalar fallback, no type conversion
vscatterind!(v::SIMD.Vec{N, T}, vec, indices::SIMD.Vec{N, <:Integer}, mask) # converts to eltype{vec}
vscatterind!(v::SIMD.Vec{N, T}, vec, indices::SIMD.Vec{N, <:Integer}) # converts to eltype{vec}
Unpack SIMD type and write to scalar Vector, with type conversion to `eltype(vec)`.
Versions without `mask` assume all indices are valid (but may be repeated).
See [`SIMDIter`](@ref) for example usage.
"""
@Base.propagate_inbounds function vscatterind!(v::T, vec::Array{T, N}, indices::Integer, mask) where {T,N}
vec[indices] = v
return nothing
end

"scalar fallback - just set element in a vector"
@Base.propagate_inbounds function vscatterind!(v::T, vec::Array{T,N}, indices::Integer) where {T,N}
vec[indices] = v
return nothing
Expand All @@ -209,7 +232,16 @@ end
# Unpack SIMD type and add to scalar Vector
#############################################

"scalar fallback - just add to element in a vector `vec`"
"""
vaddind!(v::T, vec::Array{T, N}, indices::Integer) # scalar fallback, no type conversion
vaddind!(v::SIMD.Vec{N, T}, vec, indices::SIMD.Vec{N, <:Integer})
Unpack SIMD type and add to scalar Vector, with type conversion to `eltype(vec)`.
Assumes all indices are valid (but may be repeated, which will still have the effect of one add).
See [`SIMDIter`](@ref) for example usage.
"""
@Base.propagate_inbounds function vaddind!(v::T, vec::Array{T, N}, indices::Integer) where {T, N}
vec[indices] += v
return nothing
Expand Down

0 comments on commit 0c89818

Please sign in to comment.