/
filter.jl
70 lines (64 loc) · 1.83 KB
/
filter.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
function vfilter!(
f::F,
x::Vector{T},
y::AbstractArray{T}
) where {F,T<:NativeTypes}
W, Wshift = VectorizationBase.pick_vector_width_shift(T)
N = length(y)
Nrep = N >>> Wshift
Nrem = N & (W - 1)
j = 0
st = VectorizationBase.static_sizeof(T)
zero_index = MM(W, StaticInt(0), st)
incr = W * VectorizationBase.static_sizeof(T)
GC.@preserve x y begin
# ptr_x = llvmptr(x); ptr_y = llvmptr(y)
ptr_x = pointer(x)
ptr_y = pointer(y)
for _ ∈ 1:Nrep
vy =
VectorizationBase.__vload(ptr_y, zero_index, False(), register_size())
mask = f(vy)
VectorizationBase.compressstore!(
gep(ptr_x, VectorizationBase.lazymul(st, j)),
vy,
mask
)
ptr_y = gep(ptr_y, incr)
j = vadd_nw(j, count_ones(mask))
end
rem_mask = VectorizationBase.mask(T, Nrem)
vy = VectorizationBase.__vload(
ptr_y,
zero_index,
rem_mask,
False(),
register_size()
)
mask = rem_mask & f(vy)
VectorizationBase.compressstore!(
gep(ptr_x, VectorizationBase.lazymul(st, j)),
vy,
mask
)
j = vadd_nw(j, count_ones(mask))
Base._deleteend!(x, N - j) # resize!(x, j)
end
x
end
vfilter!(f::F, x::Vector{T}) where {F,T<:NativeTypes} = vfilter!(f, x, x)
vfilter(f::F, y::AbstractArray{T}) where {F,T<:NativeTypes} =
vfilter!(f, Vector{T}(undef, length(y)), y)
vfilter(f::F, y) where {F} = filter(f, y)
vfilter!(f::F, y) where {F} = filter!(f, y)
"""
vfilter(f, a::AbstractArray)
SIMD-vectorized `filter`, returning an array containing the elements of `a` for which `f` return `true`.
This function requires AVX512 to be faster than `Base.filter`, as it adds compressstore instructions.
"""
vfilter
"""
vfilter!(f, a::AbstractArray)
SIMD-vectorized `filter!`, removing the element of `a` for which `f` is false.
"""
vfilter!