Skip to content

Commit

Permalink
Change trim and winsor to return iterators (#546)
Browse files Browse the repository at this point in the history
`trim` and `winsor` now return iterators rather than arrays to avoid unnecessary copies,
and maintain original order of the data.
  • Loading branch information
AsafManela committed Feb 17, 2020
1 parent d5e6fbc commit 3762c78
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 70 deletions.
107 changes: 57 additions & 50 deletions src/robust.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,96 +7,103 @@
#############################

# Trimmed set
"Return the upper and lower bound elements used by `trim` and `winsor`"
function uplo(x::AbstractVector; prop::Real=0.0, count::Integer=0)
n = length(x)
n > 0 || throw(ArgumentError("x can not be empty."))

if count == 0
0 <= prop < 0.5 || throw(ArgumentError("prop must satisfy 0 ≤ prop < 0.5."))
count = floor(Int, n * prop)
else
prop == 0 || throw(ArgumentError("prop and count can not both be > 0."))
0 <= count < n/2 || throw(ArgumentError("count must satisfy 0 ≤ count < length(x)/2."))
end

# indices for lowest count values
x2 = copy(x)
lo = partialsort!(x2, 1:count+1)[end]
# indices for largest count values
up = partialsort!(x2, n-count:n)[1]

up, lo
end

"""
trim(x; prop=0.0, count=0)
trim(x::AbstractVector; prop=0.0, count=0)
Return an iterator of all elements of `x` that omits either `count` or proportion
`prop` of the highest and lowest elements.
Return a copy of `x` with either `count` or proportion `prop` of the highest
and lowest elements removed. To compute the trimmed mean of `x` use
`mean(trim(x))`; to compute the variance use `trimvar(x)` (see [`trimvar`](@ref)).
The number of trimmed elements could be smaller than specified if several
elements equal the lower or upper bound.
To compute the trimmed mean of `x` use `mean(trim(x))`;
to compute the variance use `trimvar(x)` (see [`trimvar`](@ref)).
# Example
```julia
julia> trim([1,2,3,4,5], prop=0.2)
julia> collect(trim([5,2,4,3,1], prop=0.2))
3-element Array{Int64,1}:
2
3
4
3
```
"""
function trim(x::AbstractVector; prop::Real=0.0, count::Integer=0)
trim!(copy(x); prop=prop, count=count)
up, lo = uplo(x; prop=prop, count=count)

(xi for xi in x if lo <= xi <= up)
end

"""
trim!(x; prop=0.0, count=0)
trim!(x::AbstractVector; prop=0.0, count=0)
A variant of [`trim`](@ref) that modifies `x` in place.
"""
function trim!(x::AbstractVector; prop::Real=0.0, count::Integer=0)
n = length(x)
n > 0 || throw(ArgumentError("x can not be empty."))

if count == 0
0 <= prop < 0.5 || throw(ArgumentError("prop must satisfy 0 ≤ prop < 0.5."))
count = floor(Int, n * prop)
else
prop == 0 || throw(ArgumentError("prop and count can not both be > 0."))
0 <= count < n/2 || throw(ArgumentError("count must satisfy 0 ≤ count < length(x)/2."))
end

partialsort!(x, (n-count+1):n)
partialsort!(x, 1:count)
deleteat!(x, (n-count+1):n)
deleteat!(x, 1:count)

up, lo = uplo(x; prop=prop, count=count)
ix = (i for (i,xi) in enumerate(x) if lo > xi || xi > up)
deleteat!(x, ix)
return x
end

"""
winsor(x; prop=0.0, count=0)
winsor(x::AbstractVector; prop=0.0, count=0)
Return a copy of `x` with either `count` or proportion `prop` of the lowest
elements of `x` replaced with the next-lowest, and an equal number of the
highest elements replaced with the previous-highest. To compute the Winsorized
mean of `x` use `mean(winsor(x))`.
Return an iterator of all elements of `x` that replaces either `count` or
proportion `prop` of the highest elements with the previous-highest element
and an equal number of the lowest elements with the next-lowest element.
The number of replaced elements could be smaller than specified if several
elements equal the lower or upper bound.
To compute the Winsorized mean of `x` use `mean(winsor(x))`.
# Example
```julia
julia> winsor([1,2,3,4,5], prop=0.2)
julia> collect(winsor([5,2,3,4,1], prop=0.2))
5-element Array{Int64,1}:
2
4
2
3
4
4
2
```
"""
function winsor(x::AbstractVector; prop::Real=0.0, count::Integer=0)
winsor!(copy(x); prop=prop, count=count)
up, lo = uplo(x; prop=prop, count=count)

(clamp(xi, lo, up) for xi in x)
end

"""
winsor!(x; prop=0.0, count=0)
winsor!(x::AbstractVector; prop=0.0, count=0)
A variant of [`winsor`](@ref) that modifies vector `x` in place.
"""
function winsor!(x::AbstractVector; prop::Real=0.0, count::Integer=0)
n = length(x)
n > 0 || throw(ArgumentError("x can not be empty."))

if count == 0
0 <= prop < 0.5 || throw(ArgumentError("prop must satisfy 0 ≤ prop < 0.5."))
count = floor(Int, n * prop)
else
prop == 0 || throw(ArgumentError("prop and count can not both be > 0."))
0 <= count < n/2 || throw(ArgumentError("count must satisfy 0 ≤ count < length(x)/2."))
end

partialsort!(x, (n-count+1):n)
partialsort!(x, 1:count)
x[1:count] .= x[count+1]
x[n-count+1:end] .= x[n-count]

copyto!(x, winsor(x; prop=prop, count=count))
return x
end

Expand Down
41 changes: 21 additions & 20 deletions test/robust.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,39 +3,40 @@ using Test

### Trimming outliers

@test trim([1,2,3,4,5,6,7,8], prop=0.1) == [1,2,3,4,5,6,7,8]
@test trim([1,2,3,4,5,6,7,8], prop=0.2) == [2,3,4,5,6,7]
@test trim([1,2,3,4,5,6,7,8,9], prop=0.4) == [4,5,6]
@test trim([1,2,3,4,5,6,7,8], count=1) == [2,3,4,5,6,7]
@test trim([1,2,3,4,5,6,7,8,9], count=3) == [4,5,6]
@test collect(trim([8,2,3,4,5,6,7,1], prop=0.1)) == [8,2,3,4,5,6,7,1]
@test collect(trim([8,2,3,4,5,6,7,1], prop=0.2)) == [2,3,4,5,6,7]
@test collect(trim([1,2,3,4,5,6,7,8,9], prop=0.4)) == [4,5,6]
@test collect(trim([8,7,6,5,4,3,2,1], count=1)) == [7,6,5,4,3,2]
@test collect(trim([1,2,3,4,5,6,7,8,9], count=3)) == [4,5,6]


@test_throws ArgumentError trim([])
@test_throws ArgumentError trim([1,2,3,4,5], prop=0.5)

@test trim!([1,2,3,4,5,6,7,8], prop=0.1) == [1,2,3,4,5,6,7,8]
@test trim!([1,2,3,4,5,6,7,8], prop=0.2) == [2,3,4,5,6,7]
@test trim!([1,2,3,4,5,6,7,8,9], prop=0.4) == [4,5,6]
@test trim!([1,2,3,4,5,6,7,8], count=1) == [2,3,4,5,6,7]
@test trim!([1,2,3,4,5,6,7,8,9], count=3) == [4,5,6]
@test collect(trim!([8,2,3,4,5,6,7,1], prop=0.1)) == [8,2,3,4,5,6,7,1]
@test collect(trim!([8,2,3,4,5,6,7,1], prop=0.2)) == [2,3,4,5,6,7]
@test collect(trim!([1,2,3,4,5,6,7,8,9], prop=0.4)) == [4,5,6]
@test collect(trim!([8,7,6,5,4,3,2,1], count=1)) == [7,6,5,4,3,2]
@test collect(trim!([1,2,3,4,5,6,7,8,9], count=3)) == [4,5,6]

@test_throws ArgumentError trim!([])
@test_throws ArgumentError trim!([1,2,3,4,5], prop=0.5)

@test winsor([1,2,3,4,5,6,7,8], prop=0.1) == [1,2,3,4,5,6,7,8]
@test winsor([1,2,3,4,5,6,7,8], prop=0.2) == [2,2,3,4,5,6,7,7]
@test winsor([1,2,3,4,5,6,7,8,9], prop=0.4) == [4,4,4,4,5,6,6,6,6]
@test winsor([1,2,3,4,5,6,7,8], count=1) == [2,2,3,4,5,6,7,7]
@test winsor([1,2,3,4,5,6,7,8,9], count=3) == [4,4,4,4,5,6,6,6,6]
@test collect(winsor([8,2,3,4,5,6,7,1], prop=0.1)) == [8,2,3,4,5,6,7,1]
@test collect(winsor([8,2,3,4,5,6,7,1], prop=0.2)) == [7,2,3,4,5,6,7,2]
@test collect(winsor([1,2,3,4,5,6,7,8,9], prop=0.4)) == [4,4,4,4,5,6,6,6,6]
@test collect(winsor([1,2,3,4,5,6,7,8], count=1)) == [2,2,3,4,5,6,7,7]
@test collect(winsor([8,7,6,5,4,3,2,1], count=1)) == [7,7,6,5,4,3,2,2]
@test collect(winsor([1,2,3,4,5,6,7,8,9], count=3)) == [4,4,4,4,5,6,6,6,6]

@test_throws ArgumentError winsor([])
@test_throws ArgumentError winsor([1,2,3,4,5], prop=0.5)

@test winsor!([1,2,3,4,5,6,7,8], prop=0.1) == [1,2,3,4,5,6,7,8]
@test winsor!([1,2,3,4,5,6,7,8], prop=0.2) == [2,2,3,4,5,6,7,7]
@test winsor!([1,2,3,4,5,6,7,8,9], prop=0.4) == [4,4,4,4,5,6,6,6,6]
@test winsor!([1,2,3,4,5,6,7,8], count=1) == [2,2,3,4,5,6,7,7]
@test winsor!([1,2,3,4,5,6,7,8,9], count=3) == [4,4,4,4,5,6,6,6,6]
@test collect(winsor!([8,2,3,4,5,6,7,1], prop=0.1)) == [8,2,3,4,5,6,7,1]
@test collect(winsor!([8,2,3,4,5,6,7,1], prop=0.2)) == [7,2,3,4,5,6,7,2]
@test collect(winsor!([1,2,3,4,5,6,7,8,9], prop=0.4)) == [4,4,4,4,5,6,6,6,6]
@test collect(winsor!([8,7,6,5,4,3,2,1], count=1)) == [7,7,6,5,4,3,2,2]
@test collect(winsor!([1,2,3,4,5,6,7,8,9], count=3)) == [4,4,4,4,5,6,6,6,6]

@test_throws ArgumentError winsor!([])
@test_throws ArgumentError winsor!([1,2,3,4,5], prop=0.5)
Expand Down

0 comments on commit 3762c78

Please sign in to comment.