Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 46 additions & 3 deletions src/scalarstats.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,11 @@ end
# compute mode, given the range of integer values
"""
mode(a, [r])
mode(a::AbstractArray, wv::AbstractWeights)

Return the mode (most common number) of an array, optionally
over a specified range `r`. If several modes exist, the first
one (in order of appearance) is returned.
over a specified range `r` or weighted via a vector `wv`.
If several modes exist, the first one (in order of appearance) is returned.
"""
function mode(a::AbstractArray{T}, r::UnitRange{T}) where T<:Integer
isempty(a) && throw(ArgumentError("mode is not defined for empty collections"))
Expand All @@ -75,9 +76,10 @@ end

"""
modes(a, [r])::Vector
mode(a::AbstractArray, wv::AbstractWeights)::Vector

Return all modes (most common numbers) of an array, optionally over a
specified range `r`.
specified range `r` or weighted via vector `wv`.
"""
function modes(a::AbstractArray{T}, r::UnitRange{T}) where T<:Integer
r0 = r[1]
Expand Down Expand Up @@ -158,6 +160,47 @@ function modes(a)
return [x for (x, c) in cnts if c == mc]
end

# Weighted mode of arbitrary vectors of values
function mode(a::AbstractVector, wv::AbstractWeights{T}) where T <: Real
isempty(a) && throw(ArgumentError("mode is not defined for empty collections"))
length(a) == length(wv) ||
throw(ArgumentError("data and weight vectors must be the same size, got $(length(a)) and $(length(wv))"))

# Iterate through the data
mv = first(a)
mw = first(wv)
weights = Dict{eltype(a), T}()
for (x, w) in zip(a, wv)
_w = get!(weights, x, zero(T)) + w
if _w > mw
mv = x
mw = _w
end
weights[x] = _w
end

return mv
end

function modes(a::AbstractVector, wv::AbstractWeights{T}) where T <: Real
isempty(a) && throw(ArgumentError("mode is not defined for empty collections"))
length(a) == length(wv) ||
throw(ArgumentError("data and weight vectors must be the same size, got $(length(a)) and $(length(wv))"))

# Iterate through the data
mw = first(wv)
weights = Dict{eltype(a), T}()
for (x, w) in zip(a, wv)
_w = get!(weights, x, zero(T)) + w
if _w > mw
mw = _w
end
weights[x] = _w
end

# find values corresponding to maximum counts
return [x for (x, w) in weights if w == mw]
end

#############################
#
Expand Down
14 changes: 14 additions & 0 deletions test/scalarstats.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,24 @@ using Statistics
@test modes(skipmissing([1, missing, missing, 3, 2, 2, missing])) == [2]
@test sort(modes(skipmissing([1, missing, 3, 3, 2, 2, missing]))) == [2, 3]

d1 = [1, 2, 3, 3, 4, 5, 5, 3]
d2 = ['a', 'b', 'c', 'c', 'd', 'e', 'e', 'c']
wv = weights([0.1:0.1:0.7; 0.1])
@test mode(d1) == 3
@test mode(d2) == 'c'
@test mode(d1, wv) == 5
@test mode(d2, wv) == 'e'
@test sort(modes(d1[1:end-1], weights(ones(7)))) == [3, 5]
@test sort(modes(d1, weights([.9, .1, .1, .1, .9, .1, .1, .1]))) == [1, 4]

@test_throws ArgumentError mode(Int[])
@test_throws ArgumentError modes(Int[])
@test_throws ArgumentError mode(Any[])
@test_throws ArgumentError modes(Any[])
@test_throws ArgumentError mode([], weights(Float64[]))
@test_throws ArgumentError modes([], weights(Float64[]))
@test_throws ArgumentError mode([1, 2, 3], weights([0.1, 0.3]))
@test_throws ArgumentError modes([1, 2, 3], weights([0.1, 0.3]))

## zscores

Expand Down