Skip to content

Commit

Permalink
Add entropy functions and friends
Browse files Browse the repository at this point in the history
  • Loading branch information
lindahua committed May 21, 2014
1 parent 075908c commit 1c964a9
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 0 deletions.
16 changes: 16 additions & 0 deletions docs/source/scalarstats.rst
Expand Up @@ -39,6 +39,22 @@ Measurements of Variation
Compute the middle between ``a`` and ``b``, *i.e.* ``(a + b) / 2``.


Entropy and Friends
---------------------

- *entropy*(p)
Compute the entropy of the probability vector ``p``.

- **crossentropy**(p, q)
Compute the cross entropy between two probability vectors ``p`` and ``q``.

- **kldivergence**(p, q)
Compute the Kullback-Leibler divergence between ``p`` and ``q``.


Quantile and Friends
---------------------

Expand Down
3 changes: 3 additions & 0 deletions src/StatsBase.jl
Expand Up @@ -21,6 +21,9 @@ module StatsBase
sem, # standard error of the mean, i.e. sqrt(var / n)
mad, # median absolute deviation
middle, # the mean of two real numbers
entropy, # the entropy of a probability vector
crossentropy, # cross entropy between two probability vectors
kldivergence, # K-L divergence between two probability vectors
percentile, # quantile using percentage (instead of fraction) as argument
iqr, # interquatile range
nquantile, # quantiles at [0:n]/n
Expand Down
48 changes: 48 additions & 0 deletions src/scalarstats.jl
Expand Up @@ -131,6 +131,54 @@ middle{T<:FloatingPoint}(a1::T, a2::T) = (a1 + a2) / convert(T, 2)
middle{T<:Integer}(a1::T, a2::T) = (a1 + a2) / 2


#############################
#
# entropy and friends
#
#############################

function entropy{T<:Real}(p::AbstractArray{T})
s = 0.
z = zero(T)
for i = 1:length(p)
@inbounds pi = p[i]
if pi > z
s += pi * log(pi)
end
end
return -s
end

function crossentropy{T<:Real}(p::AbstractArray{T}, q::AbstractArray{T})
length(p) == length(q) || throw(DimensionMismatch("Inconsistent array length."))
s = 0.
z = zero(T)
for i = 1:length(p)
@inbounds pi = p[i]
@inbounds qi = q[i]
if pi > z
s += pi * log(qi)
end
end
return -s
end

function kldivergence{T<:Real}(p::AbstractArray{T}, q::AbstractArray{T})
length(p) == length(q) || throw(DimensionMismatch("Inconsistent array length."))
s = 0.
z = zero(T)
for i = 1:length(p)
@inbounds pi = p[i]
@inbounds qi = q[i]
if pi > z
s += pi * log(pi / qi)
end
end
return s
end



#############################
#
# quantile and friends
Expand Down
9 changes: 9 additions & 0 deletions test/scalarstats.jl
Expand Up @@ -28,6 +28,15 @@ wv = weights(ones(5) * 2.0)
@test_approx_eq mad([1:5]) 1.4826
@test_approx_eq mad(1:5) 1.4826

## entropy

@test_approx_eq entropy([0.5, 0.5]) 0.6931471805599453
@test_approx_eq entropy([0.2, 0.3, 0.5]) 1.0296530140645737

@test_approx_eq crossentropy([0.2, 0.3, 0.5], [0.3, 0.4, 0.3]) 1.1176681825904018

@test_approx_eq kldivergence([0.2, 0.3, 0.5], [0.3, 0.4, 0.3]) 0.08801516852582819

# quantile & friends

@test_approx_eq quantile(1:5) [1:5]
Expand Down

0 comments on commit 1c964a9

Please sign in to comment.