From 85d8fe092307cbec807a850cf99284f030507379 Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Tue, 6 Aug 2019 11:20:24 +0800 Subject: [PATCH 01/16] make evaluate a general API --- src/generic.jl | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/generic.jl b/src/generic.jl index 3580faf..8b21fb1 100644 --- a/src/generic.jl +++ b/src/generic.jl @@ -21,6 +21,7 @@ abstract type SemiMetric <: PreMetric end # abstract type Metric <: SemiMetric end +evaluate(dist::PreMetric, a, b) = dist(a, b) # Generic functions @@ -82,7 +83,7 @@ end # Generic pairwise evaluation function _pairwise!(r::AbstractMatrix, metric::PreMetric, - a::AbstractMatrix, b::AbstractMatrix=a) + a::AbstractMatrix, b::AbstractMatrix = a) na = size(a, 2) nb = size(b, 2) size(r) == (na, nb) || throw(DimensionMismatch("Incorrect size of r.")) @@ -135,7 +136,7 @@ If a single matrix `a` is provided, compute distances between its rows or column """ function pairwise!(r::AbstractMatrix, metric::PreMetric, a::AbstractMatrix, b::AbstractMatrix; - dims::Union{Nothing,Integer}=nothing) + dims::Union{Nothing,Integer} = nothing) dims = deprecated_dims(dims) dims in (1, 2) || throw(ArgumentError("dims should be 1 or 2 (got $dims)")) if dims == 1 @@ -159,7 +160,7 @@ function pairwise!(r::AbstractMatrix, metric::PreMetric, end function pairwise!(r::AbstractMatrix, metric::PreMetric, a::AbstractMatrix; - dims::Union{Nothing,Integer}=nothing) + dims::Union{Nothing,Integer} = nothing) dims = deprecated_dims(dims) dims in (1, 2) || throw(ArgumentError("dims should be 1 or 2 (got $dims)")) if dims == 1 @@ -186,20 +187,20 @@ compute distances between its rows or columns. `a` and `b` must have the same numbers of columns if `dims=1`, or of rows if `dims=2`. """ function pairwise(metric::PreMetric, a::AbstractMatrix, b::AbstractMatrix; - dims::Union{Nothing,Integer}=nothing) + dims::Union{Nothing,Integer} = nothing) dims = deprecated_dims(dims) dims in (1, 2) || throw(ArgumentError("dims should be 1 or 2 (got $dims)")) m = size(a, dims) n = size(b, dims) r = Matrix{result_type(metric, a, b)}(undef, m, n) - pairwise!(r, metric, a, b, dims=dims) + pairwise!(r, metric, a, b, dims = dims) end function pairwise(metric::PreMetric, a::AbstractMatrix; - dims::Union{Nothing,Integer}=nothing) + dims::Union{Nothing,Integer} = nothing) dims = deprecated_dims(dims) dims in (1, 2) || throw(ArgumentError("dims should be 1 or 2 (got $dims)")) n = size(a, dims) r = Matrix{result_type(metric, a, a)}(undef, n, n) - pairwise!(r, metric, a, dims=dims) + pairwise!(r, metric, a, dims = dims) end From d7962c327b5d58995a5aac4e6d189fbcad91d8ea Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Tue, 6 Aug 2019 11:24:05 +0800 Subject: [PATCH 02/16] BhattacharyyaDist and HellingerDist --- src/bhattacharyya.jl | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/bhattacharyya.jl b/src/bhattacharyya.jl index 06ec9ba..26705c9 100644 --- a/src/bhattacharyya.jl +++ b/src/bhattacharyya.jl @@ -6,7 +6,6 @@ struct BhattacharyyaDist <: SemiMetric end struct HellingerDist <: Metric end - # Bhattacharyya coefficient function bhattacharyya_coeff(a::AbstractVector{T}, b::AbstractVector{T}) where {T <: Number} @@ -37,13 +36,11 @@ bhattacharyya_coeff(a::T, b::T) where {T <: Number} = throw("Bhattacharyya coeff # Bhattacharyya distance -evaluate(dist::BhattacharyyaDist, a::AbstractVector{T}, b::AbstractVector{T}) where {T <: Number} = -log(bhattacharyya_coeff(a, b)) -bhattacharyya(a::AbstractVector, b::AbstractVector) = evaluate(BhattacharyyaDist(), a, b) -evaluate(dist::BhattacharyyaDist, a::T, b::T) where {T <: Number} = throw("Bhattacharyya distance cannot be calculated for scalars") -bhattacharyya(a::T, b::T) where {T <: Number} = evaluate(BhattacharyyaDist(), a, b) +(::BhattacharyyaDist)(a::AbstractVector{T}, b::AbstractVector{T}) where {T <: Number} = -log(bhattacharyya_coeff(a, b)) +(::BhattacharyyaDist)(a::T, b::T) where {T <: Number} = throw("Bhattacharyya distance cannot be calculated for scalars") +bhattacharyya(a, b) = BhattacharyyaDist()(a, b) # Hellinger distance -evaluate(dist::HellingerDist, a::AbstractVector{T}, b::AbstractVector{T}) where {T <: Number} = sqrt(1 - bhattacharyya_coeff(a, b)) -hellinger(a::AbstractVector, b::AbstractVector) = evaluate(HellingerDist(), a, b) -evaluate(dist::HellingerDist, a::T, b::T) where {T <: Number} = throw("Hellinger distance cannot be calculated for scalars") -hellinger(a::T, b::T) where {T <: Number} = evaluate(HellingerDist(), a, b) +(::HellingerDist)(a::AbstractVector{T}, b::AbstractVector{T}) where {T <: Number} = sqrt(1 - bhattacharyya_coeff(a, b)) +(::HellingerDist)(a::T, b::T) where {T <: Number} = throw("Hellinger distance cannot be calculated for scalars") +hellinger(a, b) = HellingerDist()(a, b) From 200203c00e34be04dbea3b066b2d4d4283319770 Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Tue, 6 Aug 2019 11:28:27 +0800 Subject: [PATCH 03/16] Bregman --- src/bregman.jl | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/bregman.jl b/src/bregman.jl index c4ae26e..808326e 100644 --- a/src/bregman.jl +++ b/src/bregman.jl @@ -1,48 +1,48 @@ -# Bregman divergence +# Bregman divergence """ Implements the Bregman divergence, a friendly introduction to which can be found -[here](http://mark.reid.name/blog/meet-the-bregman-divergences.html). -Bregman divergences are a minimal implementation of the "mean-minimizer" property. +[here](http://mark.reid.name/blog/meet-the-bregman-divergences.html). +Bregman divergences are a minimal implementation of the "mean-minimizer" property. -It is assumed that the (convex differentiable) function F maps vectors (of any type or size) to real numbers. -The inner product used is `Base.dot`, but one can be passed in either by defining `inner` or by -passing in a keyword argument. If an analytic gradient isn't available, Julia offers a suite -of good automatic differentiation packages. +It is assumed that the (convex differentiable) function F maps vectors (of any type or size) to real numbers. +The inner product used is `Base.dot`, but one can be passed in either by defining `inner` or by +passing in a keyword argument. If an analytic gradient isn't available, Julia offers a suite +of good automatic differentiation packages. function evaluate(dist::Bregman, p::AbstractVector, q::AbstractVector) """ -struct Bregman{T1 <: Function, T2 <: Function, T3 <: Function} <: PreMetric +struct Bregman{T1 <: Function,T2 <: Function,T3 <: Function} <: PreMetric F::T1 ∇::T2 inner::T3 end -# Default costructor. +# Default costructor. Bregman(F, ∇) = Bregman(F, ∇, LinearAlgebra.dot) -# Evaluation fuction -function evaluate(dist::Bregman, p::AbstractVector, q::AbstractVector) +# Evaluation fuction +function (dist::Bregman)(p::AbstractVector, q::AbstractVector) # Create cache vals. FP_val = dist.F(p); - FQ_val = dist.F(q); + FQ_val = dist.F(q); DQ_val = dist.∇(q); p_size = size(p); - # Check F codomain. + # Check F codomain. if !(isa(FP_val, Real) && isa(FQ_val, Real)) throw(ArgumentError("F Codomain Error: F doesn't map the vectors to real numbers")) - end - # Check vector size. + end + # Check vector size. if !(p_size == size(q)) throw(DimensionMismatch("The vector p ($(size(p))) and q ($(size(q))) are different sizes.")) end - # Check gradient size. + # Check gradient size. if !(size(DQ_val) == p_size) throw(DimensionMismatch("The gradient result is not the same size as p and q")) - end - # Return the Bregman divergence. - return FP_val - FQ_val - dist.inner(DQ_val, p-q); -end + end + # Return the Bregman divergence. + return FP_val - FQ_val - dist.inner(DQ_val, p - q); +end -# Convenience function. -bregman(F, ∇, x, y; inner = LinearAlgebra.dot) = evaluate(Bregman(F, ∇, inner), x, y) +# Convenience function. +bregman(F, ∇, x, y; inner = LinearAlgebra.dot) = Bregman(F, ∇, inner)(x, y) From a4d78ea7bd29e57fdbe355111b281f631724b5c7 Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Tue, 6 Aug 2019 11:30:21 +0800 Subject: [PATCH 04/16] Haversine --- src/haversine.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/haversine.jl b/src/haversine.jl index 018924e..711b902 100644 --- a/src/haversine.jl +++ b/src/haversine.jl @@ -6,13 +6,13 @@ The haversine distance between two locations on a sphere of given `radius`. Locations are described with longitude and latitude in degrees. The computed distance has the same units as that of the radius. """ -struct Haversine{T<:Real} <: Metric +struct Haversine{T <: Real} <: Metric radius::T end -const VecOrLengthTwoTuple{T} = Union{AbstractVector{T}, NTuple{2, T}} +const VecOrLengthTwoTuple{T} = Union{AbstractVector{T},NTuple{2,T}} -function evaluate(dist::Haversine, x::VecOrLengthTwoTuple, y::VecOrLengthTwoTuple) +function (dist::Haversine)(x::VecOrLengthTwoTuple, y::VecOrLengthTwoTuple) length(x) == length(y) == 2 || haversine_error() @inbounds begin @@ -27,12 +27,12 @@ function evaluate(dist::Haversine, x::VecOrLengthTwoTuple, y::VecOrLengthTwoTupl Δφ = φ₂ - φ₁ # haversine formula - a = sin(Δφ/2)^2 + cos(φ₁)*cos(φ₂)*sin(Δλ/2)^2 + a = sin(Δφ / 2)^2 + cos(φ₁) * cos(φ₂) * sin(Δλ / 2)^2 # distance on the sphere - 2 * dist.radius * asin( min(√a, one(a)) ) # take care of floating point errors + 2 * dist.radius * asin(min(√a, one(a))) # take care of floating point errors end -haversine(x::VecOrLengthTwoTuple, y::VecOrLengthTwoTuple, radius::Real) = evaluate(Haversine(radius), x, y) +haversine(x::VecOrLengthTwoTuple, y::VecOrLengthTwoTuple, radius::Real) = Haversine(radius)(x, y) @noinline haversine_error() = throw(ArgumentError("expected both inputs to have length 2 in Haversine distance")) From 5484e2557eddbf41c86dd0916d77b97584cbcd96 Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Tue, 6 Aug 2019 11:33:22 +0800 Subject: [PATCH 05/16] SqMahalanobis and Mahalanobis --- src/mahalanobis.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/mahalanobis.jl b/src/mahalanobis.jl index ed188b5..2b30adc 100644 --- a/src/mahalanobis.jl +++ b/src/mahalanobis.jl @@ -13,7 +13,7 @@ result_type(::SqMahalanobis{T}, ::AbstractArray, ::AbstractArray) where {T} = T # SqMahalanobis -function evaluate(dist::SqMahalanobis{T}, a::AbstractVector, b::AbstractVector) where {T <: Real} +function (dist::SqMahalanobis{T})(a::AbstractVector, b::AbstractVector) where {T <: Real} if length(a) != length(b) throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) end @@ -23,7 +23,7 @@ function evaluate(dist::SqMahalanobis{T}, a::AbstractVector, b::AbstractVector) return dot(z, Q * z) end -sqmahalanobis(a::AbstractVector, b::AbstractVector, Q::AbstractMatrix) = evaluate(SqMahalanobis(Q), a, b) +sqmahalanobis(a::AbstractVector, b::AbstractVector, Q::AbstractMatrix) = SqMahalanobis(Q)(a, b) function colwise!(r::AbstractArray, dist::SqMahalanobis{T}, a::AbstractMatrix, b::AbstractMatrix) where {T <: Real} Q = dist.qmat @@ -83,11 +83,11 @@ end # Mahalanobis -function evaluate(dist::Mahalanobis{T}, a::AbstractVector, b::AbstractVector) where {T <: Real} - sqrt(evaluate(SqMahalanobis(dist.qmat), a, b)) +function (dist::Mahalanobis{T})(a::AbstractVector, b::AbstractVector) where {T <: Real} + sqrt(SqMahalanobis(dist.qmat)(a, b)) end -mahalanobis(a::AbstractVector, b::AbstractVector, Q::AbstractMatrix) = evaluate(Mahalanobis(Q), a, b) +mahalanobis(a::AbstractVector, b::AbstractVector, Q::AbstractMatrix) = Mahalanobis(Q)(a, b) function colwise!(r::AbstractArray, dist::Mahalanobis{T}, a::AbstractMatrix, b::AbstractMatrix) where {T <: Real} sqrt!(colwise!(r, SqMahalanobis(dist.qmat), a, b)) From 83a2f3abb66555da22355d1395a211b275ced8fa Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Tue, 6 Aug 2019 14:48:14 +0800 Subject: [PATCH 06/16] metrics in metrics.jl CorrDist is excluded from `UnionMetrics` since it's a simple wrap on CosineDist --- src/metrics.jl | 96 ++++++++++++++++++++++++++------------------------ 1 file changed, 50 insertions(+), 46 deletions(-) diff --git a/src/metrics.jl b/src/metrics.jl index 93b17d9..cbe07f8 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -25,6 +25,7 @@ end struct Hamming <: Metric end struct CosineDist <: SemiMetric end +# CorrDist is excluded from `UnionMetrics` struct CorrDist <: SemiMetric end struct BrayCurtis <: SemiMetric end @@ -103,7 +104,8 @@ struct PeriodicEuclidean{W <: AbstractArray{<: Real}} <: Metric periods::W end -const UnionMetrics = Union{Euclidean,SqEuclidean,PeriodicEuclidean,Chebyshev,Cityblock,TotalVariation,Minkowski,Hamming,Jaccard,RogersTanimoto,CosineDist,CorrDist,ChiSqDist,KLDivergence,RenyiDivergence,BrayCurtis,JSDivergence,SpanNormDist,GenKLDivergence} +const metric_list = (Euclidean,SqEuclidean,PeriodicEuclidean,Chebyshev,Cityblock,TotalVariation,Minkowski,Hamming,Jaccard,RogersTanimoto,CosineDist,ChiSqDist,KLDivergence,RenyiDivergence,BrayCurtis,JSDivergence,SpanNormDist,GenKLDivergence) +const UnionMetrics = Union{metric_list...} """ Euclidean([thresh]) @@ -165,7 +167,7 @@ PeriodicEuclidean() = PeriodicEuclidean(Int[]) ########################################################### # -# Define Evaluate +# Implementations # ########################################################### @@ -173,8 +175,10 @@ const ArraySlice{T} = SubArray{T,1,Array{T,2},Tuple{Base.Slice{Base.OneTo{Int}}, @inline parameters(::UnionMetrics) = nothing +# breaks the implementation into eval_start, eval_op, eval_reduce and eval_end + # Specialized for Arrays and avoids a branch on the size -@inline Base.@propagate_inbounds function evaluate(d::UnionMetrics, a::Union{Array, ArraySlice}, b::Union{Array, ArraySlice}) +@inline Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::Union{Array, ArraySlice}, b::Union{Array, ArraySlice}) @boundscheck if length(a) != length(b) throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) end @@ -205,7 +209,7 @@ const ArraySlice{T} = SubArray{T,1,Array{T,2},Tuple{Base.Slice{Base.OneTo{Int}}, end end -@inline function evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray) +@inline function _evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray) @boundscheck if length(a) != length(b) throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) end @@ -253,27 +257,29 @@ end return eval_end(d, s) end result_type(dist::UnionMetrics, a::AbstractArray, b::AbstractArray) = - typeof(evaluate(dist, oneunit(eltype(a)), oneunit(eltype(b)))) - + typeof(dist(oneunit(eltype(a)), oneunit(eltype(b)))) eval_start(d::UnionMetrics, a::AbstractArray, b::AbstractArray) = zero(result_type(d, a, b)) eval_end(d::UnionMetrics, s) = s -evaluate(dist::UnionMetrics, a::Number, b::Number) = eval_end(dist, eval_op(dist, a, b)) +for M in metric_list + @eval @inline (dist::$M)(a::AbstractArray, b::AbstractArray) = _evaluate(dist, a, b) + @eval @inline (dist::$M)(a::Number, b::Number) = eval_end(dist, eval_op(dist, a, b)) +end # SqEuclidean @inline eval_op(::SqEuclidean, ai, bi) = abs2(ai - bi) @inline eval_reduce(::SqEuclidean, s1, s2) = s1 + s2 -sqeuclidean(a::AbstractArray, b::AbstractArray) = evaluate(SqEuclidean(), a, b) -sqeuclidean(a::Number, b::Number) = evaluate(SqEuclidean(), a, b) +sqeuclidean(a::AbstractArray, b::AbstractArray) = SqEuclidean()(a, b) +sqeuclidean(a::Number, b::Number) = SqEuclidean()(a, b) # Euclidean @inline eval_op(::Euclidean, ai, bi) = abs2(ai - bi) @inline eval_reduce(::Euclidean, s1, s2) = s1 + s2 eval_end(::Euclidean, s) = sqrt(s) -euclidean(a::AbstractArray, b::AbstractArray) = evaluate(Euclidean(), a, b) -euclidean(a::Number, b::Number) = evaluate(Euclidean(), a, b) +euclidean(a::AbstractArray, b::AbstractArray) = Euclidean()(a, b) +euclidean(a::Number, b::Number) = Euclidean()(a, b) # PeriodicEuclidean Base.eltype(d::PeriodicEuclidean) = eltype(d.periods) @@ -292,42 +298,42 @@ end @inline eval_reduce(::PeriodicEuclidean, s1, s2) = s1 + s2 @inline eval_end(::PeriodicEuclidean, s) = sqrt(s) peuclidean(a::AbstractArray, b::AbstractArray, p::AbstractArray{<: Real}) = - evaluate(PeriodicEuclidean(p), a, b) -peuclidean(a::Number, b::Number, p::Real) = evaluate(PeriodicEuclidean([p]), a, b) + PeriodicEuclidean(p)(a, b) +peuclidean(a::Number, b::Number, p::Real) = PeriodicEuclidean([p])(a, b) # Cityblock @inline eval_op(::Cityblock, ai, bi) = abs(ai - bi) @inline eval_reduce(::Cityblock, s1, s2) = s1 + s2 -cityblock(a::AbstractArray, b::AbstractArray) = evaluate(Cityblock(), a, b) -cityblock(a::Number, b::Number) = evaluate(Cityblock(), a, b) +cityblock(a::AbstractArray, b::AbstractArray) = Cityblock()(a, b) +cityblock(a::Number, b::Number) = Cityblock()(a, b) # Total variation @inline eval_op(::TotalVariation, ai, bi) = abs(ai - bi) @inline eval_reduce(::TotalVariation, s1, s2) = s1 + s2 eval_end(::TotalVariation, s) = s / 2 -totalvariation(a::AbstractArray, b::AbstractArray) = evaluate(TotalVariation(), a, b) -totalvariation(a::Number, b::Number) = evaluate(TotalVariation(), a, b) +totalvariation(a::AbstractArray, b::AbstractArray) = TotalVariation()(a, b) +totalvariation(a::Number, b::Number) = TotalVariation()(a, b) # Chebyshev @inline eval_op(::Chebyshev, ai, bi) = abs(ai - bi) @inline eval_reduce(::Chebyshev, s1, s2) = max(s1, s2) # if only NaN, will output NaN @inline Base.@propagate_inbounds eval_start(::Chebyshev, a::AbstractArray, b::AbstractArray) = abs(a[1] - b[1]) -chebyshev(a::AbstractArray, b::AbstractArray) = evaluate(Chebyshev(), a, b) -chebyshev(a::Number, b::Number) = evaluate(Chebyshev(), a, b) +chebyshev(a::AbstractArray, b::AbstractArray) = Chebyshev()(a, b) +chebyshev(a::Number, b::Number) = Chebyshev()(a, b) # Minkowski @inline eval_op(dist::Minkowski, ai, bi) = abs(ai - bi).^dist.p @inline eval_reduce(::Minkowski, s1, s2) = s1 + s2 eval_end(dist::Minkowski, s) = s.^(1 / dist.p) -minkowski(a::AbstractArray, b::AbstractArray, p::Real) = evaluate(Minkowski(p), a, b) -minkowski(a::Number, b::Number, p::Real) = evaluate(Minkowski(p), a, b) +minkowski(a::AbstractArray, b::AbstractArray, p::Real) = Minkowski(p)(a, b) +minkowski(a::Number, b::Number, p::Real) = Minkowski(p)(a, b) # Hamming @inline eval_op(::Hamming, ai, bi) = ai != bi ? 1 : 0 @inline eval_reduce(::Hamming, s1, s2) = s1 + s2 -hamming(a::AbstractArray, b::AbstractArray) = evaluate(Hamming(), a, b) -hamming(a::Number, b::Number) = evaluate(Hamming(), a, b) +hamming(a::AbstractArray, b::AbstractArray) = Hamming()(a, b) +hamming(a::Number, b::Number) = Hamming()(a, b) # Cosine dist @inline function eval_start(dist::CosineDist, a::AbstractArray, b::AbstractArray) @@ -344,32 +350,30 @@ function eval_end(::CosineDist, s) ab, a2, b2 = s max(1 - ab / (sqrt(a2) * sqrt(b2)), zero(eltype(ab))) end -cosine_dist(a::AbstractArray, b::AbstractArray) = evaluate(CosineDist(), a, b) +cosine_dist(a::AbstractArray, b::AbstractArray) = CosineDist()(a, b) # Correlation Dist _centralize(x::AbstractArray) = x .- mean(x) -evaluate(::CorrDist, a::AbstractArray, b::AbstractArray) = cosine_dist(_centralize(a), _centralize(b)) -# Ambiguity resolution -evaluate(::CorrDist, a::Array, b::Array) = cosine_dist(_centralize(a), _centralize(b)) -corr_dist(a::AbstractArray, b::AbstractArray) = evaluate(CorrDist(), a, b) +(dist::CorrDist)(a::AbstractArray, b::AbstractArray) = CosineDist()(_centralize(a), _centralize(b)) +corr_dist(a::AbstractArray, b::AbstractArray) = CorrDist()(a, b) result_type(::CorrDist, a::AbstractArray, b::AbstractArray) = result_type(CosineDist(), a, b) # ChiSqDist @inline eval_op(::ChiSqDist, ai, bi) = (d = abs2(ai - bi) / (ai + bi); ifelse(ai != bi, d, zero(d))) @inline eval_reduce(::ChiSqDist, s1, s2) = s1 + s2 -chisq_dist(a::AbstractArray, b::AbstractArray) = evaluate(ChiSqDist(), a, b) +chisq_dist(a::AbstractArray, b::AbstractArray) = ChiSqDist()(a, b) # KLDivergence @inline eval_op(dist::KLDivergence, ai, bi) = ai > 0 ? ai * log(ai / bi) : zero(eval_op(dist, oneunit(ai), bi)) @inline eval_reduce(::KLDivergence, s1, s2) = s1 + s2 -kl_divergence(a::AbstractArray, b::AbstractArray) = evaluate(KLDivergence(), a, b) +kl_divergence(a::AbstractArray, b::AbstractArray) = KLDivergence()(a, b) # GenKLDivergence @inline eval_op(dist::GenKLDivergence, ai, bi) = ai > 0 ? ai * log(ai / bi) - ai + bi : oftype(eval_op(dist, oneunit(ai), bi), bi) @inline eval_reduce(::GenKLDivergence, s1, s2) = s1 + s2 -gkl_divergence(a::AbstractArray, b::AbstractArray) = evaluate(GenKLDivergence(), a, b) +gkl_divergence(a::AbstractArray, b::AbstractArray) = GenKLDivergence()(a, b) # RenyiDivergence @inline Base.@propagate_inbounds function eval_start(::RenyiDivergence, a::AbstractArray{T}, b::AbstractArray{T}) where {T <: Real} @@ -416,7 +420,7 @@ function eval_end(dist::RenyiDivergence, s::Tuple{T,T,T,T}) where {T <: Real} end end -renyi_divergence(a::AbstractArray, b::AbstractArray, q::Real) = evaluate(RenyiDivergence(q), a, b) +renyi_divergence(a::AbstractArray, b::AbstractArray, q::Real) = RenyiDivergence(q)(a, b) # Combine docs with RenyiDivergence. Fetching the docstring with @doc causes # problems during package compilation; see # https://github.com/JuliaLang/julia/issues/31640 @@ -433,7 +437,7 @@ end ta + tb - tu end @inline eval_reduce(::JSDivergence, s1, s2) = s1 + s2 -js_divergence(a::AbstractArray, b::AbstractArray) = evaluate(JSDivergence(), a, b) +js_divergence(a::AbstractArray, b::AbstractArray) = JSDivergence()(a, b) # SpanNormDist @inline Base.@propagate_inbounds function eval_start(::SpanNormDist, a::AbstractArray, b::AbstractArray) @@ -451,7 +455,7 @@ end end eval_end(::SpanNormDist, s) = s[2] - s[1] -spannorm_dist(a::AbstractArray, b::AbstractArray) = evaluate(SpanNormDist(), a, b) +spannorm_dist(a::AbstractArray, b::AbstractArray) = SpanNormDist()(a, b) result_type(dist::SpanNormDist, a::AbstractArray, b::AbstractArray) = typeof(eval_op(dist, oneunit(eltype(a)), oneunit(eltype(b)))) @@ -477,7 +481,7 @@ end @inbounds v = 1 - (a[1] / a[2]) return v end -jaccard(a::AbstractArray, b::AbstractArray) = evaluate(Jaccard(), a, b) +jaccard(a::AbstractArray, b::AbstractArray) = Jaccard()(a, b) # BrayCurtis @@ -500,7 +504,7 @@ end @inbounds v = a[1] / a[2] return v end -braycurtis(a::AbstractArray, b::AbstractArray) = evaluate(BrayCurtis(), a, b) +braycurtis(a::AbstractArray, b::AbstractArray) = BrayCurtis()(a, b) # Tanimoto @@ -527,24 +531,24 @@ end @inbounds denominator = a[1] + a[4] + 2(a[2] + a[3]) numerator / denominator end -rogerstanimoto(a::AbstractArray{T}, b::AbstractArray{T}) where {T <: Bool} = evaluate(RogersTanimoto(), a, b) +rogerstanimoto(a::AbstractArray{T}, b::AbstractArray{T}) where {T <: Bool} = RogersTanimoto()(a, b) # Deviations -evaluate(::MeanAbsDeviation, a, b) = cityblock(a, b) / length(a) -meanad(a, b) = evaluate(MeanAbsDeviation(), a, b) +(dist::MeanAbsDeviation)(a, b) = cityblock(a, b) / length(a) +meanad(a, b) = MeanAbsDeviation()(a, b) -evaluate(::MeanSqDeviation, a, b) = sqeuclidean(a, b) / length(a) -msd(a, b) = evaluate(MeanSqDeviation(), a, b) +(dist::MeanSqDeviation)(a, b) = sqeuclidean(a, b) / length(a) +msd(a, b) = MeanSqDeviation()(a, b) -evaluate(::RMSDeviation, a, b) = sqrt(evaluate(MeanSqDeviation(), a, b)) -rmsd(a, b) = evaluate(RMSDeviation(), a, b) +(dist::RMSDeviation)(a, b) = sqrt(MeanSqDeviation()(a, b)) +rmsd(a, b) = RMSDeviation()(a, b) -function evaluate(::NormRMSDeviation, a, b) +function (dist::NormRMSDeviation)(a, b) amin, amax = extrema(a) - return evaluate(RMSDeviation(), a, b) / (amax - amin) + return RMSDeviation()(a, b) / (amax - amin) end -nrmsd(a, b) = evaluate(NormRMSDeviation(), a, b) +nrmsd(a, b) = NormRMSDeviation()(a, b) ########################################################### From 53307b4686dfc94e2a6e4de75e694516836b13e6 Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Tue, 6 Aug 2019 15:01:02 +0800 Subject: [PATCH 07/16] remove specification of pairwise and colwise on CorrDist The previous specification is needed to pass a centralized input, now we don't need it anymore --- src/metrics.jl | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/metrics.jl b/src/metrics.jl index cbe07f8..c80da11 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -712,20 +712,3 @@ function _pairwise!(r::AbstractMatrix, dist::CosineDist, a::AbstractMatrix) end r end - -# CorrDist -_centralize_colwise(x::AbstractVector) = x .- mean(x) -_centralize_colwise(x::AbstractMatrix) = x .- mean(x, dims=1) -function colwise!(r::AbstractVector, dist::CorrDist, a::AbstractMatrix, b::AbstractMatrix) - colwise!(r, CosineDist(), _centralize_colwise(a), _centralize_colwise(b)) -end -function colwise!(r::AbstractVector, dist::CorrDist, a::AbstractVector, b::AbstractMatrix) - colwise!(r, CosineDist(), _centralize_colwise(a), _centralize_colwise(b)) -end -function _pairwise!(r::AbstractMatrix, dist::CorrDist, - a::AbstractMatrix, b::AbstractMatrix) - _pairwise!(r, CosineDist(), _centralize_colwise(a), _centralize_colwise(b)) -end -function _pairwise!(r::AbstractMatrix, dist::CorrDist, a::AbstractMatrix) - _pairwise!(r, CosineDist(), _centralize_colwise(a)) -end From f1214a1950bea87377067851d102fd4a7a598851 Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Tue, 6 Aug 2019 15:20:25 +0800 Subject: [PATCH 08/16] metrics in wmetrics.jl --- src/wmetrics.jl | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/wmetrics.jl b/src/wmetrics.jl index acb342f..f881827 100644 --- a/src/wmetrics.jl +++ b/src/wmetrics.jl @@ -30,20 +30,17 @@ struct WeightedHamming{W <: RealAbstractArray} <: Metric weights::W end - -const UnionWeightedMetrics{W} = Union{WeightedEuclidean{W},WeightedSqEuclidean{W},WeightedCityblock{W},WeightedMinkowski{W},WeightedHamming{W}} +const weightedmetrics_list = (WeightedEuclidean,WeightedSqEuclidean,WeightedCityblock,WeightedMinkowski,WeightedHamming) +const UnionWeightedMetrics{W} = Union{map(M->M{W}, weightedmetrics_list)...} Base.eltype(x::UnionWeightedMetrics) = eltype(x.weights) ########################################################### # -# Evaluate +# Implementations # ########################################################### -function evaluate(dist::UnionWeightedMetrics, a::Number, b::Number) - eval_end(dist, eval_op(dist, a, b, oneunit(eltype(dist)))) -end result_type(dist::UnionWeightedMetrics, a::AbstractArray, b::AbstractArray) = - typeof(evaluate(dist, oneunit(eltype(a)), oneunit(eltype(b)))) + typeof(dist(oneunit(eltype(a)), oneunit(eltype(b)))) @inline function eval_start(d::UnionWeightedMetrics, a::AbstractArray, b::AbstractArray) zero(result_type(d, a, b)) @@ -52,7 +49,7 @@ eval_end(d::UnionWeightedMetrics, s) = s -@inline function evaluate(d::UnionWeightedMetrics, a::AbstractArray, b::AbstractArray) +@inline function _evaluate(d::UnionWeightedMetrics, a::AbstractArray, b::AbstractArray) @boundscheck if length(a) != length(b) throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) end @@ -83,32 +80,37 @@ eval_end(d::UnionWeightedMetrics, s) = s return eval_end(d, s) end +for M in weightedmetrics_list + @eval (dist::$M)(a::AbstractArray, b::AbstractArray) = _evaluate(dist, a, b) + @eval (dist::$M)(a::Number, b::Number) = eval_end(dist, eval_op(dist, a, b, oneunit(eltype(dist)))) +end + # Squared Euclidean @inline eval_op(::WeightedSqEuclidean, ai, bi, wi) = abs2(ai - bi) * wi @inline eval_reduce(::WeightedSqEuclidean, s1, s2) = s1 + s2 -wsqeuclidean(a::AbstractArray, b::AbstractArray, w::AbstractArray) = evaluate(WeightedSqEuclidean(w), a, b) +wsqeuclidean(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedSqEuclidean(w)(a, b) # Weighted Euclidean @inline eval_op(::WeightedEuclidean, ai, bi, wi) = abs2(ai - bi) * wi @inline eval_reduce(::WeightedEuclidean, s1, s2) = s1 + s2 @inline eval_end(::WeightedEuclidean, s) = sqrt(s) -weuclidean(a::AbstractArray, b::AbstractArray, w::AbstractArray) = evaluate(WeightedEuclidean(w), a, b) +weuclidean(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedEuclidean(w)(a, b) # City Block @inline eval_op(::WeightedCityblock, ai, bi, wi) = abs((ai - bi) * wi) @inline eval_reduce(::WeightedCityblock, s1, s2) = s1 + s2 -wcityblock(a::AbstractArray, b::AbstractArray, w::AbstractArray) = evaluate(WeightedCityblock(w), a, b) +wcityblock(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedCityblock(w)(a, b) # Minkowski @inline eval_op(dist::WeightedMinkowski, ai, bi, wi) = abs(ai - bi).^dist.p * wi @inline eval_reduce(::WeightedMinkowski, s1, s2) = s1 + s2 eval_end(dist::WeightedMinkowski, s) = s.^(1 / dist.p) -wminkowski(a::AbstractArray, b::AbstractArray, w::AbstractArray, p::Real) = evaluate(WeightedMinkowski(w, p), a, b) +wminkowski(a::AbstractArray, b::AbstractArray, w::AbstractArray, p::Real) = WeightedMinkowski(w, p)(a, b) # WeightedHamming @inline eval_op(::WeightedHamming, ai, bi, wi) = ai != bi ? wi : zero(eltype(wi)) @inline eval_reduce(::WeightedHamming, s1, s2) = s1 + s2 -whamming(a::AbstractArray, b::AbstractArray, w::AbstractArray) = evaluate(WeightedHamming(w), a, b) +whamming(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedHamming(w)(a, b) ########################################################### # From c79f914eaf78e1e311be8402acfdbac29e9bea25 Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Tue, 6 Aug 2019 15:27:02 +0800 Subject: [PATCH 09/16] colwise and pairwise --- src/generic.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/generic.jl b/src/generic.jl index 8b21fb1..c18d4c3 100644 --- a/src/generic.jl +++ b/src/generic.jl @@ -34,7 +34,7 @@ function colwise!(r::AbstractArray, metric::PreMetric, a::AbstractVector, b::Abs n = size(b, 2) length(r) == n || throw(DimensionMismatch("Incorrect size of r.")) @inbounds for j = 1:n - r[j] = evaluate(metric, a, view(b, :, j)) + r[j] = metric(a, view(b, :, j)) end r end @@ -43,7 +43,7 @@ function colwise!(r::AbstractArray, metric::PreMetric, a::AbstractMatrix, b::Abs n = size(a, 2) length(r) == n || throw(DimensionMismatch("Incorrect size of r.")) @inbounds for j = 1:n - r[j] = evaluate(metric, view(a, :, j), b) + r[j] = metric(view(a, :, j), b) end r end @@ -52,7 +52,7 @@ function colwise!(r::AbstractArray, metric::PreMetric, a::AbstractMatrix, b::Abs n = get_common_ncols(a, b) length(r) == n || throw(DimensionMismatch("Incorrect size of r.")) @inbounds for j = 1:n - r[j] = evaluate(metric, view(a, :, j), view(b, :, j)) + r[j] = metric(view(a, :, j), view(b, :, j)) end r end @@ -90,7 +90,7 @@ function _pairwise!(r::AbstractMatrix, metric::PreMetric, @inbounds for j = 1:size(b, 2) bj = view(b, :, j) for i = 1:size(a, 2) - r[i, j] = evaluate(metric, view(a, :, i), bj) + r[i, j] = metric(view(a, :, i), bj) end end r @@ -102,7 +102,7 @@ function _pairwise!(r::AbstractMatrix, metric::SemiMetric, a::AbstractMatrix) @inbounds for j = 1:n aj = view(a, :, j) for i = (j + 1):n - r[i, j] = evaluate(metric, view(a, :, i), aj) + r[i, j] = metric(view(a, :, i), aj) end r[j, j] = 0 for i = 1:(j - 1) From c31007e398398b76449051288baad26562573dbc Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Tue, 6 Aug 2019 15:29:00 +0800 Subject: [PATCH 10/16] update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 97f2762..4069190 100644 --- a/README.md +++ b/README.md @@ -54,12 +54,14 @@ Each distance corresponds to a *distance type*. You can always compute a certain ```julia r = evaluate(dist, x, y) +r = dist(x, y) ``` Here, dist is an instance of a distance type. For example, the type for Euclidean distance is ``Euclidean`` (more distance types will be introduced in the next section), then you can compute the Euclidean distance between ``x`` and ``y`` as ```julia r = evaluate(Euclidean(), x, y) +r = Euclidean()(x, y) ``` Common distances also come with convenient functions for distance evaluation. For example, you may also compute Euclidean distance between two vectors as below From 089243a62c86bcd91c97c6a81e3ff1d0a9e230f0 Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Tue, 6 Aug 2019 15:47:12 +0800 Subject: [PATCH 11/16] update test --- test/test_dists.jl | 52 ++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/test/test_dists.jl b/test/test_dists.jl index 1a31e60..06d2bc1 100644 --- a/test/test_dists.jl +++ b/test/test_dists.jl @@ -2,36 +2,38 @@ function test_metricity(dist, x, y, z) @testset "Test metricity of $(typeof(dist))" begin - dxy = evaluate(dist, x, y) - dxz = evaluate(dist, x, z) - dyz = evaluate(dist, y, z) + @test dist(x, y) == evaluate(dist, x, y) + + dxy = dist(x, y) + dxz = dist(x, z) + dyz = dist(y, z) if isa(dist, PreMetric) # Unfortunately small non-zero numbers (~10^-16) are appearing # in our tests due to accumulating floating point rounding errors. # We either need to allow small errors in our tests or change the # way we do accumulations... - @test evaluate(dist, x, x) + one(eltype(x)) ≈ one(eltype(x)) - @test evaluate(dist, y, y) + one(eltype(y)) ≈ one(eltype(y)) - @test evaluate(dist, z, z) + one(eltype(z)) ≈ one(eltype(z)) + @test dist(x, x) + one(eltype(x)) ≈ one(eltype(x)) + @test dist(y, y) + one(eltype(y)) ≈ one(eltype(y)) + @test dist(z, z) + one(eltype(z)) ≈ one(eltype(z)) @test dxy ≥ zero(eltype(x)) @test dxz ≥ zero(eltype(x)) @test dyz ≥ zero(eltype(x)) end if isa(dist, SemiMetric) - @test dxy ≈ evaluate(dist, y, x) - @test dxz ≈ evaluate(dist, z, x) - @test dyz ≈ evaluate(dist, y, z) + @test dxy ≈ dist(y, x) + @test dxz ≈ dist(z, x) + @test dyz ≈ dist(y, z) else # Not symmetric, so more PreMetric tests - @test evaluate(dist, y, x) ≥ zero(eltype(x)) - @test evaluate(dist, z, x) ≥ zero(eltype(x)) - @test evaluate(dist, z, y) ≥ zero(eltype(x)) + @test dist(y, x) ≥ zero(eltype(x)) + @test dist(z, x) ≥ zero(eltype(x)) + @test dist(z, y) ≥ zero(eltype(x)) end if isa(dist, Metric) # Again we have small rounding errors in accumulations @test dxz ≤ dxy + dyz || dxz ≈ dxy + dyz - dyx = evaluate(dist, y, x) + dyx = dist(y, x) @test dyz ≤ dyx + dxz || dyz ≈ dyx + dxz - dzy = evaluate(dist, z, y) + dzy = dist(z, y) @test dxy ≤ dxz + dzy || dxy ≈ dxz + dzy end end @@ -189,9 +191,9 @@ end @test whamming(a, b, w) === sum((a .!= b) .* w) # Minimal test of Jaccard - test return type stability. - @inferred evaluate(Jaccard(), rand(T, 3), rand(T, 3)) - @inferred evaluate(Jaccard(), [1, 2, 3], [1, 2, 3]) - @inferred evaluate(Jaccard(), [true, false, true], [false, true, true]) + @inferred Jaccard()(rand(T, 3), rand(T, 3)) + @inferred Jaccard()([1, 2, 3], [1, 2, 3]) + @inferred Jaccard()([true, false, true], [false, true, true]) # Test Bray-Curtis. Should be 1 if no elements are shared, 0 if all are the same @test braycurtis([1,0,3],[0,1,0]) == 1.0 @@ -295,8 +297,8 @@ end # testset @test_throws DimensionMismatch colwise!(mat23, Euclidean(), mat23, q) @test_throws DimensionMismatch colwise!(mat23, Euclidean(), mat23, mat22) @test_throws DimensionMismatch colwise!(mat23, Bregman(x -> sqeuclidean(x, zero(x)), x -> 2*x), mat23, mat22) - @test_throws DimensionMismatch evaluate(Bregman(x -> sqeuclidean(x, zero(x)), x -> 2*x), [1, 2, 3], [1, 2]) - @test_throws DimensionMismatch evaluate(Bregman(x -> sqeuclidean(x, zero(x)), x -> [1, 2]), [1, 2, 3], [1, 2, 3]) + @test_throws DimensionMismatch Bregman(x -> sqeuclidean(x, zero(x)), x -> 2*x)([1, 2, 3], [1, 2]) + @test_throws DimensionMismatch Bregman(x -> sqeuclidean(x, zero(x)), x -> [1, 2])([1, 2, 3], [1, 2, 3]) end # testset @testset "Different input types" begin @@ -409,9 +411,9 @@ function test_colwise(dist, x, y, T) r2 = zeros(T, n) r3 = zeros(T, n) for j = 1:n - r1[j] = evaluate(dist, x[:, j], y[:, j]) - r2[j] = evaluate(dist, x[:, 1], y[:, j]) - r3[j] = evaluate(dist, x[:, j], y[:, 1]) + r1[j] = dist(x[:, j], y[:, j]) + r2[j] = dist(x[:, 1], y[:, j]) + r3[j] = dist(x[:, j], y[:, 1]) end # ≈ and all( .≈ ) seem to behave slightly differently for F64 @test all(colwise(dist, x, y) .≈ r1) @@ -485,10 +487,10 @@ function test_pairwise(dist, x, y, T) rxy = zeros(T, nx, ny) rxx = zeros(T, nx, nx) for j = 1:ny, i = 1:nx - rxy[i, j] = evaluate(dist, x[:, i], y[:, j]) + rxy[i, j] = dist(x[:, i], y[:, j]) end for j = 1:nx, i = 1:nx - rxx[i, j] = evaluate(dist, x[:, i], x[:, j]) + rxx[i, j] = dist(x[:, i], x[:, j]) end # As earlier, we have small rounding errors in accumulations @test pairwise(dist, x, y) ≈ rxy @@ -582,7 +584,7 @@ end q = rand(4) p = p/sum(p); q = q/sum(q); - @test evaluate(testDist, p, q) ≈ gkl_divergence(p, q) + @test testDist(p, q) ≈ gkl_divergence(p, q) # Test if Bregman() correctly implements the squared euclidean dist. between them. @test bregman(x -> norm(x)^2, x -> 2*x, p, q) ≈ sqeuclidean(p, q) # Test if Bregman() correctly implements the IS distance. From b0f114825514088ea3f06db9a85cc5dfb7381285 Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Wed, 7 Aug 2019 10:08:04 +0800 Subject: [PATCH 12/16] rename metric_list to metrics --- src/metrics.jl | 6 +++--- src/wmetrics.jl | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/metrics.jl b/src/metrics.jl index c80da11..3e0f4d4 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -104,8 +104,8 @@ struct PeriodicEuclidean{W <: AbstractArray{<: Real}} <: Metric periods::W end -const metric_list = (Euclidean,SqEuclidean,PeriodicEuclidean,Chebyshev,Cityblock,TotalVariation,Minkowski,Hamming,Jaccard,RogersTanimoto,CosineDist,ChiSqDist,KLDivergence,RenyiDivergence,BrayCurtis,JSDivergence,SpanNormDist,GenKLDivergence) -const UnionMetrics = Union{metric_list...} +const metrics = (Euclidean, SqEuclidean, PeriodicEuclidean, Chebyshev, Cityblock, TotalVariation, Minkowski, Hamming, Jaccard, RogersTanimoto, CosineDist, ChiSqDist, KLDivergence, RenyiDivergence, BrayCurtis, JSDivergence, SpanNormDist, GenKLDivergence) +const UnionMetrics = Union{metrics...} """ Euclidean([thresh]) @@ -262,7 +262,7 @@ eval_start(d::UnionMetrics, a::AbstractArray, b::AbstractArray) = zero(result_type(d, a, b)) eval_end(d::UnionMetrics, s) = s -for M in metric_list +for M in metrics @eval @inline (dist::$M)(a::AbstractArray, b::AbstractArray) = _evaluate(dist, a, b) @eval @inline (dist::$M)(a::Number, b::Number) = eval_end(dist, eval_op(dist, a, b)) end diff --git a/src/wmetrics.jl b/src/wmetrics.jl index f881827..c898b22 100644 --- a/src/wmetrics.jl +++ b/src/wmetrics.jl @@ -30,8 +30,8 @@ struct WeightedHamming{W <: RealAbstractArray} <: Metric weights::W end -const weightedmetrics_list = (WeightedEuclidean,WeightedSqEuclidean,WeightedCityblock,WeightedMinkowski,WeightedHamming) -const UnionWeightedMetrics{W} = Union{map(M->M{W}, weightedmetrics_list)...} +const weightedmetrics = (WeightedEuclidean, WeightedSqEuclidean, WeightedCityblock, WeightedMinkowski, WeightedHamming) +const UnionWeightedMetrics{W} = Union{map(M->M{W}, weightedmetrics)...} Base.eltype(x::UnionWeightedMetrics) = eltype(x.weights) ########################################################### # @@ -80,7 +80,7 @@ eval_end(d::UnionWeightedMetrics, s) = s return eval_end(d, s) end -for M in weightedmetrics_list +for M in weightedmetrics @eval (dist::$M)(a::AbstractArray, b::AbstractArray) = _evaluate(dist, a, b) @eval (dist::$M)(a::Number, b::Number) = eval_end(dist, eval_op(dist, a, b, oneunit(eltype(dist)))) end From 86f5d2243f3e2d95690f3f3884ff20e561831f90 Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Wed, 7 Aug 2019 10:13:10 +0800 Subject: [PATCH 13/16] revert auto-formatted spaces changed parts: * spaces after type annotation, e.g, `b::AbstractMatrix = a` not changed parts: * additional spaces at the end of line * spaces between operations and comma, e.g., `a + b` and `(a, b)` --- src/generic.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/generic.jl b/src/generic.jl index c18d4c3..4bad394 100644 --- a/src/generic.jl +++ b/src/generic.jl @@ -83,7 +83,7 @@ end # Generic pairwise evaluation function _pairwise!(r::AbstractMatrix, metric::PreMetric, - a::AbstractMatrix, b::AbstractMatrix = a) + a::AbstractMatrix, b::AbstractMatrix=a) na = size(a, 2) nb = size(b, 2) size(r) == (na, nb) || throw(DimensionMismatch("Incorrect size of r.")) @@ -136,7 +136,7 @@ If a single matrix `a` is provided, compute distances between its rows or column """ function pairwise!(r::AbstractMatrix, metric::PreMetric, a::AbstractMatrix, b::AbstractMatrix; - dims::Union{Nothing,Integer} = nothing) + dims::Union{Nothing,Integer}=nothing) dims = deprecated_dims(dims) dims in (1, 2) || throw(ArgumentError("dims should be 1 or 2 (got $dims)")) if dims == 1 @@ -160,7 +160,7 @@ function pairwise!(r::AbstractMatrix, metric::PreMetric, end function pairwise!(r::AbstractMatrix, metric::PreMetric, a::AbstractMatrix; - dims::Union{Nothing,Integer} = nothing) + dims::Union{Nothing,Integer}=nothing) dims = deprecated_dims(dims) dims in (1, 2) || throw(ArgumentError("dims should be 1 or 2 (got $dims)")) if dims == 1 @@ -187,7 +187,7 @@ compute distances between its rows or columns. `a` and `b` must have the same numbers of columns if `dims=1`, or of rows if `dims=2`. """ function pairwise(metric::PreMetric, a::AbstractMatrix, b::AbstractMatrix; - dims::Union{Nothing,Integer} = nothing) + dims::Union{Nothing,Integer}=nothing) dims = deprecated_dims(dims) dims in (1, 2) || throw(ArgumentError("dims should be 1 or 2 (got $dims)")) m = size(a, dims) @@ -197,7 +197,7 @@ function pairwise(metric::PreMetric, a::AbstractMatrix, b::AbstractMatrix; end function pairwise(metric::PreMetric, a::AbstractMatrix; - dims::Union{Nothing,Integer} = nothing) + dims::Union{Nothing,Integer}=nothing) dims = deprecated_dims(dims) dims in (1, 2) || throw(ArgumentError("dims should be 1 or 2 (got $dims)")) n = size(a, dims) From 498647fffa6a2a9379ccf1e73664e9fc6c76cb44 Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Fri, 9 Aug 2019 23:34:26 +0800 Subject: [PATCH 14/16] update format --- src/bregman.jl | 4 ++-- src/generic.jl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/bregman.jl b/src/bregman.jl index 808326e..1f1f159 100644 --- a/src/bregman.jl +++ b/src/bregman.jl @@ -12,7 +12,7 @@ of good automatic differentiation packages. function evaluate(dist::Bregman, p::AbstractVector, q::AbstractVector) """ -struct Bregman{T1 <: Function,T2 <: Function,T3 <: Function} <: PreMetric +struct Bregman{T1 <: Function, T2 <: Function, T3 <: Function} <: PreMetric F::T1 ∇::T2 inner::T3 @@ -41,7 +41,7 @@ function (dist::Bregman)(p::AbstractVector, q::AbstractVector) throw(DimensionMismatch("The gradient result is not the same size as p and q")) end # Return the Bregman divergence. - return FP_val - FQ_val - dist.inner(DQ_val, p - q); + return FP_val - FQ_val - dist.inner(DQ_val, p-q); end # Convenience function. diff --git a/src/generic.jl b/src/generic.jl index 4bad394..2af476b 100644 --- a/src/generic.jl +++ b/src/generic.jl @@ -193,7 +193,7 @@ function pairwise(metric::PreMetric, a::AbstractMatrix, b::AbstractMatrix; m = size(a, dims) n = size(b, dims) r = Matrix{result_type(metric, a, b)}(undef, m, n) - pairwise!(r, metric, a, b, dims = dims) + pairwise!(r, metric, a, b, dims=dims) end function pairwise(metric::PreMetric, a::AbstractMatrix; @@ -202,5 +202,5 @@ function pairwise(metric::PreMetric, a::AbstractMatrix; dims in (1, 2) || throw(ArgumentError("dims should be 1 or 2 (got $dims)")) n = size(a, dims) r = Matrix{result_type(metric, a, a)}(undef, n, n) - pairwise!(r, metric, a, dims = dims) + pairwise!(r, metric, a, dims=dims) end From acd1095542b81dd20b21395248efe9e7e041e0e7 Mon Sep 17 00:00:00 2001 From: JohnnyChen Date: Thu, 15 Aug 2019 19:25:38 +0800 Subject: [PATCH 15/16] rollback auto-format on whitespaces --- src/bhattacharyya.jl | 1 + src/bregman.jl | 36 ++++++++++++++++++------------------ src/haversine.jl | 8 ++++---- src/metrics.jl | 2 +- src/wmetrics.jl | 2 +- 5 files changed, 25 insertions(+), 24 deletions(-) diff --git a/src/bhattacharyya.jl b/src/bhattacharyya.jl index 26705c9..60a7ae1 100644 --- a/src/bhattacharyya.jl +++ b/src/bhattacharyya.jl @@ -6,6 +6,7 @@ struct BhattacharyyaDist <: SemiMetric end struct HellingerDist <: Metric end + # Bhattacharyya coefficient function bhattacharyya_coeff(a::AbstractVector{T}, b::AbstractVector{T}) where {T <: Number} diff --git a/src/bregman.jl b/src/bregman.jl index 1f1f159..afaa2ef 100644 --- a/src/bregman.jl +++ b/src/bregman.jl @@ -1,14 +1,14 @@ -# Bregman divergence +# Bregman divergence """ Implements the Bregman divergence, a friendly introduction to which can be found -[here](http://mark.reid.name/blog/meet-the-bregman-divergences.html). -Bregman divergences are a minimal implementation of the "mean-minimizer" property. +[here](http://mark.reid.name/blog/meet-the-bregman-divergences.html). +Bregman divergences are a minimal implementation of the "mean-minimizer" property. -It is assumed that the (convex differentiable) function F maps vectors (of any type or size) to real numbers. -The inner product used is `Base.dot`, but one can be passed in either by defining `inner` or by -passing in a keyword argument. If an analytic gradient isn't available, Julia offers a suite -of good automatic differentiation packages. +It is assumed that the (convex differentiable) function F maps vectors (of any type or size) to real numbers. +The inner product used is `Base.dot`, but one can be passed in either by defining `inner` or by +passing in a keyword argument. If an analytic gradient isn't available, Julia offers a suite +of good automatic differentiation packages. function evaluate(dist::Bregman, p::AbstractVector, q::AbstractVector) """ @@ -18,31 +18,31 @@ struct Bregman{T1 <: Function, T2 <: Function, T3 <: Function} <: PreMetric inner::T3 end -# Default costructor. +# Default costructor. Bregman(F, ∇) = Bregman(F, ∇, LinearAlgebra.dot) -# Evaluation fuction +# Evaluation fuction function (dist::Bregman)(p::AbstractVector, q::AbstractVector) # Create cache vals. FP_val = dist.F(p); - FQ_val = dist.F(q); + FQ_val = dist.F(q); DQ_val = dist.∇(q); p_size = size(p); - # Check F codomain. + # Check F codomain. if !(isa(FP_val, Real) && isa(FQ_val, Real)) throw(ArgumentError("F Codomain Error: F doesn't map the vectors to real numbers")) - end - # Check vector size. + end + # Check vector size. if !(p_size == size(q)) throw(DimensionMismatch("The vector p ($(size(p))) and q ($(size(q))) are different sizes.")) end - # Check gradient size. + # Check gradient size. if !(size(DQ_val) == p_size) throw(DimensionMismatch("The gradient result is not the same size as p and q")) - end - # Return the Bregman divergence. + end + # Return the Bregman divergence. return FP_val - FQ_val - dist.inner(DQ_val, p-q); -end +end -# Convenience function. +# Convenience function. bregman(F, ∇, x, y; inner = LinearAlgebra.dot) = Bregman(F, ∇, inner)(x, y) diff --git a/src/haversine.jl b/src/haversine.jl index 711b902..8a08cff 100644 --- a/src/haversine.jl +++ b/src/haversine.jl @@ -6,11 +6,11 @@ The haversine distance between two locations on a sphere of given `radius`. Locations are described with longitude and latitude in degrees. The computed distance has the same units as that of the radius. """ -struct Haversine{T <: Real} <: Metric +struct Haversine{T<:Real} <: Metric radius::T end -const VecOrLengthTwoTuple{T} = Union{AbstractVector{T},NTuple{2,T}} +const VecOrLengthTwoTuple{T} = Union{AbstractVector{T}, NTuple{2, T}} function (dist::Haversine)(x::VecOrLengthTwoTuple, y::VecOrLengthTwoTuple) length(x) == length(y) == 2 || haversine_error() @@ -27,10 +27,10 @@ function (dist::Haversine)(x::VecOrLengthTwoTuple, y::VecOrLengthTwoTuple) Δφ = φ₂ - φ₁ # haversine formula - a = sin(Δφ / 2)^2 + cos(φ₁) * cos(φ₂) * sin(Δλ / 2)^2 + a = sin(Δφ/2)^2 + cos(φ₁)*cos(φ₂)*sin(Δλ/2)^2 # distance on the sphere - 2 * dist.radius * asin(min(√a, one(a))) # take care of floating point errors + 2 * dist.radius * asin( min(√a, one(a)) ) # take care of floating point errors end haversine(x::VecOrLengthTwoTuple, y::VecOrLengthTwoTuple, radius::Real) = Haversine(radius)(x, y) diff --git a/src/metrics.jl b/src/metrics.jl index aa6d9e2..6047b27 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -104,7 +104,7 @@ struct PeriodicEuclidean{W <: AbstractArray{<: Real}} <: Metric periods::W end -const metrics = (Euclidean, SqEuclidean, PeriodicEuclidean, Chebyshev, Cityblock, TotalVariation, Minkowski, Hamming, Jaccard, RogersTanimoto, CosineDist, ChiSqDist, KLDivergence, RenyiDivergence, BrayCurtis, JSDivergence, SpanNormDist, GenKLDivergence) +const metrics = (Euclidean,SqEuclidean,PeriodicEuclidean,Chebyshev,Cityblock,TotalVariation,Minkowski,Hamming,Jaccard,RogersTanimoto,CosineDist,ChiSqDist,KLDivergence,RenyiDivergence,BrayCurtis,JSDivergence,SpanNormDist,GenKLDivergence) const UnionMetrics = Union{metrics...} """ diff --git a/src/wmetrics.jl b/src/wmetrics.jl index d426c7d..9e41bdc 100644 --- a/src/wmetrics.jl +++ b/src/wmetrics.jl @@ -30,7 +30,7 @@ struct WeightedHamming{W <: RealAbstractArray} <: Metric weights::W end -const weightedmetrics = (WeightedEuclidean, WeightedSqEuclidean, WeightedCityblock, WeightedMinkowski, WeightedHamming) +const weightedmetrics = (WeightedEuclidean,WeightedSqEuclidean,WeightedCityblock,WeightedMinkowski,WeightedHamming) const UnionWeightedMetrics{W} = Union{map(M->M{W}, weightedmetrics)...} Base.eltype(x::UnionWeightedMetrics) = eltype(x.weights) ########################################################### From b885201ac3b7c0b0719e97055058516ba7994874 Mon Sep 17 00:00:00 2001 From: JohnnyChen Date: Thu, 15 Aug 2019 19:36:08 +0800 Subject: [PATCH 16/16] test if there are any ambiguities This PR fixes all the ambiguities as a good start, future PRs may not break this. --- test/runtests.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index eb9b147..6afb39a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,5 +5,7 @@ using LinearAlgebra using Random using Statistics +@test isempty(detect_ambiguities(Distances)) + include("F64.jl") include("test_dists.jl")