diff --git a/docs/src/introduction/gettingstarted.md b/docs/src/introduction/gettingstarted.md index 5199ee4..839ece6 100644 --- a/docs/src/introduction/gettingstarted.md +++ b/docs/src/introduction/gettingstarted.md @@ -113,10 +113,10 @@ avoid allocating a temporary array and directly compute the result. ```julia-repl -julia> value(L2DistLoss(), true_targets, pred_outputs, AvgMode.Sum()) +julia> value(L2DistLoss(), true_targets, pred_outputs, AggMode.Sum()) 5.25 -julia> value(L2DistLoss(), true_targets, pred_outputs, AvgMode.Mean()) +julia> value(L2DistLoss(), true_targets, pred_outputs, AggMode.Mean()) 1.75 ``` @@ -126,10 +126,10 @@ each observation in the predicted outputs and so allow to give certain observations a stronger influence over the result. ```julia-repl -julia> value(L2DistLoss(), true_targets, pred_outputs, AvgMode.WeightedSum([2,1,1])) +julia> value(L2DistLoss(), true_targets, pred_outputs, AggMode.WeightedSum([2,1,1])) 5.5 -julia> value(L2DistLoss(), true_targets, pred_outputs, AvgMode.WeightedMean([2,1,1])) +julia> value(L2DistLoss(), true_targets, pred_outputs, AggMode.WeightedMean([2,1,1])) 1.375 ``` @@ -157,7 +157,7 @@ julia> value(L2DistLoss(), A, B) 0.00161395 0.0423701 0.183882 0.172286 0.0180639 0.00252607 -julia> value(L2DistLoss(), A, B, AvgMode.Sum()) +julia> value(L2DistLoss(), A, B, AggMode.Sum()) 0.420741920634 ``` @@ -172,7 +172,7 @@ julia> value(L2DistLoss(), rand(2), rand(2,2)) 0.228077 0.597212 0.789808 0.311914 -julia> value(L2DistLoss(), rand(2), rand(2,2), AvgMode.Sum()) +julia> value(L2DistLoss(), rand(2), rand(2,2), AggMode.Sum()) 0.0860658081865589 ``` @@ -182,18 +182,18 @@ multivariate regression where one could want to accumulate the loss per individual observation. ```julia-repl -julia> value(L2DistLoss(), A, B, AvgMode.Sum(), ObsDim.First()) +julia> value(L2DistLoss(), A, B, AggMode.Sum(), ObsDim.First()) 2-element Array{Float64,1}: 0.227866 0.192876 -julia> value(L2DistLoss(), A, B, AvgMode.Sum(), ObsDim.Last()) +julia> value(L2DistLoss(), A, B, AggMode.Sum(), ObsDim.Last()) 3-element Array{Float64,1}: 0.1739 0.060434 0.186408 -julia> value(L2DistLoss(), A, B, AvgMode.WeightedSum([2,1]), ObsDim.First()) +julia> value(L2DistLoss(), A, B, AggMode.WeightedSum([2,1]), ObsDim.First()) 0.648608280735 ``` @@ -287,4 +287,3 @@ If you encounter a bug or would like to participate in the further development of this package come find us on Github. - [JuliaML/LossFunctions.jl](https://github.com/JuliaML/LossFunctions.jl) - diff --git a/docs/src/user/aggregate.md b/docs/src/user/aggregate.md index ef17884..0c24dbf 100644 --- a/docs/src/user/aggregate.md +++ b/docs/src/user/aggregate.md @@ -1,3 +1,9 @@ +```@meta +DocTestSetup = quote + using LossFunctions +end +``` + # Efficient Sum and Mean In many situations we are not really that interested in the @@ -50,14 +56,14 @@ common accumulations efficiently without allocating temporary arrays. These methods can be invoked using an additional parameter which specifies how the values should be accumulated / averaged. The type of this parameter has to be a subtype of -`AverageMode`. +`AggregateMode`. ## Average Modes Before we discuss these memory-efficient methods, let us briefly introduce the available average mode types. We provide a number of different averages modes, all of which are contained within -the namespace `AvgMode`. An instance of such type can then be +the namespace `AggMode`. An instance of such type can then be used as additional parameter to [`value`](@ref), [`deriv`](@ref), and [`deriv2`](@ref), as we will see further down. @@ -66,11 +72,11 @@ a short description of what their effect would be when used as an additional parameter to the functions mentioned above. ```@docs -AvgMode.None -AvgMode.Sum -AvgMode.Mean -AvgMode.WeightedSum -AvgMode.WeightedMean +AggMode.None +AggMode.Sum +AggMode.Mean +AggMode.WeightedSum +AggMode.WeightedMean ``` ## Unweighted Sum and Mean @@ -82,15 +88,15 @@ broadcasted) results of [`value`](@ref), [`deriv`](@ref), and temporary array and instead compute the result directly. ```@docs -value(::Loss, ::AbstractArray, ::AbstractArray, ::LossFunctions.AverageMode) +value(::Loss, ::AbstractArray, ::AbstractArray, ::LossFunctions.AggregateMode) ``` The exact same method signature is also implemented for [`deriv`](@ref) and [`deriv2`](@ref) respectively. ```@docs -deriv(::Loss, ::AbstractArray, ::AbstractArray, ::LossFunctions.AverageMode) -deriv2(::Loss, ::AbstractArray, ::AbstractArray, ::LossFunctions.AverageMode) +deriv(::Loss, ::AbstractArray, ::AbstractArray, ::LossFunctions.AggregateMode) +deriv2(::Loss, ::AbstractArray, ::AbstractArray, ::LossFunctions.AggregateMode) ``` ## Sum and Mean per Observation @@ -110,7 +116,7 @@ that denotes the observations. For that purpose we provide the types contained in the namespace `ObsDim`. ```@docs -value(::Loss, ::AbstractArray, ::AbstractArray, ::LossFunctions.AverageMode, ::LearnBase.ObsDimension) +value(::Loss, ::AbstractArray, ::AbstractArray, ::LossFunctions.AggregateMode, ::LearnBase.ObsDimension) ``` Consider the following two matrices, `targets` and `outputs`. @@ -136,12 +142,12 @@ the observations. Thus this data would consist of two observations with four variables each. ```jldoctest obsdim -julia> value(L1DistLoss(), targets, outputs, AvgMode.Sum(), ObsDim.First()) +julia> value(L1DistLoss(), targets, outputs, AggMode.Sum(), ObsDim.First()) 2-element Array{Float64,1}: 1.5 2.0 -julia> value(L1DistLoss(), targets, outputs, AvgMode.Mean(), ObsDim.First()) +julia> value(L1DistLoss(), targets, outputs, AggMode.Mean(), ObsDim.First()) 2-element Array{Float64,1}: 0.375 0.5 @@ -152,14 +158,14 @@ second/last dimension denotes the observations. In that case our data consists of four observations with two variables each. ```jldoctest obsdim -julia> value(L1DistLoss(), targets, outputs, AvgMode.Sum(), ObsDim.Last()) +julia> value(L1DistLoss(), targets, outputs, AggMode.Sum(), ObsDim.Last()) 4-element Array{Float64,1}: 0.125 0.625 1.125 1.625 -julia> value(L1DistLoss(), targets, outputs, AvgMode.Mean(), ObsDim.Last()) +julia> value(L1DistLoss(), targets, outputs, AggMode.Mean(), ObsDim.Last()) 4-element Array{Float64,1}: 0.0625 0.3125 @@ -172,17 +178,17 @@ mutating version that can make use a preallocated vector to write the results into. ```@docs -value!(::AbstractArray, ::Loss, ::AbstractArray, ::AbstractArray, ::LossFunctions.AverageMode, ::LearnBase.ObsDimension) +value!(::AbstractArray, ::Loss, ::AbstractArray, ::AbstractArray, ::LossFunctions.AggregateMode, ::LearnBase.ObsDimension) ``` Naturally we also provide both of these methods for [`deriv`](@ref) and [`deriv2`](@ref) respectively. ```@docs -deriv(::Loss, ::AbstractArray, ::AbstractArray, ::LossFunctions.AverageMode, ::LearnBase.ObsDimension) -deriv!(::AbstractArray, ::Loss, ::AbstractArray, ::AbstractArray, ::LossFunctions.AverageMode, ::LearnBase.ObsDimension) -deriv2(::Loss, ::AbstractArray, ::AbstractArray, ::LossFunctions.AverageMode, ::LearnBase.ObsDimension) -deriv2!(::AbstractArray, ::Loss, ::AbstractArray, ::AbstractArray, ::LossFunctions.AverageMode, ::LearnBase.ObsDimension) +deriv(::Loss, ::AbstractArray, ::AbstractArray, ::LossFunctions.AggregateMode, ::LearnBase.ObsDimension) +deriv!(::AbstractArray, ::Loss, ::AbstractArray, ::AbstractArray, ::LossFunctions.AggregateMode, ::LearnBase.ObsDimension) +deriv2(::Loss, ::AbstractArray, ::AbstractArray, ::LossFunctions.AggregateMode, ::LearnBase.ObsDimension) +deriv2!(::AbstractArray, ::Loss, ::AbstractArray, ::AbstractArray, ::LossFunctions.AggregateMode, ::LearnBase.ObsDimension) ``` ## Weighted Sum and Mean @@ -233,7 +239,7 @@ each observation (which results in a vector), and then we compute the weighted sum of all observations. The following code snipped demonstrates how to compute the -`AvgMode.WeightedSum([2,1])` manually. This is **not** meant as +`AggMode.WeightedSum([2,1])` manually. This is **not** meant as an example of how to do it, but simply to show what is happening qualitatively. In this example we assume that we are working in a multi-variable regression setting, in which our data set has four @@ -261,7 +267,7 @@ julia> sum(tmp .* [2, 1]) # weigh 1st observation twice as high 5.0 ``` -To manually compute the result for `AvgMode.WeightedMean([2,1])` +To manually compute the result for `AggMode.WeightedMean([2,1])` we follow a similar approach, but use the normalized weight vector in the last step. @@ -282,8 +288,8 @@ julia> sum(tmp .* [0.6666, 0.3333]) # weigh 1st observation twice as high Note that you can specify explicitly if you want to normalize the weight vector. That option is supported for computing the weighted sum, as well as for computing the weighted mean. See the -documentation for [`AvgMode.WeightedSum`](@ref) and -[`AvgMode.WeightedMean`](@ref) for more information. +documentation for [`AggMode.WeightedSum`](@ref) and +[`AggMode.WeightedMean`](@ref) for more information. The code-snippets above are of course very inefficient, because they allocate (multiple) temporary arrays. We only included them @@ -293,16 +299,16 @@ special methods for [`value`](@ref), [`deriv`](@ref), [`deriv2`](@ref) and their mutating counterparts. ```jldoctest weight -julia> value(L1DistLoss(), [1.,2,3], [2,5,-2], AvgMode.WeightedSum([1,2,1])) +julia> value(L1DistLoss(), [1.,2,3], [2,5,-2], AggMode.WeightedSum([1,2,1])) 12.0 -julia> value(L1DistLoss(), [1.,2,3], [2,5,-2], AvgMode.WeightedMean([1,2,1])) +julia> value(L1DistLoss(), [1.,2,3], [2,5,-2], AggMode.WeightedMean([1,2,1])) 3.0 -julia> value(L1DistLoss(), targets, outputs, AvgMode.WeightedSum([2,1]), ObsDim.First()) +julia> value(L1DistLoss(), targets, outputs, AggMode.WeightedSum([2,1]), ObsDim.First()) 5.0 -julia> value(L1DistLoss(), targets, outputs, AvgMode.WeightedMean([2,1]), ObsDim.First()) +julia> value(L1DistLoss(), targets, outputs, AggMode.WeightedMean([2,1]), ObsDim.First()) 0.4166666666666667 ``` @@ -310,15 +316,15 @@ We also provide this functionality for [`deriv`](@ref) and [`deriv2`](@ref) respectively. ```jldoctest weight -julia> deriv(L2DistLoss(), [1.,2,3], [2,5,-2], AvgMode.WeightedSum([1,2,1])) +julia> deriv(L2DistLoss(), [1.,2,3], [2,5,-2], AggMode.WeightedSum([1,2,1])) 4.0 -julia> deriv(L2DistLoss(), [1.,2,3], [2,5,-2], AvgMode.WeightedMean([1,2,1])) +julia> deriv(L2DistLoss(), [1.,2,3], [2,5,-2], AggMode.WeightedMean([1,2,1])) 1.0 -julia> deriv(L2DistLoss(), targets, outputs, AvgMode.WeightedSum([2,1]), ObsDim.First()) +julia> deriv(L2DistLoss(), targets, outputs, AggMode.WeightedSum([2,1]), ObsDim.First()) 10.0 -julia> deriv(L2DistLoss(), targets, outputs, AvgMode.WeightedMean([2,1]), ObsDim.First()) +julia> deriv(L2DistLoss(), targets, outputs, AggMode.WeightedMean([2,1]), ObsDim.First()) 0.8333333333333334 ``` diff --git a/docs/src/user/interface.md b/docs/src/user/interface.md index 2b72286..af0a00a 100644 --- a/docs/src/user/interface.md +++ b/docs/src/user/interface.md @@ -1,3 +1,9 @@ +```@meta +DocTestSetup = quote + using LossFunctions +end +``` + # Working with Losses Even though they are called loss "functions", this package diff --git a/src/LossFunctions.jl b/src/LossFunctions.jl index e43b6e1..ef25e56 100644 --- a/src/LossFunctions.jl +++ b/src/LossFunctions.jl @@ -82,7 +82,7 @@ export AggMode -include("common.jl") +include("devutils.jl") include("aggregatemode.jl") include("supervised/supervised.jl") diff --git a/src/common.jl b/src/common.jl deleted file mode 100644 index 9e8812b..0000000 --- a/src/common.jl +++ /dev/null @@ -1,9 +0,0 @@ -macro _not_implemented() - quote - throw(ArgumentError("Not implemented for the given type")) - end -end - -macro _dimcheck(condition) - :(($(esc(condition))) || throw(DimensionMismatch("Dimensions of the parameters don't match: $($(string(condition)))"))) -end diff --git a/src/devutils.jl b/src/devutils.jl new file mode 100644 index 0000000..49a42fe --- /dev/null +++ b/src/devutils.jl @@ -0,0 +1,3 @@ +macro dimcheck(condition) + :(($(esc(condition))) || throw(DimensionMismatch("Dimensions of the parameters don't match: $($(string(condition)))"))) +end diff --git a/src/supervised/io.jl b/src/supervised/io.jl index c7dfdc2..76aa3ce 100644 --- a/src/supervised/io.jl +++ b/src/supervised/io.jl @@ -33,33 +33,12 @@ Base.print(io::IO, loss::WeightedBinaryLoss{T,W}, args...) where {T,W} = print(i _loss_xguide(loss::MarginLoss) = "y * h(x)" _loss_xguide(loss::DistanceLoss) = "h(x) - y" +_loss_yguide(loss::SupervisedLoss) = "L("*_loss_xguide(loss)*")" -@recipe function plot(drv::Deriv, rng = -2:0.05:2) - xguide --> _loss_xguide(drv.loss) - yguide --> "L'(y, h(x))" - label --> string(drv.loss) - deriv_fun(drv.loss), rng -end - -@recipe function plot(loss::SupervisedLoss, rng = -2:0.05:2) +@recipe function plot(loss::SupervisedLoss, range=-2:0.05:2; fun=value) xguide --> _loss_xguide(loss) - yguide --> "L(y, h(x))" + yguide --> _loss_yguide(loss) label --> string(loss) - value_fun(loss), rng -end - -@recipe function plot(derivs::AbstractVector{T}, rng = -2:0.05:2) where T<:Deriv - for drv in derivs - @series begin - drv, rng - end - end -end - -@recipe function plot(losses::AbstractVector{T}, rng = -2:0.05:2) where T<:SupervisedLoss - for loss in losses - @series begin - loss, rng - end - end + l(a) = fun(loss, a) + l, range end diff --git a/src/supervised/sparse.jl b/src/supervised/sparse.jl index b0aec2b..c94136b 100644 --- a/src/supervised/sparse.jl +++ b/src/supervised/sparse.jl @@ -19,8 +19,8 @@ end ) where {T,N,Q,Ti,M} M > N && throw(ArgumentError("target has more dimensions than output; broadcasting not supported in this direction.")) quote - @_dimcheck size(buffer) == size(output) - @nexprs $M (n)->@_dimcheck(size(target,n) == size(output,n)) + @dimcheck size(buffer) == size(output) + @nexprs $M (n)->@dimcheck(size(target,n) == size(output,n)) zeroQ = zero(Q) negQ = Q(-1) @simd for I in CartesianIndices(size(output)) diff --git a/src/supervised/supervised.jl b/src/supervised/supervised.jl index 051bf9d..a59bd4a 100644 --- a/src/supervised/supervised.jl +++ b/src/supervised/supervised.jl @@ -1,20 +1,7 @@ Base.Broadcast.broadcastable(l::SupervisedLoss) = Ref(l) # -------------------------------------------------------------- -# non-exported types - -struct Deriv{L<:SupervisedLoss} - loss::L -end -@inline (d::Deriv)(args...) = deriv(d.loss, args...) - -struct Deriv2{L<:SupervisedLoss} - loss::L -end -@inline (d::Deriv2)(args...) = deriv2(d.loss, args...) - -# -------------------------------------------------------------- -# avgmode support +# aggmode support for (FUN, DESC, EXAMPLE) in ( (:value, @@ -194,7 +181,7 @@ for (FUN, DESC, EXAMPLE) in ( S, B = min(M,N), max(M,N) P = promote_type(Q,T) quote - @nexprs $S (n)->@_dimcheck(size(target, n) == size(output, n)) + @nexprs $S (n)->@dimcheck(size(target, n) == size(output, n)) nrm = 1 / $P(length($bigger)) out = zero(($($FUN))(loss, one(Q), one(T)) * nrm) @inbounds @simd for I in CartesianIndices(size($bigger)) @@ -214,7 +201,7 @@ for (FUN, DESC, EXAMPLE) in ( bigger = M > N ? :target : :output S, B = min(M,N), max(M,N) quote - @nexprs $S (n)->@_dimcheck(size(target, n) == size(output, n)) + @nexprs $S (n)->@dimcheck(size(target, n) == size(output, n)) out = zero(($($FUN))(loss, one(Q), one(T))) @inbounds @simd for I in CartesianIndices(size($bigger)) @nexprs $B n->(i_n = I[n]) @@ -235,8 +222,8 @@ for (FUN, DESC, EXAMPLE) in ( avg::AggMode.WeightedMean, ::ObsDim.Constant{O}) where {Q,T,N,O} O > N && throw(ArgumentError("The specified obsdim is larger as the available dimensions.")) - @_dimcheck size(target) == size(output) - @_dimcheck size(output, O) == length(avg.weights) + @dimcheck size(target) == size(output) + @dimcheck size(output, O) == length(avg.weights) k = prod(n != O ? size(output,n) : 1 for n in 1:N)::Int nrm = avg.normalize ? inv(k * sum(avg.weights)) : inv(k * one(sum(avg.weights))) out = zero(($FUN)(loss, one(Q), one(T)) * (avg.weights[1] * nrm)) @@ -254,8 +241,8 @@ for (FUN, DESC, EXAMPLE) in ( avg::AggMode.WeightedSum, ::ObsDim.Constant{O}) where {Q,T,N,O} O > N && throw(ArgumentError("The specified obsdim is larger as the available dimensions.")) - @_dimcheck size(target) == size(output) - @_dimcheck size(output, O) == length(avg.weights) + @dimcheck size(target) == size(output) + @dimcheck size(output, O) == length(avg.weights) nrm = avg.normalize ? inv(sum(avg.weights)) : inv(one(sum(avg.weights))) out = zero(($FUN)(loss, one(Q), one(T)) * (avg.weights[1] * nrm)) @inbounds @simd for I in CartesianIndices(size(output)) @@ -289,8 +276,8 @@ for (FUN, DESC, EXAMPLE) in ( ::ObsDim.Constant{O}) where {B,Q,T,N,O} N == 1 && throw(ArgumentError("Mean per observation non sensible for two Vectors. Try omitting the obsdim")) O > N && throw(ArgumentError("The specified obsdim is larger as the available dimensions.")) - @_dimcheck size(target) == size(output) - @_dimcheck length(buffer) == size(output, O) + @dimcheck size(target) == size(output) + @dimcheck length(buffer) == size(output, O) fill!(buffer, zero(B)) P = promote_type(Q,T) k = P(prod(size(output,n) for n in 1:N if n != O)) @@ -323,8 +310,8 @@ for (FUN, DESC, EXAMPLE) in ( ::ObsDim.Constant{O}) where {B,Q,T,N,O} N == 1 && throw(ArgumentError("Sum per observation non sensible for two Vectors. Try omitting the obsdim")) O > N && throw(ArgumentError("The specified obsdim is larger as the available dimensions.")) - @_dimcheck size(target) == size(output) - @_dimcheck length(buffer) == size(output, O) + @dimcheck size(target) == size(output) + @dimcheck length(buffer) == size(output, O) fill!(buffer, zero(B)) @inbounds @simd for I in CartesianIndices(size(output)) buffer[I[O]] += ($FUN)(loss, target[I], output[I]) @@ -419,7 +406,7 @@ for (FUN, DESC, EXAMPLE) in ( numbers::AbstractArray{T,N}, avg::AggMode.WeightedMean, ::ObsDim.Constant{O}) where {T,N,O} - @_dimcheck size(numbers, O) == length(avg.weights) + @dimcheck size(numbers, O) == length(avg.weights) k = prod(n != O ? size(numbers,n) : 1 for n in 1:N)::Int nrm = avg.normalize ? inv(k * sum(avg.weights)) : inv(k * one(sum(avg.weights))) out = zero(($FUN)(loss, one(T)) * (avg.weights[1] * nrm)) @@ -435,7 +422,7 @@ for (FUN, DESC, EXAMPLE) in ( numbers::AbstractArray{T,N}, avg::AggMode.WeightedSum, ::ObsDim.Constant{O}) where {T,N,O} - @_dimcheck size(numbers, O) == length(avg.weights) + @dimcheck size(numbers, O) == length(avg.weights) nrm = avg.normalize ? inv(sum(avg.weights)) : inv(one(sum(avg.weights))) out = zero(($FUN)(loss, one(T)) * (avg.weights[1] * nrm)) @inbounds @simd for I in CartesianIndices(size(numbers)) @@ -467,7 +454,7 @@ for (FUN, DESC, EXAMPLE) in ( ::ObsDim.Constant{O}) where {B,T,N,O} N == 1 && throw(ArgumentError("Mean per observation non sensible for two Vectors. Try omitting the obsdim")) O > N && throw(ArgumentError("The specified obsdim is larger as the available dimensions.")) - @_dimcheck length(buffer) == size(numbers, O) + @dimcheck length(buffer) == size(numbers, O) fill!(buffer, zero(B)) k = prod(size(numbers,n) for n in 1:N if n != O)::Int nrm = 1 / k @@ -497,7 +484,7 @@ for (FUN, DESC, EXAMPLE) in ( ::ObsDim.Constant{O}) where {B,T,N,O} N == 1 && throw(ArgumentError("Sum per observation non sensible for two Vectors. Try omitting the obsdim")) O > N && throw(ArgumentError("The specified obsdim is larger as the available dimensions.")) - @_dimcheck length(buffer) == size(numbers, O) + @dimcheck length(buffer) == size(numbers, O) fill!(buffer, zero(B)) @inbounds @simd for I in CartesianIndices(size(numbers)) buffer[I[O]] += ($FUN)(loss, numbers[I])