Skip to content

Commit

Permalink
Fixes for type instabilities in histogram functions (#253)
Browse files Browse the repository at this point in the history
* Add type stability tests for histogram functions

* Fix type instabilities in histogram functions

* Remove unused isdensity arguments from fit(::Histogram...)

isdensity arguments don't belong there in the first place.

* Add more elegant Julia v0.6 implementation in _nbins_tuple.

Suggested by @nalimilan.
  • Loading branch information
oschulz authored and andreasnoack committed May 3, 2017
1 parent 38c62a1 commit e200a72
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 31 deletions.
55 changes: 41 additions & 14 deletions src/hist.jl
Expand Up @@ -27,6 +27,14 @@ end
end


# Need a generated function to promote edge types, because a simple
# promote_type(map(eltype, h.edges)...) isn't type stable (tested
# with Julia v0.5).
@generated function _promote_edge_types{N}(edges::NTuple{N,AbstractVector})
promote_type(map(eltype, edges.parameters)...)
end


## nice-valued ranges for histograms
function histrange{T}(v::AbstractArray{T}, n::Integer, closed::Symbol=:default_left)
closed = _check_closed_arg(closed,:histrange)
Expand Down Expand Up @@ -134,7 +142,7 @@ type Histogram{T<:Real,N,E} <: AbstractHistogram{T,N,E}
weights::Array{T,N}, closed::Symbol, isdensity::Bool=false)
closed == :right || closed == :left || error("closed must :left or :right")
isdensity && !(T <: AbstractFloat) && error("Density histogram must have float-type weights")
map(x -> length(x)-1,edges) == size(weights) || error("Histogram edge vectors must be 1 longer than corresponding weight dimensions")
_edges_nbins(edges) == size(weights) || error("Histogram edge vectors must be 1 longer than corresponding weight dimensions")
new{T,N,E}(edges,weights,closed,isdensity)
end
end
Expand All @@ -143,7 +151,7 @@ Histogram{T,N}(edges::NTuple{N,AbstractVector},weights::AbstractArray{T,N},close
Histogram{T,N,typeof(edges)}(edges,weights,_check_closed_arg(closed,:Histogram),isdensity)

Histogram{T,N}(edges::NTuple{N,AbstractVector},::Type{T},closed::Symbol=:default_left, isdensity::Bool=false) =
Histogram(edges,zeros(T,map(x -> length(x)-1,edges)...),_check_closed_arg(closed,:Histogram),isdensity)
Histogram(edges,zeros(T,_edges_nbins(edges)...),_check_closed_arg(closed,:Histogram),isdensity)

Histogram{N}(edges::NTuple{N,AbstractVector},closed::Symbol=:default_left, isdensity::Bool=false) =
Histogram(edges,Int,_check_closed_arg(closed,:Histogram),isdensity)
Expand Down Expand Up @@ -180,7 +188,7 @@ binvolume{T,E}(h::AbstractHistogram{T,1,E}, binidx::Integer) = binvolume(h, (bin
binvolume{V,T,E}(::Type{V}, h::AbstractHistogram{T,1,E}, binidx::Integer) = binvolume(V, h, (binidx,))

binvolume{T,N,E}(h::Histogram{T,N,E}, binidx::NTuple{N,Integer}) =
binvolume(promote_type(map(eltype, h.edges)...), h, binidx)
binvolume(_promote_edge_types(h.edges), h, binidx)

binvolume{V,T,N,E}(::Type{V}, h::Histogram{T,N,E}, binidx::NTuple{N,Integer}) =
prod(map((edge, i) -> _edge_binvolume(V, edge, i), h.edges, binidx))
Expand All @@ -190,6 +198,11 @@ binvolume{V,T,N,E}(::Type{V}, h::Histogram{T,N,E}, binidx::NTuple{N,Integer}) =
@inline _edge_binvolume(edge::AbstractVector, i::Integer) = _edge_binvolume(eltype(edge), edge, i)


@inline _edges_nbins{N}(edges::NTuple{N,AbstractVector}) = map(_edge_nbins, edges)

@inline _edge_nbins(edge::AbstractVector) = length(edge) - 1


# 1-dimensional

Histogram{T}(edge::AbstractVector, weights::AbstractVector{T}, closed::Symbol=:default_left, isdensity::Bool=false) =
Expand Down Expand Up @@ -259,34 +272,48 @@ end
append!{T,N}(h::AbstractHistogram{T,N}, vs::NTuple{N,AbstractVector}, wv::WeightVec) = append!(h, vs, values(wv))


# Turn kwargs nbins into a type-stable tuple of integers:
function _nbins_tuple{N}(vs::NTuple{N,AbstractVector}, nbins)
template = map(length, vs)

@static if VERSION < v"0.6.0-dev.695"
result = if isa(nbins, Integer)
map(t -> typeof(t)(nbins), template)
elseif isa(nbins, NTuple{N, Integer})
map((t, x) -> typeof(t)(x), template, nbins)
else
throw(ArgumentError("nbins must be an Integer or NTuple{N, Integer}"))
end
else
result = broadcast((t, x) -> typeof(t)(x), template, nbins)
end

result::typeof(template)
end

fit{T,N}(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, edges::NTuple{N,AbstractVector}; closed::Symbol=:default_left) =
append!(Histogram(edges, T, _check_closed_arg(closed,:fit), false), vs)

fit{T,N}(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}; closed::Symbol=:default_left, isdensity::Bool=false, nbins=sturges(length(vs[1]))) = begin
fit{T,N}(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}; closed::Symbol=:default_left, nbins=sturges(length(vs[1]))) = begin
closed = _check_closed_arg(closed,:fit)
fit(Histogram{T}, vs, histrange(vs,nbins,closed); closed=closed)
fit(Histogram{T}, vs, histrange(vs,_nbins_tuple(vs, nbins),closed); closed=closed)
end

fit{T,N,W}(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, wv::WeightVec{W}, edges::NTuple{N,AbstractVector}; closed::Symbol=:default_left) =
append!(Histogram(edges, T, _check_closed_arg(closed,:fit), false), vs, wv)

fit{T,N}(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, wv::WeightVec; closed::Symbol=:default_left, isdensity::Bool=false, nbins=sturges(length(vs[1]))) = begin
fit{T,N}(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, wv::WeightVec; closed::Symbol=:default_left, nbins=sturges(length(vs[1]))) = begin
closed = _check_closed_arg(closed,:fit)
fit(Histogram{T}, vs, wv, histrange(vs,nbins,closed); closed=closed)
fit(Histogram{T}, vs, wv, histrange(vs,_nbins_tuple(vs, nbins),closed); closed=closed)
end

fit(::Type{Histogram}, args...; kwargs...) = fit(Histogram{Int}, args...; kwargs...)
fit{N,W}(::Type{Histogram}, vs::NTuple{N,AbstractVector}, wv::WeightVec{W}, args...; kwargs...) = fit(Histogram{W}, vs, wv, args...; kwargs...)


# Get a suitable high-precision type for the norm of a histogram.
@generated function norm_type{T, N, E}(h::Histogram{T, N, E})
args = [:( eltype(edges[$d]) ) for d = 1:N]
quote
edges = h.edges
norm_type(promote_type(T, $(args...)))
end
end
norm_type{T, N, E}(h::Histogram{T, N, E}) =
promote_type(T, _promote_edge_types(h.edges))

norm_type{T<:Integer}(::Type{T}) = promote_type(T, Int64)
norm_type{T<:AbstractFloat}(::Type{T}) = promote_type(T, Float64)
Expand Down
39 changes: 22 additions & 17 deletions test/hist.jl
Expand Up @@ -13,20 +13,22 @@ using Base.Test
h1 = Histogram(edg1, :left)
h2 = Histogram((edg1, edg2), :left)
h3 = Histogram((edg1f0, edg2), :left)
@test StatsBase.binindex(h1, -0.5) == 4
@test StatsBase.binindex(h2, (1.5, 2)) == (8, 3)
@test @inferred StatsBase.binindex(h1, -0.5) == 4
@test @inferred StatsBase.binindex(h2, (1.5, 2)) == (8, 3)

@test [StatsBase.binvolume(h1, i) for i in indices(h1.weights, 1)] diff(edg1)
@test [StatsBase.binvolume(h2, (i,j)) for i in indices(h2.weights, 1), j in indices(h2.weights, 2)] diff(edg1) * diff(edg2)'

@test typeof(StatsBase.binvolume(h2, (1,1))) == Float64
@test typeof(StatsBase.binvolume(h3, (1,1))) == Float32
@test typeof(StatsBase.binvolume(Float64, h3, (1,1))) == Float64
@test typeof(@inferred(StatsBase.binvolume(h2, (1,1)))) == Float64
@test typeof(@inferred(StatsBase.binvolume(h3, (1,1)))) == Float32
@test typeof(@inferred(StatsBase.binvolume(Float64, h3, (1,1)))) == Float64
end


@testset "Histogram append" begin
# FIXME: closed (all lines in this block):
h = Histogram(0:20:100, Float64, :left, false)
@test @inferred(append!(h, 0:0.5:99.99)) == h
@test append!(Histogram(0:20:100, Float64, :left, false), 0:0.5:99.99).weights [40,40,40,40,40]
@test append!(Histogram(0:20:100, Float64, :left, true), 0:0.5:99.99).weights [2,2,2,2,2]
@test append!(Histogram(0:20:100, Float64, :left, false), 0:0.5:99.99, fill(2, 200)).weights [80,80,80,80,80]
Expand Down Expand Up @@ -64,16 +66,16 @@ end

@testset "Histogram element type" begin
# FIXME: closed (all lines in this block):
@test eltype(fit(Histogram,1:100,weights(ones(Int,100)),nbins=5, closed=:left).weights) == Int
@test eltype(fit(Histogram{Float32},1:100,weights(ones(Int,100)),nbins=5, closed=:left).weights) == Float32
@test eltype(fit(Histogram,1:100,weights(ones(Float64,100)),nbins=5, closed=:left).weights) == Float64
@test eltype(fit(Histogram{Float32},1:100,weights(ones(Float64,100)),nbins=5, closed=:left).weights) == Float32
@test eltype(@inferred(fit(Histogram,1:100,weights(ones(Int,100)),nbins=5, closed=:left)).weights) == Int
@test eltype(@inferred(fit(Histogram{Float32},1:100,weights(ones(Int,100)),nbins=5, closed=:left)).weights) == Float32
@test eltype(@inferred(fit(Histogram,1:100,weights(ones(Float64,100)),nbins=5, closed=:left)).weights) == Float64
@test eltype(@inferred(fit(Histogram{Float32},1:100,weights(ones(Float64,100)),nbins=5, closed=:left)).weights) == Float32
end


@testset "histrange" begin
# Note: atm histrange must be qualified
@test StatsBase.histrange(Float64[], 0, :left) == 0.0:1.0:0.0
@test @inferred(StatsBase.histrange(Float64[], 0, :left)) == 0.0:1.0:0.0
@test StatsBase.histrange(Float64[1:5;], 1, :left) == 0.0:5.0:10.0
@test StatsBase.histrange(Float64[1:10;], 1, :left) == 0.0:10.0:20.0
@test StatsBase.histrange(1.0, 10.0, 1, :left) == 0.0:10.0:20.0
Expand All @@ -90,7 +92,7 @@ end
@test StatsBase.histrange([200.0,300.0], 10, :left) == 200.0:10.0:310.0
@test StatsBase.histrange([200.0,300.0], 10, :right) == 190.0:10.0:300.0

@test StatsBase.histrange(Int64[1:5;], 1, :left) == 0:5:10
@test @inferred(StatsBase.histrange(Int64[1:5;], 1, :left)) == 0:5:10
@test StatsBase.histrange(Int64[1:10;], 1, :left) == 0:10:20

# FIXME: closed (all lines in this block):
Expand Down Expand Up @@ -149,25 +151,28 @@ end

@test norm(h) sum(h.weights .* bin_vols)

@test normalize(h, mode = :none) == h
@test @inferred(normalize(h, mode = :none)) == h


h_pdf = normalize(h, mode = :pdf)
@test h_pdf.weights h.weights ./ bin_vols ./ weight_sum
@test h_pdf.isdensity == true
@test norm(h_pdf) 1
@test normalize(h_pdf, mode = :pdf) == h_pdf
@test normalize(h_pdf, mode = :density) == h_pdf
@test @inferred(norm(h_pdf)) 1
@test @inferred(normalize(h_pdf, mode = :pdf)) == h_pdf
@test @inferred(normalize(h_pdf, mode = :density)) == h_pdf

h_density = normalize(h, mode = :density)
@test h_density.weights h.weights ./ bin_vols
@test h_density.isdensity == true
@test norm(h_density) weight_sum
@test normalize(h_density, mode = :pdf) ==
@test @inferred(norm(h_density)) weight_sum
@test @inferred(normalize(h_density, mode = :pdf)) ==
Histogram(h_density.edges, h_density.weights .* (1/norm(h_density)), h_density.closed, true)
@test normalize(h_density, mode = :pdf).weights h_pdf.weights
@test normalize(h_density, mode = :density) == h_density

h_copy = deepcopy(float(h))
@test @inferred(normalize!(h_copy, mode = :density)) == h_copy

h2 = deepcopy(float(h))
mod_h2 = normalize!(h2, mode = :density)
@test mod_h2 === h2 && mod_h2.weights === h2.weights
Expand Down

0 comments on commit e200a72

Please sign in to comment.