diff --git a/README.md b/README.md index 02fef541..36e062b7 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Kernel density estimators for Julia. ### Univariate The main accessor function is `kde`: -``` +```julia U = kde(data) ``` @@ -33,7 +33,9 @@ The `UnivariateKDE` object `U` contains gridded coordinates (`U.x`) and the dens estimate (`U.density`). These are typically sufficient for plotting. A related function -``` kde_lscv(data) ``` +```julia +kde_lscv(data) +``` will construct a `UnivariateKDE` object, with the bandwidth selected by least-squares cross validation. It accepts the above keyword arguments, except @@ -41,39 +43,51 @@ least-squares cross validation. It accepts the above keyword arguments, except There are also some slightly more advanced interfaces: -``` +```julia kde(data, midpoints::R) where R<:AbstractRange ``` allows specifying the internal grid to use. Optional keyword arguments are `kernel` and `bandwidth`. -``` +```julia kde(data, dist::Distribution) ``` allows specifying the exact distribution to use as the kernel. Optional keyword arguments are `boundary` and `npoints`. -``` +```julia kde(data, midpoints::R, dist::Distribution) where R<:AbstractRange ``` allows specifying both the distribution and grid. -### Bivariate +### Multivariate The usage mirrors that of the univariate case, except that `data` is now -either a tuple of vectors -``` -B = kde((xdata, ydata)) +either an `N`-tuple of vectors +```julia +M = kde((xdata, ydata, zdata)) ``` -or a matrix with two columns +or a matrix with `N` columns +```julia +M = kde(datamatrix) ``` -B = kde(datamatrix) +Similarly, the optional arguments all now take `N`-tuple arguments: +e.g. `boundary` now takes a `N`-tuple of tuples `((xlo, xhi), (ylo, yhi), (zlo, zhi))`. + +The `MultivariateKDE` object `M` contains gridded coordinates (`M.ranges`, an +`N`-tuple of `AbstractRange`s) and the multivariate density estimate (`M.density`). + +### Bi- and Trivariate + +Special type definitions exist for the bi- and trivariate case: +```julia +const BivariateKDE = MultivariateKDE{2, ...} +const TrivariateKDE = MultivariateKDE{3, ...} ``` -Similarly, the optional arguments all now take tuple arguments: -e.g. `boundary` now takes a tuple of tuples `((xlo,xhi),(ylo,yhi))`. -The `BivariateKDE` object `B` contains gridded coordinates (`B.x` and `B.y`) and the bivariate density -estimate (`B.density`). +Their contained gridded coordinates can be accessed as `B.x` and `B.y` for +`B isa BivariateKDE` and `T.x`, `T.y`, and `T.z` for `T isa TrivariateKDE`, +respectively. ### Interpolation @@ -83,17 +97,19 @@ intermediate values can be interpolated using the [Interpolations.jl](https://github.com/tlycken/Interpolations.jl) package via the `pdf` method (extended from Distributions.jl). -``` +```julia pdf(k::UnivariateKDE, x) pdf(k::BivariateKDE, x, y) +pdf(k::TrivariateKDE, x, y, z) +pdf(k::MultivariateKDE, x...) ``` -where `x` and `y` are real numbers or arrays. +where `x`, `y`, and `z` are real numbers or arrays. If you are making multiple calls to `pdf`, it will be more efficient to construct an intermediate `InterpKDE` to store the interpolation structure: -``` +```julia ik = InterpKDE(k) pdf(ik, x) ``` diff --git a/src/KernelDensity.jl b/src/KernelDensity.jl index 15de0601..678dd776 100644 --- a/src/KernelDensity.jl +++ b/src/KernelDensity.jl @@ -9,12 +9,14 @@ import StatsBase: RealVector, RealMatrix import Distributions: twoπ, pdf import FFTW: rfft, irfft -export kde, kde_lscv, UnivariateKDE, BivariateKDE, InterpKDE, pdf +export kde, kde_lscv, UnivariateKDE, BivariateKDE, MultivariateKDE, InterpKDE, pdf abstract type AbstractKDE end include("univariate.jl") +include("multivariate.jl") include("bivariate.jl") +include("trivariate.jl") include("interp.jl") end # module diff --git a/src/bivariate.jl b/src/bivariate.jl index d54c41ae..f457bcb2 100644 --- a/src/bivariate.jl +++ b/src/bivariate.jl @@ -1,153 +1,15 @@ -""" -$(TYPEDEF) +const BivariateKDE{Rx <: AbstractRange, Ry <: AbstractRange} = + MultivariateKDE{2, Tuple{Rx, Ry}} -Store both grid and density for KDE over the real line. -Reading the fields directly is part of the API, and +const BivariateDistribution = NTuple{2, UnivariateDistribution} -```julia -sum(density) * step(x) * step(y) ≈ 1 -``` - -# Fields - -$(FIELDS) -""" -mutable struct BivariateKDE{Rx<:AbstractRange,Ry<:AbstractRange} <: AbstractKDE - "First coordinate of gridpoints for evaluating the density." - x::Rx - "Second coordinate of gridpoints for evaluating the density." - y::Ry - "Kernel density at corresponding gridpoints `Tuple.(x, permutedims(y))`." - density::Matrix{Float64} -end - -function kernel_dist(::Type{D},w::Tuple{Real,Real}) where D<:UnivariateDistribution - kernel_dist(D,w[1]), kernel_dist(D,w[2]) -end -function kernel_dist(::Type{Tuple{Dx, Dy}},w::Tuple{Real,Real}) where {Dx<:UnivariateDistribution,Dy<:UnivariateDistribution} - kernel_dist(Dx,w[1]), kernel_dist(Dy,w[2]) -end - -const DataTypeOrUnionAll = Union{DataType, UnionAll} - -# this function provided for backwards compatibility, though it doesn't have the type restrictions -# to ensure that the given tuple only contains univariate distributions -function kernel_dist(d::Tuple{DataTypeOrUnionAll, DataTypeOrUnionAll}, w::Tuple{Real,Real}) - kernel_dist(d[1],w[1]), kernel_dist(d[2],w[2]) -end - -# TODO: there are probably better choices. -function default_bandwidth(data::Tuple{RealVector,RealVector}) - default_bandwidth(data[1]), default_bandwidth(data[2]) -end - -# tabulate data for kde -function tabulate(data::Tuple{RealVector, RealVector}, midpoints::Tuple{Rx, Ry}, - weights::Weights = default_weights(data)) where {Rx<:AbstractRange,Ry<:AbstractRange} - xdata, ydata = data - ndata = length(xdata) - length(ydata) == ndata || error("data vectors must be of same length") - - xmid, ymid = midpoints - nx, ny = length(xmid), length(ymid) - sx, sy = step(xmid), step(ymid) - - # Set up a grid for discretized data - grid = zeros(Float64, nx, ny) - ainc = 1.0 / (sum(weights)*(sx*sy)^2) - - # weighted discretization (cf. Jones and Lotwick) - for i in 1:length(xdata) - x = xdata[i] - y = ydata[i] - kx, ky = searchsortedfirst(xmid,x), searchsortedfirst(ymid,y) - jx, jy = kx-1, ky-1 - if 1 <= jx <= nx-1 && 1 <= jy <= ny-1 - grid[jx,jy] += (xmid[kx]-x)*(ymid[ky]-y)*ainc*weights[i] - grid[kx,jy] += (x-xmid[jx])*(ymid[ky]-y)*ainc*weights[i] - grid[jx,ky] += (xmid[kx]-x)*(y-ymid[jy])*ainc*weights[i] - grid[kx,ky] += (x-xmid[jx])*(y-ymid[jy])*ainc*weights[i] - end - end - - # returns an un-convolved KDE - BivariateKDE(xmid, ymid, grid) -end - -# convolution with product distribution of two univariates distributions -function conv(k::BivariateKDE, dist::Tuple{UnivariateDistribution,UnivariateDistribution}) - # Transform to Fourier basis - Kx, Ky = size(k.density) - ft = rfft(k.density) - - distx, disty = dist - - # Convolve fft with characteristic function of kernel - cx = -twoπ/(step(k.x)*Kx) - cy = -twoπ/(step(k.y)*Ky) - for j = 0:size(ft,2)-1 - for i = 0:size(ft,1)-1 - ft[i+1,j+1] *= cf(distx,i*cx)*cf(disty,min(j,Ky-j)*cy) - end +function Base.getproperty(k::BivariateKDE, s::Symbol) + if s === :x + k.ranges[1] + elseif s === :y + k.ranges[2] + else + getfield(k, s) end - dens = irfft(ft, Kx) - - for i = 1:length(dens) - dens[i] = max(0.0,dens[i]) - end - - # Invert the Fourier transform to get the KDE - BivariateKDE(k.x, k.y, dens) -end - -const BivariateDistribution = Union{MultivariateDistribution,Tuple{UnivariateDistribution,UnivariateDistribution}} - -default_weights(data::Tuple{RealVector, RealVector}) = UniformWeights(length(data[1])) - -function kde(data::Tuple{RealVector, RealVector}, weights::Weights, midpoints::Tuple{Rx, Ry}, - dist::BivariateDistribution) where {Rx<:AbstractRange,Ry<:AbstractRange} - k = tabulate(data, midpoints, weights) - conv(k,dist) -end - -function kde(data::Tuple{RealVector, RealVector}, dist::BivariateDistribution; - boundary::Tuple{Tuple{Real,Real}, Tuple{Real,Real}} = (kde_boundary(data[1],std(dist[1])), - kde_boundary(data[2],std(dist[2]))), - npoints::Tuple{Int,Int}=(256,256), - weights::Weights = default_weights(data)) - - xmid = kde_range(boundary[1],npoints[1]) - ymid = kde_range(boundary[2],npoints[2]) - - kde(data,weights,(xmid,ymid),dist) -end - -function kde(data::Tuple{RealVector, RealVector}, midpoints::Tuple{Rx, Ry}; - bandwidth=default_bandwidth(data), kernel=Normal, - weights::Weights = default_weights(data)) where {Rx<:AbstractRange,Ry<:AbstractRange} - - dist = kernel_dist(kernel,bandwidth) - kde(data,weights,midpoints,dist) -end - -function kde(data::Tuple{RealVector, RealVector}; - bandwidth=default_bandwidth(data), - kernel=Normal, - boundary::Tuple{Tuple{Real,Real}, Tuple{Real,Real}} = (kde_boundary(data[1],bandwidth[1]), - kde_boundary(data[2],bandwidth[2])), - npoints::Tuple{Int,Int}=(256,256), - weights::Weights = default_weights(data)) - - dist = kernel_dist(kernel,bandwidth) - xmid = kde_range(boundary[1],npoints[1]) - ymid = kde_range(boundary[2],npoints[2]) - - kde(data,weights,(xmid,ymid),dist) -end - -# matrix data -function kde(data::RealMatrix,args...;kwargs...) - size(data,2) == 2 || error("Can only construct KDE from matrices with 2 columns.") - kde((data[:,1],data[:,2]),args...;kwargs...) end diff --git a/src/interp.jl b/src/interp.jl index 05e648e6..5b9e0a75 100644 --- a/src/interp.jl +++ b/src/interp.jl @@ -16,17 +16,21 @@ end InterpKDE(kde::UnivariateKDE) = InterpKDE(kde, BSpline(Quadratic(Line(OnGrid())))) -function InterpKDE(kde::BivariateKDE, opts...) +function InterpKDE(kde::MultivariateKDE, opts...) itp_u = interpolate(kde.density,opts...) - itp = scale(itp_u, kde.x, kde.y) + itp = scale(itp_u, kde.ranges...) etp = extrapolate(itp, zero(eltype(kde.density))) InterpKDE{typeof(kde),typeof(etp)}(kde, etp) end -InterpKDE(kde::BivariateKDE) = InterpKDE(kde::BivariateKDE, BSpline(Quadratic(Line(OnGrid())))) +InterpKDE(kde::MultivariateKDE) = InterpKDE(kde, BSpline(Quadratic(Line(OnGrid())))) pdf(ik::InterpKDE,x::Real...) = ik.itp(x...) pdf(ik::InterpKDE,xs::AbstractVector) = [ik.itp(x) for x in xs] -pdf(ik::InterpKDE,xs::AbstractVector,ys::AbstractVector) = [ik.itp(x,y) for x in xs, y in ys] +function pdf(ik::InterpKDE{K, I}, xs::Vararg{AbstractVector, N}) where + {N, R, K <: MultivariateKDE{N, R}, I} + + [ik.itp(x...) for x in Iterators.product(xs...)] +end pdf(k::UnivariateKDE,x) = pdf(InterpKDE(k),x) -pdf(k::BivariateKDE,x,y) = pdf(InterpKDE(k),x,y) +pdf(k::MultivariateKDE,x...) = pdf(InterpKDE(k),x...) diff --git a/src/multivariate.jl b/src/multivariate.jl new file mode 100644 index 00000000..3fb58ffe --- /dev/null +++ b/src/multivariate.jl @@ -0,0 +1,220 @@ +""" +$(TYPEDEF) + +Store both grid and density for KDE over the real line. + +Reading the fields directly is part of the API, and + +```julia +sum(density) * prod(step.(ranges)) ≈ 1 +``` + +# Fields + +$(FIELDS) +""" +struct MultivariateKDE{N, R <: NTuple{N, AbstractRange}} <: AbstractKDE + "Coordinates of gridpoints for evaluating density" + ranges::R + "Kernel density at corresponding gridpoints." + density::Array{Float64, N} +end + +function kernel_dist(::Type{D}, w::NTuple{N, Real}) where {N, D <: UnivariateDistribution} + kernel_dist.(D, w) +end + +@generated function kernel_dist(::Type{Ds}, w::NTuple{N, Real}) where {N, Ds <: NTuple{N, UnivariateDistribution}} + quote + kernel_dist.( + # Ds.parameters is of type svec which does not have a compile time + # known length. By splatting it into a tuple at compile time, proper + # type inference is possible. + ($(Ds.parameters...),), + w + ) + end +end + +const DataTypeOrUnionAll = Union{DataType, UnionAll} + +# this function provided for backwards compatibility, though it doesn't have the type restrictions +# to ensure that the given tuple only contains univariate distributions +function kernel_dist(d::NTuple{N, DataTypeOrUnionAll}, w::NTuple{N, Real}) where N + kernel_dist.(d, w) +end + +# TODO: there are probably better choices. +function default_bandwidth(data::NTuple{N, RealVector}) where N + default_bandwidth.(data) +end + +@generated function interpolate_in_hypercube!( + grid::Array{Float64, N}, + midpoints::NTuple{N, AbstractRange}, + coords::NTuple{N, Real}, + high_idcs::NTuple{N, Int}, + low_idcs::NTuple{N, Int}, + weight::Real +) where N + + interpolate_in_hypercube!_impl(Val(N)) + +end + +function interpolate_in_hypercube!_impl(::Val{N}) where N + # We need to distribute the `weight` of one data point to all vertices of + # the hypercube that surrounds it in the grid. + # These vertices can be described using all N-bit bitstrings. + # A zero bit then means that we are looking at the lower index in a specific + # dimension and a one means we look at the higher one. + side_to_symbol(side) = side == 0 ? :low_idcs : :high_idcs + updates = quote end + sides = zeros(Int, N) + + for vertex_number in 0 : 2^N - 1 + # Get the bitstring representation of the vertex. + digits!(sides, vertex_number, base = 2) + # Reverse it such that we will iterate the last dimension the fastest + # to adhere to Julia's "column"-major memory layout of arrays. + reverse!(sides) + + indices = [:($(side_to_symbol(side))[$idx]) for (side, idx) in zip(sides, 1:N)] + + # The interpolation coefficients are chosen such that the partial weight + # on one vertex is proportional to the distance of the data point to the + # opposite vertex for each dimension respectively. + factors = [ + :(midpoints[$idx][$(side_to_symbol(1-side))[$idx]] - coords[$idx]) + for (side, idx) in zip(sides, 1:N) + ] + # The previous formula is only almost correct. + # Whenever we calculate the distance to a vertex with a lower index, we + # introduce a multiplicative error of -1. + # This is mitigated by the following correction: + sign_correction = (-1)^sum(sides) + + updates = quote + $updates + + @inbounds grid[$(indices...)] += *($sign_correction, weight, $(factors...)) + end + end + + updates +end + +# tabulate data for kde +function tabulate( + data::NTuple{N, RealVector}, + midpoints::NTuple{N, AbstractRange}, + weights::Weights = default_weights(data) +) where N + + ndata = length(first(data)) + all(==(ndata) ∘ length, data) || error("data vectors must be of same length") + + range_lengths = length.(midpoints) + range_steps = step.(midpoints) + + # Set up a grid for discretized data + grid = zeros(Float64, range_lengths...) + ainc = 1.0 / (sum(weights) * prod(range_steps)^2) + + # weighted discretization (cf. Jones and Lotwick) + for i in 1:ndata + coords = getindex.(data, i) + high_idcs = searchsortedfirst.(midpoints, coords) + low_idcs = high_idcs .- 1 + if all(1 .<= low_idcs .<= range_lengths .- 1) + interpolate_in_hypercube!(grid, midpoints, coords, high_idcs, low_idcs, weights[i]) + end + end + # Normalize for interpolation coefficients and weights + grid .*= ainc + + # returns an un-convolved KDE + MultivariateKDE(midpoints, grid) +end + +# convolution with product distribution of N univariate distributions +function conv(k::MultivariateKDE{N, R}, dists::NTuple{N, UnivariateDistribution}) where {N, R} + # Transform to Fourier basis + ft = rfft(k.density) + + # Convolve fft with characteristic function of kernel + cs = -twoπ ./ (maximum.(k.ranges) .- minimum.(k.ranges)) + for idx in CartesianIndices(ft) + pos = Tuple(idx) .- 1 + pos = min.(pos, size(k.density) .- pos) + ft[idx] *= prod(cf.(dists, pos .* cs)) + end + + # Invert the Fourier transform to get the KDE + density = irfft(ft, size(k.density, 1)) + + density .= max.(0., density) + + MultivariateKDE(k.ranges, density) +end + +default_weights(data::NTuple{N, RealVector}) where N = UniformWeights(length(data[1])) + +function kde( + data::NTuple{N, RealVector}, + weights::Weights, + midpoints::NTuple{N, AbstractRange}, + dist::NTuple{N, UnivariateDistribution} +) where N + + k = tabulate(data, midpoints, weights) + conv(k, dist) +end + +function kde( + data::NTuple{N, RealVector}, + dist::NTuple{N, UnivariateDistribution}; + boundary::NTuple{N, Tuple{Real, Real}} = + kde_boundary.(data, std.(dist)), + npoints::NTuple{N, Int} = ntuple(_ -> 256, Val(N)), + weights::Weights = default_weights(data) +) where N + + midpoints = kde_range.(boundary, npoints) + kde(data, weights, midpoints, dist) +end + +function kde( + data::NTuple{N, RealVector}, + midpoints::NTuple{N, AbstractRange}; + bandwidth = default_bandwidth(data), + kernel = Normal, + weights::Weights = default_weights(data) +) where N + + dist = kernel_dist(kernel, bandwidth) + kde(data, weights, midpoints, dist) +end + +function kde( + data::NTuple{N, RealVector}; + bandwidth = default_bandwidth(data), + kernel = Normal, + boundary::NTuple{N, Tuple{Real, Real}} = kde_boundary.(data, bandwidth), + npoints::NTuple{N, Int} = ntuple(_ -> 256, Val(N)), + weights::Weights = default_weights(data) +) where N + + dist = kernel_dist(kernel, bandwidth) + midpoints = kde_range.(boundary, npoints) + + kde(data, weights, midpoints, dist) +end + +# matrix data +function kde(data::RealMatrix, args...; kwargs...) + kde( + ntuple(i -> view(data, :, i), size(data, 2)), + args...; kwargs... + ) +end diff --git a/src/trivariate.jl b/src/trivariate.jl new file mode 100644 index 00000000..c4801084 --- /dev/null +++ b/src/trivariate.jl @@ -0,0 +1,17 @@ +const TrivariateKDE{Rx <: AbstractRange, Ry <: AbstractRange, Rz <: AbstractRange} = + MultivariateKDE{3, Tuple{Rx, Ry, Rz}} + + +const TrivariateDistribution = NTuple{3, UnivariateDistribution} + +function Base.getproperty(k::TrivariateKDE, s::Symbol) + if s === :x + k.ranges[1] + elseif s === :y + k.ranges[2] + elseif s === :z + k.ranges[3] + else + getfield(k, s) + end +end diff --git a/test/interp.jl b/test/interp.jl index ed93d914..8b66b065 100644 --- a/test/interp.jl +++ b/test/interp.jl @@ -3,6 +3,7 @@ using KernelDensity X = randn(100) Y = randn(100) +Z = randn(100) k = kde(X) @test pdf(k, k.x) ≈ k.density @@ -10,6 +11,9 @@ k = kde(X) k = kde((X,Y)) @test pdf(k, k.x, k.y) ≈ k.density +k = kde((X,Y,Z)) +@test pdf(k, k.x, k.y, k.z) ≈ k.density + # Try to evaluate the KDE outside the interpolation domain # The KDE is allowed to be zero, but it should not be greater than the exact solution k = kde([0.0], bandwidth=1.0) @@ -23,3 +27,12 @@ k = kde(([0.0],[0.0]), bandwidth=(1.0, 1.0)) @test pdf(k, +10.0, 0.0) ≤ pdf(MvNormal(2, 1.0), [+10.0, 0.0]) @test pdf(k, 0.0, -10.0) ≤ pdf(MvNormal(2, 1.0), [0.0, -10.0]) @test pdf(k, 0.0, +10.0) ≤ pdf(MvNormal(2, 1.0), [0.0, +10.0]) + +k = kde(([0.0],[0.0],[0.0]), bandwidth=(1.0, 1.0, 1.0)) +@test pdf(k, k.x, k.y, k.z) ≈ k.density +@test pdf(k, -10.0, 0.0, 0.0) ≤ pdf(MvNormal(3, 1.0), [-10.0, 0.0, 0.0]) +@test pdf(k, +10.0, 0.0, 0.0) ≤ pdf(MvNormal(3, 1.0), [+10.0, 0.0, 0.0]) +@test pdf(k, 0.0, -10.0, 0.0) ≤ pdf(MvNormal(3, 1.0), [0.0, -10.0, 0.0]) +@test pdf(k, 0.0, +10.0, 0.0) ≤ pdf(MvNormal(3, 1.0), [0.0, +10.0, 0.0]) +@test pdf(k, 0.0, 0.0, +10.0) ≤ pdf(MvNormal(3, 1.0), [0.0, 0.0, +10.0]) +@test pdf(k, 0.0, 0.0, -10.0) ≤ pdf(MvNormal(3, 1.0), [0.0, 0.0, -10.0]) diff --git a/test/multivariate.jl b/test/multivariate.jl new file mode 100644 index 00000000..d775ac9d --- /dev/null +++ b/test/multivariate.jl @@ -0,0 +1,72 @@ +using Test +using Distributions +using KernelDensity + +import KernelDensity: kernel_dist, default_bandwidth, kde_boundary, kde_range, tabulate + +# for D in [Tuple{Normal,Normal}, Tuple{Uniform,Uniform}, Tuple{Logistic,Logistic}, +# (Normal, Normal), (Uniform, Uniform), (Logistic, Logistic)] +# d = KernelDensity.kernel_dist(D,(0.5,0.5)) +# dx,dy = d +# @test mean(dx) == 0.0 +# @test mean(dy) == 0.0 +# @test std(dx) ≈ 0.5 +# @test std(dy) ≈ 0.5 +# end + +r = kde_range((-2.0,2.0), 128) +@test step(r) > 0 + +for X in ([0.0], [0.0,0.0], [0.0,0.5], [-0.5:0.1:0.5;]) + w = default_bandwidth(X) + @test w > 0 + lo, hi = kde_boundary(X, w) + @test lo < hi + kr = kde_range((lo, hi), 512) + @test step(kr) > 0 + + for D in (Normal, ) + k1 = tabulate((X, X, X), (kr, kr, kr)) + @test isa(k1, MultivariateKDE{3, R} where R) + @test size(k1.density) == length.(k1.ranges) + @test all(>=(0.0), k1.density) + @test sum(k1.density) * prod(step.(k1.ranges)) ≈ 1.0 + + k2 = KernelDensity.conv(k1,kernel_dist(Tuple{D, D, D}, (0.1, 0.1, 0.1))) + @test isa(k2, MultivariateKDE{3, R} where R) + @test size(k2.density) == length.(k2.ranges) + @test all(>=(0.0), k2.density) + @test sum(k2.density) * prod(step.(k2.ranges)) ≈ 1.0 + + k3 = kde((X, X, X); kernel = D) + @test isa(k3, MultivariateKDE{3, R} where R) + @test size(k3.density) == length.(k3.ranges) + @test all(>=(0.0), k3.density) + @test sum(k3.density) * prod(step.(k3.ranges)) ≈ 1.0 + + k4 = kde((X, X, X), (kr, kr, kr); kernel = D) + @test isa(k4, MultivariateKDE{3, R} where R) + @test size(k4.density) == length.(k4.ranges) + @test all(>=(0.0), k4.density) + @test sum(k4.density) * prod(step.(k4.ranges)) ≈ 1.0 + + k5 = kde([X X X]; kernel = D) + @test isa(k5, MultivariateKDE{3, R} where R) + @test size(k5.density) == length.(k5.ranges) + @test all(>=(0.0), k5.density) + @test sum(k5.density) * prod(step.(k5.ranges)) ≈ 1.0 + + k6 = kde([X X X], (kr, kr, kr); kernel = D, weights = fill(1.0 / length(X), length(X))) + @test k4.density ≈ k6.density + end +end + +for d in 1:5 + data = zeros(10, d) + k = kde(data, npoints = ntuple(_ -> 16, d)) + @test k isa (MultivariateKDE{d, R} where R) +end + +k11 = kde([0.0 0.0 0.0; 1.0 1.0 1.0], (r, r, r), bandwidth = (1, 1, 1), weights = [0, 1]) +k12 = kde([1.0 1.0 1.0], (r, r, r), bandwidth = (1, 1, 1)) +@test k11.density ≈ k12.density diff --git a/test/runtests.jl b/test/runtests.jl index 3b667928..42b8f0aa 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,7 @@ tests = [ "univariate", "bivariate", + "multivariate", "interp", ]