-
Notifications
You must be signed in to change notification settings - Fork 409
/
jointorderstatistics.jl
168 lines (152 loc) · 6.04 KB
/
jointorderstatistics.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# Implementation based on chapters 2-4 of
# Arnold, Barry C., Narayanaswamy Balakrishnan, and Haikady Navada Nagaraja.
# A first course in order statistics. Society for Industrial and Applied Mathematics, 2008.
"""
JointOrderStatistics <: ContinuousMultivariateDistribution
The joint distribution of a subset of order statistics from a sample from a continuous
univariate distribution.
JointOrderStatistics(
dist::ContinuousUnivariateDistribution,
n::Int,
ranks=Base.OneTo(n);
check_args::Bool=true,
)
Construct the joint distribution of order statistics for the specified `ranks` from an IID
sample of size `n` from `dist`.
The ``i``th order statistic of a sample is the ``i``th element of the sorted sample.
For example, the 1st order statistic is the sample minimum, while the ``n``th order
statistic is the sample maximum.
`ranks` must be a sorted vector or tuple of unique `Int`s between 1 and `n`.
For a single order statistic, use [`OrderStatistic`](@ref) instead.
## Examples
```julia
JointOrderStatistics(Normal(), 10) # Product(fill(Normal(), 10)) restricted to ordered vectors
JointOrderStatistics(Cauchy(), 10, 2:9) # joint distribution of all but the extrema
JointOrderStatistics(Cauchy(), 10, (1, 10)) # joint distribution of only the extrema
```
"""
struct JointOrderStatistics{
D<:ContinuousUnivariateDistribution,R<:Union{AbstractVector{Int},Tuple{Int,Vararg{Int}}}
} <: ContinuousMultivariateDistribution
dist::D
n::Int
ranks::R
function JointOrderStatistics(
dist::ContinuousUnivariateDistribution,
n::Int,
ranks::Union{AbstractVector{Int},Tuple{Int,Vararg{Int}}}=Base.OneTo(n);
check_args::Bool=true,
)
@check_args(
JointOrderStatistics,
(n, n ≥ 1, "`n` must be a positive integer."),
(
ranks,
_are_ranks_valid(ranks, n),
"`ranks` must be a sorted vector or tuple of unique integers between 1 and `n`.",
),
)
return new{typeof(dist),typeof(ranks)}(dist, n, ranks)
end
end
_islesseq(x, y) = isless(x, y) || isequal(x, y)
function _are_ranks_valid(ranks, n)
# this is equivalent to but faster than
# issorted(ranks) && allunique(ranks)
!isempty(ranks) && first(ranks) ≥ 1 && last(ranks) ≤ n && issorted(ranks; lt=_islesseq)
end
function _are_ranks_valid(ranks::AbstractRange, n)
!isempty(ranks) && first(ranks) ≥ 1 && last(ranks) ≤ n && step(ranks) > 0
end
length(d::JointOrderStatistics) = length(d.ranks)
function insupport(d::JointOrderStatistics, x::AbstractVector)
length(d) == length(x) || return false
xi, state = iterate(x) # at least one element!
dist = d.dist
insupport(dist, xi) || return false
while (xj_state = iterate(x, state)) !== nothing
xj, state = xj_state
xj ≥ xi && insupport(dist, xj) || return false
xi = xj
end
return true
end
minimum(d::JointOrderStatistics) = Fill(minimum(d.dist), length(d))
maximum(d::JointOrderStatistics) = Fill(maximum(d.dist), length(d))
params(d::JointOrderStatistics) = tuple(params(d.dist)..., d.n, d.ranks)
partype(d::JointOrderStatistics) = partype(d.dist)
Base.eltype(::Type{<:JointOrderStatistics{D}}) where {D} = Base.eltype(D)
Base.eltype(d::JointOrderStatistics) = eltype(d.dist)
function logpdf(d::JointOrderStatistics, x::AbstractVector{<:Real})
n = d.n
ranks = d.ranks
lp = loglikelihood(d.dist, x)
T = typeof(lp)
lp += loggamma(T(n + 1))
if length(ranks) == n
issorted(x) && return lp
return oftype(lp, -Inf)
end
i = first(ranks)
xᵢ = first(x)
if i > 1 # _marginalize_range(d.dist, 0, i, -Inf, xᵢ, T)
lp += (i - 1) * logcdf(d.dist, xᵢ) - loggamma(T(i))
end
for (j, xⱼ) in Iterators.drop(zip(ranks, x), 1)
xⱼ < xᵢ && return oftype(lp, -Inf)
lp += _marginalize_range(d.dist, i, j, xᵢ, xⱼ, T)
i = j
xᵢ = xⱼ
end
if i < n # _marginalize_range(d.dist, i, n + 1, xᵢ, Inf, T)
lp += (n - i) * logccdf(d.dist, xᵢ) - loggamma(T(n - i + 1))
end
return lp
end
# given ∏ₖf(xₖ), marginalize all xₖ for i < k < j
function _marginalize_range(dist, i, j, xᵢ, xⱼ, T)
k = j - i - 1
k == 0 && return zero(T)
return k * T(logdiffcdf(dist, xⱼ, xᵢ)) - loggamma(T(k + 1))
end
function _rand!(rng::AbstractRNG, d::JointOrderStatistics, x::AbstractVector{<:Real})
n = d.n
if n == length(d.ranks) # ranks == 1:n
# direct method, slower than inversion method for large `n` and distributions with
# fast quantile function or that use inversion sampling
rand!(rng, d.dist, x)
sort!(x)
else
# use exponential generation method with inversion, where for gaps in the ranks, we
# use the fact that the sum Y of k IID variables xₘ ~ Exp(1) is Y ~ Gamma(k, 1).
# Lurie, D., and H. O. Hartley. "Machine-generation of order statistics for Monte
# Carlo computations." The American Statistician 26.1 (1972): 26-27.
# this is slow if length(d.ranks) is close to n and quantile for d.dist is expensive,
# but this branch is probably taken when length(d.ranks) is small or much smaller than n.
T = typeof(one(eltype(x)))
s = zero(eltype(x))
i = 0
for (m, j) in zip(eachindex(x), d.ranks)
k = j - i
if k > 1
# specify GammaMTSampler directly to avoid unnecessarily checking the shape
# parameter again and because it has been benchmarked to be the fastest for
# shape k ≥ 1 and scale 1
s += T(rand(rng, GammaMTSampler(Gamma{T}(T(k), T(1)))))
else
s += randexp(rng, T)
end
i = j
x[m] = s
end
j = n + 1
k = j - i
if k > 1
s += T(rand(rng, GammaMTSampler(Gamma{T}(T(k), T(1)))))
else
s += randexp(rng, T)
end
x .= Base.Fix1(quantile, d.dist).(x ./ s)
end
return x
end