-
Notifications
You must be signed in to change notification settings - Fork 0
/
twosampletest.jl
212 lines (183 loc) · 7.22 KB
/
twosampletest.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
struct TwoSampleTest <: AbstractMCMCTest
n_control ::Int
n_treatment ::Int
n_mcmc_steps::Int
n_mcmc_thin ::Int
end
"""
TwoSampleTest(n_samples, n_mcmc_steps; n_control, n_treatment, n_mcmc_thin)
Two-sample hypothesis testing strategy. Algorithm 1 in Gandy & Scott (2021).
# Arguments
- `n_samples::Int`: Number of samples from the joint `p(θ, y)` used for the computing the p-values.
- `n_mcmc_steps::Int`: Number of times the MCMC kernel is applied to initial sample from the joint. (Increasing this value improves the power of the test.)
# Keyword Arguments
- `n_control::Int`: Number of pure samples from the joint (control group). (Default: `n_samples`)
- `n_treatment::Int`: Number of samples from the MCMC kernel (treatment group). (Default: `n_samples`)
- `n_mcmc_thin::Int`: Number of thinning applied to the MCMC chain. The effect of this argument is the same as `n_mcmc_steps`.
# Returns
- `pvalues`: P-value computed for each dimension of the statistic returned from `statistics`.
# Requirements
This test requires the following functions for `model` and `kernel` to be implemented:
- `markovchain_transition`
- `sample_joint`
# Keyword Arguments for Tests
When calling `mcmctest` or `seqmcmctest`, this tests has an additional keyword argument:
- `two_sample_test_pvalue`: The p-value calculation strategy.
The default strategy is an approximate two-sample Kolmogorov-Smirnov test. Any function returning a single p-value from a two-sample hypothesis test will work. The format is as follows:
```julia
two_sample_test_pvalue(x::AbstractVector, y::AbstractVector)::Real
```
"""
function TwoSampleTest(
n_samples ::Int,
n_mcmc_steps::Int;
n_control ::Int = n_samples,
n_treatment ::Int = n_samples,
n_mcmc_thin ::Int = 1,
)
@assert n_control > 1
@assert n_treatment > 1
@assert n_mcmc_steps ≥ 1
@assert n_mcmc_thin ≥ 1
TwoSampleTest(n_control, n_treatment, n_mcmc_steps, n_mcmc_thin)
end
struct TwoSampleGibbsTest <: AbstractMCMCTest
n_control ::Int
n_treatment ::Int
n_mcmc_steps::Int
n_mcmc_thin ::Int
end
"""
TwoSampleTest(n_samples, n_mcmc_steps; n_control, n_treatment, n_mcmc_thin)
Two-sample hypothesis testing strategy with an additional Gibbs sampling step. Modified version of Algorithm 1 in Gandy & Scott 2021 for increased power.
# Arguments
- `n_samples::Int`: Number of samples from the joint `p(θ, y)` used for the computing the p-values.
- `n_mcmc_steps::Int`: Number of times the MCMC kernel is applied to initial sample from the joint. (Increasing this value improves the power of the test.)
# Keyword Arguments
- `n_control::Int`: Number of pure samples from the joint (control group). (Default: `n_samples`)
- `n_treatment::Int`: Number of samples from the MCMC kernel (treatment group). (Default: `n_samples`)
- `n_mcmc_thin::Int`: Number of thinning applied to the MCMC chain. The effect of this argument is the same as `n_mcmc_steps`.
# Returns
- `pvalues`: P-value computed for each dimension of the statistic returned from `statistics`.
# Requirements
This test requires the following functions for `model` and `kernel` to be implemented:
- `markovchain_transition`
- `sample_joint`
- `sample_predictive`
# Keyword Arguments for Tests
When calling `mcmctest` or `seqmcmctest`, this tests has an additional keyword argument:
- `two_sample_test_pvalue`: The p-value calculation strategy.
The default strategy is an approximate two-sample Kolmogorov-Smirnov test. Any function returning a p-value to two sample groups will work. The format is as follows:
```julia
two_sample_test_pvalue(x::AbstractVector, y::AbstractVector)::Real
```
# References
"""
function TwoSampleGibbsTest(
n_samples ::Int,
n_mcmc_steps::Int;
n_control ::Int = n_samples,
n_treatment ::Int = n_samples,
n_mcmc_thin ::Int = 1,
)
@assert n_control > 1
@assert n_treatment > 1
@assert n_mcmc_steps ≥ 1
@assert n_mcmc_thin ≥ 1
TwoSampleGibbsTest(n_control, n_treatment, n_mcmc_steps, n_mcmc_thin)
end
function simulate_two_sample_groups(
rng ::Random.AbstractRNG,
test ::TwoSampleTest,
subject ::TestSubject,
show_progress::Bool
)
model = subject.model
kernel = subject.kernel
n_trtm = test.n_treatment
n_ctrl = test.n_control
n_mcmc_steps = test.n_mcmc_steps
n_mcmc_thin = test.n_mcmc_thin
prog = ProgressMeter.Progress(
n_trtm; barlen = 31, showspeed = true, enabled = show_progress
)
# Treatment Group
trtm = map(1:n_trtm) do n
θ, y = sample_joint(rng, model)
θ_trtm = markovchain_multiple_transition(
rng, model, kernel, n_mcmc_steps*n_mcmc_thin, θ, y
)
next!(prog, showvalues=[(:treatment_group, "$(n)/$(n_trtm)")])
vcat(θ_trtm, y)
end
prog = ProgressMeter.Progress(
n_ctrl; barlen = 31, showspeed = true, enabled = show_progress
)
# Control Group
ctrl = map(1:n_ctrl) do n
θ_ctrl, y = sample_joint(rng, model)
next!(prog, showvalues=[(:control_group, "$(n)/$(n_ctrl)")])
vcat(θ_ctrl, y)
end
trtm, ctrl
end
function simulate_two_sample_groups(
rng ::Random.AbstractRNG,
test ::TwoSampleGibbsTest,
subject ::TestSubject,
show_progress::Bool
)
model = subject.model
kernel = subject.kernel
n_trtm = test.n_treatment
n_ctrl = test.n_control
prog = ProgressMeter.Progress(
n_ctrl + n_trtm; barlen = 31, showspeed = true, enabled = show_progress
)
n_mcmc_steps = test.n_mcmc_steps
n_mcmc_thin = test.n_mcmc_thin
# Treatment Group
θs_trtm = map(1:n_trtm) do n
θ, y = sample_joint(rng, model)
θ_trtm = markovchain_multiple_transition(
rng, model, kernel, n_mcmc_steps*n_mcmc_thin, θ, y
)
y_trtm = sample_predictive(rng, model, θ_trtm)
next!(prog, showvalues=[(:treatment_group, "$(n)/$(n_trtm)")])
vcat(θ_trtm, y_trtm)
end
# Control Group
θs_ctrl = map(1:n_ctrl) do n
θ_ctrl, y_ctrl = sample_joint(rng, model)
next!(prog, showvalues=[(:control_group, "$(n)/$(n_ctrl)")])
vcat(θ_ctrl, y_ctrl)
end
θs_trtm, θs_ctrl
end
function mcmctest(
rng ::Random.AbstractRNG,
test ::Union{TwoSampleTest, TwoSampleGibbsTest},
subject::TestSubject;
two_sample_test_pvalue = default_two_sample_test_pvalue,
statistics = default_statistics,
show_progress::Bool = true,
)
θs_trtm, θs_ctrl = simulate_two_sample_groups(rng, test, subject, show_progress)
stats_trtm = map(statistics, θs_trtm)
stats_ctrl = map(statistics, θs_ctrl)
stats_trtm_cat = hcat(stats_trtm...)
stats_ctrl_cat = hcat(stats_ctrl...)
map(eachrow(stats_trtm_cat), eachrow(stats_ctrl_cat)) do param_trtm, param_ctrl
two_sample_test_pvalue(param_trtm, param_ctrl)
end
end
function mcmctest(
test ::Union{TwoSampleTest, TwoSampleGibbsTest},
subject::TestSubject;
two_sample_test_pvalue = default_two_sample_test_pvalue,
statistics = default_statistics,
show_progress::Bool = true,
)
mcmctest(Random.default_rng(), test, subject;
two_sample_test_pvalue, statistics, show_progress)
end