This repository has been archived by the owner on Jun 10, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
/
quantile_measures.py
383 lines (313 loc) · 11.8 KB
/
quantile_measures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
"""Capabilities for quantile-based sensitivity analysis.
This module contains functions to calculate the global sensitivity measures based on
quantiles of the output introduced by Kucherenko et al.(2019). Both the brute force
and double loop reordering MC estimators are included.
"""
import chaospy as cp
import numpy as np
import pandas as pd
from scipy.stats import expon
from scipy.stats import norm
from scipy.stats import uniform
def mc_quantile_measures(
estimator,
func,
n_params,
loc,
scale,
dist_type,
n_draws,
sampling_scheme="sobol",
seed=0,
skip=0,
):
r"""Compute the MC/QMC estimators of quantile-based global sensitivity measures.
The algorithm is described in Section 4 of Kucherenko et al.(2019).
Parameters
----------
estimator : string
Specify the Monte Carlo estimator. One of ["brute force", "DLR"], where "DLR" denotes
to the double loop reordering approach.
func : callable
Objective function to estimate the quantile-based measures. Must be broadcastable.
n_params : int
Number of parameters of objective function.
loc : float or np.ndarray
The location(`loc`) keyword passed to `scipy.stats.norm`_ function to shift the
location of "standardized" distribution. Specifically, for normal distribution
it specifies the mean array with the length of `n_params`.
.. _scipy.stats.norm: https://docs.scipy.org/doc/scipy/reference/generated/
_scipy.stats.norm.html
scale : float or np.ndarray
The `scale` keyword passed to `scipy.stats.norm`_ function to adjust the scale of
"standardized" distribution. Specifically, for normal distribution it specifies
the covariance matrix of shape (n_params, n_params).
dist_type : str
The distribution type of inputs. Options are "Normal", "Exponential" and "Uniform".
n_draws : int
Number of Monte Carlo draws. For double loop reordering estimator,
S. Kucherenko and S. Song(2017). suggests that `n_draws` should always be equal
to :math:`2^p` to preserve the uniformity properties , where :math:`p`
is an integer. In this function :math:`p` should be integers between 6 and 15 if
`estimator` is "DLR".
sampling_scheme : str, optional
One of ["random", "sobol"], default "sobol".
seed : int, optional
Random number generator seed.
skip : int, optional
Number of values to skip of Sobol sequence. Default is `0`.
Returns
-------
df_measures : pd.DataFrame
DataFrame containing quantile-based sensitivity measures.
"""
# range of alpha
dalp = (0.98 - 0.02) / 30
alpha_grid = np.arange(0.02, 0.98 + dalp, dalp) # len(alpha_grid) = 31
# get the two independent groups of sample points
x, x_prime = _unconditional_samples(
n_draws,
n_params,
dist_type,
loc,
scale,
sampling_scheme,
seed=0,
skip=0,
)
# get the conditional sample sets
if estimator == "brute force":
x_mix = _bf_conditional_samples(x, x_prime)
elif estimator == "DLR":
x_mix = _dlr_conditional_samples(x)
else:
raise ValueError("Argument 'estimator' is not in {'brute force', 'DLR'}.")
# quantiles of output with unconditional input
quantile_y_x = _unconditional_quantile_y(x, alpha_grid, func)
# quantiles of output with conditional input
quantile_y_x_mix = _conditional_quantile_y(x_mix, func, alpha_grid)
# Get quantile based measures
q_1, q_2 = _quantile_measures(quantile_y_x, quantile_y_x_mix)
# Get normalized quantile based measures
norm_q_1, norm_q_2 = _normalized_quantile_measures(q_1, q_2)
# store results
dict_measures = {
"q_1": pd.DataFrame(q_1),
"q_2": pd.DataFrame(q_2),
"Q_1": pd.DataFrame(norm_q_1),
"Q_2": pd.DataFrame(norm_q_2),
}
df_measures = pd.concat(dict_measures.values(), axis=0)
df_measures.index = pd.MultiIndex.from_product(
[dict_measures.keys(), alpha_grid],
names=["Measures", "alpha"],
)
df_measures.columns = [f"x_{i + 1}" for i in range(n_params)]
return df_measures
def _unconditional_samples(
n_draws,
n_params,
dist_type,
loc,
scale,
sampling_scheme,
seed=0,
skip=0,
):
"""Generate two independent groups of sample points.
Parameters
----------
n_draws : int
Number of Monte Carlo draws.
n_params : int
Number of parameters of objective function.
dist_type : str
The distribution type of input. Options are "Normal", "Exponential" and "Uniform".
loc : float or np.ndarray
The location(`loc`) keyword passed to `scipy.stats.norm`_ function to shift the
location of "standardized" distribution.
scale : float or np.ndarray
The `scale` keyword passed to `scipy.stats.norm`_ function to adjust the scale of
"standardized" distribution.
sampling_scheme : str, optional
One of ["sobol", "random"]
seed : int, optional
Random number generator seed. Default is 0.
skip : int, optional
Number of values to skip of Sobol sequence. Default is `0`.
Returns
-------
x, x_prime : np.ndarray
Two arrays of shape (n_draws, n_params) with i.i.d draws from a joint distribution.
"""
# Generate uniform distributed samples
np.random.seed(seed)
if sampling_scheme == "sobol":
u = cp.generate_samples(
order=n_draws + skip,
domain=n_params,
rule="S",
).T
elif sampling_scheme == "random":
u = np.random.uniform(size=(n_draws, n_params))
else:
raise ValueError("Argument 'sampling_scheme' is not in {'sobol', 'random'}.")
skip = skip if sampling_scheme == "sobol" else 0
u = cp.generate_samples(order=n_draws, domain=2 * n_params, rule="S").T
u_1 = u[skip:, :n_params]
u_2 = u[skip:, n_params:]
# Transform uniform draws into a joint PDF
if dist_type == "Normal":
z = norm.ppf(u_1)
z_prime = norm.ppf(u_2)
cholesky = np.linalg.cholesky(scale)
x = loc + cholesky.dot(z.T).T
x_prime = loc + cholesky.dot(z_prime.T).T
elif dist_type == "Exponential":
x = expon.ppf(u_1, loc, scale)
x_prime = expon.ppf(u_2, loc, scale)
elif dist_type == "Uniform":
x = uniform.ppf(u_1, loc, scale)
x_prime = uniform.ppf(u_2, loc, scale)
else:
raise NotImplementedError
return x, x_prime
def _bf_conditional_samples(x, x_prime):
"""Generate mixed sample sets distributed accroding to a conditional PDF.
Parameters
----------
x : np.ndarray
Array with shape (n_draws, n_params).
x_prime : np.ndarray
Array with shape (n_draws, n_params).
Returns
-------
x_mix : np.ndarray
Mixed sample sets. Shape has the form (n_draws, n_params, n_draws, n_params).
"""
n_draws, n_params = x.shape
x_mix = np.zeros((n_draws, n_params, n_draws, n_params))
for i in range(n_params):
for j in range(n_draws):
x_mix[j, i] = x
x_mix[j, i, :, i] = x_prime[j, i]
return x_mix
def _dlr_conditional_samples(x):
"""Generate conditional sample sets.
Parameters
----------
x : np.ndarray
Draws from a joint distribution. Shape has the form (n_draws, n_params).
Returns
-------
x_mix : np.ndarray
Mixed sample sets. Shape has the form (m, n_params, n_draws, n_params), where m
is the number of conditional bins.
"""
n_draws, n_params = x.shape
# The dependence of m versus n_draws accroding to S. Kucherenko and S. Song(2017).
if n_draws == 2**6:
m = 2**3
elif n_draws in [2**7, 2**8, 2**9]:
m = 2**4
elif n_draws == 2**10:
m = 2**5
elif n_draws in [2**11, 2**12, 2**13]:
m = 2**6
elif n_draws in [2**14, 2**15]:
m = 2**7
else:
raise NotImplementedError
conditional_bin = x[:m]
x_mix = np.zeros((m, n_params, n_draws, n_params))
# subdivide unconditional samples into M eaually bins, within each bin x_i being fixed.
for i in range(n_params):
for j in range(m):
x_mix[j, i] = x
x_mix[j, i, :, i] = conditional_bin[j, i]
return x_mix
def _unconditional_quantile_y(x, alpha_grid, func):
"""Return quantiles of outputs with unconditional input.
Parameters
----------
x : np.ndarray
Draws from a joint distribution. Shape has the form (n_draws, n_params).
alpha_grid : np.ndarray
A sequence of evenly spaced values on the interval (0, 1).
func : callable
Objective function to calculate the quantile-based measures. Must be broadcastable.
Returns
-------
quantile_y_x : np.ndarray
Quantiles of outputs corresponding to alpha with unconditional inputs.
Shape has the form (len(alpha_grid),).
"""
n_draws = x.shape[0]
# Equation 21a
y_x = func(x)
y_x_asc = np.sort(y_x)
q_index = (np.floor(alpha_grid * n_draws)).astype(int)
quantile_y_x = y_x_asc[q_index]
return quantile_y_x
def _conditional_quantile_y(x_mix, func, alpha_grid):
"""Return quantiles of outputs with conditional input.
Parameters
----------
x_mix : np.ndarray
Mixed draws. Shape has the form (m, n_params, n_draws, n_params).
func : callable
Objective function to calculate the quantile-based measures. Must be broadcastable.
alpha_grid : np.ndarray
A sequence of evenly spaced values on the interval (0, 1).
Returns
-------
quantile_y_x_mix : np.ndarray
Quantiles of output corresponding to alpha with conditional inputs. Shape has the form
(m, n_params, len(alpha_grid), 1), where m is the number of conditional bins.
"""
m, n_params, n_draws = x_mix.shape[:3]
y_x_mix = np.zeros((m, n_params, n_draws, 1))
y_x_mix_asc = np.zeros((m, n_params, n_draws, 1))
quantile_y_x_mix = np.zeros((m, n_params, len(alpha_grid), 1))
# Equation 21b/26. Get quantiles within each bin.
for i in range(n_params):
for j in range(m):
# values of conditional outputs
y_x_mix[j, i] = np.vstack(func(x_mix[j, i]))
y_x_mix_asc[j, i] = np.sort(y_x_mix[j, i], axis=0)
for pp, a in enumerate(alpha_grid):
quantile_y_x_mix[j, i, pp] = y_x_mix_asc[j, i][
(np.floor(a * n_draws)).astype(int)
] # quantiles corresponding to alpha
return quantile_y_x_mix
def _quantile_measures(quantile_y_x, quantile_y_x_mix):
"""Estimate the values of quantile based measures."""
m, n_params, len_alp = quantile_y_x_mix.shape[:3]
# initialization
q_1 = np.zeros((len_alp, n_params))
q_2 = np.zeros((len_alp, n_params))
delt = np.zeros((m, n_params, len_alp, 1))
# Equation 24&25&27&28
for j in range(m):
for i in range(n_params):
for pp in range(len_alp):
delt[j, i, pp] = quantile_y_x_mix[j, i, pp] - quantile_y_x[pp]
q_1[pp, i] = np.mean(np.absolute(delt[:, i, pp]))
q_2[pp, i] = np.mean(delt[:, i, pp] ** 2)
return q_1, q_2
def _normalized_quantile_measures(q_1, q_2):
"""Estimate the values of normalized quantile based measures."""
len_alp, n_params = q_1.shape
# initialization
sum_q_1 = np.zeros(len_alp)
sum_q_2 = np.zeros(len_alp)
norm_q_1 = np.zeros((len_alp, n_params))
norm_q_2 = np.zeros((len_alp, n_params))
# Equation 13 & 14
for pp in range(len_alp):
sum_q_1[pp] = np.sum(q_1[pp, :])
sum_q_2[pp] = np.sum(q_2[pp, :])
for i in range(n_params):
norm_q_1[pp, i] = q_1[pp, i] / sum_q_1[pp]
norm_q_2[pp, i] = q_2[pp, i] / sum_q_2[pp]
return norm_q_1, norm_q_2