-
Notifications
You must be signed in to change notification settings - Fork 1
/
arrays.py
1460 lines (1170 loc) · 56.9 KB
/
arrays.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
from __future__ import ( # Necessary for self-type annotations until Python >3.10
annotations,
)
import copy
import warnings
import numpy as np
from scipy.special import expit, logit
from scipy.stats import chi2, lognorm, ncx2, norm, rv_continuous
from scipy.stats._multivariate import multivariate_normal_frozen
from uncertainties import unumpy as unp
from .aggregation import Standardizer
from .utils import skip, assert_in
__all__ = [
"LayeredArray",
"ParameterArray",
"UncertainArray",
"UncertainParameterArray",
"MVUncertainParameterArray",
]
class LogitNormal(rv_continuous):
r"""A logit-normal continuous random variable.
The probability density function for LogitNormal is:
.. math::
f \left( x, \sigma \right) = \frac{1}{\sigma \sqrt{2 \pi}} e^{- \frac{ \left( \text{logit} (x) - \mu \right)^2}{2 \sigma^2}} \frac{1}{x \left( 1-x \right)} # noqa: E501
for :math:`0<x<1`, :math:`\sigma>0`
A logit-normal random variable `Y` can be parameterized in terms of the mean, :math:`\mu`, and standard deviation,
:math:`\sigma`, of the unique normally distributed random variable `X` such that `expit(X) = Y`. This
parametrization corresponds to setting `scale = σ` and `loc = expit(μ)`.
"""
def __init__(self, loc=0.5, scale=1):
super().__init__(self)
self.scale = scale
self.loc = logit(loc)
def _pdf(self, x):
return norm(loc=self.loc, scale=self.scale).pdf(logit(x)) / (x * (1 - x))
def _cdf(self, x):
return norm(loc=self.loc, scale=self.scale).cdf(logit(x))
def ppf(self, q):
return expit(norm(loc=self.loc, scale=self.scale).ppf(q))
def rvs(self, size=None, random_state=None):
return expit(norm(loc=self.loc, scale=self.scale).rvs(size=size, random_state=random_state))
class MultivariateNormalish(multivariate_normal_frozen):
r"""A multivariate Normal distribution built from and callable on ParameterArrays.
In particular, this class takes care of transforming variables to/from "natural" space as necessary.
Parameters
----------
mean : ParameterArray
A ParameterArray containing the distribution mean, must be a single point
cov : float or np.array, default 1
Covariance matrix of the distribution as a standard array or scalar (default one)
**kwargs
Additional keyword arguments passed to `scipy.stats.multivariate_normal`
"""
def __init__(self, mean: ParameterArray, cov: int | float | np.ndarray, **kwargs):
assert isinstance(mean, ParameterArray), "Mean must be a ParameterArray"
if mean.ndim != 0:
raise NotImplementedError("Multidimensional multivariate distributions are not yet supported.")
self._names = mean.names
self._stdzr = mean.stdzr
self._log_vars = mean.stdzr.log_vars
self._logit_vars = mean.stdzr.logit_vars
self._islog = [1 if var in self._log_vars else 0 for var in self._names]
self._islogit = [1 if var in self._logit_vars else 0 for var in self._names]
super(MultivariateNormalish, self).__init__(mean=mean.z.values(), cov=cov, **kwargs)
def pdf(self, x: ParameterArray) -> float:
r"""Probability density function.
Parameters
----------
x : ParameterArray
Quantiles, with the last axis of x denoting the components.
Returns
-------
array-like
"""
return super(MultivariateNormalish, self).pdf(x)
def logpdf(self, x: ParameterArray) -> float:
r"""Log of the probability density function.
Parameters
----------
x : ParameterArray
Quantiles, with the last axis of x denoting the components.
Returns
-------
array-like
"""
return super(MultivariateNormalish, self).logpdf(x.z.dstack())
def cdf(self, x: ParameterArray) -> float:
r"""Cumulative distribution function.
Parameters
----------
x : ParameterArray
Quantiles, with the last axis of x denoting the components.
Returns
-------
array-like
"""
return super(MultivariateNormalish, self).cdf(x.z.dstack())
def logcdf(self, x: ParameterArray) -> float:
r"""Log of the cumulative distribution function.
Parameters
----------
x : ParameterArray
Quantiles, with the last axis of x denoting the components.
Returns
-------
array-like
"""
return super(MultivariateNormalish, self).logcdf(x)
def rvs(self, size=1, random_state=None) -> ParameterArray:
r"""Draw random samples from the distribution.
Parameters
----------
size: int or array-like of ints
random_state: int
Returns
-------
ParameterArray
"""
# multivariate_normal throws a RuntimeWarning: covariance is not positive semidefinite if rvs() is called more
# than once without calling dist().rvs() in between. Covariance isn't being altered as a side-effect of this
# call, so this issue really makes no sense.....
# with warnings.catch_warnings():
# warnings.filterwarnings("error")
# try:
# samples = super(MultivariateNormalish, self).rvs(size=size, random_state=random_state)
# except:
# multivariate_normal(mean=self.mean, cov=self.cov).rvs()
# samples = super(MultivariateNormalish, self).rvs(size=size, random_state=random_state)
samples = super(MultivariateNormalish, self).rvs(size=size, random_state=random_state)
return ParameterArray(
**{p: samples[..., i] for i, p in enumerate(self._names)},
stdzd=True,
stdzr=self._stdzr,
)
class LayeredArray(np.ndarray):
"""An array with one or more named values at every index.
Parameters
----------
name : str
array : array-like
"""
def __new__(cls, stdzr=None, **arrays):
if arrays == {}:
raise ValueError("Must supply at least one array")
arrays = {name: np.asarray(array) for name, array in arrays.items() if array is not None}
narray_dtype = np.dtype([(name, array.dtype) for name, array in arrays.items()])
narray_prototype = np.empty(list(arrays.values())[0].shape, dtype=narray_dtype)
for name, array in arrays.items():
narray_prototype[name] = array
larray = narray_prototype.view(cls)
larray.names = list(narray_dtype.fields.keys())
larray.stdzr = stdzr
return larray
def __array_finalize__(self, larray):
if larray is None:
return
self.names = getattr(larray, "names", None)
self.stdzr = getattr(larray, "stdzr", None)
def __array_ufunc__(self, ufunc, method, *inputs, out=None, **kwargs):
"""https://numpy.org/doc/stable/user/basics.subclassing.html#array-ufunc-for-ufuncs"""
args = []
if len(set([larray.names[0] for larray in inputs if isinstance(larray, LayeredArray)])) > 1:
warnings.warnings.warn("Operating on arrays with different layer names, results may be unexpected.")
for input_ in inputs:
if isinstance(input_, LayeredArray):
if len(input_.names) > 1:
raise ValueError("Cannot operate on array with multiple layer names")
args.append(input_.astype(float).view(np.ndarray))
else:
args.append(input_)
outputs = kwargs.get("out")
if outputs:
out_args = []
for output in outputs:
if isinstance(output, LayeredArray):
out_args.append(output.astype(float).view(np.ndarray))
else:
out_args.append(output)
kwargs["out"] = tuple(out_args)
else:
outputs = (None,) * ufunc.nout
results = super().__array_ufunc__(ufunc, method, *args, **kwargs)
if results is NotImplemented:
return NotImplemented
if ufunc.nout == 1:
results = (results,)
results = tuple(
LayeredArray(**{self.names[0]: result}) if output is None else output
for result, output in zip(results, outputs)
)
return results[0] if len(results) == 1 else results
def __getitem__(self, item):
default = super().__getitem__(item)
if isinstance(item, str):
arrays = {item: default}
elif isinstance(item, (int, np.int32, np.int64)) or (
isinstance(item, tuple) and all(isinstance(val, int) for val in item)
):
arrays = {name: value for name, value in zip(default.dtype.names, default)}
elif isinstance(item, slice):
arrays = {layer.names[0]: layer.values() for layer in default.as_list()}
else:
return default
return LayeredArray(**arrays)
def __repr__(self):
return f"{tuple(self.names)}: {np.asarray(self)}"
def __str__(self):
return repr(self)
def get(self, name, default=None):
"""Return value given by `name` if it exists, otherwise return `default`."""
if name in self.names:
return self[name]
elif default is None:
return None
else:
return LayeredArray(**{name: default})
def drop(self, name, missing_ok=True):
if name in self.names:
return LayeredArray(**{p: arr for p, arr in self.as_dict().items() if p != name})
elif missing_ok:
return self
else:
raise KeyError(f"Name {name} not found in array.")
def values(self):
"""Values at each index stacked into regular ndarray."""
stacked = np.stack([self[name].astype(float) for name in self.names])
if len(self.names) > 1:
return stacked
else:
return stacked[0]
def dstack(self):
"""Values at each index as ndarrays stacked in sequence depth wise (along third axis)."""
return np.dstack([la.values() for la in self.as_list()])
def as_list(self, order=None):
order = self.names if order is None else order
assert all(name in order for name in self.names)
return [self[name] for name in order]
def as_dict(self):
"""Values corresponding to each named level as a dictionary."""
return {name: self[name].values() for name in self.names}
def add_layers(self, **arrays):
"""Add additional layers at each index."""
arrays_ = arrays.as_dict() if isinstance(arrays, LayeredArray) else arrays
return LayeredArray(**{**self.as_dict(), **arrays_}) # Fix once Python >= 3.9
class ParameterArray(LayeredArray):
"""Array of parameter values, allowing simple transformation.
:class:`ParameterArray` stores not only the value of the variable itself but also a :class:`Standardizer` instance.
This makes it simple to switch between the natural scale of the parameter and its transformed and standardized
values through the :attr:`t` and :attr:`z` properties, respectively.
This class can also be accessed through the alias :class:`parray`.
Parameters
----------
**arrays
arrays to store with their names as keywords
stdzr : Standardizer
An instance of :class:`Standardizer`
stdzd : bool, default False
Whether the supplied values are on standardized scale instead of the natural scale
Examples
--------
A parray can created with a single parameter. In this case, `r` is treated as a `LogNormal` variable by the stdzr.
>>> from gumbi import ParameterArray as parray
>>> stdzr = Standardizer(d={'μ': -0.307, 'σ': 0.158}, log_vars=['d'])
>>> rpa = parray(d=np.arange(5,10)/10, stdzr=stdzr)
>>> rpa
('d',): [(0.5,) (0.6,) (0.7,) (0.8,) (0.9,)]
>>> rpa.t
('r_t',): [(-0.69314718,) (-0.51082562,) (-0.35667494,) (-0.22314355,) (-0.10536052,)]
>>> rpa.z
('r_z',): [(-2.4439695 ,) (-1.29003559,) (-0.31439838,) ( 0.53073702,) ( 1.27619927,)]
If the parameter is completely absent from the stdzr, its natural, :attr:`t`, and :attr:`z` values are identical.
>>> pa = parray(param=np.arange(5), stdzr=stdzr)
>>> pa
('param',): [(0,) (1,) (2,) (3,) (4,)]
>>> pa.t
('param_t',): [(0,) (1,) (2,) (3,) (4,)]
>>> pa.z
('param_z',): [(0.,) (1.,) (2.,) (3.,) (4.,)]
You can even do monstrous compositions like
>>> np.min(np.sqrt(np.mean(np.square(rpa-rpa[0]-0.05)))).t
('r_t',): (-1.5791256,)
If you `have` work with an ordinary numpy array, use :meth:`values`.
>>> np.argmax(rpa.values())
4
Attributes
----------
names : list of str
Names of all stored parameters
stdzr : Standardizer
An instance of :class:`Standardizer`
"""
def __new__(cls, stdzr: Standardizer, stdzd=False, **arrays):
if arrays == {}:
raise ValueError("Must supply at least one array")
if stdzd:
arrays = {name: stdzr.unstdz(name, np.array(array)) for name, array in arrays.items()}
parray = LayeredArray.__new__(cls, **arrays)
parray.stdzr = stdzr
return parray
def __array_finalize__(self, parray):
if parray is None:
return
self.stdzr = getattr(parray, "stdzr", None)
self.names = getattr(parray, "names", None)
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
result = super().__array_ufunc__(ufunc, method, *inputs, **kwargs)
if result is NotImplemented:
return NotImplemented
return ParameterArray(**result.as_dict(), stdzr=self.stdzr, stdzd=False)
def __getitem__(self, item):
default = super(LayeredArray, self).__getitem__(item)
if isinstance(item, str):
arrays = {item: default}
elif isinstance(item, (int, np.int32, np.int64)) or (
isinstance(item, tuple) and all(isinstance(val, int) for val in item)
):
arrays = {name: value for name, value in zip(default.dtype.names, default)}
elif isinstance(item, slice):
arrays = {layer.names[0]: layer.values() for layer in default.as_list()}
else:
return default
return ParameterArray(**arrays, stdzr=self.stdzr, stdzd=False)
def get(self, name, default=None):
"""Return value given by `name` if it exists, otherwise return `default`"""
if name in self.names:
return self[name]
elif isinstance(name, (list, tuple)):
return self.parray(**{p: arr for p, arr in self.as_dict().items() if p in name})
elif default is None:
return None
else:
return self.parray(**{name: default})
def drop(self, name, missing_ok=True):
if name in self.names:
return self.parray(**{p: arr for p, arr in self.as_dict().items() if p != name})
elif missing_ok:
return self
else:
raise KeyError(f"Name {name} not found in array.")
@property
def z(self) -> LayeredArray:
"""Standardized values"""
zdct = {name + "_z": self.stdzr.stdz(name, self[name].values()) for name in self.names}
return LayeredArray(**zdct, stdzr=self.stdzr)
@property
def t(self) -> LayeredArray:
"""Transformed values"""
tdct = {name + "_t": self.stdzr.transform(name, self[name].values()) for name in self.names}
return LayeredArray(**tdct, stdzr=self.stdzr)
def add_layers(self, stdzd=False, **arrays):
"""Add additional layers at each index"""
narrays = super().add_layers(**arrays)
if stdzd:
for name in narrays.names:
narrays[name] = self.stdzr.unstdz(name, narrays[name])
return self.parray(**narrays.as_dict(), stdzd=False)
def fill_with(self, **params):
"""Add a single value for a new parameter at all locations."""
assert all([isinstance(value, (float, int)) for value in params.values()])
assert all([isinstance(key, str) for key in params.keys()])
return self.add_layers(**{param: np.full(self.shape, value) for param, value in params.items()})
def parray(self, *args, **kwargs):
"""Create a new ParameterArray using this instance's standardizer"""
return ParameterArray(*args, **kwargs, stdzr=self.stdzr)
@classmethod
def stack(cls, parray_list, axis=0, **kwargs):
all_names = [pa.names for pa in parray_list]
if not all(names == all_names[0] for names in all_names):
raise ValueError("Arrays do not have the same names!")
new = np.stack(parray_list, axis=axis, **kwargs)
stdzr = parray_list[0].stdzr
return cls(**{dim: new[dim] for dim in new.dtype.names}, stdzr=stdzr)
@classmethod
def vstack(cls, parray_list, **kwargs):
all_names = [pa.names for pa in parray_list]
if not all(names == all_names[0] for names in all_names):
raise ValueError("Arrays do not have the same names!")
new = np.vstack(parray_list, **kwargs)
stdzr = parray_list[0].stdzr
return cls(**{dim: new[dim] for dim in new.dtype.names}, stdzr=stdzr)
@classmethod
def hstack(cls, parray_list, **kwargs):
all_names = [pa.names for pa in parray_list]
if not all(names == all_names[0] for names in all_names):
raise ValueError("Arrays do not have the same names!")
new = np.hstack(parray_list, **kwargs)
stdzr = parray_list[0].stdzr
return cls(**{dim: new[dim] for dim in new.dtype.names}, stdzr=stdzr)
class UncertainArray(np.ndarray):
"""Structured array containing mean and variance of a normal distribution at each point.
The main purpose of this object is to correctly `propagate uncertainty`_ under transformations. Arithmetic
operations between distributions or between distributions and scalars are handled appropriately via the
`uncertainties`_ package.
Additionally, a `scipy Normal distribution`_ object can be created at each point through the :attr:`dist` property,
allowing access to that objects such as :meth:`rvs`, :meth:`ppf`, :meth:`pdf`, etc.
This class can also be accessed through the alias :class:`uarray`.
.. _`propagate uncertainty`: https://en.wikipedia.org/wiki/Propagation_of_uncertainty
.. _`uncertainties`: https://pythonhosted.org/uncertainties/
.. _`scipy Normal distribution`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
Notes
-----
The `name` argument is intended to be the general name of the value held, not unique to this instance. Combining two
:class:`UncertainArray` objects with the same name results in a new object with that name; combining two objects
with different names results in a new name that reflects this combination (so ``'A'+'B'`` becomes ``'(A+B)'``).
Parameters
----------
name : str
Name of variable.
μ : array
Mean at each point
σ2 : array
Variance at each point
**kwargs
Names and values of additional arrays to store
Examples
--------
>>> ua1 = UncertainArray('A', μ=1, σ2=0.1)
>>> ua2 = uarray('A', μ=2, σ2=0.2) #equivalent
>>> ua2
A['μ', 'σ2']: (2, 0.2)
Addition of a scalar
>>> ua1+1
A['μ', 'σ2']: (2, 0.1)
Addition and subtraction of two UncertainArrays:
>>> ua2+ua1
A['μ', 'σ2']: (3., 0.3)
>>> ua2-ua1
A['μ', 'σ2']: (1., 0.3)
Note, however, that correlations are not properly accounted for (yet). Subtracting one UncertainArray from itself
should give exactly zero with no uncertainty, but it doesn't:
>>> ua2+ua2
A['μ', 'σ2']: (4., 0.4)
>>> ua2-ua2
A['μ', 'σ2']: (0., 0.4)
Mean of two `uarray` objects:
>>> uarray.stack([ua1, ua2]).mean(axis=0)
A['μ', 'σ2']: (1.5, 0.075)
Mean within a single `uarray` object:
>>> ua3 = uarray('B', np.arange(1,5)/10, np.arange(1,5)/100)
>>> ua3
B['μ', 'σ2']: [(0.1, 0.01) (0.2, 0.02) (0.3, 0.03) (0.4, 0.04)]
>>> ua3.μ
array([0.1, 0.2, 0.3, 0.4])
>>> ua3.mean()
B['μ', 'σ2']: (0.25, 0.00625)
Adding two `uarrays` with differnt name creates an object with a new name
>>> ua1+ua3.mean()
(A+B)['μ', 'σ2']: (1.25, 0.10625)
Accessing :attr:`dist` methods
>>> ua3.dist.ppf(0.95)
array([0.26448536, 0.43261743, 0.58489701, 0.72897073])
>>> ua3.dist.rvs([3,*ua3.shape])
array([[0.05361942, 0.14164882, 0.14924506, 0.03808633],
[0.05804824, 0.09946732, 0.08727794, 0.28091272],
[0.06291355, 0.47451576, 0.20756356, 0.2108717 ]]) # random
Attributes
----------
name : str
Name of variable.
fields : list of str
Names of each level held in the array
"""
def __new__(cls, name: str, μ: np.ndarray, σ2: np.ndarray, stdzr=None, **kwargs):
μ_ = np.asarray(μ)
σ2_ = np.asarray(σ2)
assert μ_.shape == σ2_.shape
base_dtypes = [("μ", μ_.dtype), ("σ2", σ2_.dtype)]
extra_dtypes = [(dim, np.asarray(arr).dtype) for dim, arr in kwargs.items() if arr is not None]
uarray_dtype = np.dtype(base_dtypes + extra_dtypes)
uarray_prototype = np.empty(μ_.shape, dtype=uarray_dtype)
uarray_prototype["μ"] = μ_
uarray_prototype["σ2"] = σ2_
for dim, arr in kwargs.items():
if arr is not None:
uarray_prototype[dim] = np.asarray(arr)
uarray = uarray_prototype.view(cls)
uarray.name = name
uarray.stdzr = stdzr
uarray.fields = list(uarray_dtype.fields.keys())
return uarray
def __array_finalize__(self, uarray):
if uarray is None:
return
self.name = getattr(uarray, "name", None)
self.stdzr = getattr(uarray, "stdzr", None)
self.fields = getattr(uarray, "fields", None)
@property
def μ(self) -> np.ndarray:
"""Nominal value (mean)"""
return self["μ"]
@μ.setter
def μ(self, val):
self["μ"] = val
@property
def σ2(self) -> np.ndarray:
"""Variance"""
return self["σ2"]
@σ2.setter
def σ2(self, val):
self["σ2"] = val
@property
def σ(self) -> np.ndarray:
"""Standard deviation"""
return np.sqrt(self.σ2)
@σ.setter
def σ(self, val):
self["σ2"] = val**2
@property
def _as_uncarray(self):
return unp.uarray(self.μ, self.σ)
@classmethod
def _from_uncarray(cls, name, uncarray, **extra):
return cls(
name=name,
μ=unp.nominal_values(uncarray),
σ2=unp.std_devs(uncarray) ** 2,
**extra,
)
@property
def dist(self) -> rv_continuous:
"""Array of :func:`scipy.stats.norm` objects"""
return norm(loc=self.μ, scale=self.σ)
@staticmethod
def stack(uarray_list, axis=0) -> UncertainArray:
new = np.stack(uarray_list, axis=axis)
names = [ua.name for ua in uarray_list]
if all(name == names[0] for name in names):
name = uarray_list[0].name
else:
raise ValueError("Arrays do not have the same name!")
# name = '('+', '.join(names)+')'
return UncertainArray(name, **{dim: new[dim] for dim in new.dtype.names})
def nlpd(self, target) -> float:
"""Negative log posterior density"""
return -np.log(self.dist.pdf(target))
def vEI(self, target, best_yet, k=1) -> float:
"""Vector expected improvement
Taken from https://github.com/akuhren/target_vector_estimation
Parameters
----------
target : float
best_yet : float
k : int
Returns
-------
vEI : float
"""
nc = ((target - self.μ) ** 2) / self.σ2
h1_nx = ncx2.cdf((best_yet / self.σ2), k, nc)
h2_nx = ncx2.cdf((best_yet / self.σ2), (k + 2), nc)
h3_nx = ncx2.cdf((best_yet / self.σ2), (k + 4), nc)
t1 = best_yet * h1_nx
t2 = self.σ2 * (k * h2_nx + nc * h3_nx)
return t1 - t2
def KLD(self, other):
"""Kullback–Leibler Divergence
Parameters
----------
other : UncertainArray
Returns
-------
divergence : float
"""
assert isinstance(other, UncertainArray)
return np.log(other.σ / self.σ) + (self.σ2 + (self.μ - other.μ) ** 2) / (2 * other.σ2) - 1 / 2
def BD(self, other):
"""Bhattacharyya Distance
Parameters
----------
other : UncertainArray
Returns
-------
distance : float
"""
assert isinstance(other, UncertainArray)
return 1 / 4 * np.log(1 / 4 * (self.σ2 / other.σ2 + other.σ2 / self.σ2 + 2)) + 1 / 4 * (
(self.μ - other.μ) ** 2 / (self.σ2 + other.σ2)
)
def BC(self, other):
"""Bhattacharyya Coefficient
Parameters
----------
other : UncertainArray
Returns
-------
coefficient : float
"""
return np.exp(-self.BD(other))
def HD(self, other):
"""Hellinger Distance
Parameters
----------
other : UncertainArray
Returns
-------
distance : float
"""
return np.sqrt(1 - self.BC(other))
def __repr__(self):
return f"{self.name}{self.fields}: {np.asarray(self)}"
def __str__(self):
return repr(self)
def __getitem__(self, item):
default = super().__getitem__(item)
if isinstance(item, (int, np.int32, np.int64)) or (
isinstance(item, tuple) and all(isinstance(val, int) for val in item)
):
arrays = {name: value for name, value in zip(default.dtype.names, default)}
elif isinstance(item, slice):
# arrays = {layer.names[0]: layer.values() for layer in default.as_list()}
return default
else:
return default.view(np.ndarray)
return UncertainArray(self.name, **arrays)
def sum(self, axis=None, dtype=None, out=None, keepdims=False, **kwargs) -> UncertainArray:
"""Summation with uncertainty propagation"""
kwargs.update(dict(axis=axis, dtype=dtype, out=out, keepdims=keepdims)) # Fix once Python >= 3.9
new = self._as_uncarray.sum(**kwargs)
extra = {dim: np.sum(self[dim]) for dim in self.fields if dim not in ["μ", "σ2"]}
return self._from_uncarray(self.name, new, **extra)
def mean(self, axis=None, dtype=None, out=None, keepdims=False, **kwargs) -> UncertainArray:
"""Mean with uncertainty propagation"""
kwargs.update(dict(axis=axis, dtype=dtype, out=out, keepdims=keepdims)) # Fix once Python >= 3.9
new = self._as_uncarray.mean(**kwargs)
extra = {dim: np.mean(self[dim]) for dim in self.fields if dim not in ["μ", "σ2"]}
return self._from_uncarray(self.name, new, **extra)
def __add__(self, other):
new = self._as_uncarray
if isinstance(other, UncertainArray):
new += other._as_uncarray
name = self.name if self.name == other.name else f"({self.name}+{other.name})"
else:
new += other
name = self.name
extra = {dim: np.mean(self[dim]) for dim in self.fields if dim not in ["μ", "σ2"]}
return self._from_uncarray(name, new, **extra)
def __radd__(self, other):
return self.__add__(other)
def __sub__(self, other):
new = self._as_uncarray
if isinstance(other, UncertainArray):
new -= other._as_uncarray
name = self.name if self.name == other.name else f"({self.name}-{other.name})"
else:
new -= other
name = self.name
extra = {dim: np.mean(self[dim]) for dim in self.fields if dim not in ["μ", "σ2"]}
return self._from_uncarray(name, new, **extra)
def __rsub__(self, other):
if isinstance(other, UncertainArray):
new = other._as_uncarray
name = self.name if self.name == other.name else f"({other.name}-{self.name})"
else:
new = copy.copy(other)
name = self.name
new -= self._as_uncarray
extra = {dim: np.mean(self[dim]) for dim in self.fields if dim not in ["μ", "σ2"]}
return self._from_uncarray(name, new, **extra)
def __mul__(self, other):
new = self._as_uncarray
if isinstance(other, UncertainArray):
new *= other._as_uncarray
name = self.name if self.name == other.name else f"({self.name}*{other.name})"
else:
new *= other
name = self.name
extra = {dim: np.mean(self[dim]) for dim in self.fields if dim not in ["μ", "σ2"]}
return self._from_uncarray(name, new, **extra)
def __rmul__(self, other):
return self.__mul__(other)
def __truediv__(self, other):
new = self._as_uncarray
if isinstance(other, UncertainArray):
new /= other._as_uncarray
name = self.name if self.name == other.name else f"({self.name}/{other.name})"
else:
new /= other
name = self.name
extra = {dim: np.mean(self[dim]) for dim in self.fields if dim not in ["μ", "σ2"]}
return self._from_uncarray(name, new, **extra)
def __pow__(self, other):
new = self._as_uncarray
if isinstance(other, UncertainArray):
new **= other._as_uncarray
name = self.name if self.name == other.name else f"({self.name}**{other.name})"
else:
new **= other
name = f"({self.name}**{other})"
extra = {dim: np.mean(self[dim]) for dim in self.fields if dim not in ["μ", "σ2"]}
return self._from_uncarray(name, new, **extra)
class UncertainParameterArray(UncertainArray):
r"""Structured array of parameter means and variances, allowing transformation with uncertainty handling.
The primary role of this class is to compactly store the outputs of our regression models
(e.g., :class:`gumbi.GP`). We typically use these models
to produce parameter predictions or estimates, but under some transformation. For example, reaction `rate` must
clearly be strictly positive, so we fit a GP to the `log` of rate in order to more appropriately conform to the
assumption of normality. For prediction and visualization, however, we often need to switch back and forth between
natural space (:math:`rate`), transformed space (:math:`\text{ln}\; rate`), and standardized space
(:math:`\left( \text{ln}\; rate - \mu_{\text{ln}\; rate} \right)/\sigma_{\text{ln}\; rate}`), meanwhile calculating
summary statistics such as means and percentiles. This class is intended to facilitate switching between those
different contexts.
:class:`UncertainParameterArray`, also accessible through the alias :class:`uparray`, combines the functionality of
:class:`ParameterArray` and :class:`UncertainArray`. A `uparray` stores the mean and variance of the
variable itself as well as a :class:`Standardizer`
instance. This makes it simple to switch between the natural scale of the parameter and its transformed and
standardized values through the :attr:`t` and :attr:`z` properties, respectively, with the accompanying variance
transformed and scaled appropriately. This uncertainty is propagated under transformation, as with
:class:`UncertainArray`, and a scipy distribution object can be created at each point through the :attr:`dist`
property, allowing access to that objects such as :meth:`rvs`, :meth:`ppf`, :meth:`pdf`, etc.
Notes
-----
The `name` argument is intended to be the general name of the value held, not unique to this instance. Combining two
:class:`UncertainParameterArray` objects with the same name results in a new object with that name; combining two
objects with different names results in a new name that reflects this combination (so ``'A'+'B'`` becomes
``'(A+B)'``).
The behavior of this object depends on the transformation associated with it, as indicated by its `name` in its
stored :class:`Standardizer` instance. If this transformation is :func:`np.log`, the parameter is treated as a
`LogNormal` variable; otherwise it's treated as a `Normal` variable. This affects which distribution is returned by
:attr:`dist` (`lognorm`_ vs `norm`_) and also the interpretation of :attr:`μ` and :attr:`σ2`.
* For a `Normal` random
variable, these are simply parameter's mean and variance in unstandardized space, :attr:`t.μ` and :attr:`t.σ2`
are identical to :attr:`μ` and :attr:`σ2`, and :attr:`z.μ` and :attr:`z.σ2` are the parameter's mean and variance
in standardized space.
* For a `LogNormal` random variable ``Y``, however, :attr:`t.μ` and :attr:`t.σ2` are the mean and variance of a
`Normal` variable ``X`` such that ``exp(X)=Y`` (:attr:`z.μ` and :attr:`z.σ2` are this mean and variance in
standardized space). In this case, :attr:`μ` and :attr:`σ2` are the scale and shape descriptors of ``Y``, so
``self.μ = np.exp(self.t.μ)`` and ``self.σ2 = self.t.σ2``. Thus, :attr:`μ` and :attr:`σ2` are not strictly the
mean and variance of the random variable in natural space, these can be obtained from the :attr:`dist`.
* This behavior is most important, and potentially most confusing, when calculating the :meth:`mean`. Averaging
is performed in `transformed` space, where the random variable exhibits a `Normal` distribution and the mean
also exhibits a `Normal` distribution, allowing error propagation to be applied analytically. The :attr:`μ` and
:attr:`σ2` returned are the descriptors of the `LogNormal` distribution that represents the reverse
transformation of this new `Normal` distribution. Therefore, the result is more akin to marginalizing out the
given dimensions in the underlying model than a true natural-space average.
.. _norm: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
.. _lognorm: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.lognorm.html
See Also
--------
`norm <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html>`_: \
scipy `Normal` random variable
`lognorm <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.lognorm.html>`_: \
scipy `LogNormal` random variable
:class:`ParameterArray`
:class:`UncertainArray`
:class:`Standardizer`
Parameters
----------
name : str
Name of variable.
μ : array
Mean at each point
σ2 : array
Variance at each point
stdzr : Standardizer
An instance of :class:`Standardizer`, converted internally to :class:`Standardizer`
stdzd : bool, default False
Whether the supplied values are on standardized scale instead of the natural scale
Examples
--------
Create a `LogNormal` random variable, as indicated by its :class:`Standardizer`
>>> from gumbi import uparray, Standardizer
>>> import numpy as np
>>> stdzr = Standardizer(m = {'μ': -5.30, 'σ': 0.582}, log_vars=['c'])
>>> upa = uparray('c', np.arange(1,5)/10, np.arange(1,5)/100, stdzr)
>>> upa
m['μ', 'σ2']: [(0.1, 0.01) (0.2, 0.02) (0.3, 0.03) (0.4, 0.04)]
>>> stdzr.transforms['c']
[<ufunc 'log'>, <ufunc 'exp'>]
Mean and variance of the parameter in standardized space:
>>> upa.z
m_z['μ', 'σ2']: [(5.15019743, 0.02952256) (6.34117197, 0.05904512)
(7.03784742, 0.08856768) (7.53214651, 0.11809024)]
Verify round-trip transformation:
>>> upa.stdzr.unstdz(upa.name, upa.z.μ, upa.z.σ2)
(array([0.1, 0.2, 0.3, 0.4]), array([0.01, 0.02, 0.03, 0.04]))
Create a `uparray` from already-standardized values and verify round-trip transformation:
>>> uparray('c', np.arange(-2,3), np.arange(1,6)/10, stdzr, stdzd=True).z
m_z['μ', 'σ2']: [(-2., 0.1) (-1., 0.2) ( 0., 0.3) ( 1., 0.4) ( 2., 0.5)]
For `LogNormal` parameters, uparray follows the `scipy.stats` convention of parameterizing a lognormal random
variable in terms of it's natural-space mean and its log-space standard deviation. Thus, a LogNormal uparray defined
as `m['μ', 'σ2']: (0.1, 0.01)` represents `exp(Normal(log(0.1), 0.01))`.
Note that the mean is not simply the mean of each component, it is the parameters of the `LogNormal` distribution
that corresponds to the mean of the underlying `Normal` distributions in `log` (transformed) space.
>>> upa.μ.mean()
0.25
>>> upa.σ2.mean()
0.025
>>> upa.mean()
m['μ', 'σ2']: (0.22133638, 0.00625)
You can verify the mean and variance returned by averaging over the random variable explicitly.
>>> upa.mean().dist.mean()
2.2202914201059437e-01
>>> np.exp(upa.t.mean().dist.rvs(10000, random_state=2021).mean())
2.2133371283050837e-01
>>> upa.mean().dist.var()
3.0907071428047016e-04
>>> np.log(upa.mean().dist.rvs(10000, random_state=2021)).var()
6.304628046829242e-03
Calculate percentiles
>>> upa.dist.ppf(0.025)
array([0.08220152, 0.1515835 , 0.21364308, 0.27028359])
>>> upa.dist.ppf(0.975)