/
mnonr.py
187 lines (144 loc) · 4.71 KB
/
mnonr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
""" A Generator of Multivariate Non-Normal Random Numbers
Reference:
A method of generating multivariate non-normal random numbers
with desired multivariate skewness and kurtosis
https://doi.org/10.3758/s13428-019-01291-5
Code modified from:
https://github.com/wqu-nd/mnonr
"""
import numpy as np
import scipy.optimize as optim
def multouni(d, ms, mk):
beta1 = ms
beta2 = mk
# Fourth moment of ksi
f = beta2 / d - (d - 1)
# third moment of ksi
t = np.sqrt(beta1 / d)
return [t, f]
def fleishtarget(x, a):
b, c, d = x[0], x[1], x[2]
g1, g2 = a[0], a[1]
loss = (
(1 - ((b ** 2) + 2 * (c ** 2) + 6 * b * d + 15 * (d ** 2))) ** 2
+ (g1 - (6 * (b ** 2) * c + 8 * (c ** 3) + 72 * b * c * d + 270 * c * (d ** 2)))
** 2
+ (
g2
- (
3 * (b ** 4)
+ 60 * (b ** 2) * (c ** 2)
+ 60 * (c ** 4)
+ 60 * (b ** 3) * d
+ 936 * b * (c ** 2) * d
+ 630 * (b ** 2) * (d ** 2)
+ 4500 * (c ** 2) * (d ** 2)
+ 3780 * b * (d ** 3)
+ 10395 * (d ** 4)
)
)
** 2
)
return loss
def reltol(xk):
tol = 1e-10
val = fleishtarget(xk, reltol.a)
if not hasattr(reltol, "last"):
reltol.last = val
return False
if reltol.last - val < (tol * abs(val) + tol):
return True
reltol.last = val
return False
def findcoef(tf, coef):
result = optim.minimize(
fleishtarget,
coef,
args=tf,
method="BFGS",
callback=reltol, # TODO
options={
"maxiter": 1e8,
"finite_diff_rel_step": [1e-10, 1e-10, 1e-10],
},
)
return result.x
def mnonr(
n: int,
p: int,
ms: float,
mk: float,
cov: np.ndarray,
coef: np.ndarray = np.array([0.9, 0.4, 0]),
) -> np.ndarray:
"""Multivariate Non-normal Random Number Generator based on Multivariate Measures
Args:
n (int): Sample size
p (int): Number of variables
ms (float): Value of multivariate skewness
mk (float): Value of multivariate kurtosis
cov (np.ndarray): Covariance matrix
coef (np.ndarray, optional): Vector with 3 numbers for initial polynominal coefficients' (b,c,d). Defaults to (0.9,0.4,0).
Return:
Multivariate data matrix
"""
if cov.shape != (p, p):
raise ValueError("Incompatible covariance matrix.")
if (cov != cov.T).any():
raise ValueError("Covariance matrix must be symmetric.")
ev, _ = np.linalg.eig(cov)
if not (ev >= -1e-6 * np.abs(ev[0])).all():
raise ValueError("Covariance matrix is not positive definite.")
# Multivariate skewness and kurtosis range
sug_min_mk = p * (p + 0.774)
sug_mk = 1.641 * ms + sug_min_mk
sug_ms = (mk - sug_min_mk) / 1.641
if ms < 0:
raise ValueError("Multivariate skewness must be non-negtive.")
if mk < sug_min_mk:
raise ValueError(
"The minimun multivariate kurtosis in your setting should be %.3f"
% sug_min_mk
)
if not mk >= sug_mk:
raise ValueError(
"Multivariate skewness and kurtosis must follow the range of: MK>=1.641*MS+d*(d+0.774) and MS cannot be negative, where d is the number of variables.\n For your reference:\n For the given d and multivariate skewness, the kurtosis must be no less than {:.3f}.".format(
sug_mk
),
"\n For the given d and multivariate kurtosis, the skewness must be no more than {:.3f}.".format(
sug_ms
),
)
z = np.random.normal(0, 1, (n, p))
reltol.a = multouni(p, ms, mk)
coef = findcoef(reltol.a, coef)
b, c, d = coef[0], coef[1], coef[2]
a = -1 * c
xi = a + b * z + c * (z ** 2) + d * (z ** 3)
x = np.zeros((n, p))
r = np.linalg.cholesky(cov).T
for j in range(n):
for m in range(p):
for i in range(p):
x[j, m] += r[i, m] * xi[j, i]
return x, coef
if __name__ == "__main__":
data, coef = mnonr(
n=5000,
p=100,
ms=10,
mk=12000,
# cov=np.eye(3) / 2 + np.ones((3, 3)) / 2,
cov=np.eye(100),
)
# for i, row in enumerate(data):
# print("[{:2d}]".format(i), end=" ")
# for x in row:
# print("{:7.3f}".format(x), end=" ")
# print()
max, min = (
data[np.unravel_index(np.argmax(np.abs(data)), data.shape)],
data[np.unravel_index(np.argmin(np.abs(data)), data.shape)],
)
print("max={:.3f} min={:.3f}".format(max, min))
print("coef: {}".format(coef))