-
Notifications
You must be signed in to change notification settings - Fork 0
/
pictures_eigvecs.py
235 lines (188 loc) · 6.89 KB
/
pictures_eigvecs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
"""
Module `picture_eigvecs.py`.
Numerical validation for the eigenvector correlation formulas in the DSBM.
References:
[1]: Submitted paper
"""
from os import path
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import rc
from scipy.sparse.linalg import eigs
from generator import fast_sbm
rc("text", usetex=True)
rc("font", family="serif")
def true_eigenvectors(eta, n_nodes):
"""
Computes the theoretical eigenvectors of the path-DSBM with parameter eta. See [1]
for details and computations.
Input:
- eta: array_like, parameter for the path-DSBM.
- n_nodes: number of nodes in the graph
Output:
- array of shape (2, n_nodes, eta.size), such that result[i, :, j] contains the
i-th eigenvector associated with eta[j].
"""
if not isinstance(eta, np.ndarray):
eta = np.array(eta)
m = len(eta)
eigv = np.zeros((2, n_nodes, m))
half = int(n_nodes / 2)
eigv[0, :half, :] = np.sqrt(eta)
eigv[0, half:, :] = np.sqrt(1 - eta)
eigv[1, :half, :] = np.sqrt(eta)
eigv[1, half:, :] = -np.sqrt(1 - eta)
result = np.sqrt(2 / n_nodes) * eigv
return result
def theoretical_overlaps(eta, d=10):
"""
Returns the theoretical scalar products between the informative eigenvectors of a
path-DSBM random graph and its expectation matrix. See [1] for details.
Input:
- eta: array_like, asymmetry parameters for the DSBM
- d: degree parameter
Output:
- array of shape (2, 2, eta.size) such that result[i, j, k] is the theoretical
scalar product between the i-th eigenvector of the graph and the j-th one of its
expected adjacency matrix, for asymmetry parameter eta[k].
"""
if not isinstance(eta, np.ndarray):
eta = np.array(eta)
theta = 2 * np.sqrt(eta * (1 - eta))
mu = d * (1 + np.array([1, -1])[:, None] * theta) / 4
x = d / (mu * mu)
denominator = 4 - x + theta * theta * x
numerator = 4 - 2 * x + x * x * (1 - theta * theta) / 4
correlations = np.divide(
numerator,
denominator,
out=np.zeros_like(numerator),
where=mu > np.sqrt(mu[0, :]),
)
b = np.array([[0, 1], [1, 0]])
mult = np.eye(2)[:, :, None] + b[:, :, None] * (2 * eta - 1)
return mult * np.sqrt(correlations[:, None, :])
def empirical_overlaps(n_nodes, eta_list, n_samples, d, filename=None):
"""
Computes the scalar products between the eigenvectors a path-DSBM random graph and
their expected values.
Input:
- n_nodes: number of nodes in the graph
- eta_list: list of asymmetry parameters eta to consider.
- n_samples: number of graphs generated for each value of eta
- d: degree parameter for the path-DSBM
- filename: if provided, the data will be saved to disk
Output:
- mean_overlap: array of shape (2, 2, n_eta) containing the mean of the
`n_samples` scalar products between the expected eigenvectors and the empirical
ones.
- std_overlap: idem, but for the standard deviation.
If saved to disk, the saved matrix will be of shape (2, 2, 2, n_eta), with data[0]
containing the mean overlaps and data[1] the standard deviations.
"""
n_eta = eta_list.size
true_vecs = true_eigenvectors(eta_list, n_nodes)
overlap = np.zeros((2, 2, n_eta, n_samples))
for (i, eta) in enumerate(eta_list):
print(f"Step {i+1}/{n_eta} --- eta = {eta:.3f}", end="\r")
for sample in range(n_samples):
F = d * np.array([[1 / 2, eta], [1 - eta, 1 / 2]])
A, _ = fast_sbm(n_nodes, F)
_, eigenvectors = eigs(A, k=2)
overlap[:, :, i, sample] = np.abs(true_vecs[:, :, i].dot(eigenvectors).T)
mean_overlap = np.mean(overlap, axis=3)
std_overlap = np.std(overlap, axis=3)
if filename is not None:
data = np.stack((mean_overlap.ravel(), std_overlap.ravel()))
np.savetxt(filename, data)
return mean_overlap, std_overlap
def gen_plot(
x,
means,
stds,
theories,
bg_colors=None,
fg_colors=None,
xlabel=None,
legends=None,
filename=None,
):
"""
A somewhat generic function to plot the empirical mean/std of a quantity against its
predicted theory. Multiple such plots can be superimposed on the same figure.
Input:
- x: array_like, common x axis
- means: list of arrays, one for each plot
- stds: idem
- theories: idem
- bg_colors: colors to plot theory and stdev ; one for each plot
- fg_colors: colors to plot the mean quantity; one for each plot
- xlabel: label for the x axis
- legends: one legend per plot
- filename: if provided, saves the picture to the disk
Output: None, but shows the generated figure.
"""
fig, ax = plt.subplots(figsize=(6, 6))
ax.set_xlabel(xlabel, fontsize=30)
ax.tick_params(axis="both", labelsize=20)
for (k, (mean, std, theory, bg, fg, legend)) in enumerate(
zip(means, stds, theories, bg_colors, fg_colors, legends)
):
errsup = mean + std
errinf = mean - std
ax.fill_between(x, errinf, errsup, color=bg, alpha=0.2)
ax.plot(
x,
mean,
"-",
markersize=1,
marker="o",
color=fg,
label=legend,
zorder=10,
)
ax.legend(loc=0, fontsize=20)
ax.plot(x, theory, color=bg, linewidth=5)
# if filename is not None:
# plt.savefig(filename)
plt.show()
# ------------
# Actual picture generation
# ------------
if __name__ == "__main__":
n_eta = 50
eta_list = np.linspace(0.5, 1, n_eta)
d = 20
datafile = "data/eigen_overlaps.txt"
if path.exists(datafile):
data = np.genfromtxt(datafile)
mean_overlap = data[0].reshape((2, 2, n_eta))
std_overlap = data[1].reshape((2, 2, n_eta))
else:
mean_overlap, std_overlap = empirical_overlaps(
500, eta_list, 20, d, filename=datafile
)
theoretical_overlap = theoretical_overlaps(eta_list, d)
legend = r"$|\langle u_{}, \varphi_{} \rangle |$"
gen_plot(
eta_list,
mean_overlap[[0, 1], [0, 1], :],
std_overlap[[0, 1], [0, 1], :],
theoretical_overlap[[0, 1], [0, 1], :],
xlabel=r"$\eta$",
bg_colors=["tomato", "cornflowerblue"],
fg_colors=["brown", "mediumblue"],
legends=[legend.format(1, 1), legend.format(2, 2)],
filename="pictures/eigenvectors_corr1.pdf",
)
gen_plot(
eta_list,
mean_overlap[[0, 1], [1, 0], :],
std_overlap[[0, 1], [1, 0], :],
theoretical_overlap[[0, 1], [1, 0], :],
xlabel=r"$\eta$",
bg_colors=["tomato", "cornflowerblue"],
fg_colors=["brown", "mediumblue"],
legends=[legend.format(1, 2), legend.format(2, 1)],
filename="pictures/eigenvectors_corr1.pdf",
)