-
Notifications
You must be signed in to change notification settings - Fork 98
/
Copy pathmultinomialsymmetricfactory.m
163 lines (141 loc) · 5.45 KB
/
multinomialsymmetricfactory.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
function M = multinomialsymmetricfactory(n)
% Manifold of n-by-n symmetric stochastic matrices with positive entries.
%
% function M = multinomialsymmetricfactory(n)
%
% M is a Manopt manifold structure to optimize over the set of n-by-n
% symmetric matrices with (strictly) positive entries and such that the
% entries of each column and each row sum to one.
%
% Points on the manifold and tangent vectors are represented naturally as
% symmetric matrices of size n. The Riemannian metric imposed on the
% manifold is the Fisher metric, that is, if X is a point on the manifold
% and U, V are two tangent vectors:
%
% M.inner(X, U, V) = <U, V>_X = sum(sum(U.*V./X)).
%
% The retraction here provided is only first order. Consequently, the
% slope test in the checkhessian tool is only valid at points X where the
% gradient is zero. Furthermore, if some entries of X are very close to
% zero, this may cause numerical difficulties that can also lead to a
% failed slope test. More generally, it is important the the solution of
% the optimization problem should have positive entries, sufficiently far
% away from zero to avoid numerical issues.
%
% Link to the paper: https://arxiv.org/abs/1802.02628.
%
% Please cite the Manopt paper as well as the research paper:
% @Techreport{Douik2018Manifold,
% Title = {Manifold Optimization Over the Set of Doubly Stochastic
% Matrices: {A} Second-Order Geometry},
% Author = {Douik, A. and Hassibi, B.},
% Journal = {Arxiv preprint ArXiv:1802.02628},
% Year = {2018}
% }
%
% See also: multinomialdoublystochasticfactory multinomialfactory
% This file is part of Manopt: www.manopt.org.
% Original author: Ahmed Douik, March 06, 2018.
% Contributors:
% Change log:
%
% Sep. 6, 2018 (NB):
% Removed M.exp() as it was not implemented.
%
% Jan. 4, 2021 (NB):
% Compatibility with Octave 6.1.0, at the cost of having duplicated
% the definition of maxDSiters and of having replaced all occurrences
% of 'e' with its formerly defined value, namely, ones(n, 1).
M.name = @() sprintf('%dx%d symmetric doubly-stochastic matrices with positive entries', n, n);
M.dim = @() n*(n-1)/2;
% Fisher metric
M.inner = @iproduct;
function ip = iproduct(X, eta, zeta)
ip = sum((eta(:).*zeta(:))./X(:));
end
M.norm = @(X, eta) sqrt(M.inner(X, eta, eta));
% The manifold is not compact as a result of the choice of the metric,
% thus any choice here is arbitrary. This is notably used to pick
% default values of initial and maximal trust-region radius in the
% trustregions solver.
M.typicaldist = @() n;
% Pick a random point on the manifold
M.rand = @random;
function X = random()
maxDSiters = 100 + 2*n;
Z = symm(abs(randn(n, n))); % Random point in the ambient space
X = symm(doubly_stochastic(Z, maxDSiters)); % Projection on the manifold
end
% Pick a random vector in the tangent space at X, of norm 1
M.randvec = @randomvec;
function eta = randomvec(X)
% A random vector in the ambient space
Z = symm(randn(n, n)) ;
% Projection to the tangent space
alpha = sum((eye(n) + X)\(Z),2) ;
eta = Z - (alpha*ones(n, 1)' + ones(n, 1)*alpha').*X ;
% Normalizing the vector
nrm = M.norm(X, eta);
eta = eta / nrm;
end
% Orthogonal projection of the vector eta in the ambient space to the
% tangent space.
M.proj = @projection;
function etaproj = projection(X, eta)
alpha = sum((eye(n) + X)\(eta), 2);
etaproj = eta - (alpha*ones(n, 1)' + ones(n, 1)*alpha').*X;
end
M.tangent = M.proj;
M.tangent2ambient = @(X, eta) eta;
% Conversion of Euclidean to Riemannian gradient
M.egrad2rgrad = @egrad2rgrad;
function rgrad = egrad2rgrad(X, egrad)
mu = sum((X.*egrad), 2);
alpha = (eye(n) + X)\mu;
rgrad = (egrad - alpha*ones(n, 1)' - ones(n, 1)*alpha').*X;
end
% First-order retraction
M.retr = @retraction;
function Y = retraction(X, eta, t)
if nargin < 3
t = 1.0;
end
maxDSiters = 100 + 2*n;
Y = X.*exp(t*(eta./X));
Y = symm(doubly_stochastic(Y, maxDSiters));
Y = max(Y, eps);
end
% Conversion of Euclidean to Riemannian Hessian
M.ehess2rhess = @ehess2rhess;
function rhess = ehess2rhess(X, egrad, ehess, eta)
% Computing the directional derivative of the Riemannian
% gradient
gamma = egrad.*X ;
gammadot = ehess.*X + egrad.*eta;
alpha = sum((eye(n) + X)\(gamma), 2);
m = (eye(n)+X)\eta;
alphadot = sum((eye(n) + X)\(gammadot - m*gamma), 2);
S = (alpha*ones(n, 1)' + ones(n, 1)*alpha');
deltadot = gammadot - (alphadot*ones(n, 1)' + ones(n, 1)*alphadot').*X - S.*eta;
% Projecting gamma
delta = gamma - S.*X;
% Computing and projecting nabla
nabla = deltadot - 0.5*(delta.*eta)./X;
w = sum((eye(n) + X)\(nabla), 2);
rhess = nabla - (w*ones(n, 1)' + ones(n, 1)*w').*X;
end
% Miscellaneous manifold functions
M.hash = @(X) ['z' hashmd5(X(:))];
M.lincomb = @matrixlincomb;
M.zerovec = @(X) zeros(n, n);
M.vec = @(X, U) U(:);
M.mat = @(X, u) reshape(u, n, n);
M.vecmatareisometries = @() false;
M.transp = @transp;
function U = transp(X1, X2, d)
U = projection(X2, d);
end
end
function A = symm(Z)
A = .5*(Z+Z');
end