Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
function M = sympositivedefinitesimplexcomplexfactory(n, k)
% Manifold of k product of n-by-n Hermitian positive definite matrices
% with the bi-invariant geometry such that the sum is the identity matrix.
%
% function M = sympositivedefinitesimplexcomplexfactory(n, k)
%
% Given X1, X2, ... Xk Hermitian positive definite matrices, the constraint
% tackled is
% X1 + X2 + ... = I.
%
% The Riemannian structure enforced on the manifold
% M:={(X1, X2,...) : X1 + X2 + ... = I } is a submanifold structure of the
% total space defined as the k Cartesian product of Hermitian positive
% definite Riemannian manifold (of n-by-n matrices) endowed with the bi-invariant metric.
%
% A point X on the manifold is represented as multidimensional array
% of size n-by-n-by-k. Each n-by-n matrix is Hermitian positive definite.
% Tangent vectors are represented as n-by-n-by-k multidimensional arrays, where
% each n-by-n matrix is Hermitian.
%
% The embedding space is the k Cartesian product of complex matrices of size
% n-by-n (Hermitian not required). The Euclidean gradient and Hessian expressions
% needed for egrad2rgrad and ehess2rhess are in the embedding space endowed with the
% usual metric for the complex plane identified with R^2.
%
% E = (C^(nxn))^k is the embedding space: we have the obvious representation of points
% there as 3D arrays of size nxnxk. It is equipped with the standard Euclidean metric.
%
% P = {X in C^(nxn) : X = X' and X positive definite} is a submanifold of C^(nxn).
% We turn it into a Riemannian manifold (but not a Riemannian submanifold) by equipping
% it with the bi-invariant metric.
%
% M = {X in P^k : X_1 + ... + X_k = I} is the manifold we care about here: it is
% a Riemannian submanifold of P^k, hence it is also a submanifold (but not a Riemannian
% submanifold) of E -- our embedding space.
%
%
% Please cite the Manopt paper as well as the research paper:
%
% @techreport{mishra2019riemannian,
% title={Riemannian optimization on the simplex of positive definite matrices},
% author={Mishra, B. and Kasai, H. and Jawanpuria, P.},
% institution={arXiv preprint arXiv:1906.10436},
% year={2019}
% }
%
% See also sympositivedefinitesimplexcomplexfactory multinomialfactory sympositivedefinitefactory
% This file is part of Manopt: www.manopt.org.
% Original author: Bamdev Mishra, September 18, 2019.
% Contributors: NB
% Change log: Comments updated, 16 Dec 2019
% Removed typos in Hessian expression, 01 Nov, 2021
symm = @(X) .5*(X+X');
M.name = @() sprintf('%d complex hemitian positive definite matrices of size %dx%d such that their sum is the identiy matrix.', k, n, n);
M.dim = @() (k-1)*n*(n+1);
% Helpers to avoid computing full matrices simply to extract their trace
vec = @(A) A(:);
trinner = @(A, B) real(vec(A')'*vec(B)); % = trace(A*B)
trnorm = @(A) sqrt((trinner(A, A))); % = sqrt(trace(A^2))
% Choice of the metric on the orthonormal space is motivated by the
% symmetry present in the space. The metric on the positive definite
% cone is its natural bi-invariant metric.
% The result is equal to: trace( (X\eta) * (X\zeta) )
M.inner = @innerproduct;
function iproduct = innerproduct(X, eta, zeta)
iproduct = 0;
for kk = 1 : k
iproduct = iproduct + (trinner(X(:,:,kk)\eta(:,:,kk), X(:,:,kk)\zeta(:,:,kk))); % BM okay
end
end
% Notice that X\eta is *not* symmetric in general.
% The result is equal to: sqrt(trace((X\eta)^2))
% There should be no need to take the real part, but rounding errors
% may cause a small imaginary part to appear, so we discard it.
M.norm = @innernorm;
function inorm = innernorm(X, eta)
inorm = 0;
for kk = 1:k
inorm = inorm + (trnorm(X(:,:,kk)\eta(:,:,kk)))^2; % BM okay
end
inorm = sqrt(inorm);
end
% % Same here: X\Y is not symmetric in general.
% % Same remark about taking the real part.
% M.dist = @innerdistance;
% function idistance = innerdistance(X, Y)
% idistance = 0;
% for kk = 1:k
% idistance = idistance + real(trnorm(real(logm(X(:,:,kk)\Y(:,:,kk))))); % BM okay, but need not be correct.
% end
% end
M.typicaldist = @() sqrt(k*n*(n+1)); % BM: to be looked into.
M.egrad2rgrad = @egrad2rgrad;
function rgrad = egrad2rgrad(X, egrad)
egradscaled = nan(size(egrad));
for kk = 1:k
egradscaled(:,:,kk) = X(:,:,kk)*symm(egrad(:,:,kk))*X(:,:,kk);
end
% Project onto the set X1dot + X2dot + ... = 0.
% That is rgrad = Xk*egradk*Xk + Xk*Lambdasol*Xk
rgrad = M.proj(X, egradscaled);
% % Debug
% norm(sum(rgrad,3), 'fro') % BM: this should be zero.
end
M.ehess2rhess = @ehess2rhess;
function Hess = ehess2rhess(X, egrad, ehess, eta)
Hess = nan(size(X));
egradscaled = nan(size(egrad));
egradscaleddot = nan(size(egrad));
for kk = 1:k
egradk = symm(egrad(:,:,kk));
ehessk = symm(ehess(:,:,kk));
Xk = X(:,:,kk);
etak = eta(:,:,kk);
egradscaled(:,:,kk) = Xk*egradk*Xk;
egradscaleddot(:,:,kk) = Xk*ehessk*Xk + 2*symm(etak*egradk*Xk);
end
% Compute Lambdasol
RHS = - sum(egradscaled,3);
[Lambdasol] = mylinearsolve(X, RHS);
% Compute Lambdasoldot
temp = nan(size(egrad));;
for kk = 1:k
Xk = X(:,:,kk);
etak = eta(:,:,kk);
temp(:,:,kk) = 2*symm(etak*Lambdasol*Xk);
end
RHSdot = - sum(egradscaleddot,3) - sum(temp,3);
[Lambdasoldot] = mylinearsolve(X, RHSdot);
for kk = 1:k
egradk = symm(egrad(:,:,kk));
ehessk = symm(ehess(:,:,kk));
Xk = X(:,:,kk);
etak = eta(:,:,kk);
% Directional derivatives of the Riemannian gradient
% Note that Riemannian grdient is Xk*egradk*Xk + Xk*Lambdasol*Xk.
% rhessk = Xk*(ehessk + Lambdasoldot)*Xk + 2*symm(etak*(egradk + Lambdasol)*Xk);
% rhessk = rhessk - symm(etak*(egradk + Lambdasol)*Xk);
rhessk = Xk*(ehessk + Lambdasoldot)*Xk + symm(etak*(egradk + Lambdasol)*Xk);
Hess(:,:,kk) = rhessk;
end
% Project onto the set X1dot + X2dot + ... = 0.
Hess = M.proj(X, Hess);
% Hess = nan(size(X));
% for kk = 1 : k
% % % Directional derivatives of the Riemannian gradient
% % Hess(:,:,kk) = symm(X(:,:,kk)*symm(ehess(:,:,kk))*X(:,:,kk)) + 2*symm(eta(:,:,kk)*symm(egrad(:,:,kk))*X(:,:,kk));
% % % Correction factor for the non-constant metric
% % Hess(:,:,kk) = Hess(:,:,kk) - symm(eta(:,:,kk)*symm(egrad(:,:,kk))*X(:,:,kk));
% Hess(:,:,kk) = symm(X(:,:,kk)*symm(ehess(:,:,kk))*X(:,:,kk)) + symm(eta(:,:,kk)*symm(egrad(:,:,kk))*X(:,:,kk));
% end
% % Project onto the set X1dot + X2dot + ... = 0.
% Hess = M.proj(X, Hess);
end
% Project onto the set X1dot + X2dot + ... = 0.
M.proj = @innerprojection;
function zeta = innerprojection(X, eta)
% etareal = real(eta);
% etaimag = imag(eta);
% sumetareal = sum(etareal,3);
% sumetaimag = sum(etaimag,3);
RHS = -sum(eta,3);
Lambdasol = mylinearsolve(X, RHS);
zeta = zeros(size(eta));
for jj = 1 : k
zeta(:,:,jj) = eta(:,:,jj) + (X(:,:,jj)*Lambdasol*X(:,:,jj));
end
% % Debug
% eta;
% sum(real(zeta),3)
% sum(imag(zeta),3)
% neta = eta - zeta;
% innerproduct(X, zeta, neta) % This should be zero
end
function Lambdasol = mylinearsolve(X, RHS)
% Solve the linear system.
tol_omegax_pcg = 1e-8;
max_iterations_pcg = 100;
sumetareal = real(RHS);
sumetaimag = imag(RHS);
rhs = [sumetareal(:); sumetaimag(:)];
[lambdasol, ~, ~, ~] = pcg(@compute_matrix_system, rhs, tol_omegax_pcg, max_iterations_pcg);
lambdasolreal = lambdasol(1:n^2);
lambdasolimag = lambdasol(n^2 + 1 : end);
Lambdasol = symm(reshape(lambdasolreal, [n n])) + 1i*reshape(lambdasolimag,n,n);
function lhslambda = compute_matrix_system(lambda)
lambdareal = lambda(1:n^2);
lambdaimag = lambda(n^2 + 1 : end);
Lambda = symm(reshape(lambdareal, [n n])) + 1i*reshape(lambdaimag, n, n);
lhsLambda = zeros(n,n);
for kk = 1 : k
lhsLambda = lhsLambda + ((X(:,:,kk)*Lambda*X(:,:,kk)));
end
lhsLambdareal = real(lhsLambda);
lhsLambdaimag = imag(lhsLambda);
lhslambda = [lhsLambdareal(:); lhsLambdaimag(:)];
end
end
M.tangent = M.proj;
M.tangent2ambient = @(X, eta) eta;
myeps = eps;
M.retr = @retraction;
function Y = retraction(X, eta, t) % BM okay
if nargin < 3
teta = eta;
else
teta = t*eta;
end
% The symm() call is mathematically unnecessary but numerically
% necessary.
Y = zeros(size(X));
for kk=1:k
% Second-order approximation of expm
Y(:,:,kk) = symm(X(:,:,kk) + teta(:,:,kk) + .5*teta(:,:,kk)*((X(:,:,kk) + myeps*eye(n) )\teta(:,:,kk)));
end
Ysum = sum(Y, 3);
Ysumsqrt = sqrtm(Ysum);
for kk=1:kk
Y(:,:,kk) = symm((Ysumsqrt\Y(:,:,kk))/Ysumsqrt);
end
% % Debug
% norm(sum(Y, 3) - eye(n), 'fro') % This should be zero
end
M.exp = @exponential;
function Y = exponential(X, eta, t)
if nargin < 3
t = 1.0;
end
Y = retraction(X, eta, t);
warning('manopt:sympositivedefinitesimplexcomplexfactory:exp', ...
['Exponential for the Simplex' ...
'manifold not implemented yet. Used retraction instead.']);
end
M.hash = @(X) ['z' hashmd5([real(X(:)); imag(X(:))])];% BM okay
% Generate a random symmetric positive definite matrix following a
% certain distribution. The particular choice of a distribution is of
% course arbitrary, and specific applications might require different
% ones.
M.rand = @random;
function X = random()
X = nan(n,n,k);
for kk = 1:k
D = diag(1+rand(n, 1));
[Q, R] = qr(randn(n) +1i*randn(n)); % BM okay
X(:,:,kk) = Q*D*Q';
end
Xsum = sum(X, 3);
Xsumsqrt = sqrtm(Xsum);
for kk = 1 : k
X(:,:,kk) = symm((Xsumsqrt\X(:,:,kk))/Xsumsqrt); % To do
end
end
% Generate a uniformly random unit-norm tangent vector at X.
M.randvec = @randomvec;
function eta = randomvec(X)
eta = nan(size(X));
for kk = 1:k
eta(:,:,kk) = symm(randn(n,n) + 1i*randn(n, n)); % BM okay
end
eta = M.proj(X, eta); % To do
nrm = M.norm(X, eta);
eta = eta / nrm;
end
M.lincomb = @matrixlincomb; % BM okay
M.zerovec = @(X) zeros(n,n,k); % BM okay
% Poor man's vector transport: exploit the fact that all tangent spaces
% are the set of symmetric matrices, so that the identity is a sort of
% vector transport. It may perform poorly if the origin and target (X1
% and X2) are far apart though. This should not be the case for typical
% optimization algorithms, which perform small steps.
M.transp = @(X1, X2, eta) M.proj(X2, eta);% To do
% vec and mat are not isometries, because of the unusual inner metric.
M.vec = @(X, U) [real(U(:)); image(U(:))] ; % BM okay
M.mat = @(X, u) reshape(u(1:(n*n*k)) + 1i*u((n*n*k+1):end), n, n, k); % BM okay
M.vecmatareisometries = @() false;
end