Skip to content

Commit

Permalink
NF: principal component analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
nno committed Sep 1, 2016
1 parent 547ed63 commit cd54520
Show file tree
Hide file tree
Showing 2 changed files with 487 additions and 0 deletions.
223 changes: 223 additions & 0 deletions mvpa/cosmo_pca.m
@@ -0,0 +1,223 @@
function [pca_samples,params]=cosmo_pca(samples,retain)
% Principal Component Analysis
%
% [pca_samples,params]=cosmo_pca(samples[,retain])
%
% Input:
% samples M x N numeric matrix
% retain (optional) number of components to retain;
% must be less than or equal to N. Default: N
%
% Output:
% pca_samples M x retain samples in Principal Component
% space, after samples had been centered
% params struct with fields:
% .coef M x retain Principal Component coefficients
% .mu M x 1 column-wise average of samples
% It holds that:
% samples=bsxfun(@plus,params.mu,...
% pca_samples*params.coef')
% .explained 1 x N Percentage of explained variance
%
%
% Examples:
% samples=[ 2.0317 -0.8918 -0.8258;...
% 0.5838 1.8439 1.1656;...
% -1.4437 -0.2617 -1.9207;...
% -0.5177 2.3387 0.4412;...
% 1.1908 -0.2040 -0.2088;...
% -1.3265 2.7235 0.1476];
% %
% % apply PCA, keeping two dimensions
% [pca_samples,params]=cosmo_pca(samples,2);
% %
% % show samples in PC space
% cosmo_disp(pca_samples);
% > [ -2.64 0.654
% > 0.923 1.43
% > -0.723 -2.48
% > 1.64 0.265
% > -1.46 0.569
% > 2.27 -0.438 ]
% %
% % show parameters
% cosmo_disp(params);
% > .coef
% > [ -0.512 0.744
% > 0.794 0.219
% > 0.328 0.632 ]
% > .mu
% > [ 0.0864 0.925 -0.2 ]
% > .explained
% > [ 66
% > 33.3
% > 0.676 ]
%
% here
%
% # For CoSMoMVPA's copyright information and license terms, #
% # see the COPYING file distributed with CoSMoMVPA. #

if nargin<2
retain=[];
end

verify_parameters(samples,retain)
ndim=get_number_of_components(samples,retain);

% subtract mean
mu=mean(samples,1);
samples_demu=bsxfun(@minus, samples, mu);

% singular value decomposition
[u,s,w]=svd(samples_demu,'econ');

% extract eigen values
[nrow,ncol]=size(samples);
samples_is_vector=nrow==1 || ncol==1;
if samples_is_vector
% single eigen value
eigvals=s(1);
else
% take diagonal
eigvals=diag(s);
end

if ndim==0
% seperate case for zero dimensions
pca_samples=zeros(1,0);
coef=zeros(ncol,0);
else
pca_samples_rand_sign=bsxfun(@times,u(:,1:ndim),eigvals(1:ndim)');
[coef,sgn]=max_abs_positive_columnwise(w(:,1:ndim));
pca_samples=bsxfun(@times,pca_samples_rand_sign,sgn);
end

% store coefficients
params=struct();
params.coef=coef;
params.mu=mu;


nexpl=min([nrow-1,ncol]);
if nrow==1 || ncol==0
% special case for empty vecto with explained variance
params.explained=zeros(0,1);
else
explained_ratio=eigvals.^2;
params.explained=100*explained_ratio(1:nexpl)/sum(explained_ratio);
end


function ncomp=get_number_of_components(samples,retain)
max_retain=size(samples,2);

if isempty(retain)
retain=max_retain;
end

if retain>max_retain
error('retain argument %d must be less than %d',...
retain,max_retain);
end

[nrow,ncol]=size(samples);
ncomp=min([nrow-1,ncol,retain]);



function [coef_pos,sgn]=max_abs_positive_columnwise(coef)
% swap sign for each column in which the maximum absolute value is
% negative. sgn contains the sign used in each column (-1 or 1)
[unused,i]=max(abs(coef),[],1);
[nrows,ncols]=size(coef);
mx_idx=(0:(ncols-1))*nrows+i;
mx=coef(mx_idx);

sgn=(mx>0)*2-1;
coef_pos=bsxfun(@times,coef,sgn);

function verify_parameters(samples,retain)
if ~isnumeric(samples)
error('samples argument must be numeric');
end

if numel(size(samples))>2
error('samples argument must be a matrix');
end

if ~(isempty(retain) || ...
(isscalar(retain) && ...
retain>0 && ...
isequal(round(retain),retain)))
error('retain argument must be integer');
end



% function [pca_samples,params]=cosmo_temp_pca(samples,retain)
%
%
% mu = mean(samples,1);
% samples_demu = bsxfun(@minus, samples, mu);
% [u,s,w]=svd(samples_demu);
%
% if size(samples,2)==1 || size(samples,1)==1
% ev=s(1);
% else
% ev=diag(s);
% end
%
% [nrow,ncol]=size(s);
% ncomp=min(nrow,ncol);
% pca_samples=bsxfun(@times,u(:,1:ncomp),ev');
%
% %uu=max_abs_positive(uu);
% [coef,sgn]=max_abs_positive_columnwise(w);
% pca_samples=bsxfun(@times,pca_samples,sgn(1:ncomp));
%
%
% max_retain=size(samples,2);
% if nargin<2
% retain=max_retain;
% elseif ~(isscalar(retain) && ...
% retain>0 && ...
% retain<=max_retain && ...
% isequal(round(retain),retain))
% error('illegal value for retain, data has %d dimensions',...
% max_retain);
% end
%
% retain=min([nrow-1,ncol,retain]);
%
% pca_samples=pca_samples(:,1:retain);
% coef=coef(:,1:retain);
%
%
% params=struct();
% params.coef=coef;
% params.mu=mu;
% ev=ev.^2;
%
%
% %nexpl=max(min([nrow-1,ncol]),1);
% nexpl=min([nrow-1,ncol]);
% if nexpl==0
% params.explained=zeros(0,1);
% else
% params.explained=100*ev(1:nexpl)/sum(ev);
% end
%
%
%
%
% function [coef_pos,sgn]=max_abs_positive_columnwise(coef)
%
% [nrow,ncol]=size(coef);
% [unused,i]=max(abs(coef),[],1);
%
% mx_idx=sub2ind(size(coef),i,1:size(coef,2));
% mx=coef(mx_idx);
%
% sgn=(mx>0)*2-1;
% coef_pos=bsxfun(@times,coef,sgn);

0 comments on commit cd54520

Please sign in to comment.