Skip to content

Commit

Permalink
add fast ica
Browse files Browse the repository at this point in the history
  • Loading branch information
lindahua committed Jul 19, 2014
1 parent fea0283 commit 1010d72
Show file tree
Hide file tree
Showing 7 changed files with 302 additions and 4 deletions.
4 changes: 2 additions & 2 deletions docs/source/whiten.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,15 @@ Given a dataset, one can use the ``fit`` method to estimate a whitening transfor
**Note:** This function internally relies on ``cov_whiten`` to derive the transformation ``W``. The function ``cov_whiten`` itself is also a useful function.


.. function:: cov_whiten(C)
.. function:: cov_whitening(C)

Derive the whitening transform coefficient matrix ``W`` given the covariance matrix ``C``. Here, ``C`` can be either a square matrix, or an instance of ``Cholesky``.

Internally, this function solves the whitening transform using Cholesky factorization. The rationale is as follows: let :math:`\mathbf{C} = \mathbf{U}^T \mathbf{U}` and :math:`\mathbf{W} = \mathbf{U}^{-1}`, then :math:`\mathbf{W}^T \mathbf{C} \mathbf{W} = \mathbf{I}`.

**Note:** The return matrix ``W`` is an upper triangular matrix.

.. function:: cov_whiten(C, regcoef)
.. function:: cov_whitening(C, regcoef)

Derive a whitening transform based on a regularized covariance, as ``C + (eigmax(C) * regcoef) * eye(d)``.

Expand Down
11 changes: 10 additions & 1 deletion src/MultivariateStats.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ module MultivariateStats
invsqrtm!, # Compute inverse of matrix square root inplace
cov_whitening, # Compute a whitening transform based on covariance
cov_whitening!, # Compute a whitening transform based on covariance (input will be overwritten)
invsqrtm, # Compute C^{-1/2}, i.e. inv(sqrtm(C))

## pca
PCA, # Type: Principal Component Analysis model
Expand Down Expand Up @@ -78,7 +79,14 @@ module MultivariateStats
multiclass_lda_stats, # compute statistics for multiclass LDA training
multiclass_lda, # train multi-class LDA based on statistics
mclda_solve, # solve multi-class LDA projection given scatter matrices
mclda_solve! # solve multi-class LDA projection (inputs are overriden)
mclda_solve!, # solve multi-class LDA projection (inputs are overriden)

## fastica
FastICA, # Type: the Fast ICA model

icagfun, # a function to get a ICA approx neg-entropy functor
fastica! # core algorithm function for the Fast ICA


## source files
include("common.jl")
Expand All @@ -87,5 +95,6 @@ module MultivariateStats
include("cca.jl")
include("cmds.jl")
include("lda.jl")
include("fastica.jl")

end # module
198 changes: 198 additions & 0 deletions src/fastica.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
# FastICA

# Reference:
#
# Aapo Hyvarinen and Erkki Oja
# Independent Component Analysis: Algorithms and Applications.
# Neural Network 13(4-5), 2000.
#

#### g: Derivatives of G

# the abstract type for all g functions:
#
# Let f be an instance of such type, then
#
# evaluate(f, x) --> (v, d)
#
# It returns a function value v, and derivative d
#
abstract ICAGDeriv

immutable Tanh <: ICAGDeriv
a::Float64
end

Tanh() = Tanh(1.0)
evaluate(f::Tanh, x::Float64) = (a = f.a; t = tanh(a * x); (t, a * (1.0 - t * t)))

immutable Gaus <: ICAGDeriv end
evaluate(f::Gaus, x::Float64) = (x2 = x * x; e = exp(-0.5 * x2); (x * e, (1.0 - x2) * e))

## a function to get a g-fun

icagfun(fname::Symbol) =
fname == :tanh ? Tanh() :
fname == :gaus ? Gaus() :
error("Unknown gfun $(fname)")

icagfun(fname::Symbol, a::Float64) =
fname == :tanh ? Tanh(a) :
fname == :gaus ? error("The gfun $(fname) has no parameters") :
error("Unknown gfun $(fname)")


#### core algorithm

function fastica!(W::DenseMatrix{Float64}, # initialized component matrix, size (m, k)
X::DenseMatrix{Float64}, # (whitened) observation sample matrix, size(m, n)
fun::ICAGDeriv, # approximate neg-entropy functor
maxiter::Int, # maximum number of iterations
tol::Real, # convergence tolerance
verbose::Bool) # whether to show iterative info

# argument checking
m = size(W, 1)
k = size(W, 2)
size(X, 1) == m || throw(DimensionMismatch("Sizes of W and X mismatch."))
n = size(X, 2)
k <= min(m, n) || throw(DimensionMismatch("k must not exceed min(m, n)."))

if verbose
@printf("FastICA Algorithm (m = %d, n = %d, k = %d)\n", m, n, k)
println("============================================")
end

# pre-allocated storage
Wp = Array(Float64, m, k) # to store previous version of W
U = Array(Float64, n, k) # to store w'x & g(w'x)
Y = Array(Float64, m, k) # to store E{x g(w'x)} for components
E1 = Array(Float64, k) # store E{g'(w'x)} for components

# normalize each column
for j = 1:k
w = view(W,:,j)
scale!(w, 1.0 / sqrt(sumabs2(w)))
end

# main loop
t = 0
converged = false
while !converged && t < maxiter
t += 1
copy!(Wp, W)

# apply W of previous step
At_mul_B!(U, X, W) # u <- w'x

# compute g(w'x) --> U and E{g'(w'x)} --> E1
_s = 0.0
for j = 1:k
for i = 1:n
u, v = evaluate(fun, U[i,j])
U[i,j] = u
_s += v
end
E1[j] = _s / n
end

# compute E{x g(w'x)} --> Y
scale!(A_mul_B!(Y, X, U), 1.0 / n)

# update w: E{x g(w'x)} - E{g'(w'x)} w := y - e1 * w
for j = 1:k
w = view(W,:,j)
y = view(Y,:,j)
e1 = E1[j]
for i = 1:m
w[i] = y[i] - e1 * w[i]
end
end

# symmetric decorrelation: W <- W * (W'W)^{-1/2}
copy!(W, W * _invsqrtm!(W'W))

# compare with Wp
chg = 0.0
for j = 1:k
s = 0.0
w = view(W,:,j)
wp = view(Wp,:,j)
for i = 1:m
s += abs(w[i] - wp[i])
end
if s > chg
chg = s
end
end
converged = (chg < tol)

if verbose
@printf("Iter %4d: change = %.6e\n", t, chg)
end
end
end


#### FastICA type

type FastICA
mean::Vector{Float64} # mean vector, of length m (or empty to indicate zero mean)
W::Matrix{Float64} # component coefficient matrix, of size (m, k)
end

indim(M::FastICA) = size(M.W, 1)
outdim(M::FastICA) = size(M.W, 2)
Base.mean(M::FastICA) = fullmean(indim(M), M.mean)

transform(M::FastICA, x::AbstractVecOrMat) = At_mul_B(M.W, centralize(x, M.mean))


#### interface function

function fit(::Type{FastICA}, X::DenseMatrix{Float64}, # sample matrix, size (m, n)
k::Int; # number of independent components
fun::ICAGDeriv=icagfun(:tanh), # approx neg-entropy functor
do_whiten::Bool=true, # whether to perform pre-whitening
maxiter::Integer=100, # maximum number of iterations
tol::Real=1.0e-6, # convergence tolerance
mean=nothing, # pre-computed mean
winit::Matrix{Float64}=zeros(0,0), # init guess of W, size (m, k)
verbose::Bool=false) # whether to display iterations

# check input arguments
m, n = size(X)
n > 1 || error("There must be more than one samples, i.e. n > 1.")
k <= min(m, n) || error("k must not exceed min(m, n).")

maxiter > 1 || error("maxiter must be greater than 1.")
tol > 0 || error("tol must be positive.")

# preprocess data
mv = preprocess_mean(X, mean)
Z::Matrix{Float64} = centralize(X, mv)

W0::Matrix{Float64} = zeros(0,0) # whitening matrix
if do_whiten
C = scale!(A_mul_Bt(Z, Z), 1.0 / (n - 1))
Efac = eigfact(C)
ord = sortperm(Efac.values; rev=true)
(v, P) = extract_kv(Efac, ord, k)
W0 = scale!(P, 1.0 ./ sqrt(v))
# println(W0' * C * W0)
Z = W0'Z
end

# initialize
W = (isempty(winit) ? randn(size(Z,1), k) : copy(winit))::Matrix{Float64}

# invoke core algorithm
fastica!(W, Z, fun, maxiter, tol, verbose)

# construct model
if do_whiten
W = W0 * W
end
return FastICA(mv, W)
end

17 changes: 17 additions & 0 deletions src/whiten.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,20 @@ function fit{T<:FloatingPoint}(::Type{Whitening}, X::DenseMatrix{T};
return Whitening(mv, cov_whitening!(C, regcoef))
end

# invsqrtm

function _invsqrtm!{T<:FloatingPoint}(C::Matrix{T})
n = size(C, 1)
size(C, 2) == n || error("C must be a square matrix.")
E = eigfact!(Symmetric(C))
U = E.vectors
evs = E.values
for i = 1:n
@inbounds evs[i] = 1.0 / sqrt(sqrt(evs[i]))
end
scale!(U, evs)
return A_mul_Bt(U, U)
end

invsqrtm{T<:FloatingPoint}(C::DenseMatrix{T}) = _invsqrtm!(copy(C))

68 changes: 68 additions & 0 deletions test/fastica.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
using MultivariateStats
using Base.Test

srand(15678)

## icagfun

f = icagfun(:tanh)
u, v = evaluate(f, 1.5)
@test_approx_eq u 0.905148253644866438242
@test_approx_eq v 0.180706638923648530597

f = icagfun(:tanh, 1.5)
u, v = evaluate(f, 1.2)
@test_approx_eq u 0.946806012846268289646
@test_approx_eq v 0.155337561057228069719

f = icagfun(:gaus)
u, v = evaluate(f, 1.5)
@test_approx_eq u 0.486978701037524594696
@test_approx_eq v -0.405815584197937162246


## data

# sources
n = 1000
k = 3
m = 8

t = linspace(0.0, 10.0, n)
s1 = sin(t * 2)
s2 = s2 = 1.0 - 2.0 * Bool[isodd(ifloor(_ / 3)) for _ in t]
s3 = Float64[mod(_, 5.0) for _ in t]

s1 += 0.1 * randn(n)
s2 += 0.1 * randn(n)
s3 += 0.1 * randn(n)

S = hcat(s1, s2, s3)'
@assert size(S) == (k, n)

A = randn(m, k)

X = A * S
mv = vec(mean(X,2))
@assert size(X) == (m, n)
C = cov(X; vardim=2)

# fit FastICA

M = fit(FastICA, X, k; do_whiten=false)
@test isa(M, FastICA)
@test indim(M) == m
@test outdim(M) == k
@test mean(M) == mv
W = M.W
@test_approx_eq transform(M, X) W' * (X .- mv)
@test_approx_eq W'W eye(k)

M = fit(FastICA, X, k; do_whiten=true)
@test isa(M, FastICA)
@test indim(M) == m
@test outdim(M) == k
@test mean(M) == mv
W = M.W
@test_approx_eq W'C * W eye(k)

3 changes: 2 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ tests = ["whiten",
"cca",
"cmds",
"lda",
"mclda"]
"mclda",
"fastica"]

println("Running tests:")

Expand Down
5 changes: 5 additions & 0 deletions test/whiten.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,9 @@ Cx = (X * X') / (n - 1)
W = f.W
@test_approx_eq W'Cx * W eye(d)

# invsqrtm

R = invsqrtm(C)
@test C == C0
@test_approx_eq R inv(sqrtm(C))

0 comments on commit 1010d72

Please sign in to comment.