add fast ica

JuliaStats · Jul 19, 2014 · 1010d72 · 1010d72
1 parent fea0283
commit 1010d72
Show file tree

Hide file tree

Showing 7 changed files with 302 additions and 4 deletions.
diff --git a/docs/source/whiten.rst b/docs/source/whiten.rst
@@ -77,15 +77,15 @@ Given a dataset, one can use the ``fit`` method to estimate a whitening transfor
     **Note:** This function internally relies on ``cov_whiten`` to derive the transformation ``W``. The function ``cov_whiten`` itself is also a useful function.
 
 
-.. function:: cov_whiten(C)
+.. function:: cov_whitening(C)
 
     Derive the whitening transform coefficient matrix ``W`` given the covariance matrix ``C``. Here, ``C`` can be either a square matrix, or an instance of ``Cholesky``.
 
     Internally, this function solves the whitening transform using Cholesky factorization. The rationale is as follows: let :math:`\mathbf{C} = \mathbf{U}^T \mathbf{U}` and :math:`\mathbf{W} = \mathbf{U}^{-1}`, then :math:`\mathbf{W}^T \mathbf{C} \mathbf{W} = \mathbf{I}`.
 
     **Note:** The return matrix ``W`` is an upper triangular matrix.
 
-.. function:: cov_whiten(C, regcoef)
+.. function:: cov_whitening(C, regcoef)
 
     Derive a whitening transform based on a regularized covariance, as ``C + (eigmax(C) * regcoef) * eye(d)``.
 

diff --git a/src/MultivariateStats.jl b/src/MultivariateStats.jl
@@ -27,6 +27,7 @@ module MultivariateStats
     invsqrtm!,          # Compute inverse of matrix square root inplace
     cov_whitening,      # Compute a whitening transform based on covariance
     cov_whitening!,     # Compute a whitening transform based on covariance (input will be overwritten)
+    invsqrtm,           # Compute C^{-1/2}, i.e. inv(sqrtm(C))
 
     ## pca
     PCA,                # Type: Principal Component Analysis model
@@ -78,7 +79,14 @@ module MultivariateStats
     multiclass_lda_stats,   # compute statistics for multiclass LDA training
     multiclass_lda,         # train multi-class LDA based on statistics
     mclda_solve,            # solve multi-class LDA projection given scatter matrices
-    mclda_solve!            # solve multi-class LDA projection (inputs are overriden)
+    mclda_solve!,           # solve multi-class LDA projection (inputs are overriden)
+
+    ## fastica
+    FastICA,                # Type: the Fast ICA model
+
+    icagfun,                # a function to get a ICA approx neg-entropy functor
+    fastica!                # core algorithm function for the Fast ICA
+
 
     ## source files
     include("common.jl")
@@ -87,5 +95,6 @@ module MultivariateStats
     include("cca.jl")
     include("cmds.jl")
     include("lda.jl")
+    include("fastica.jl")
 
 end # module
diff --git a/src/fastica.jl b/src/fastica.jl
@@ -0,0 +1,198 @@
+# FastICA
+
+# Reference:
+#
+#   Aapo Hyvarinen and Erkki Oja
+#   Independent Component Analysis: Algorithms and Applications.
+#   Neural Network 13(4-5), 2000.
+#
+
+#### g: Derivatives of G
+
+# the abstract type for all g functions:
+#
+# Let f be an instance of such type, then
+#
+#   evaluate(f, x) --> (v, d)
+#
+# It returns a function value v, and derivative d
+#
+abstract ICAGDeriv
+
+immutable Tanh <: ICAGDeriv
+    a::Float64
+end
+
+Tanh() = Tanh(1.0)
+evaluate(f::Tanh, x::Float64) = (a = f.a; t = tanh(a * x); (t, a * (1.0 - t * t)))
+
+immutable Gaus <: ICAGDeriv end
+evaluate(f::Gaus, x::Float64) = (x2 = x * x; e = exp(-0.5 * x2); (x * e, (1.0 - x2) * e))
+
+## a function to get a g-fun
+
+icagfun(fname::Symbol) = 
+    fname == :tanh ? Tanh() :
+    fname == :gaus ? Gaus() :
+    error("Unknown gfun $(fname)")
+
+icagfun(fname::Symbol, a::Float64) = 
+    fname == :tanh ? Tanh(a) :
+    fname == :gaus ? error("The gfun $(fname) has no parameters") :
+    error("Unknown gfun $(fname)")
+
+
+#### core algorithm
+
+function fastica!(W::DenseMatrix{Float64},      # initialized component matrix, size (m, k)
+                  X::DenseMatrix{Float64},      # (whitened) observation sample matrix, size(m, n)
+                  fun::ICAGDeriv,               # approximate neg-entropy functor
+                  maxiter::Int,                 # maximum number of iterations
+                  tol::Real,                    # convergence tolerance
+                  verbose::Bool)                # whether to show iterative info
+
+    # argument checking
+    m = size(W, 1)
+    k = size(W, 2)
+    size(X, 1) == m || throw(DimensionMismatch("Sizes of W and X mismatch."))
+    n = size(X, 2)
+    k <= min(m, n) || throw(DimensionMismatch("k must not exceed min(m, n)."))
+
+    if verbose
+        @printf("FastICA Algorithm (m = %d, n = %d, k = %d)\n", m, n, k)
+        println("============================================")  
+    end
+
+    # pre-allocated storage
+    Wp = Array(Float64, m, k)    # to store previous version of W
+    U  = Array(Float64, n, k)    # to store w'x & g(w'x)
+    Y  = Array(Float64, m, k)    # to store E{x g(w'x)} for components
+    E1 = Array(Float64, k)       # store E{g'(w'x)} for components
+
+    # normalize each column
+    for j = 1:k
+        w = view(W,:,j)
+        scale!(w, 1.0 / sqrt(sumabs2(w)))
+    end
+
+    # main loop
+    t = 0
+    converged = false
+    while !converged && t < maxiter
+        t += 1
+        copy!(Wp, W)
+
+        # apply W of previous step
+        At_mul_B!(U, X, W)  # u <- w'x
+
+        # compute g(w'x) --> U and E{g'(w'x)} --> E1
+        _s = 0.0
+        for j = 1:k
+            for i = 1:n
+                u, v = evaluate(fun, U[i,j])
+                U[i,j] = u
+                _s += v
+            end
+            E1[j] = _s / n
+        end
+
+        # compute E{x g(w'x)} --> Y
+        scale!(A_mul_B!(Y, X, U), 1.0 / n)
+
+        # update w: E{x g(w'x)} - E{g'(w'x)} w := y - e1 * w
+        for j = 1:k
+            w = view(W,:,j)
+            y = view(Y,:,j)
+            e1 = E1[j]
+            for i = 1:m
+                w[i] = y[i] - e1 * w[i]
+            end
+        end
+
+        # symmetric decorrelation: W <- W * (W'W)^{-1/2}
+        copy!(W, W * _invsqrtm!(W'W))
+
+        # compare with Wp
+        chg = 0.0
+        for j = 1:k
+            s = 0.0
+            w = view(W,:,j)
+            wp = view(Wp,:,j)
+            for i = 1:m
+                s += abs(w[i] - wp[i])
+            end
+            if s > chg 
+                chg = s
+            end
+        end
+        converged = (chg < tol)
+
+        if verbose
+            @printf("Iter %4d:  change = %.6e\n", t, chg)
+        end
+    end
+end
+
+
+#### FastICA type
+
+type FastICA
+    mean::Vector{Float64}   # mean vector, of length m (or empty to indicate zero mean)
+    W::Matrix{Float64}      # component coefficient matrix, of size (m, k)
+end
+
+indim(M::FastICA) = size(M.W, 1)
+outdim(M::FastICA) = size(M.W, 2)
+Base.mean(M::FastICA) = fullmean(indim(M), M.mean)
+
+transform(M::FastICA, x::AbstractVecOrMat) = At_mul_B(M.W, centralize(x, M.mean))
+
+
+#### interface function 
+
+function fit(::Type{FastICA}, X::DenseMatrix{Float64},          # sample matrix, size (m, n)
+                              k::Int;                           # number of independent components
+                              fun::ICAGDeriv=icagfun(:tanh),    # approx neg-entropy functor
+                              do_whiten::Bool=true,             # whether to perform pre-whitening 
+                              maxiter::Integer=100,             # maximum number of iterations
+                              tol::Real=1.0e-6,                 # convergence tolerance
+                              mean=nothing,                     # pre-computed mean
+                              winit::Matrix{Float64}=zeros(0,0),  # init guess of W, size (m, k)
+                              verbose::Bool=false)              # whether to display iterations
+
+    # check input arguments
+    m, n = size(X)
+    n > 1 || error("There must be more than one samples, i.e. n > 1.")
+    k <= min(m, n) || error("k must not exceed min(m, n).")
+
+    maxiter > 1 || error("maxiter must be greater than 1.")
+    tol > 0 || error("tol must be positive.")
+
+    # preprocess data
+    mv = preprocess_mean(X, mean)
+    Z::Matrix{Float64} = centralize(X, mv)
+
+    W0::Matrix{Float64} = zeros(0,0)    # whitening matrix
+    if do_whiten
+        C = scale!(A_mul_Bt(Z, Z), 1.0 / (n - 1))
+        Efac = eigfact(C)
+        ord = sortperm(Efac.values; rev=true)
+        (v, P) = extract_kv(Efac, ord, k)
+        W0 = scale!(P, 1.0 ./ sqrt(v))
+        # println(W0' * C * W0)
+        Z = W0'Z
+    end
+
+    # initialize
+    W = (isempty(winit) ? randn(size(Z,1), k) : copy(winit))::Matrix{Float64}
+
+    # invoke core algorithm
+    fastica!(W, Z, fun, maxiter, tol, verbose)
+
+    # construct model
+    if do_whiten
+        W = W0 * W
+    end
+    return FastICA(mv, W)
+end
+
diff --git a/src/whiten.jl b/src/whiten.jl
@@ -51,3 +51,20 @@ function fit{T<:FloatingPoint}(::Type{Whitening}, X::DenseMatrix{T};
     return Whitening(mv, cov_whitening!(C, regcoef))
 end
 
+# invsqrtm
+
+function _invsqrtm!{T<:FloatingPoint}(C::Matrix{T})
+    n = size(C, 1)
+    size(C, 2) == n || error("C must be a square matrix.")
+    E = eigfact!(Symmetric(C))
+    U = E.vectors
+    evs = E.values
+    for i = 1:n
+        @inbounds evs[i] = 1.0 / sqrt(sqrt(evs[i]))
+    end
+    scale!(U, evs)
+    return A_mul_Bt(U, U)
+end
+
+invsqrtm{T<:FloatingPoint}(C::DenseMatrix{T}) = _invsqrtm!(copy(C))
+
diff --git a/test/fastica.jl b/test/fastica.jl
@@ -0,0 +1,68 @@
+using MultivariateStats
+using Base.Test
+
+srand(15678)
+
+## icagfun
+
+f = icagfun(:tanh)
+u, v = evaluate(f, 1.5)
+@test_approx_eq u 0.905148253644866438242
+@test_approx_eq v 0.180706638923648530597
+
+f = icagfun(:tanh, 1.5)
+u, v = evaluate(f, 1.2)
+@test_approx_eq u 0.946806012846268289646
+@test_approx_eq v 0.155337561057228069719
+
+f = icagfun(:gaus)
+u, v = evaluate(f, 1.5)
+@test_approx_eq u 0.486978701037524594696
+@test_approx_eq v -0.405815584197937162246
+
+
+## data
+
+# sources
+n = 1000
+k = 3
+m = 8
+
+t = linspace(0.0, 10.0, n)
+s1 = sin(t * 2)
+s2 = s2 = 1.0 - 2.0 * Bool[isodd(ifloor(_ / 3)) for _ in t]
+s3 = Float64[mod(_, 5.0) for _ in t]
+
+s1 += 0.1 * randn(n)
+s2 += 0.1 * randn(n)
+s3 += 0.1 * randn(n)
+
+S = hcat(s1, s2, s3)'
+@assert size(S) == (k, n)
+
+A = randn(m, k)
+
+X = A * S 
+mv = vec(mean(X,2))
+@assert size(X) == (m, n)
+C = cov(X; vardim=2)
+
+# fit FastICA 
+
+M = fit(FastICA, X, k; do_whiten=false)
+@test isa(M, FastICA)
+@test indim(M) == m
+@test outdim(M) == k
+@test mean(M) == mv
+W = M.W
+@test_approx_eq transform(M, X) W' * (X .- mv)
+@test_approx_eq W'W eye(k)
+
+M = fit(FastICA, X, k; do_whiten=true)
+@test isa(M, FastICA)
+@test indim(M) == m
+@test outdim(M) == k
+@test mean(M) == mv
+W = M.W
+@test_approx_eq W'C * W eye(k)
+
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -3,7 +3,8 @@ tests = ["whiten",
          "cca", 
          "cmds", 
          "lda", 
-         "mclda"]
+         "mclda", 
+         "fastica"]
 
 println("Running tests:")
 

diff --git a/test/whiten.jl b/test/whiten.jl
@@ -67,4 +67,9 @@ Cx = (X * X') / (n - 1)
 W = f.W
 @test_approx_eq W'Cx * W eye(d)
 
+# invsqrtm
+
+R = invsqrtm(C)
+@test C == C0
+@test_approx_eq R inv(sqrtm(C))