/
ica.jl
198 lines (162 loc) · 5.87 KB
/
ica.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# Independent Component Analysis
#### FastICA type
type ICA
mean::Vector{Float64} # mean vector, of length m (or empty to indicate zero mean)
W::Matrix{Float64} # component coefficient matrix, of size (m, k)
end
indim(M::ICA) = size(M.W, 1)
outdim(M::ICA) = size(M.W, 2)
Base.mean(M::ICA) = fullmean(indim(M), M.mean)
transform(M::ICA, x::AbstractVecOrMat) = At_mul_B(M.W, centralize(x, M.mean))
#### core algorithm
# the abstract type for all g functions:
#
# Let f be an instance of such type, then
#
# evaluate(f, x) --> (v, d)
#
# It returns a function value v, and derivative d
#
abstract ICAGDeriv
immutable Tanh <: ICAGDeriv
a::Float64
end
Tanh() = Tanh(1.0)
evaluate(f::Tanh, x::Float64) = (a = f.a; t = tanh(a * x); (t, a * (1.0 - t * t)))
immutable Gaus <: ICAGDeriv end
evaluate(f::Gaus, x::Float64) = (x2 = x * x; e = exp(-0.5 * x2); (x * e, (1.0 - x2) * e))
## a function to get a g-fun
icagfun(fname::Symbol) =
fname == :tanh ? Tanh() :
fname == :gaus ? Gaus() :
error("Unknown gfun $(fname)")
icagfun(fname::Symbol, a::Float64) =
fname == :tanh ? Tanh(a) :
fname == :gaus ? error("The gfun $(fname) has no parameters") :
error("Unknown gfun $(fname)")
# Fast ICA
#
# Reference:
#
# Aapo Hyvarinen and Erkki Oja
# Independent Component Analysis: Algorithms and Applications.
# Neural Network 13(4-5), 2000.
#
function fastica!(W::DenseMatrix{Float64}, # initialized component matrix, size (m, k)
X::DenseMatrix{Float64}, # (whitened) observation sample matrix, size(m, n)
fun::ICAGDeriv, # approximate neg-entropy functor
maxiter::Int, # maximum number of iterations
tol::Real, # convergence tolerance
verbose::Bool) # whether to show iterative info
# argument checking
m = size(W, 1)
k = size(W, 2)
size(X, 1) == m || throw(DimensionMismatch("Sizes of W and X mismatch."))
n = size(X, 2)
k <= min(m, n) || throw(DimensionMismatch("k must not exceed min(m, n)."))
if verbose
@printf("FastICA Algorithm (m = %d, n = %d, k = %d)\n", m, n, k)
println("============================================")
end
# pre-allocated storage
Wp = Array(Float64, m, k) # to store previous version of W
U = Array(Float64, n, k) # to store w'x & g(w'x)
Y = Array(Float64, m, k) # to store E{x g(w'x)} for components
E1 = Array(Float64, k) # store E{g'(w'x)} for components
# normalize each column
for j = 1:k
w = view(W,:,j)
scale!(w, 1.0 / sqrt(sumabs2(w)))
end
# main loop
t = 0
converged = false
while !converged && t < maxiter
t += 1
copy!(Wp, W)
# apply W of previous step
At_mul_B!(U, X, W) # u <- w'x
# compute g(w'x) --> U and E{g'(w'x)} --> E1
_s = 0.0
for j = 1:k
for i = 1:n
u, v = evaluate(fun, U[i,j])
U[i,j] = u
_s += v
end
E1[j] = _s / n
end
# compute E{x g(w'x)} --> Y
scale!(A_mul_B!(Y, X, U), 1.0 / n)
# update w: E{x g(w'x)} - E{g'(w'x)} w := y - e1 * w
for j = 1:k
w = view(W,:,j)
y = view(Y,:,j)
e1 = E1[j]
for i = 1:m
w[i] = y[i] - e1 * w[i]
end
end
# symmetric decorrelation: W <- W * (W'W)^{-1/2}
copy!(W, W * _invsqrtm!(W'W))
# compare with Wp
chg = 0.0
for j = 1:k
s = 0.0
w = view(W,:,j)
wp = view(Wp,:,j)
for i = 1:m
s += abs(w[i] - wp[i])
end
if s > chg
chg = s
end
end
converged = (chg < tol)
if verbose
@printf("Iter %4d: change = %.6e\n", t, chg)
end
end
return W
end
#### interface function
function fit(::Type{ICA}, X::DenseMatrix{Float64}, # sample matrix, size (m, n)
k::Int; # number of independent components
alg::Symbol=:fastica, # choice of algorithm
fun::ICAGDeriv=icagfun(:tanh), # approx neg-entropy functor
do_whiten::Bool=true, # whether to perform pre-whitening
maxiter::Integer=100, # maximum number of iterations
tol::Real=1.0e-6, # convergence tolerance
mean=nothing, # pre-computed mean
winit::Matrix{Float64}=zeros(0,0), # init guess of W, size (m, k)
verbose::Bool=false) # whether to display iterations
# check input arguments
m, n = size(X)
n > 1 || error("There must be more than one samples, i.e. n > 1.")
k <= min(m, n) || error("k must not exceed min(m, n).")
alg == :fastica || error("alg must be :fastica")
maxiter > 1 || error("maxiter must be greater than 1.")
tol > 0 || error("tol must be positive.")
# preprocess data
mv = preprocess_mean(X, mean)
Z::Matrix{Float64} = centralize(X, mv)
W0::Matrix{Float64} = zeros(0,0) # whitening matrix
if do_whiten
C = scale!(A_mul_Bt(Z, Z), 1.0 / (n - 1))
Efac = eigfact(C)
ord = sortperm(Efac.values; rev=true)
(v, P) = extract_kv(Efac, ord, k)
W0 = scale!(P, 1.0 ./ sqrt(v))
# println(W0' * C * W0)
Z = W0'Z
end
# initialize
W = (isempty(winit) ? randn(size(Z,1), k) : copy(winit))::Matrix{Float64}
# invoke core algorithm
fastica!(W, Z, fun, maxiter, tol, verbose)
# construct model
if do_whiten
W = W0 * W
end
return ICA(mv, W)
end