Skip to content

Commit

Permalink
Add a normalized option for SubspaceLDA
Browse files Browse the repository at this point in the history
This is an alternative formulation of LDA, one which finds good projections for separating clusters even when different clusters have very different numbers of points. Each cluster is effectively "normalized" before doing the linear algebra.
  • Loading branch information
timholy committed Aug 2, 2016
1 parent 1cbd57b commit a40515f
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 30 deletions.
9 changes: 6 additions & 3 deletions src/lda.jl
Expand Up @@ -221,15 +221,18 @@ transform(M::SubspaceLDA, x) = M.projLDA' * (M.projw' * x)
fit{T,F<:SubspaceLDA}(::Type{F}, X::AbstractMatrix{T}, nc::Int, label::AbstractVector{Int})=
fit(F, X, label, nc)

function fit{T,F<:SubspaceLDA}(::Type{F}, X::AbstractMatrix{T}, label::AbstractVector{Int}, nc=maximum(label))
function fit{T,F<:SubspaceLDA}(::Type{F}, X::AbstractMatrix{T}, label::AbstractVector{Int}, nc=maximum(label); normalize::Bool=false)
d, n = size(X, 1), size(X, 2)
n nc || throw(ArgumentError("The number of samples is less than the number of classes"))
length(label) == n || throw(DimensionMismatch("Inconsistent array sizes."))
# Compute centroids, class weights, and deviation from centroids
# Note Sb = Hb*Hb', Sw = Hw*Hw'
cmeans, cweights, Hw = center(X, label, nc)
dmeans = cmeans .- cmeans * (cweights / n)
Hb = dmeans * Diagonal(sqrt(cweights))
dmeans = cmeans .- (normalize ? mean(cmeans, 2) : cmeans * (cweights / n))
Hb = normalize ? dmeans : dmeans * Diagonal(sqrt(cweights))
if normalize
Hw /= sqrt(n)
end
# Project to the subspace spanned by the within-class scatter
# (essentially, PCA before LDA)
Uw, Σw, _ = svd(Hw, thin=true)
Expand Down
93 changes: 66 additions & 27 deletions test/mclda.jl
Expand Up @@ -130,40 +130,55 @@ M = fit(MulticlassLDA, nc, X, y; method=:whiten, regcoef=lambda)

# Low-dimensional case (no singularities)

centers = [zeros(5) [10.0;zeros(4)] [0.0;10.0;zeros(3)]]

# Case 1: 3 groups of 500
dX = randn(5,1500);
# 3 groups of 500 with zero mean
for i = 0:500:1000
dX[:,(1:500)+i] = dX[:,(1:500)+i] .- mean(dX[:,(1:500)+i],2)
dX[:,(1:500)+i] = dX[:,(1:500)+i] .- mean(dX[:,(1:500)+i],2) # make the mean of each 0
end
centers = [zeros(5) [10.0;zeros(4)] [0.0;10.0;zeros(3)]]
X = [dX[:,1:500].+centers[:,1] dX[:,501:1000].+centers[:,2] dX[:,1001:1500].+centers[:,3]]
label = [fill(1,500); fill(2,500); fill(3,500)]
M = fit(SubspaceLDA, X, label)
@test indim(M) == 5
@test outdim(M) == 2
@test_approx_eq mean(M) vec(mean(centers,2))
@test_approx_eq classmeans(M) centers
@test classweights(M) == [500,500,500]
x = rand(5)
@test_approx_eq transform(M, x) projection(M)'*x
dcenters = centers .- mean(centers,2)
Hb = dcenters*sqrt(500)
Sb = Hb*Hb'
Sw = dX*dX'
# When there are no singularities, we need to solve the "regular" LDA eigenvalue equation
proj = projection(M)
@test_approx_eq Sb*proj Sw*proj*Diagonal(M.λ)
@test_approx_eq proj'*Sw*proj eye(2,2)

# also check that this is consistent with the conventional algorithm
Mld = fit(MulticlassLDA, 3, X, label)
for i = 1:2
@test_approx_eq abs(dot(normalize(proj[:,i]), normalize(Mld.proj[:,i]))) 1
dX1 = dX
X1 = [dX[:,1:500].+centers[:,1] dX[:,501:1000].+centers[:,2] dX[:,1001:1500].+centers[:,3]]
label1 = [fill(1,500); fill(2,500); fill(3,500)]
# Case 2: 3 groups, one with 1000, one with 100, and one with 10
dX = randn(5,1110);
dX[:, 1:1000] = dX[:, 1:1000] .- mean(dX[:, 1:1000],2)
dX[:,1001:1100] = dX[:,1001:1100] .- mean(dX[:,1001:1100],2)
dX[:,1101:1110] = dX[:,1101:1110] .- mean(dX[:,1101:1110],2)
dX2 = dX
X2 = [dX[:,1:1000].+centers[:,1] dX[:,1001:1100].+centers[:,2] dX[:,1101:1110].+centers[:,3]]
label2 = [fill(1,1000); fill(2,100); fill(3,10)]

for (X, dX, label) in ((X1, dX1, label1), (X2, dX2, label2))
w = [length(find(label.==1)), length(find(label.==2)), length(find(label.==3))]
M = fit(SubspaceLDA, X, label)
@test indim(M) == 5
@test outdim(M) == 2
totcenter = vec(sum(centers.*w',2)./sum(w))
@test_approx_eq mean(M) totcenter
@test_approx_eq classmeans(M) centers
@test classweights(M) == w
x = rand(5)
@test_approx_eq transform(M, x) projection(M)'*x
dcenters = centers .- totcenter
Hb = dcenters.*sqrt(w)'
Sb = Hb*Hb'
Sw = dX*dX'
# When there are no singularities, we need to solve the "regular" LDA eigenvalue equation
proj = projection(M)
@test_approx_eq Sb*proj Sw*proj*Diagonal(M.λ)
@test_approx_eq proj'*Sw*proj eye(2,2)

# also check that this is consistent with the conventional algorithm
Mld = fit(MulticlassLDA, 3, X, label)
for i = 1:2
@test_approx_eq abs(dot(normalize(proj[:,i]), normalize(Mld.proj[:,i]))) 1
end
end

# High-dimensional case (undersampled => singularities)
X = randn(10^6, 9)
label = rand(1:3, 9)
label = rand(1:3, 9); label[1:3] = 1:3
M = fit(SubspaceLDA, X, label)
centers = M.cmeans
for i = 1:3
Expand All @@ -187,3 +202,27 @@ dcenters = centers .- mean(X, 2)
Hb = dcenters*Diagonal(sqrt(M.cweights))
Hw = X - centers[:,label]
@test_approx_eq (M.projw'*Hb)*(Hb'*M.projw)*M.projLDA (M.projw'*Hw)*(Hw'*M.projw)*M.projLDA*Diagonal(M.λ)

# Test normalized LDA
function gen_ldadata_2(centers, n1, n2)
d = size(centers, 1)
X = randn(d, n1+n2)
X[:,1:n1] .-= vec(mean(X[:,1:n1],2))
X[:,n1+1:n1+n2] .-= vec(mean(X[:,n1+1:n1+n2],2))
dX = copy(X)
X[:,1:n1] .+= centers[:,1]
X[:,n1+1:n1+n2] .+= centers[:,2]
label = [fill(1,n1); fill(2,n2)]
X, dX, label
end
centers = zeros(2,2); centers[1,2] = 10
n1 = 100
for n2 in (100, 1000, 10000)
X, dX, label = gen_ldadata_2(centers, n1, n2)
M = fit(SubspaceLDA, X, label; normalize=true)
proj = projection(M)
Hb = centers .- mean(centers, 2) # not weighted by number in each cluster
Sb = Hb*Hb'
Sw = dX*dX'/(n1+n2)
@test_approx_eq Sb*proj Sw*proj*Diagonal(M.λ)
end

0 comments on commit a40515f

Please sign in to comment.