Add a normalized option for SubspaceLDA

This is an alternative formulation of LDA, one which finds good projections for separating clusters even when different clusters have very different numbers of points. Each cluster is effectively "normalized" before doing the linear algebra.
JuliaStats · Aug 2, 2016 · a40515f · a40515f
1 parent 1cbd57b
commit a40515f
Show file tree

Hide file tree

Showing 2 changed files with 72 additions and 30 deletions.
diff --git a/src/lda.jl b/src/lda.jl
@@ -221,15 +221,18 @@ transform(M::SubspaceLDA, x) = M.projLDA' * (M.projw' * x)
 fit{T,F<:SubspaceLDA}(::Type{F}, X::AbstractMatrix{T}, nc::Int, label::AbstractVector{Int})=
     fit(F, X, label, nc)
 
-function fit{T,F<:SubspaceLDA}(::Type{F}, X::AbstractMatrix{T}, label::AbstractVector{Int}, nc=maximum(label))
+function fit{T,F<:SubspaceLDA}(::Type{F}, X::AbstractMatrix{T}, label::AbstractVector{Int}, nc=maximum(label); normalize::Bool=false)
     d, n = size(X, 1), size(X, 2)
     n ≥ nc || throw(ArgumentError("The number of samples is less than the number of classes"))
     length(label) == n || throw(DimensionMismatch("Inconsistent array sizes."))
     # Compute centroids, class weights, and deviation from centroids
     # Note Sb = Hb*Hb', Sw = Hw*Hw'
     cmeans, cweights, Hw = center(X, label, nc)
-    dmeans = cmeans .- cmeans * (cweights / n)
-    Hb = dmeans * Diagonal(sqrt(cweights))
+    dmeans = cmeans .- (normalize ? mean(cmeans, 2) : cmeans * (cweights / n))
+    Hb = normalize ? dmeans : dmeans * Diagonal(sqrt(cweights))
+    if normalize
+        Hw /= sqrt(n)
+    end
     # Project to the subspace spanned by the within-class scatter
     # (essentially, PCA before LDA)
     Uw, Σw, _ = svd(Hw, thin=true)

diff --git a/test/mclda.jl b/test/mclda.jl
@@ -130,40 +130,55 @@ M = fit(MulticlassLDA, nc, X, y; method=:whiten, regcoef=lambda)
 
 # Low-dimensional case (no singularities)
 
+centers = [zeros(5) [10.0;zeros(4)] [0.0;10.0;zeros(3)]]
+
+# Case 1: 3 groups of 500
 dX = randn(5,1500);
-# 3 groups of 500 with zero mean
 for i = 0:500:1000
-    dX[:,(1:500)+i] = dX[:,(1:500)+i] .- mean(dX[:,(1:500)+i],2)
+    dX[:,(1:500)+i] = dX[:,(1:500)+i] .- mean(dX[:,(1:500)+i],2)  # make the mean of each 0
 end
-centers = [zeros(5) [10.0;zeros(4)] [0.0;10.0;zeros(3)]]
-X = [dX[:,1:500].+centers[:,1] dX[:,501:1000].+centers[:,2] dX[:,1001:1500].+centers[:,3]]
-label = [fill(1,500); fill(2,500); fill(3,500)]
-M = fit(SubspaceLDA, X, label)
-@test indim(M) == 5
-@test outdim(M) == 2
-@test_approx_eq mean(M) vec(mean(centers,2))
-@test_approx_eq classmeans(M) centers
-@test classweights(M) == [500,500,500]
-x = rand(5)
-@test_approx_eq transform(M, x) projection(M)'*x
-dcenters = centers .- mean(centers,2)
-Hb = dcenters*sqrt(500)
-Sb = Hb*Hb'
-Sw = dX*dX'
-# When there are no singularities, we need to solve the "regular" LDA eigenvalue equation
-proj = projection(M)
-@test_approx_eq Sb*proj Sw*proj*Diagonal(M.λ)
-@test_approx_eq proj'*Sw*proj eye(2,2)
-
-# also check that this is consistent with the conventional algorithm
-Mld = fit(MulticlassLDA, 3, X, label)
-for i = 1:2
-    @test_approx_eq abs(dot(normalize(proj[:,i]), normalize(Mld.proj[:,i]))) 1
+dX1 = dX
+X1 = [dX[:,1:500].+centers[:,1] dX[:,501:1000].+centers[:,2] dX[:,1001:1500].+centers[:,3]]
+label1 = [fill(1,500); fill(2,500); fill(3,500)]
+# Case 2: 3 groups, one with 1000, one with 100, and one with 10
+dX = randn(5,1110);
+dX[:,   1:1000] = dX[:,   1:1000] .- mean(dX[:,   1:1000],2)
+dX[:,1001:1100] = dX[:,1001:1100] .- mean(dX[:,1001:1100],2)
+dX[:,1101:1110] = dX[:,1101:1110] .- mean(dX[:,1101:1110],2)
+dX2 = dX
+X2 = [dX[:,1:1000].+centers[:,1] dX[:,1001:1100].+centers[:,2] dX[:,1101:1110].+centers[:,3]]
+label2 = [fill(1,1000); fill(2,100); fill(3,10)]
+
+for (X, dX, label) in ((X1, dX1, label1), (X2, dX2, label2))
+    w = [length(find(label.==1)), length(find(label.==2)), length(find(label.==3))]
+    M = fit(SubspaceLDA, X, label)
+    @test indim(M) == 5
+    @test outdim(M) == 2
+    totcenter = vec(sum(centers.*w',2)./sum(w))
+    @test_approx_eq mean(M) totcenter
+    @test_approx_eq classmeans(M) centers
+    @test classweights(M) == w
+    x = rand(5)
+    @test_approx_eq transform(M, x) projection(M)'*x
+    dcenters = centers .- totcenter
+    Hb = dcenters.*sqrt(w)'
+    Sb = Hb*Hb'
+    Sw = dX*dX'
+    # When there are no singularities, we need to solve the "regular" LDA eigenvalue equation
+    proj = projection(M)
+    @test_approx_eq Sb*proj Sw*proj*Diagonal(M.λ)
+    @test_approx_eq proj'*Sw*proj eye(2,2)
+
+    # also check that this is consistent with the conventional algorithm
+    Mld = fit(MulticlassLDA, 3, X, label)
+    for i = 1:2
+        @test_approx_eq abs(dot(normalize(proj[:,i]), normalize(Mld.proj[:,i]))) 1
+    end
 end
 
 # High-dimensional case (undersampled => singularities)
 X = randn(10^6, 9)
-label = rand(1:3, 9)
+label = rand(1:3, 9); label[1:3] = 1:3
 M = fit(SubspaceLDA, X, label)
 centers = M.cmeans
 for i = 1:3
@@ -187,3 +202,27 @@ dcenters = centers .- mean(X, 2)
 Hb = dcenters*Diagonal(sqrt(M.cweights))
 Hw = X - centers[:,label]
 @test_approx_eq (M.projw'*Hb)*(Hb'*M.projw)*M.projLDA (M.projw'*Hw)*(Hw'*M.projw)*M.projLDA*Diagonal(M.λ)
+
+# Test normalized LDA
+function gen_ldadata_2(centers, n1, n2)
+    d = size(centers, 1)
+    X = randn(d, n1+n2)
+    X[:,1:n1]       .-= vec(mean(X[:,1:n1],2))
+    X[:,n1+1:n1+n2] .-= vec(mean(X[:,n1+1:n1+n2],2))
+    dX = copy(X)
+    X[:,1:n1]       .+= centers[:,1]
+    X[:,n1+1:n1+n2] .+= centers[:,2]
+    label = [fill(1,n1); fill(2,n2)]
+    X, dX, label
+end
+centers = zeros(2,2); centers[1,2] = 10
+n1 = 100
+for n2 in (100, 1000, 10000)
+    X, dX, label = gen_ldadata_2(centers, n1, n2)
+    M = fit(SubspaceLDA, X, label; normalize=true)
+    proj = projection(M)
+    Hb = centers .- mean(centers, 2)   # not weighted by number in each cluster
+    Sb = Hb*Hb'
+    Sw = dX*dX'/(n1+n2)
+    @test_approx_eq Sb*proj Sw*proj*Diagonal(M.λ)
+end