|
|
@@ -2,17 +2,17 @@ |
|
|
|
|
|
|
|
#### CCA Type |
|
|
|
|
|
|
|
type CCA |
|
|
|
struct CCA |
|
|
|
xmean::Vector{Float64} # sample mean of X: of length dx (can be empty) |
|
|
|
ymean::Vector{Float64} # sample mean of Y: of length dy (can be empty) |
|
|
|
ymean::Vector{Float64} # sample mean of Y: of length dy (can be empty) |
|
|
|
xproj::Matrix{Float64} # projection matrix for X, of size (dx, p) |
|
|
|
yproj::Matrix{Float64} # projection matrix for Y, of size (dy, p) |
|
|
|
corrs::Vector{Float64} # correlations, of length p |
|
|
|
|
|
|
|
function CCA(xm::Vector{Float64}, |
|
|
|
ym::Vector{Float64}, |
|
|
|
function CCA(xm::Vector{Float64}, |
|
|
|
ym::Vector{Float64}, |
|
|
|
xp::Matrix{Float64}, |
|
|
|
yp::Matrix{Float64}, |
|
|
|
yp::Matrix{Float64}, |
|
|
|
crs::Vector{Float64}) |
|
|
|
|
|
|
|
dx, px = size(xp) |
|
|
@@ -24,7 +24,7 @@ type CCA |
|
|
|
isempty(ym) || length(ym) == dy || |
|
|
|
throw(DimensionMismatch("Incorrect length of ymean.")) |
|
|
|
|
|
|
|
px == py || |
|
|
|
px == py || |
|
|
|
throw(DimensionMismatch("xproj and yproj should have the same number of columns.")) |
|
|
|
|
|
|
|
length(crs) == px || |
|
|
@@ -50,8 +50,8 @@ correlations(M::CCA) = M.corrs |
|
|
|
|
|
|
|
## use |
|
|
|
|
|
|
|
xtransform{T<:Real}(M::CCA, X::AbstractVecOrMat{T}) = At_mul_B(M.xproj, centralize(X, M.xmean)) |
|
|
|
ytransform{T<:Real}(M::CCA, Y::AbstractVecOrMat{T}) = At_mul_B(M.yproj, centralize(Y, M.ymean)) |
|
|
|
xtransform(M::CCA, X::AbstractVecOrMat{T}) where T<:Real = transpose(M.xproj) * centralize(X, M.xmean) |
|
|
|
ytransform(M::CCA, Y::AbstractVecOrMat{T}) where T<:Real = transpose(M.yproj) * centralize(Y, M.ymean) |
|
|
|
|
|
|
|
## show & dump |
|
|
|
|
|
|
@@ -79,9 +79,9 @@ end |
|
|
|
|
|
|
|
## ccacov |
|
|
|
|
|
|
|
function ccacov(Cxx::DenseMatrix{Float64}, |
|
|
|
function ccacov(Cxx::DenseMatrix{Float64}, |
|
|
|
Cyy::DenseMatrix{Float64}, |
|
|
|
Cxy::DenseMatrix{Float64}, |
|
|
|
Cxy::DenseMatrix{Float64}, |
|
|
|
xmean::Vector{Float64}, |
|
|
|
ymean::Vector{Float64}, |
|
|
|
p::Int) |
|
|
@@ -91,10 +91,10 @@ function ccacov(Cxx::DenseMatrix{Float64}, |
|
|
|
dy, dy2 = size(Cyy) |
|
|
|
dx == dx2 || error("Cxx must be a square matrix.") |
|
|
|
dy == dy2 || error("Cyy must be a square matrix.") |
|
|
|
size(Cxy) == (dx, dy) || |
|
|
|
size(Cxy) == (dx, dy) || |
|
|
|
throw(DimensionMismatch("size(Cxy) should be equal to (dx, dy)")) |
|
|
|
|
|
|
|
isempty(xmean) || length(xmean) == dx || |
|
|
|
isempty(xmean) || length(xmean) == dx || |
|
|
|
throw(DimensionMismatch("Incorrect length of xmean.")) |
|
|
|
|
|
|
|
isempty(ymean) || length(ymean) == dy || |
|
|
@@ -116,17 +116,17 @@ function _ccacov(Cxx, Cyy, Cxy, xmean, ymean, p::Int) |
|
|
|
# solve Px: (Cxy * inv(Cyy) * Cyx) Px = λ Cxx * Px |
|
|
|
# compute Py: inv(Cyy) * Cyx * Px |
|
|
|
|
|
|
|
G = cholfact(Cyy) \ Cxy' |
|
|
|
Ex = eigfact(Symmetric(Cxy * G), Symmetric(Cxx)) |
|
|
|
G = cholesky(Cyy) \ Cxy' |
|
|
|
Ex = eigen(Symmetric(Cxy * G), Symmetric(Cxx)) |
|
|
|
ord = sortperm(Ex.values; rev=true) |
|
|
|
vx, Px = extract_kv(Ex, ord, p) |
|
|
|
Py = qnormalize!(G * Px, Cyy) |
|
|
|
else |
|
|
|
# solve Py: (Cyx * inv(Cxx) * Cxy) Py = λ Cyy Py |
|
|
|
# compute Px: inv(Cx) * Cxy * Py |
|
|
|
|
|
|
|
H = cholfact(Cxx) \ Cxy |
|
|
|
Ey = eigfact(Symmetric(Cxy'H), Symmetric(Cyy)) |
|
|
|
H = cholesky(Cxx) \ Cxy |
|
|
|
Ey = eigen(Symmetric(Cxy'H), Symmetric(Cyy)) |
|
|
|
ord = sortperm(Ey.values; rev=true) |
|
|
|
vy, Py = extract_kv(Ey, ord, p) |
|
|
|
Px = qnormalize!(H * Py, Cxx) |
|
|
@@ -143,18 +143,18 @@ end |
|
|
|
|
|
|
|
## ccasvd |
|
|
|
|
|
|
|
function ccasvd(Zx::DenseMatrix{Float64}, |
|
|
|
Zy::DenseMatrix{Float64}, |
|
|
|
xmean::Vector{Float64}, |
|
|
|
ymean::Vector{Float64}, |
|
|
|
function ccasvd(Zx::DenseMatrix{Float64}, |
|
|
|
Zy::DenseMatrix{Float64}, |
|
|
|
xmean::Vector{Float64}, |
|
|
|
ymean::Vector{Float64}, |
|
|
|
p::Int) |
|
|
|
|
|
|
|
dx, n = size(Zx) |
|
|
|
dy, n2 = size(Zy) |
|
|
|
n == n2 || |
|
|
|
n == n2 || |
|
|
|
throw(DimensionMismatch("Zx and Zy must have the same number of columns.")) |
|
|
|
|
|
|
|
isempty(xmean) || length(xmean) == dx || |
|
|
|
isempty(xmean) || length(xmean) == dx || |
|
|
|
throw(DimensionMismatch("Incorrect length of xmean.")) |
|
|
|
|
|
|
|
isempty(ymean) || length(ymean) == dy || |
|
|
@@ -170,7 +170,7 @@ end |
|
|
|
# |
|
|
|
# David Weenink. |
|
|
|
# Canonical Correlation Analysis. |
|
|
|
# Institute of Phonetic Sciences, Univ. of Amsterdam, |
|
|
|
# Institute of Phonetic Sciences, Univ. of Amsterdam, |
|
|
|
# Proceedings 25 (2003), 81-99. |
|
|
|
# |
|
|
|
# Note: in this paper, each row is considered as an observation. |
|
|
@@ -182,27 +182,27 @@ function _ccasvd(Zx, Zy, xmean, ymean, p::Int) |
|
|
|
n = size(Zx, 2) |
|
|
|
|
|
|
|
# svd decomposition |
|
|
|
Sx = svdfact(Zx) |
|
|
|
Sy = svdfact(Zy) |
|
|
|
S = svdfact!(A_mul_Bt(Sx.Vt, Sy.Vt)) # svd of Vx * Vy' |
|
|
|
Sx = svd(Zx) |
|
|
|
Sy = svd(Zy) |
|
|
|
S = svd!(Sx.Vt * transpose(Sy.Vt)) # svd of Vx * Vy' |
|
|
|
|
|
|
|
# compute Px and Py |
|
|
|
ord = sortperm(S.S; rev=true) |
|
|
|
si = ord[1:p] |
|
|
|
Px = scale!(Sx.U, 1.0 ./ Sx.S) * S.U[:, si] |
|
|
|
Py = A_mul_Bt(scale!(Sy.U, 1.0 ./ Sy.S), S.Vt[si, :]) |
|
|
|
Px = rmul!(Sx.U, Diagonal(1.0 ./ Sx.S)) * S.U[:, si] |
|
|
|
Py = rmul!(Sy.U, Diagonal(1.0 ./ Sy.S)) * S.V[:, si] |
|
|
|
|
|
|
|
# scale so that Px' * Cxx * Py == I |
|
|
|
# and Py' * Cyy * Py == I, |
|
|
|
# scale so that Px' * Cxx * Py == I |
|
|
|
# and Py' * Cyy * Py == I, |
|
|
|
# |
|
|
|
# with Cxx = Zx * Zx' / (n - 1) |
|
|
|
# Cyy = Zy * Zy' / (n - 1) |
|
|
|
# |
|
|
|
scale!(Px, sqrt(n-1)) |
|
|
|
scale!(Py, sqrt(n-1)) |
|
|
|
rmul!(Px, sqrt(n-1)) |
|
|
|
rmul!(Py, sqrt(n-1)) |
|
|
|
|
|
|
|
# compute correlations |
|
|
|
crs = scale!(coldot(Zx'Px, Zy'Py), inv(n-1)) |
|
|
|
crs = rmul!(coldot(Zx'Px, Zy'Py), inv(n-1)) |
|
|
|
|
|
|
|
# construct CCA model |
|
|
|
CCA(xmean, ymean, Px, Py, crs) |
|
|
@@ -212,17 +212,17 @@ end |
|
|
|
|
|
|
|
function fit(::Type{CCA}, X::DenseMatrix{Float64}, Y::DenseMatrix{Float64}; |
|
|
|
outdim::Int=min(min(size(X)...), min(size(Y)...)), |
|
|
|
method::Symbol=:svd, |
|
|
|
xmean=nothing, |
|
|
|
method::Symbol=:svd, |
|
|
|
xmean=nothing, |
|
|
|
ymean=nothing) |
|
|
|
|
|
|
|
dx, n = size(X) |
|
|
|
dy, n2 = size(Y) |
|
|
|
|
|
|
|
n2 == n || |
|
|
|
n2 == n || |
|
|
|
throw(DimensionMismatch("X and Y should have the same number of columns.")) |
|
|
|
|
|
|
|
(n >= dx && n >= dy) || |
|
|
|
(n >= dx && n >= dy) || |
|
|
|
warn("CCA would be numerically instable when n < dx or n < dy.") |
|
|
|
|
|
|
|
xmv = preprocess_mean(X, xmean) |
|
|
@@ -232,9 +232,9 @@ function fit(::Type{CCA}, X::DenseMatrix{Float64}, Y::DenseMatrix{Float64}; |
|
|
|
Zy = centralize(Y, ymv) |
|
|
|
|
|
|
|
if method == :cov |
|
|
|
Cxx = scale!(A_mul_Bt(Zx, Zx), inv(n - 1)) |
|
|
|
Cyy = scale!(A_mul_Bt(Zy, Zy), inv(n - 1)) |
|
|
|
Cxy = scale!(A_mul_Bt(Zx, Zy), inv(n - 1)) |
|
|
|
Cxx = rmul!(Zx*transpose(Zx), inv(n - 1)) |
|
|
|
Cyy = rmul!(Zy*transpose(Zy), inv(n - 1)) |
|
|
|
Cxy = rmul!(Zx*transpose(Zy), inv(n - 1)) |
|
|
|
M = ccacov(Cxx, Cyy, Cxy, xmv, ymv, outdim) |
|
|
|
elseif method == :svd |
|
|
|
M = ccasvd(Zx, Zy, xmv, ymv, outdim) |
|
|
|
0 comments on commit
ca576a4