/
mult-PCA.R
executable file
·428 lines (401 loc) · 12.8 KB
/
mult-PCA.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
# PCA on Coe objects
# 1. PCA calculation and builder --------------------------
#' Principal component analysis on Coe objects
#'
#' Performs a PCA on \link{Coe} objects, using \link{prcomp}.
#'
#' By default, methods on \link{Coe} object do not scale the input data but center them.
#' There is also a generic method (eg for traditional morphometrics) that centers and scales data.
#' @aliases PCA
#' @rdname PCA
#' @param x a \link{Coe} object or an appropriate object (eg \link{prcomp}) for \code{as_PCA}
#' @param fac any factor or data.frame to be passed to \code{as_PCA} and for use with \link{plot.PCA}
#' @param scale. logical whether to scale the input data
#' @param center logical whether to center the input data
#' @return a 'PCA' object on which to apply \link{plot.PCA}, among others. This list has several
#' components, most of them inherited from the \code{prcomp} object:
#' \enumerate{
#' \item \code{sdev} the standard deviations of the principal components
#' (i.e., the square roots of the eigenvalues of the
#' covariance/correlation matrix, though the calculation
#' is actually done with the singular values of the data matrix)
#' \item \code{eig} the cumulated proportion of variance along the PC axes
#' \item \code{rotation} the matrix of variable loadings (i.e., a matrix whose columns contain the eigenvectors).
#' The function princomp returns this in the element loadings.
#' \item \code{center}, scale the centering and scaling used
#' \item \code{x} PCA scores (the value of the rotated data (the centred (and scaled if requested)
#' data multiplied by the rotation matrix))
#' \item other components are inherited from the \code{Coe} object passed to \code{PCA},
#' eg \code{fac}, \code{mshape}, \code{method}, \code{baseline1} and \code{baseline2}, etc. They
#' are documented in the corresponding \code{*Coe} file.
#' }
#' @family multivariate
#' @examples
#' bot.f <- efourier(bot, 12)
#' bot.p <- PCA(bot.f)
#' bot.p
#' plot(bot.p, morpho=FALSE)
#' plot(bot.p, 'type')
#'
#' op <- npoly(olea, 5)
#' op.p <- PCA(op)
#' op.p
#' plot(op.p, 1, morpho=TRUE)
#'
#' wp <- fgProcrustes(wings, tol=1e-4)
#' wpp <- PCA(wp)
#' wpp
#' plot(wpp, 1)
#'
#' # "foreign prcomp"
#' head(iris)
#' iris.p <- prcomp(iris[, 1:4])
#' iris.p <- as_PCA(iris.p, iris[, 5])
#' class(iris.p)
#' plot(iris.p, 1)
#' @export
PCA <- function(x, scale., center, fac) {
UseMethod("PCA")
}
#' @rdname PCA
#' @export
PCA.OutCoe <- function(x, scale. = FALSE, center = TRUE, fac) {
OutCoe <- x
PCA <- prcomp(OutCoe$coe, scale. = scale., center = center)
eig <- (PCA$sdev^2)
PCA$eig <- eig/sum(eig)
PCA$fac <- OutCoe$fac
PCA$mshape <- apply(OutCoe$coe, 2, mean)
PCA$method <- OutCoe$method
if (!is.null(OutCoe$baseline1)){
PCA$baseline1 <- OutCoe$baseline1
PCA$baseline2 <- OutCoe$baseline2}
PCA$cuts <- OutCoe$cuts
class(PCA) <- c("PCA", class(PCA))
return(PCA)
}
#' @rdname PCA
#' @export
PCA.OpnCoe <- function(x, scale. = FALSE, center = TRUE, fac) {
OpnCoe <- x
PCA <- prcomp(OpnCoe$coe, scale. = scale., center = center)
eig <- (PCA$sdev^2)
PCA$eig <- eig/sum(eig)
PCA$fac <- OpnCoe$fac
PCA$mshape <- apply(OpnCoe$coe, 2, mean)
PCA$method <- OpnCoe$method
PCA$mod <- OpnCoe$mod #the only diff so far
PCA$baseline1 <- OpnCoe$baseline1
PCA$baseline2 <- OpnCoe$baseline2
PCA$cuts <- OpnCoe$cuts
class(PCA) <- c("PCA", class(PCA))
return(PCA)
}
#' @rdname PCA
#' @export
PCA.LdkCoe <- function(x, scale. = FALSE, center = TRUE, fac) {
LdkCoe <- x
# LdkCoe$coe <- a2m(l2a(Coe$coo))
PCA <- prcomp(LdkCoe$coe, scale. = scale., center = center)
eig <- (PCA$sdev^2)
PCA$eig <- eig/sum(eig)
PCA$fac <- LdkCoe$fac
PCA$mshape <- apply(LdkCoe$coe, 2, mean)
PCA$method <- "procrustes"
PCA$cuts <- LdkCoe$cuts
PCA$links <- LdkCoe$links
# PCA$mod <- OpnCoe$mod #the only diff so far PCA$baseline1
# <- OpnCoe$baseline1 PCA$baseline2 <- OpnCoe$baseline2
class(PCA) <- c("PCA", class(PCA))
return(PCA)
}
#' @rdname PCA
#' @export
PCA.TraCoe <- function(x, scale. = TRUE, center = TRUE, fac) {
TraCoe <- x
# LdkCoe$coe <- a2m(l2a(Coe$coo))
PCA <- prcomp(TraCoe$coe, scale. = scale., center = center)
eig <- (PCA$sdev^2)
PCA$eig <- eig/sum(eig)
PCA$fac <- TraCoe$fac
PCA$mshape <- NULL
PCA$method <- NULL
class(PCA) <- c("PCA", class(PCA))
return(PCA)
}
#' @rdname PCA
#' @export
PCA.default <- function(x, scale. = TRUE, center = TRUE, fac=dplyr::tibble()) {
PCA <- prcomp(x, scale. = scale., center = center)
eig <- (PCA$sdev^2)
PCA$eig <- eig/sum(eig)
if (!is.null(fac)) fac <- tibble::as_tibble()(fac)
PCA$fac <- fac
PCA$method <- NULL
# PCA$baseline2 <- OpnCoe$baseline2
class(PCA) <- c("PCA", class(PCA))
return(PCA)
}
# 2. PCA Bridges ------------------------------------------
#' @rdname PCA
#' @export
as_PCA <- function(x, fac){
UseMethod("as_PCA")
}
#' @export
as_PCA.default <- function(x, fac){
if (class(x)[1] != "PCA"){
class(x) <- c("PCA", class(x))
if (!missing(fac)) x$fac <- tibble::as_tibble(fac)
return(x)}}
#' @export
print.PCA <- function(x, ...){
cat("A PCA object\n")
cat(rep("-", 20), "\n", sep = "")
cat(" -", ifelse(is.null(nrow(x$x)), 1, nrow(x$x)), "shapes \n")
# Method printer
if (length(x$method)>1) {
cat(" - $method: [", paste0(x$method, collapse=" + "), "analyses ]\n")
} else {
cat(" - $method: [", x$method, "analysis ]\n")}
# we print the fac
.print_fac(x$fac)
cat(" - All components: ", paste(names(x), collapse=", "), ".\n", sep="")
}
# 3. PCA methods ------------------------------------------
#' Get paired individual on a Coe, PCA or LDA objects
#'
#' If you have paired individuals, i.e. before and after a treatment or for repeated measures,
#' and if you have coded coded it into \code{$fac}, this methods allows you to retrieve the corresponding PC/LD scores,
#' or coefficients for \link{Coe} objects.
#' @param x any \link{Coe}, \link{PCA} of \link{LDA} object.
#' @param fac factor or column name or id corresponding to the pairing factor.
#' @param range numeric the range of coefficients for \code{Coe}, or PC (LD) axes on which to return scores.
#' @return a list with components \code{x1} all coefficients/scores corresponding to the
#' first level of the \code{fac} provided; \code{x2} same thing for the second level;
#' \code{fac} the corresponding \code{fac}.
#' @examples
#' bot2 <- bot1 <- coo_scale(coo_center(coo_sample(bot, 60)))
#' bot1$fac$session <- factor(rep("session1", 40))
#' # we simulate an measurement error
#' bot2 <- coo_jitter(bot1, amount=0.01)
#' bot2$fac$session <- factor(rep("session2", 40))
#' botc <- combine(bot1, bot2)
#' botcf <- efourier(botc, 12)
#'
#' # we gonna plot the PCA with the two measurement sessions and the two types
#' botcp <- PCA(botcf)
#' plot(botcp, "type", col=col_summer(2), pch=rep(c(1, 20), each=40), eigen=FALSE)
#' bot.pairs <- get_pairs(botcp, fac = "session", range=1:2)
# # with bot.pairs we can add segments between the two sessions
#' segments(bot.pairs$session1[, 1], bot.pairs$session1[, 2],
#' bot.pairs$session2[, 1], bot.pairs$session2[, 2],
#' col=col_summer(2)[bot.pairs$fac$type])
#'
#'
#' @export
get_pairs <- function(x, fac, range){UseMethod("get_pairs")}
#' @export
get_pairs.Coe <- function(x, fac, range){
# we check and prepare
fac <- fac_dispatcher(x, fac)
if (nlevels(fac) != 2) stop("more than two levels for the 'fac' provided")
tab <- table(fac)
if (length(unique(tab))!=1) stop("some mismatches between pairs")
# we get paired individuals
if (missing(range)) range <- 1:ncol(x$coe)
fl <- levels(fac)
x1 <- x$coe[fac==fl[1], range]
x2 <- x$coe[fac==fl[2], range]
res <- list(x1, x2, fac=x$fac[fac==fl[1],])
names(res)[1:2] <- fl
return(res)
}
#' @export
get_pairs.PCA <- function(x, fac, range){
# we check and prepare
fac <- fac_dispatcher(x, fac)
if (nlevels(fac) != 2) stop("more than two levels for the 'fac' provided")
tab <- table(fac)
if (length(unique(tab))!=1) stop("some mismatches between pairs")
# we get paired individuals
if (missing(range)) range <- 1:ncol(x$x)
fl <- levels(fac)
x1 <- x$x[fac==fl[1], range]
x2 <- x$x[fac==fl[2], range]
res <- list(x1, x2, fac=x$fac[fac==fl[1],])
names(res)[1:2] <- fl
return(res)
}
#' @export
get_pairs.LDA <- get_pairs.PCA
# 4. redo PCA ---------
#' "Redo" a PCA on a new Coe
#'
#' Basically reapply rotation to a new Coe object.
#' @param PCA a \link{PCA} object
#' @param Coe a \link{Coe} object
#' @note Quite experimental. Dimensions of the matrices and methods must match.
#' @examples
#' b <- filter(bot, type=="beer")
#' w <- filter(bot, type=="whisky")
#'
#' bf <- efourier(b, 8)
#' bp <- PCA(bf)
#'
#' wf <- efourier(w, 8)
#'
#' # and we use the "beer" PCA on the whisky coefficients
#' wp <- rePCA(bp, wf)
#'
#' plot(wp)
#'
#' plot(bp, eig=FALSE)
#' points(wp$x[, 1:2], col="red", pch=4)
#'
#'@export
rePCA <- function(PCA, Coe){
UseMethod("rePCA")
}
#'@export
rePCA.default <- function(PCA, Coe){
stop("method only defined for PCA objetcs")
}
#'@export
rePCA.PCA <- function(PCA, Coe){
if (Coe$method != PCA$method)
warning("methods differ between Coe and PCA")
scores <- PCA$x
rot <- PCA$rotation
coe <- Coe$coe
if (any(colnames(coe) != rownames(rot))) warning("matrices coefficients must match")
# we prepare a new PCA object
PCA2 <- PCA
PCA2$x <- matrix(NA, nrow(coe), ncol(rot), dimnames = list(rownames(coe), colnames(rot)))
PCA2$fac <- Coe$fac
# we recenter
coe <- apply(coe, 2, function(x) x - mean(x))
# learn matrix calculus bitch
for (PC in 1:ncol(rot)){
for (ind in 1:nrow(coe)){
PCA2$x[ind, PC] <- sum(coe[ind, ] * rot[, PC])
}
}
return(PCA2)
}
#' Calculates convex hull area/volume of PCA scores
#'
#' May be useful to compare shape diversity. Expressed in PCA units that should
#' only be compared within the same PCA.
#'
#' @param x a PCA object
#' @param fac (optionnal) column name or ID from the $fac slot.
#' @param xax the first PC axis to use (1 by default)
#' @param yax the second PC axis (2 by default)
#' @param zax the third PC axis (3 by default only for volume)
#'
#' @return
#' If fac is not provided global area/volume is returned; otherwise a named
#' list for every level of fac
#'
#' @details get_chull_area is calculated using \link{coo_chull} followed by \link{coo_area};
#' get_chull_volume is calculated using geometry::convexhulln
#'
#' @examples
#' bp <- PCA(efourier(bot, 12))
#' get_chull_area(bp)
#' get_chull_area(bp, 1)
#'
#' get_chull_volume(bp)
#' get_chull_volume(bp, 1)
#' @export
get_chull_area <- function (x, fac, xax = 1, yax = 2) {
if (!is_PCA(x)) stop("'x' must be a PCA object")
# no fac provided
if (missing(fac)){
xy <- x$x[, c(xax, yax)]
return(coo_area(coo_chull(xy)))
}
# else... if a fac is provided
fac <- fac_dispatcher(x, fac)
x <- x$x[, c(xax, yax)]
# we prepare the list
res <- list()
# we loop over to extract subset of coordinates
for (i in seq(along = levels(fac))) {
xy.i <- x[fac == levels(fac)[i], ]
# boring but prevents the numeric/2rows matrices
if (is.matrix(xy.i)) {
if (nrow(xy.i) > 2) {
res[[i]] <- coo_chull(xy.i)
} else {
res[[i]] <- NULL
}
} else {
res[[i]] <- NULL
}
}
# we calculate the area and return the results
names(res) <- levels(fac)
res <- lapply(res, coo_area)
return(res)
}
#' @rdname get_chull_area
#' @export
get_chull_volume <- function (x, fac, xax = 1, yax = 2, zax = 3) {
if (!is_PCA(x)) stop("'x' must be a PCA object")
# no fac provided
if (missing(fac)){
xy <- x$x[, c(xax, yax, zax)]
res <- geometry::convhulln(xy, options="FA")$vol
return(res)
}
# else...fac provided
fac <- fac_dispatcher(x, fac)
# we prepare the list
x <- x$x[, c(xax, yax, zax)]
res <- list()
# we loop over to extract subset of coordinates
for (i in seq(along = levels(fac))) {
xy.i <- x[fac == levels(fac)[i], ]
# boring but prevents the numeric/2rows matrices
if (is.matrix(xy.i)) {
if (nrow(xy.i) >= 4) {
res[[i]] <- geometry::convhulln(xy.i, options="FA")$vol
} else {
res[[i]] <- NULL
}
} else {
res[[i]] <- NULL
}
}
# we calculate the area and return the results
names(res) <- levels(fac)
return(res)
}
# 5. Flip PCA axes -------------
#' Flips PCA axes
#'
#' Simply multiply by -1, corresponding scores and rotation vectors for PCA objects.
#' PC orientation being arbitrary, this may help to have a better display.
#' @param x a PCA object
#' @param axs numeric which PC(s) to flip
#' @examples
#' bp <- bot %>% efourier(6) %>% PCA
#' bp %>% plot
#' bp %>% flip_PCaxes(1) %>% plot()
#' @export
flip_PCaxes <- function(x, axs){
UseMethod("flip_PCaxes")
}
#' @export
flip_PCaxes.default <- function(x, axs){
message("only defined on PCA objects")
}
#' @export
flip_PCaxes.PCA <- function(x, axs){
x$x[, axs] %<>% `*`(-1)
x$rotation[, axs] %<>% `*`(-1)
x
}
#### end PCA