diff --git a/DESCRIPTION b/DESCRIPTION index c9ab43b..799e554 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,15 +3,18 @@ Type: Package Version: 0.9-13 Date: 2018-01-05 Title: SciViews - Main package -Author: Philippe Grosjean [aut, cre] +Description: Functions to install SciViews additions to R, and more tools. Authors@R: c(person("Philippe", "Grosjean", role = c("aut", "cre"), email = "phgrosjean@sciviews.org")) Maintainer: Philippe Grosjean -Depends: R (>= 2.6.0), methods, grDevices, graphics, stats, MASS -Imports: ellipse +Depends: R (>= 2.6.0) +Imports: ellipse, grDevices, graphics, stats +Suggests: MASS, covr, knitr, testthat Enhances: base -Description: Functions to install SciViews additions to R, and more (various) tools. -License: GPL-2 ByteCompile: yes -URL: http://www.sciviews.org/SciViews-R -BugReports: https://r-forge.r-project.org/tracker/?group_id=194 +License: GPL-2 +URL: https://github.com/SciViews/SciViews, http://www.sciviews.org/SciViews-R +BugReports: https://github.com/SciViews/SciViews/issues +Roxygen: list(markdown = TRUE) +RoxygenNote: 6.0.1 +VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 7fdecdc..3c0e87e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,313 +1,79 @@ -import(methods, grDevices, graphics, stats, MASS, ellipse) - -export(.Intern, - char, - as.char, - is.char, - charAbbrev, - charEscape, - charExpand, - charFind, - charFindAll, - charFold, - charHeight, - charLower, - charMatch, - charPMatch, - charSearch, - charSplit, - charSubstr, - "charSubstr<-", - charSub, - charSubAll, - charTrans, - charTrim, - charTrimL, - charTrimR, - charTrunc, - charUpper, - charWidth, - charWrap, - p0, - p_, - ct, - cta, - ct_, - cta_, - encodingToNative, - encodingToUTF8, - encoding, - "encoding<-", - setEncoding, - as.integerBase, - as.intBase, - regex, - pcre, - is.regex, - is.pcre, - useBytes, - "useBytes<-", - filePath, - as.filePath, - is.filePath, - isDir, - isFile, - fileAccess, - fileAppend, - fileChmod, - fileCopy, - fileCreate, - fileDelete, - fileDir, - fileExists, - fileExpand, - fileFind, - fileInfo, - fileLink, - fileList, - fileListGlob, - fileName, - fileNormalize, - filePackage, - fileReadLink, - fileRemove, - fileRename, - fileShow, - fileSymLink, - fileTemp, - fileTime, - fileUMask, - dirCreate, - dirList, - dirR, - dirTemp, - sdir, - wdir, - ifElse, - names, - l, - nc, - nr, - Rows, - Cols, - any., - all., - one, - one., - asTRUE, - isFALSE, - asFALSE, - "!", - "@", - "@<-", - "%:%", - "%else%", - isEmpty, - stopIfNot, - package, - enum, - timing, - sysFunction, - sysCall, - matchCall, - sysParent, - sysParents, - parentFrame, - sysFrame, - sysnFrame, - sysStatus, - onExit, - sysOnExit, - dumpFrames, - debugOnce, - isDebugged, - baseEnv, - baseNamespaceEnv, - emptyEnv, - globalEnv, - autoloadEnv, - tempEnv, - topEnv, - envNew, - envParent, - "envParent<-", - envProfile, - sysSource, - evalQuote, - evalParent, - evalLocal, - autoloaded, - isNamespaceEnv, - - opt, - optDef, - - - plotOpt, - plotOptAll, - plotNew, - layoutShow, - screenSplit, - screenSet, - screenDelete, - screenClose, - l__cm, - x__in2user, - y__in2user, - xy__in2user, - x__cm2user, - y__cm2user, - xy__cm2user, - xConvert, - yConvert, - xyConvert, - contours, - contourplot, - filledplot, - starplot, - stemplot, - stripplot, - clevelandplot, - smoothplot, - smoothPanel, - coplotIntervals, - plotWindowInternal, - plotInternal, - boxplotInternal, - devNew, - devCur, - devList, - devNext, - devPrev, - devSet, - devClose, - devCloseAll, - devControl, - devHold, - devHoldFlush, - devFlush, - devCopy, - devCopyNew, - devCopyEps, - devCopyPdf, - devCopyBitmap, - devSave, - devRecord, - devReplay, - getSnapshot, - playSnapshot, - devCapture, - devSize, - devCapabilities, - devInteractive, - isDevInteractive, - devPdf, - devPdfOpt, - devPS, - devPSOpt, - devPdfCairo, - devPSCairo, - devSvg, - devBmp, - devJpeg, - devPng, - devTiff, - devBitmap, - devXfig, - color2rgb, - colorAdjust, - colorDens, - colorBlues9, - colorRainbow, - colorHeat, - colorTerrain, - colorTopo, - colorCm, - colorRwb, - colorRyg, - colorGray, - colorGrey, - colorConvertRgb, - colorConvert, - fontType1, - fontCid, - fontsPS, - fontsPdf, - fontsEmbed, - optCheck, - nclassSturges, - nclassScott, - nclassFD, - rangeExtend, - boxplotStats, - xyCoords, - xyzCoords, - in2cm, - cm2in, - correlation, - is.correlation, - as.correlation, - cwm.colors, - rwb.colors, - rwg.colors, - ryg.colors, - Pi, - E, - lb, - ln, - ln1p, - lg, - lg1p, - package, - panel.boxplot, - panel.density, - panel.hist, - panel.qqnorm, - panel.cor, - panel.ellipse, - panel.reg, - pcomp, - scores, - vectorplot) - -S3method(one, default) - -S3method(print, filePath) - -S3method(contours, default) - -S3method(print, s) - -S3method(print, regex) -S3method(print, pcre) - -S3method(vectorplot, default) -S3method(vectorplot, loadings) -S3method(vectorplot, correlation) - -S3method(correlation, formula) -S3method(correlation, default) - -S3method(print, correlation) -S3method(summary, correlation) -S3method(print, summary.correlation) -S3method(plot, correlation) -S3method(lines, correlation) - -S3method(pcomp, formula) -S3method(pcomp, default) - -S3method(print, pcomp) -S3method(summary, pcomp) -S3method(print, summary.pcomp) -S3method(plot, pcomp) -S3method(points, pcomp) -S3method(lines, pcomp) -S3method(text, pcomp) -S3method(screeplot, pcomp) -S3method(biplot, pcomp) -S3method(predict, pcomp) -#S3method(loadings, pcomp) # This is NOT a generic function, but it works well on pcomp -S3method(pairs, pcomp) -S3method(scores, pcomp) -S3method(correlation, pcomp) +# Generated by roxygen2: do not edit by hand + +S3method(biplot,pcomp) +S3method(correlation,default) +S3method(correlation,formula) +S3method(correlation,pcomp) +S3method(lines,Correlation) +S3method(lines,pcomp) +S3method(pairs,pcomp) +S3method(pcomp,default) +S3method(pcomp,formula) +S3method(plot,Correlation) +S3method(plot,pcomp) +S3method(points,pcomp) +S3method(predict,pcomp) +S3method(print,Correlation) +S3method(print,pcomp) +S3method(print,summary.Correlation) +S3method(print,summary.pcomp) +S3method(scores,pcomp) +S3method(screeplot,pcomp) +S3method(summary,Correlation) +S3method(summary,pcomp) +S3method(text,pcomp) +S3method(vectorplot,Correlation) +S3method(vectorplot,default) +S3method(vectorplot,loadings) +export(COLS) +export(Correlation) +export(E) +export(ROWS) +export(as.Correlation) +export(as.correlation) +export(correlation) +export(cwm.colors) +export(cwm_colors) +export(enum) +export(is.Correlation) +export(is.correlation) +export(lb) +export(lg) +export(lg1p) +export(ln) +export(ln1p) +export(nc) +export(nr) +export(panel.boxplot) +export(panel.cor) +export(panel.density) +export(panel.ellipse) +export(panel.hist) +export(panel.qqnorm) +export(panel.reg) +export(panel_boxplot) +export(panel_cor) +export(panel_density) +export(panel_ellipse) +export(panel_hist) +export(panel_qqnorm) +export(panel_reg) +export(panel_smooth) +export(pcomp) +export(rwb.colors) +export(rwb_colors) +export(rwg.colors) +export(rwg_colors) +export(ryg.colors) +export(ryg_colors) +export(scores) +export(timing) +export(vectorplot) +import(graphics) +import(stats) +importFrom(ellipse,ellipse) +importFrom(ellipse,plotcorr) +importFrom(grDevices,chull) +importFrom(grDevices,colorRampPalette) +importFrom(grDevices,hsv) +importFrom(grDevices,rainbow) diff --git a/NEWS.md b/NEWS.md index b8b13f6..34ba691 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,21 @@ # SciViews News +## SciViews version 0.9-13 + +* 'correlation' objects are now 'Correlation' to avoid clash with 'correlation' + objects from **nlme** package. + +* Doc rewritten in Roxygen2 and R Markdown (keeping only pca vignette). More + strict `importFrom()` in particular from **ellipse** package. + +* Elimination of files.R, character.R & graphics.R (experimental code that never + reached CRAN). + +* Code reritten to comply with tidyverse style guide. Functions that are not + snake case like `rwb.colors()` or `panel.hist()` now are seconded by they + equivalent `rwb_colors()` or `panel_hist()`. + + ## SciViews version 0.9-12 * Switch from R-Forge to Github https://github.com/SciViews/SciViews. CI added. diff --git a/R/SciViews-internal.R b/R/SciViews-internal.R index 2632009..78f2e35 100644 --- a/R/SciViews-internal.R +++ b/R/SciViews-internal.R @@ -1,87 +1,47 @@ -.onLoad <- function (lib, pkg) -{ - ## If corresponding options are not defined yet, specify them to FALSE - ## by default, but make them explicitly available in options() - if (!length(getOption("warnAssignWithEqualSign"))) - options(warnAssignWithEqualSign = FALSE) - if (!length(getOption("warnPartialMatchArgs"))) - options(warnPartialMatchArgs = FALSE) - if (!length(getOption("warnPartialMatchAttr"))) - options(warnPartialMatchAttr = FALSE) - if (!length(getOption("warnPartialMatchDollar"))) - options(warnPartialMatchDollar = FALSE) - - ## TODO: check configuration and install everything that we need to use the - ## SciViews extensions, including the HTTP or socket server - #serve <- getOption("ko.serve") - #if (!is.null(serve)) { - # startSocketServer(port = as.integer(serve)[1]) - # guiInstall() - #} -} - -.onUnload <- function (libpath) -{ - #serve <- getOption("ko.serve") - #if (!is.null(serve) && "package:svSocket" %in% search()) - # stopSocketServer(port = as.integer(serve)[1]) - #guiUninstall() -} - -.packageName <- "SciViews" - -## Rethink this first before making this public -.subclass <- function (x, class, superclasses = NULL) -{ - ## TODO: check this is an S3 object that inherits from the given class(es) - if (!is.null(superclasses)) { - misClass <- inherits(x, as.character(superclasses), which = TRUE) == 0 - if (any(misClass)) - stop("'x' does not inherits from", paste(superclasses[misClass], - collapse = ", ")) - } - ## Check if new class in not already defined - if (class %in% class(x)) return(x) - ## Prepend that class - class(x) <- c(class, class(x)) - return(x) -} - -`.subclass<-` <- function (x, value) -{ - if (!value %in% class(x)) class(x) <- c(value, class(x)) - return(x) -} - -## Code borrowed from svMisc, to avoid a dependency! -.TempEnv_ <- function () { - pos <- match("SciViews:TempEnv", search()) - if (is.na(pos)) { # Must create it - `SciViews:TempEnv` <- list() - Attach <- function (...) get("attach", mode = "function")(...) - Attach(`SciViews:TempEnv`, pos = length(search()) - 1) - rm(`SciViews:TempEnv`) - pos <- match("SciViews:TempEnv", search()) - } - pos.to.env(pos) -} - -.assignTemp <- function (x, value, replace.existing = TRUE) { - TempEnv <- .TempEnv_() - if (isTRUE(replace.existing) || !exists(x, envir = TempEnv, mode = "any", - inherits = FALSE)) - assign(x, value, envir = TempEnv) -} - -## This is for convenience: . == .GlobalEnv -#.assignTemp(".", base::.GlobalEnv) - -## To avoid a useless note when checking the package, -## we replace any .Internal() into .Intern() in renamed R functions -.Intern <- .Internal - -.Recode <- function (f) -{ - body(f) <- parse(text = gsub("\\.Internal\\(", ".Intern(", deparse(body(f)))) - f -} +.onLoad <- function(lib, pkg) { # nocov start + # If corresponding options are not defined yet, specify them to FALSE + # by default, but make them explicitly available in options() + if (!length(getOption("warnAssignWithEqualSign"))) + options(warnAssignWithEqualSign = FALSE) + if (!length(getOption("warnPartialMatchArgs"))) + options(warnPartialMatchArgs = FALSE) + if (!length(getOption("warnPartialMatchAttr"))) + options(warnPartialMatchAttr = FALSE) + if (!length(getOption("warnPartialMatchDollar"))) + options(warnPartialMatchDollar = FALSE) +} # nocov end + +#.onUnload <- function(libpath) { +# # Do nothing for now +#} + +.packageName <- "SciViews" # nocov + +# Code borrowed from svMisc, to avoid a dependency! +.TempEnv_ <- function() { + srch <- search() + pos <- match("SciViews:TempEnv", srch) + if (is.na(pos)) {# Must create it + pos <- length(srch) - 1 + `SciViews:TempEnv` <- list() + Attach <- function(...) get("attach", mode = "function")(...) + Attach(`SciViews:TempEnv`, pos = pos) + } + pos.to.env(pos) +} + +.assignTemp <- function(x, value, replace.existing = TRUE) { + TempEnv <- .TempEnv_() + if (isTRUE(replace.existing) || !exists(x, envir = TempEnv, mode = "any", + inherits = FALSE)) { + assign(x, value, envir = TempEnv) + } +} + +# A copy of the unexported stats:::.check_vars_numeric +.check_vars_numeric <- function(mf) { + mt <- attr(mf, "terms") + mterms <- attr(mt, "factors") + mterms <- rownames(mterms)[apply(mterms, 1L, function(x) any(x > 0L))] + any(sapply(mterms, function(x) is.factor(mf[, x]) || !is.numeric(mf[, x]))) +} diff --git a/R/SciViews-package.R b/R/SciViews-package.R new file mode 100644 index 0000000..fb8b1a9 --- /dev/null +++ b/R/SciViews-package.R @@ -0,0 +1,18 @@ +#' SciViews - Main package +#' +#' The SciViews package provides various functions to install the SciViews::R +#' dialect. It also provides additional utilites besides base, recommended and +#' tidyverse. +#' +#' @section Important functions: +#' +#' TODO... +#' +#' @docType package +#' @name SciViews-package +#' +#' @import stats +#' @import graphics +#' @importFrom grDevices colorRampPalette chull hsv rainbow +#' @importFrom ellipse ellipse plotcorr +NULL diff --git a/R/character.R b/R/character.R deleted file mode 100644 index 8ba100e..0000000 --- a/R/character.R +++ /dev/null @@ -1,259 +0,0 @@ -## Essentially a series of base R function that manipulate character strings -## and that are renamed/rationalized for facility - -## TODO: deal with zero length strings and NAs appropriately in all functions -## TODO: make.names, make.unique, regmatches, grepRaw and charToRaw - -#### In regex.Rd ################################################################## - -## String find/replace using fixed pattern or regular expressions -regex <- function (pattern, use.bytes = FALSE, globbing, -trim.head = FALSE, trim.tail = TRUE) -{ - ## Construct a regex object containing a regular expression pattern - if (!missing(globbing)) pattern <- utils::glob2rx(globbing, - trim.head = trim.head, trim.tail = trim.tail) - pattern <- as.character(pattern) - class(pattern) <- c("regex", "character") - if (isTRUE(as.logical(use.bytes))) attr(pattern, "useBytes") <- TRUE - pattern -} - -is.regex <- function (x) inherits(x, "regex") - -print.regex <- function (x, ...) -{ - msg <- "Regular expression pattern" - if (useBytes(x)) { - msg <- paste(msg, "(byte-by-byte):\n") - } else msg <- paste(msg, ":\n", sep = "") - cat(msg) - print(as.character(x)) -} - - -pcre <- function (pattern, use.bytes = FALSE) -{ - pattern <- as.character(pattern) - class(pattern) <- c("pcre", "regex", "character") - if (isTRUE(as.logical(use.bytes))) attr(pattern, "useBytes") <- TRUE - pattern -} - -print.pcre <- function (x, ...) -{ - msg <- "Perl-compatible regular expression pattern" - if (useBytes(x)) { - msg <- paste(msg, "(byte-by-byte):\n") - } else msg <- paste(msg, ":\n", sep = "") - cat(msg) - print(as.character(x)) -} - -is.pcre <- function (x) inherits(x, "pcre") - - - - -useBytes <- function (x) isTRUE(attr(x, "useBytes")) - -`useBytes<-` <- function (x, value) -{ - if (!is.character(x)) x <- as.character(x) - attr(x, "useBytes") <- isTRUE(as.logical(value)) - x -} - - -#### In character.Rd ########################################################### -## Conversion to character string... + creation and test, shorter versions -char <- .Recode(base::character) -as.char <- base::as.character -is.char <- base::is.character - -## Count the number of characters -## No: make an exception: after n (or nz) do not use uppercase! -#nChar <- nchar -#nzChar <- nzchar - -## paste() is rather long name, in comparison with, e.g., c(). -## Also the default argument of sep = " " is irritating and is not consistent -## with stop() or warning() for instance, that use sep = "". -## Thus, we define: -if (exists("paste0", envir = baseenv())) { # Starting from R 2.15.0 - p0 <- .Recode(base::paste0) -} else { - p0 <- function (..., collapse = NULL) - paste(..., sep = "", collapse = collapse) -} - -p_ <- .Recode(base::paste) - -## TODO: `%+%` for pasting two strings together?! - -## The same is true for cat() with sep = " "... and the default behaviour of -## not ending with line feed is more confusing that useful => change this -## behaviour by adding a end = "\n" argument. -## TODO: by default, interpret unicode and formatting like ucat() or ecat()! -ct <- function (..., file = "", end = "\n", fill = FALSE, -labels = NULL) - cat(..., end, file = file, sep = "", fill = fill, labels = labels, - append = FALSE) - -cta <- function (..., file = "", end = "\n", fill = FALSE, -labels = NULL) - cat(..., end, file = file, sep = "", fill = fill, labels = labels, - append = TRUE) - -ct_ <- function (..., file = "", sep = " ", end = "\n", fill = FALSE, -labels = NULL) - cat(..., end, file = file, sep = sep, fill = fill, labels = labels, - append = FALSE) - -cta_ <- function (..., file = "", sep = " ", end = "\n", fill = FALSE, -labels = NULL) - cat(..., end, file = file, sep = sep, fill = fill, labels = labels, - append = TRUE) - -## Change case and translate -charTrans <- function (x, old, new) chartr(old = old, new = new, x = x) -charFold <- base::casefold -charLower <- .Recode(base::tolower) -charUpper <- .Recode(base::toupper) - -charTrim <- function (x, all.spaces = FALSE) # Trim both sides -{ - pat <- (if (isTRUE(all.spaces)) "[[:space:]]+" else "[[:blank:]]+") - ## Trim left first - x <- charSub(p0("^", pat), "", x) - ## ... then trim right - charSub(p0(pat, "$"), "", x) -} - -charTrimL <- function (x, all.spaces = FALSE) # Trim left-side only - charSub(if (isTRUE(all.spaces)) "^[[:space:]]+" else "^[[:blank:]]+", "", x) - -charTrimR <- function (x, all.spaces = FALSE) # Trim right-side only - charSub(if (isTRUE(all.spaces)) "[[:space:]]+$" else "[[:blank:]]+$", "", x) - -charTrunc <- .Recode(base::strtrim) ## This indeed truncs strings!!! - -charSubstr <- .Recode(base::substr) -`charSubstr<-` <- .Recode(base::`substr<-`) - -## Substrings -charSplit <- function (x, pattern) - strsplit(x, split = pattern, fixed = !is.regex(pattern), - perl = is.pcre(pattern), useBytes = useBytes(pattern)) - -## Inconsistencies: regexpr(pattern, text, ...) and strsplit(x, xplit, ...) -## Solved with the present versions! -charSub <- function (x, pattern, replacement, ignore.case = FALSE) - sub(pattern, replacement, x, ignore.case = ignore.case, fixed = !is.regex(pattern), - perl = is.pcre(pattern), useBytes = useBytes(pattern)) - -charSubAll <- function (x, pattern, replacement, ignore.case = FALSE) - gsub(pattern, replacement, x, ignore.case = ignore.case, fixed = !is.regex(pattern), - perl = is.pcre(pattern), useBytes = useBytes(x)) - -charFind <- function (x, pattern, ignore.case = FALSE) - regexpr(pattern, text = x, ignore.case = ignore.case, fixed = !is.regex(pattern), - perl = is.pcre(pattern), useBytes = useBytes(pattern)) - -charFindAll <- function (x, pattern, ignore.case = FALSE) - gregexpr(pattern, text = x, ignore.case = ignore.case, fixed = !is.regex(pattern), - perl = is.pcre(pattern), useBytes = useBytes(pattern)) - -charSearch <- function (x, pattern, ignore.case = FALSE, -type = c("logical", "position", "value"), max.distance = 0, costs = NULL) -{ - type <- pmatch(type) - if (!is.regex(pattern)) { # Search fixed string - res <- switch(type, - logical = grepl(pattern, x, ignore.case = ignore.case, - fixed = TRUE, useBytes = useBytes(pattern)), - position = grep(pattern, x, ignore.case = ignore.case, value = FALSE, - fixed = TRUE, useBytes = useBytes(pattern)), - value = grep(pattern, x, ignore.case = ignore.case, value = TRUE, - fixed = TRUE, useBytes = useBytes(pattern)), - stop("Unknown type")) - } else { # Search regular expression - ## If max.distance > 0, use approximate search - if (max.distance > 0) { # Use agrep() - ## No pcre expression currently accepted! - if (is.pcre(pattern)) - stop("Perl regular expression not allowed with max.distance > 0") - res <- switch(type, - logical = 1:length(x) %in% agrep(pattern, x, - ignore.case = ignore.case, value = FALSE, - max.distance = max.distance, costs = costs, - useBytes = useBytes(pattern)), - position = agrep(pattern, x, ignore.case = ignore.case, - value = FALSE, max.distance = max.distance, costs = costs, - useBytes = useBytes(pattern)), - value = agrep(pattern, x, ignore.case = ignore.case, - value = TRUE, max.distance = max.distance, costs = costs, - useBytes = useBytes(pattern)), - stop("Unknown type")) - } else { # Use regular search (grep()) - res <- switch(type, - logical = grepl(pattern, x, ignore.case = ignore.case, - fixed = FALSE, perl = is.pcre(pattern), - useBytes = useBytes(pattern)), - position = grep(pattern, x, ignore.case = ignore.case, - value = FALSE, fixed = FALSE, perl = is.pcre(pattern), - useBytes = useBytes(pattern)), - value = grep(pattern, x, ignore.case = ignore.case, - value = TRUE, fixed = FALSE, perl = is.pcre(pattern), - useBytes = useBytes(pattern)), - stop("Unknown type")) - } - } - res -} - -## Match, expand or abbreviate character strings to a list of items -charMatch <- .Recode(base::charmatch) -## There is a faster version in data.table! named chmatch, but is does not -## exactly the same since it expects characters and do no partial matching! - -charPMatch <- .Recode(base::pmatch) - -charExpand <- function (x, target, nomatch = NA_character_) - char.expand(input = x, target = target, nomatch = nomatch) - -charAbbrev <- function (x, min.length = 4, dot = FALSE, strict = FALSE, -method = c("left.kept", "both.sides")) - abbreviate(names.arg = x, minlength = min.length, dot = dot, strict = strict, - method = method) - - -## Format character strings -charEscape <- .Recode(base::encodeString) -charWrap <- base::strwrap -# Add charPad => pad a string left/right or both or charPad/charPadL/charPadR? -#+sprintf/gettextf? - -## Measure size of a string (package graphics) -charHeight <- .Recode(graphics::strheight) -charWidth <- .Recode(graphics::strwidth) - -## Character encoding -encodingToNative <- base::enc2native -encodingToUTF8 <- base::enc2utf8 -encoding <- .Recode(base::Encoding) -## R CMD check got fooled because it does not find setEncoding... We give it too -`encoding<-` <- setEncoding <- .Recode(base::`Encoding<-`) - -#### In as.intBase.Rd file ##################################################### - -# To avoid using strtoi(), we prefer as.integerBase (because as.integer cannot -# be converted into a generic function, because it is a primitive!) -as.intBase <- as.integerBase <- .Recode(base::strtoi) - -## Define a function that takes: singular/plural msg and a vector of strings -## and construct a single string with: -## singular msg: single item -## or -## plural msg: item1, item2, ..., itemN -#+paste = cChar? + my special character string manipulation functions? diff --git a/R/colors.R b/R/colors.R index f7646b3..99ce750 100755 --- a/R/colors.R +++ b/R/colors.R @@ -1,44 +1,85 @@ -## We often need a red-white-blue color ramp, or a red-yellow-green one -## So, define rwb.colors() and ryg.colors() -rwb.colors <- function (n, alpha = 1, s = 0.9, v = 0.9) -{ - if ((n <- as.integer(n[1L])) <= 0) return(character(0L)) - ## Define the initial (red) and final (blue) colors with white in between - cols <- c(hsv(0, s, v, alpha = alpha), # Red - hsv(0, 0, v, alpha = alpha), # White - hsv(2/3, s, v, alpha = alpha)) # Blue - ## Use a color ramp from red to white to blue - return(colorRampPalette(cols)(n)) +#' Various color palettes. +#' +#' Create vectors of `n` contiguous colors. +#' +#' @param n The number of colors (>= 1) to be in the palette. +#' @param alpha The alpha transparency, a number in \[0, 1\], see argument +#' `alpha =` in [hsv()]. +#' @param s The 'saturation' to be used to complete the HSV color descriptions. +#' @param v The 'value' to use for the HSV color descriptions. +#' @details `cwm_colors(s = 0.5, v = 1)` gives very similar colors to +#' `cm.colors()`. +#' `ryg_colors()` is similar to `rainbow(start = 0, end = 2/6)`. +#' The `xxx_colors()` (tidyverse name-compatible) and `xxx.colors()`` +#' (grDevices name-compatible) functions are synonyms. +#' @export +#' @name colors +#' @seealso [cm.colors()], [colorRampPalette()] +#' @keywords color +#' @concept color palettes +#' @examples +#' # Draw color wheels with various palettes +#' opar <- par(mfrow = c(2, 2)) +#' pie(rep(1, 11), col = cwm.colors(11), main = "Cyan - white - magenta") +#' pie(rep(1, 11), col = rwb.colors(11), main = "Red - white - blue") +#' pie(rep(1, 11), col = rwg.colors(11), main = "Red - white - green") +#' pie(rep(1, 11), col = ryg.colors(11), main = "Red - yellow - green") +#' par(opar) +rwb_colors <- function(n, alpha = 1, s = 0.9, v = 0.9) { + if ((n <- as.integer(n[1L])) <= 0) return(character(0L)) + # Define the initial (red) and final (blue) colors with white in between + cols <- c(hsv(0, s, v, alpha = alpha), # Red + hsv(0, 0, v, alpha = alpha), # White + hsv(2/3, s, v, alpha = alpha)) # Blue + # Use a color ramp from red to white to blue + colorRampPalette(cols)(n) } -## Red-white-green palette (take care for color-blind people here)! -rwg.colors <- function (n, alpha = 1, s = 0.9, v = 0.9) -{ - if ((n <- as.integer(n[1L])) <= 0) return(character(0L)) - ## Define the initial (red) and final (blue) colors with white in between - cols <- c(hsv(0, s, v, alpha = alpha), # Red - hsv(0, 0, v, alpha = alpha), # White - hsv(2/6, s, v, alpha = alpha)) # Green - ## Use a color ramp from red to white to green - return(colorRampPalette(cols)(n)) +#' @export +#' @rdname colors +rwb.colors <- rwb_colors # grDevices compatibility + +#' @export +#' @rdname colors +rwg_colors <- function(n, alpha = 1, s = 0.9, v = 0.9) { + if ((n <- as.integer(n[1L])) <= 0) return(character(0L)) + # Define the initial (red) and final (blue) colors with white in between + cols <- c(hsv(0, s, v, alpha = alpha), # Red + hsv(0, 0, v, alpha = alpha), # White + hsv(2/6, s, v, alpha = alpha)) # Green + # Use a color ramp from red to white to green + colorRampPalette(cols)(n) } -## Red-yellow-green palette (take care for color-blind people here)! -ryg.colors <- function (n, alpha = 1, s = 0.9, v = 0.9) -{ - ## This is essentially rainbow(), but going from 0 (red) to 2/6 (green) - return(rainbow(n, s = s, v = v, start = 0, end = 2/6, alpha = alpha)) +#' @export +#' @rdname colors +rwg.colors <- rwg_colors # grDevices compatibility + +#' @export +#' @rdname colors +ryg_colors <- function(n, alpha = 1, s = 0.9, v = 0.9) { + # This is essentially rainbow(), but going from 0 (red) to 2/6 (green) + rainbow(n, s = s, v = v, start = 0, end = 2/6, alpha = alpha) } -## Slighly different than cm.colors(), allowing for s and v! -## Produce probably better results on a CMYK device (color printer)? -cwm.colors <- function (n, alpha = 1, s = 0.9, v = 0.9) -{ - if ((n <- as.integer(n[1L])) <= 0) return(character(0L)) - ## Define the initial (red) and final (blue) colors with white in between - cols <- c(hsv(1/2, s, v, alpha = alpha), # Cyan - hsv(0, 0, v, alpha = alpha), # White - hsv(5/6, s, v, alpha = alpha)) # Magenta - ## Use a color ramp from cyan to white to magenta - return(colorRampPalette(cols)(n)) +#' @export +#' @rdname colors +ryg.colors <- ryg_colors # grDevices compatibility + +#' @export +#' @rdname colors +cwm_colors <- function(n, alpha = 1, s = 0.9, v = 0.9) { + # Slighly different than cm.colors(), allowing for s and v! + # Produce probably better results on a CMYK device (color printer)? + if ((n <- as.integer(n[1L])) <= 0) return(character(0L)) + # Define the initial (red) and final (blue) colors with white in between + cols <- c(hsv(1/2, s, v, alpha = alpha), # Cyan + hsv(0, 0, v, alpha = alpha), # White + hsv(5/6, s, v, alpha = alpha)) # Magenta + # Use a color ramp from cyan to white to magenta + colorRampPalette(cols)(n) } + +#' @export +#' @rdname colors +cwm.colors <- cwm_colors # grDevices compatibility diff --git a/R/correlation.R b/R/correlation.R index e14bc2c..923c848 100755 --- a/R/correlation.R +++ b/R/correlation.R @@ -1,282 +1,364 @@ -## A wrapper around cor() and the like, building a "correlation" S3 object -## TODO: cov.wt(), cov2correlation(), and perhaps, functions cov.XXX() from MASS -## TODO: max, min, range, which.max, which.min for 'correlation' objects that do -## not consider elements on the diagonal... or put something else to avoid it is -## extracted for max, or which.max??? + something like 'highest' which considers -## the absolute value??? How to deal with that? - -## A generic function to calculate correlation from an object -correlation <- function (x, ...) - UseMethod("correlation") - -correlation.formula <- function (formula, data = NULL, subset, na.action, ...) -{ - mt <- terms(formula, data = data) - if (attr(mt, "response") > 0L) - stop("response not allowed in formula") - cl <- match.call() - mf <- match.call(expand.dots = FALSE) - mf$... <- NULL - mf[[1L]] <- as.name("model.frame") - mf <- eval.parent(mf) - ## TODO: avoid this! - if (stats:::.check_vars_numeric(mf)) - stop("Correlation applies only to numerical variables") - mt <- attr(mf, "terms") - attr(mt, "intercept") <- 0L - x <- model.matrix(mt, mf) - res <- correlation.default(x, ...) - cl[[1L]] <- as.name("correlation") - attr(res, "call") <- cl - attr(res, "na.method") <- NULL - if (!is.null(na.action)) - attr(res, "na.action") <- as.character(na.action) - return(res) +#' Correlation matrices. +#' +#' Compute the correlation matrix between two variables, or more (between all +#' columns of a matrix or data frame). +#' +#' @param x A numeric vector, matrix or data frame (or any object for +#' `is.Correlation()`, `as.Correlation()`. +#' @param formula A formula with no response variable, referring only to numeric +#' variables. +#' @param data An optional data frame (or similar: see [model.frame()]) +#' containing the variables in the formula `formula`. By default the variables +#' are taken from `environment(formula)`. +#' @param subset An optional vector used to select rows (observations) of the +#' data matrix `x`. +#' @param na.action A function which indicates what should happen when the data +#' contain `NA`s. The default is set by the `na.action =` setting of `options()` +#' and `na.fail()` is used if that is not set. The 'factory-fresh' default is +#' `na.omit()`. +#' @param method A character string indicating which correlation coefficient is +#' to be computed. One of `"pearson"` (default), `"kendall"`, or `"spearman"`, +#' can be abbreviated. +#' @param y `NULL` (default), or a vector, matrix or data frame with compatible +#' dimensions to `x` for `Correlation()`. The default is equivalent to `x = y`, +#' but more efficient. +#' @param use An optional character string giving a method for computing +#' correlations in the presence of missing values. This must be (an abbreviation +#' of) one of the strings `"everything"`, `"all.obs"`, `"complete.obs"`, +#' `"na.or.complete"`, or `"pairwise.complete.obs"`. +#' @param digits Digits to print after the decimal separator. +#' @param cutoff Correlation coefficients lower than this (in absolute value) +#' are suppressed. +#' @param object A 'Correlation' object. +#' @param cutpoints The cut points to use for categories. Specify only positive +#' values (absolute value of correlation coefficients are summarized, or +#' negative equivalents are automatically computed for the graph. Do not include +#' 0 or 1 in the cutpoints). +#' @param symbols The symbols to use to summarize the correlation matrix. +#' @param outline Do we draw the outline of the ellipse? +#' @param palette A function that can produce a palette of colors. +#' @param col Color of the ellipse. If `NULL` (default), the colors will be +#' computed using `cutpoints =` and `palette =`. +#' @param numbers Do we print correlation values in the center of the ellipses? +#' @param type Do we plot a complete matrix, or only lower or upper triangle? +#' @param diag Do we plot items on the diagonal? They have always a correlation +#' of one. +#' @param cex.lab The expansion factor for labels. +#' @param cex The expansion factor for text. +#' @param choices The items to select +#' @param lty The line type to draw. +#' @param ar.length The length of the arrow head. +#' @param pos The position relative to arrows. +#' @param labels The label to draw nead arrows. +#' @param ... Further arguments passed to functions. +#' @return `Correlation()` and `as.Correlation()`` create a 'Correlation' +#' object, while `is.Correlation()`` tests for it. +#' +#' There are `print()` and `summary()` methods for the 'Correlation' object +#' that differ in the symbolic encoding of the correlations in `summary()`, +#' using5 symnum()], which makes large correlation matrices more readable. +#' +#' The method `plot()` returns nothing, but it draws ellipses on a graph that +#' represent the correlation matrix visually. This is essentially the +#' [plotcorr()] function from package **ellipse**, with slightly different +#' default arguments and with default `cutpoints` equivalent to those used in +#' the `summary()` method. +#' @author Philippe Grosjean , wrapping code in package +#' ellipse, function [plotcorr()] for the `plot.Correlation()` method. +#' @export +#' @seealso [cov()], [cov2cor()], [cov.wt()], [symnum()], [plotcorr()] and look +#' at [panel_cor()] +#' @keywords distribution +#' @concept correlation matrix and plot +#' @examples +#' # This is a simple correlation coefficient +#' cor(rnorm(10), runif(10)) +# but this is a 'Correlation' object containing a correlation matrix +#' Correlation(rnorm(10), runif(10)) +#' +#' # 'Correlation' objects allow better inspection of the correlation matrices +#' # than the output of default R cor() function +#' (longley.cor <- Correlation(longley)) +#' summary(longley.cor) # Synthetic view of the correlation matrix +#' plot(longley.cor) # Graphical representation +#' +#' # Use of the formula interface +#' (mtcars.cor <- Correlation(~ mpg + cyl + disp + hp, data = mtcars, +#' method = "spearman", na.action = "na.omit")) +#' +#' mtcars.cor2 <- Correlation(mtcars, method = "spearman") +#' print(mtcars.cor2, cutoff = 0.6) +#' summary(mtcars.cor2) +#' plot(mtcars.cor2, type = "lower") +#' +#' mtcars.cor2["mpg", "cyl"] # Extract a correlation from the correlation matrix +correlation <- function(x, ...) + UseMethod("correlation") + +#' @export +#' @rdname correlation +Correlation <- correlation # Was defined as correlation, but clash with nlme! + +#' @export +#' @rdname correlation +correlation.formula <- function(formula, data = NULL, subset, na.action, ...) { + mt <- terms(formula, data = data) + if (attr(mt, "response") > 0L) + stop("response not allowed in formula") + cl <- match.call() + mf <- match.call(expand.dots = FALSE) + mf$... <- NULL + mf[[1L]] <- as.name("model.frame") + mf <- eval.parent(mf) + + if (.check_vars_numeric(mf)) + stop("Correlation applies only to numerical variables") + mt <- attr(mf, "terms") + attr(mt, "intercept") <- 0L + x <- model.matrix(mt, mf) + res <- correlation.default(x, ...) + cl[[1L]] <- as.name("Correlation") + attr(res, "call") <- cl + attr(res, "na.method") <- NULL + if (!is.null(na.action)) + attr(res, "na.action") <- as.character(na.action) + res } -## Create the 'correlation' object (same arguments as cor() in stats package) -correlation.default <- function (x, y = NULL, use = "everything", -method = c("pearson", "kendall", "spearman"), ...) -{ - Call <- match.call() - x <- as.matrix(x) - na.methods <- c("all.obs", "complete.obs", "pairwise.complete.obs", - "everything", "na.or.complete") - na.method <- pmatch(use, na.methods) - method <- match.arg(method) - - ## Just call cor in stats package - res <- stats::cor(x = x, y = y, use = use, method = method) - - ## We want to return a correlation matrix, even if there is one correlation - if (length(res) == 1) { - ## Create a simple correlation matrix using 'x' and 'y' as labels - res <- matrix(c(1, res, res, 1), ncol = 2, - dimnames = list(c("x", "y"), c("x", "y"))) - } - - ## Same strings as for cor.test() - attr(res, "method") <- switch(method, - pearson = "Pearson's product-moment correlation", - kendall = "Kendall's rank correlation tau", - spearman = "Spearman's rank correlation rho", - method) - attr(res, "na.method") <- na.methods[na.method] - attr(res, "call") <- Call - class(res) <- c("correlation", "matrix") - - return(res) +#' @export +#' @rdname correlation +correlation.default <- function(x, y = NULL, use = "everything", +method = c("pearson", "kendall", "spearman"), ...) { + Call <- match.call() + x <- as.matrix(x) + na.methods <- c("all.obs", "complete.obs", "pairwise.complete.obs", + "everything", "na.or.complete") + na.method <- pmatch(use, na.methods) + method <- match.arg(method) + + # Just call cor in stats package + res <- cor(x = x, y = y, use = use, method = method) + + # We want to return a correlation matrix, even if there is one correlation + if (length(res) == 1) { + res <- matrix(c(1, res, res, 1), ncol = 2, + dimnames = list(c("x", "y"), c("x", "y"))) + } + + # Same strings as for cor.test() + attr(res, "method") <- switch(method, + pearson = "Pearson's product-moment correlation", + kendall = "Kendall's rank correlation tau", + spearman = "Spearman's rank correlation rho", + method) + attr(res, "na.method") <- na.methods[na.method] + attr(res, "call") <- Call + class(res) <- c("Correlation", "matrix") + + res } -## Check if an object is a correlation matrix -is.correlation <- function (x) - return(inherits(x, "correlation")) - -## Transform a square matrix or a data.frame with values between -1 and 1 -## in a 'correlation' object -## TODO: should we keep more attributes, in order to document other correlation -## calculations? -as.correlation <- function (x) { - if (is.correlation(x)) return(x) - - ## Make sure we have a matrix with numeric data, dimnames and nothing else - ## (drop all other arguments, except 'comment', perhaps) - res <- structure(as.numeric(x), dim = dim(x), dimnames = dimnames(x)) - - ## Check that it is a square (2D) matrix, or an atomic number - d <- dim(x) - if (is.null(d)) { - ## Is this an atomic number? - if (length(x) == 1) { - ## Create the simplest correlation matrix using - ## generic 'x' and 'y' labels - res <- matrix(c(1, res, res, 1), ncol = 2, - dimnames = list(c("x", "y"), c("x", "y"))) - } - } else { # Check that it is a square matrix - if (length(d) != 2 || d[1] != d[2]) - stop("x must be a square matrix") - } - - ## Check the range that must be between -1 and 1 - rg <- range(res, na.rm = TRUE) - if (rg[1] < -1 || rg[2] > 1) - stop("A correlation matrix cannot have values lower than -1 or larger than 1") - - ## Reinject comment, if it exists - comment(res) <- comment(x) - - ## Look for a "method" attribute - attr(res, "method") <- attr(x, "method") - ## ... and a na.method, or na.action attribute - attr(res, "na.action") <- attr(x, "na.action") - attr(res, "na.method") <- attr(x, "na.method") - - ## Set this as both a 'correlation' and 'matrix' S3 object - class(res) <- c("correlation", "matrix") - - return(res) +#' @export +#' @rdname correlation +is.Correlation <- function(x) + inherits(x, "Correlation") + +#' @export +#' @rdname correlation +is.correlation <- is.Correlation # Backward compatibility + +#' @export +#' @rdname correlation +as.Correlation <- function(x) { + if (is.Correlation(x)) return(x) + + # Make sure we have a matrix with numeric data, dimnames and nothing else + # (drop all other arguments, except 'comment', perhaps) + res <- structure(as.numeric(x), dim = dim(x), dimnames = dimnames(x)) + + # Check that it is a square (2D) matrix, or an atomic number + d <- dim(x) + if (is.null(d)) { + # Is this an atomic number? + if (length(x) == 1) { + # Create the simplest correlation matrix using generic 'x' and 'y' labels + res <- matrix(c(1, res, res, 1), ncol = 2, + dimnames = list(c("x", "y"), c("x", "y"))) + } + } else {# Check that it is a square matrix + if (length(d) != 2 || d[1] != d[2]) + stop("x must be a square matrix") + } + + rg <- range(res, na.rm = TRUE) + if (rg[1] < -1 || rg[2] > 1) + stop("A correlation matrix cannot have values lower than -1 or larger than 1") + + # Reinject comment and other attrinutes, if they exist + comment(res) <- comment(x) + attr(res, "method") <- attr(x, "method") + attr(res, "na.action") <- attr(x, "na.action") + attr(res, "na.method") <- attr(x, "na.method") + + class(res) <- c("Correlation", "matrix") + res } -## Print a 'correlation' object -print.correlation <- function (x, digits = 3, cutoff = 0, ...) -{ - if (!is.correlation(x)) - stop("x must be a 'correlation' object (correlation matrix)") - - method <- attr(x, "method") - if (is.null(method)) { - cat("Correlation matrix:\n") - } else { - cat("Matrix of ", method, ":\n", sep = "") - } - - na.method <- attr(x, "na.method") - if (!is.null(na.method)) { - cat("(calculation uses ", na.method, ")\n", sep = "") - } else { - na.action <- attr(x, "na.action") - if (!is.null(na.action)) - cat("(missing values are managed with ", na.action, ")\n", sep = "") - } - cform <- format(round(x, digits = digits)) - nc <- nchar(cform[1L], type = "c") - cform[abs(x) < cutoff] <- paste(rep(" ", nc), collapse = "") - print(cform, quote = FALSE, ...) - - return(invisible(x)) +#' @export +#' @rdname correlation +as.correlation <- as.Correlation # Backward compatibility + +#' @export +#' @rdname correlation +print.Correlation <- function(x, digits = 3, cutoff = 0, ...) { + if (!is.Correlation(x)) + stop("x must be a 'Correlation' object (correlation matrix)") + + method <- attr(x, "method") + if (is.null(method)) { + cat("Correlation matrix:\n") + } else { + cat("Matrix of ", method, ":\n", sep = "") + } + + na.method <- attr(x, "na.method") + if (!is.null(na.method)) { + cat("(calculation uses ", na.method, ")\n", sep = "") + } else { + na.action <- attr(x, "na.action") + if (!is.null(na.action)) + cat("(missing values are managed with ", na.action, ")\n", sep = "") + } + cform <- format(round(x, digits = digits)) + nc <- nchar(cform[1L], type = "c") + cform[abs(x) < cutoff] <- paste(rep(" ", nc), collapse = "") + print(cform, quote = FALSE, ...) + + invisible(x) } -## Summary of a 'correlation' object -summary.correlation <- function (object, cutpoints = c(0.3, 0.6, 0.8, 0.9, 0.95), -symbols = c(" ", ".", ",", "+", "*", "B"), ...) -{ - ## Replace the correlation matrix by symbols using symnum() - res <- symnum(unclass(object), cutpoints = cutpoints, symbols = symbols, - corr = TRUE, ...) - - ## Reinject comment, if it exists - comment(res) <- comment(object) - - ## Look for a "method" attribute - attr(res, "method") <- attr(object, "method") - ## ... and na.action/na.method attributes - attr(res, "na.action") <- attr(object, "na.action") - attr(res, "na.method") <- attr(object, "na.method") - - ## Set this as 'summary.correlation' object - class(res) <- c("summary.correlation", "noquote") - - return(res) +#' @export +#' @rdname correlation +summary.Correlation <- function(object, cutpoints = c(0.3, 0.6, 0.8, 0.9, 0.95), +symbols = c(" ", ".", ",", "+", "*", "B"), ...) { + # Replace the correlation matrix by symbols using symnum() + res <- symnum(unclass(object), cutpoints = cutpoints, symbols = symbols, + corr = TRUE, ...) + + # Reinject comment and other attributes, if they exist + comment(res) <- comment(object) + attr(res, "method") <- attr(object, "method") + attr(res, "na.action") <- attr(object, "na.action") + attr(res, "na.method") <- attr(object, "na.method") + + class(res) <- c("summary.Correlation", "noquote") + res } -## Print method for the 'summary.correlation' object -print.summary.correlation <- function (x, ...) -{ - method <- attr(x, "method") - if (is.null(method)) { - cat("Correlation matrix:\n") - } else { - cat("Matrix of ", method, ":\n", sep = "") - } - - na.method <- attr(x, "na.method") - if (!is.null(na.method)) { - cat("(calculation uses ", na.method, ")\n", sep = "") - } else { - na.action <- attr(x, "na.action") - if (!is.null(na.action)) - cat("(missing values are managed with ", na.action, ")\n", sep = "") - } - - print(structure(as.character(x), dim = dim(x), dimnames = dimnames(x), - legend = attr(x, "legend"), class = "noquote"), ...) - - return(invisible(x)) +#' @export +#' @rdname correlation +print.summary.Correlation <- function(x, ...) { + method <- attr(x, "method") + if (is.null(method)) { + cat("Correlation matrix:\n") + } else { + cat("Matrix of ", method, ":\n", sep = "") + } + + na.method <- attr(x, "na.method") + if (!is.null(na.method)) { + cat("(calculation uses ", na.method, ")\n", sep = "") + } else { + na.action <- attr(x, "na.action") + if (!is.null(na.action)) + cat("(missing values are managed with ", na.action, ")\n", sep = "") + } + + print(structure(as.character(x), dim = dim(x), dimnames = dimnames(x), + legend = attr(x, "legend"), class = "noquote"), ...) + + invisible(x) } -## Plot a 'correlation' object (basically the ellipse's plotcorr() function, but -## as plot() method for 'corr' object and with different default values -## Also, numbers are printed inside the ellipses with numbers = TRUE -## TODO: change the way labels are plotted -## TODO: a comparison plot, when y is not NULL -plot.correlation <- function (x, y = NULL, outline = TRUE, +#' @export +#' @rdname correlation +plot.Correlation <- function(x, y = NULL, outline = TRUE, cutpoints = c(0.3, 0.6, 0.8, 0.9, 0.95), palette = rwb.colors, col = NULL, numbers = TRUE, digits = 2, type = c("full", "lower", "upper"), -diag = (type == "full"), cex.lab = par("cex.lab"), cex = 0.75 * par("cex"), ...) -{ - if (!is.correlation(x)) - stop("x must be a 'correlation' object") - - type <- match.arg(type) - diag <- as.logical(diag[1]) - ## Compute colors from cutpoints and palette - if (is.null(col)) { - ## -1.1 to include -1 - intervals are (,] - ## cutpoints - 0.0001 for positive values to include lower limit instead - br <- c(-1.1, rev(-cutpoints), cutpoints - 0.0001, 1) - ct <- cut(x, breaks = br) - col <- palette(length(levels(ct)))[as.numeric(ct)] - } - - ## Call the plotcorr() function from ellipse package - plotcorr(x, outline = outline, col = col, numbers = FALSE, type = type, - diag = diag, cex.lab = cex.lab, cex = cex, ...) - ## Do we print the numbers inside the ellipses? - if (isTRUE(numbers)) { - coords <- expand.grid(1:nrow(x), nrow(x):1) - labels <- format(round(x, digits = digits), digits = digits) - ## Do we plotted only upper or lower triangle and diagonal? - ## Note: we need to invert y-coordinates! - yinv <- max(coords) + 1 - coords[, 2] - if (diag) { - if (type == "lower") { - ## Keep only lower triangle + diagonal - coords <- coords[coords[, 1] <= yinv, ] - coords <- coords[order(coords[, 1]), ] - labels <- labels[lower.tri(labels, diag = TRUE)] - } else if (type == "upper") { - ## Keep only upper triangle - coords <- coords[coords[, 1] >= yinv, ] - coords <- coords[order(coords[, 1]), ] - labels <- labels[upper.tri(labels, diag = TRUE)] - } - } else { # No diagonals - if (type == "lower") { - ## Keep only lower triangle - coords <- coords[coords[, 1] < yinv, ] - coords <- coords[order(coords[, 1]), ] - labels <- labels[lower.tri(labels)] - } else if (type == "upper") { - ## Keep only upper triangle - coords <- coords[coords[, 1] > yinv - 1, ] - coords <- coords[order(coords[, 1]), ] - coords[, 2] <- coords[, 2] - 1 - labels <- labels[upper.tri(labels)] - } else { - ## Plot everything, except diagonal => put test to "" there - diag(labels) <- "" - } - } - text(coords, labels = labels, cex = cex, ...) - } - return(invisible()) +diag = (type == "full"), cex.lab = par("cex.lab"), cex = 0.75 * par("cex"), +...) { + if (!is.Correlation(x)) + stop("x must be a 'Correlation' object") + + type <- match.arg(type) + diag <- as.logical(diag[1]) + # Compute colors from cutpoints and palette + if (is.null(col)) { + # -1.1 to include -1 - intervals are (,] + # cutpoints - 0.0001 for positive values to include lower limit instead + br <- c(-1.1, rev(-cutpoints), cutpoints - 0.0001, 1) + ct <- cut(x, breaks = br) + col <- palette(length(levels(ct)))[as.numeric(ct)] + } + + # Call the plotcorr() function from ellipse package + plotcorr(x, outline = outline, col = col, numbers = FALSE, type = type, + diag = diag, cex.lab = cex.lab, cex = cex, ...) + # Do we print the numbers inside the ellipses? + if (isTRUE(numbers)) { + coords <- expand.grid(1:nrow(x), nrow(x):1) + labels <- format(round(x, digits = digits), digits = digits) + # Do we plotted only upper or lower triangle and diagonal? + # Note: we need to invert y-coordinates! + yinv <- max(coords) + 1 - coords[, 2] + if (diag) { + if (type == "lower") { + # Keep only lower triangle + diagonal + coords <- coords[coords[, 1] <= yinv, ] + coords <- coords[order(coords[, 1]), ] + labels <- labels[lower.tri(labels, diag = TRUE)] + } else if (type == "upper") { + # Keep only upper triangle + coords <- coords[coords[, 1] >= yinv, ] + coords <- coords[order(coords[, 1]), ] + labels <- labels[upper.tri(labels, diag = TRUE)] + } + } else {# No diagonals + if (type == "lower") { + # Keep only lower triangle + coords <- coords[coords[, 1] < yinv, ] + coords <- coords[order(coords[, 1]), ] + labels <- labels[lower.tri(labels)] + } else if (type == "upper") { + # Keep only upper triangle + coords <- coords[coords[, 1] > yinv - 1, ] + coords <- coords[order(coords[, 1]), ] + coords[, 2] <- coords[, 2] - 1 + labels <- labels[upper.tri(labels)] + } else { + # Plot everything, except diagonal => put test to "" there + diag(labels) <- "" + } + } + text(coords, labels = labels, cex = cex, ...) + } + invisible() } -## Add vectors for supplementary variables in a PCA correlation plot -lines.correlation <- function (x, choices = 1L:2L, col = par("col"), lty = 2, -ar.length = 0.1, pos = NULL, cex = par("cex"), labels = rownames(x), ...) -{ - corrs <- x[, choices] - arrows(0, 0, corrs[, 1], corrs[, 2], col = col, lty = lty, - length = ar.length, ...) - if (!is.null(labels)){ - ## If pos is NULL, calculate pos for each variable so that label is - ## located outside - if (is.null(pos)) - pos <- c(2, 1, 4, 3, 2)[floor((atan2(corrs[, 2], corrs[, 1])/pi + - 1.25) / 0.5) + 1] - text(corrs, labels = labels, col = col, pos = pos, cex = cex, ...) - } - return(invisible(x)) +#' @export +#' @rdname correlation +lines.Correlation <- function(x, choices = 1L:2L, col = par("col"), lty = 2, +ar.length = 0.1, pos = NULL, cex = par("cex"), labels = rownames(x), ...) { + corrs <- x[, choices] + arrows(0, 0, corrs[, 1], corrs[, 2], col = col, lty = lty, + length = ar.length, ...) + if (!is.null(labels)) { + # If pos is NULL, calculate pos for each variable so that label is + # located outside + if (is.null(pos)) + pos <- c(2, 1, 4, 3, 2)[floor((atan2(corrs[, 2], corrs[, 1])/pi + + 1.25) / 0.5) + 1] + text(corrs, labels = labels, col = col, pos = pos, cex = cex, ...) + } + invisible(x) } diff --git a/R/file.R b/R/file.R deleted file mode 100644 index 2c5b0dd..0000000 --- a/R/file.R +++ /dev/null @@ -1,182 +0,0 @@ -## Essentially a series of base R function that manipulate files and directories -## and that are renamed/rationalized for facility -## TODO: is the object name correctly choosen? or filepath? or fpath? - -## A replacement for file.path -filePath <- function (..., fsep = .Platform$file.sep) -{ - ## Create a filePath objects inheriting from character - return(structure(file.path(..., fsep = fsep), - class = c("filePath", "character"))) -} - -## The print function of filename separates dirs (ending with /) from files -## and also indicate which file already exists on disk or not -## EXPERIMENTAL FEATURE... Should require an option to activate/inactivate -## test of files on disk! -## TODO: determine what is an alias or link! -print.filePath <- function (x, ...) -{ - path <- as.character(x) - path <- gsub("\\\\", "/", path) - ## Make sure paths are ended with / to differentiate them from files - isdir <- file.info(path)$isdir - ## Non-existent files are these ones - nofile <- is.na(isdir) - path[nofile] <- paste(path[nofile], "*", sep = "") - ## These are directories - isdir <- (isdir & !grepl("/$", path)) - isdir[is.na(isdir)] <- FALSE - path[isdir] <- paste(path[isdir], "/", sep = "") - ## Print it - print(noquote(paste("<", path, ">", sep = ""))) - return(invisible(x)) -} - -as.filePath <- function (x, ...) - return(structure(as.character(x), class = c("filePath", "character"))) - -is.filePath <- function (x) - return(inherits(x, "filePath")) - -isDir <- function (filePath) - return(file.info(filePath)$isdir) - -isFile <- function (filePath) - return(file.exists(filePath) & !file.info(filePath)$isdir) - -## Rework paths -## basename -fileName <- function (filePath) - return(structure(basename(filePath), class = c("filePath", "character"))) - -## dirname -fileDir <- function (filePath) - return(structure(dirname(filePath), class = c("filePath", "character"))) - -## path.expand -fileExpand <- function (filePath) - return(structure(path.expand(filePath), class = c("filePath", "character"))) - -## normalizePath -fileNormalize <- function (filePath, mustWork = FALSE) - return(structure(normalizePath(filePath, winslash = "/", mustWork = mustWork), - class = c("filePath", "character"))) - -## Get various files or directories -## R.home -dirR <- function (component = "home") - return(structure(R.home(component), class = c("filePath", "character"))) - -## TODO: find.package() and path.package() -## system.file TODO: case it returns ""! And should we use mustWork? -filePackage <- function (..., package = "base", lib.loc = NULL, mustWork = FALSE) - return(structure(system.file(..., package = package, lib.loc = lib.loc, - mustWork = mustWork), class = c("filePath", "character"))) - -## tempdir -dirTemp <- function () - return(structure(tempdir(), class = c("filePath", "character"))) - -## tempfile -fileTemp <- function (pattern = "file", tmpdir = tempdir(), fileext = "") - return(structure(tempfile(pattern = pattern, tmpdir = tmpdir, - fileext = fileext), class = c("filePath", "character"))) - -## Sys.which, TODO: keep names and display them in print.filePath objects! -fileFind <- function (names) - return(structure(Sys.which(names), names = names, class = c("filePath", "character"))) - -## List dirs = dir() = list.dirs() -dirList <- function (filePath = ".", full.names = TRUE, recursive = TRUE) - return(structure(list.dirs(path = filePath, full.names = full.names, - recursive = recursive), class = c("filePath", "character"))) - -## List files = dir() and list.files() -fileList <- function (filePath = ".", pattern = NULL, all.files = FALSE, -full.names = FALSE, recursive = FALSE, ignore.case = FALSE, include.dirs = FALSE) - return(structure(dir(path = filePath, pattern = pattern, all.files = all.files, - full.names = full.names, recursive = recursive, - ignore.case = ignore.case, include.dirs = include.dirs), - class = c("filePath", "character"))) - -## List files using wildcard expansion ('globbing') -fileListGlob <- function (filePath, dir.mark = FALSE) - return(structure(Sys.glob(paths = filePath, dirmark = dir.mark), - class = c("filePath", "character"))) - -## Various file manipulation functions that do not return a path object -## (just homogenize the name...) -dirCreate <- .Recode(get("dir.create", envir = baseenv())) -fileAccess <- .Recode(get("file.access", envir = baseenv())) -fileAppend <- .Recode(get("file.append", envir = baseenv())) -fileRename <- .Recode(get("file.rename", envir = baseenv())) -fileCopy <- .Recode(get("file.copy", envir = baseenv())) -fileCreate <- .Recode(get("file.create", envir = baseenv())) -fileExists <- .Recode(get("file.exists", envir = baseenv())) -fileInfo <- .Recode(get("file.info", envir = baseenv())) -fileChmod <- .Recode(get("Sys.chmod", envir = baseenv())) -fileUMask <- .Recode(get("Sys.umask", envir = baseenv())) -fileTime <- function (filePath, time) - Sys.setFileTime(path = filePath, time = time) -fileRemove <- .Recode(get("file.remove", envir = baseenv())) -## This is "stronger" than fileRemove()! -fileDelete <- function (filePath, recursive = FALSE, force = FALSE) - return(unlink(x = filePath, recursive = recursive, force = force)) - -fileLink <- .Recode(get("file.link", envir = baseenv())) -fileSymLink <- .Recode(get("file.symlink", envir = baseenv())) -fileReadLink <- function (filePath) - return(structure(Sys.readlink(paths = filePath), - class = c("filePath", "character"))) - -## This is linked to some GUI element, possibly... anyway... -fileShow <- .Recode(get("file.show", envir = baseenv())) -## TODO: this file choose... but this is really for svDialogs (dlgOpen(), dlgSave()) -#fileChoose <- file.choose - -## A more convenient setwd()/getwd() using objects -wdir <- function (dir = NULL) -{ - if (is.null(dir)) { - dir <- getwd() - class(dir) <- c("filePath", "character") - ## Make sure to use /, even under Windows - dir <- gsub("\\\\", "/", dir) - return(dir) - } else { # Change current working directory - owdir <- setwd(dir) - ## Make sure to use /, even under Windows - owdir <- gsub("\\\\", "/", owdir) - class(owdir) <- c("filePath", "character") - ## Save old working directory - .assignTemp(".owdir", owdir) - return(owdir) - } -} - -## Get or set session dir -sdir <- function (dir = NULL) -{ - if (is.null(dir)) { - dir <- getOption("R.initdir") - if (is.null(dir)) return(NULL) - class(dir) <- c("filePath", "character") - ## Make sure to use /, even under Windows - dir <- gsub("\\\\", "/", dir) - return(dir) - } else { # Change current session directory - osdir <- getOption("R.initdir") - ## TODO: make sure to do everything required to cleanly close current - ## session! - dir <- gsub("\\\\", "/", dir) - options(R.initdir = dir) - ## TODO: make everything we need to open the new session directory - ## Make sure to use /, even under Windows - osdir <- gsub("\\\\", "/", osdir) - class(osdir) <- c("filePath", "character") - ## Save old session directory - .assignTemp(".osdir", osdir) - return(osdir) - } -} diff --git a/R/graphics.R b/R/graphics.R deleted file mode 100644 index 816fec9..0000000 --- a/R/graphics.R +++ /dev/null @@ -1,427 +0,0 @@ -## Package graphics: renaming of functions to meet SciViews coding convention -## and definition of a couple new functions -## Must add things from plotrix!!! => svPlot + vioplot + wvioplot + beanplot + ellipse + gplots - -## Graphical options -## par(.., no.readonly = FALSE) is not explicit enough -## Covered functions: graphics::par() -plotOpt <- function (...) graphics::par(..., no.readonly = TRUE) -plotOptAll <- function (...) graphics::par(..., no.readonly = FALSE) -#clip() # Set clipping region in the graph - -## The plot.xxx() functions... -## Covered functions: graphics::plot.new(), graphics::frame() -plotNew <- .Recode(graphics::plot.new) # () and synonym to frame() that is ambiguous => don't use it - -## The simple dividing of equidistant boxes is done using plotOpt(mfrow) & plotOpt(mfcol) - -## The layout() mechanism -## Covered functions: graphics::layout(), graphics::layout.show() -#layout() -layoutShow <- graphics::layout.show - -## The screen() mechanism -## Covered functions: graphics::split.screen(), graphics::screen() -## graphics::erase.screen(), graphics::close.screen() -screenSplit <- graphics::split.screen -screenSet <- graphics::screen -screenDelete <- graphics::erase.screen -screenClose <- graphics::close.screen - -## Dimensions -## Covered functions: graphics::lcm(), graphics::xinch(), graphics::yinch(), -## graphics::xyinch(), graphics::grconvertX(), graphics::grconvertY() -#lcm() # Take a number "x" and returns a string with "x cm" -# Note: __ is used to denote the units, like lenght__cm -l__cm <- graphics::lcm -#xinch(), yinch() and xyinch() convert from inch to plot units -x__in2user <- graphics::xinch -y__in2user <- graphics::yinch -xy__in2user <- graphics::xyinch -#Same for cm2user -x__cm2user <- function (x = 1, warn.log = TRUE) - x__in2user(x = x / 2.54, warn.log = warn.log) -y__cm2user <- function (y = 1, warn.log = TRUE) - y__in2user(y = y / 2.54, warn.log = warn.log) -xy__cm2user <- function (xy = 1, warn.log = TRUE) - xy__in2user(xy = xy / 2.54, warn.log = warn.log) -## grconvertX() and grconvertY() do not allow metric units, -## plus there is no grcxonvertXY() -## TODO: eliminate mm here! -xConvert <- function (x, from = "user", to = "user") -{ - ## Perform conversion from cm or mm to inches in from, if needed - x <- switch(from, - cm = x / 2.54, - mm = x / 25.4, - x) - if (from %in% c("cm", "mm")) from <- "inches" - if (to %in% c("cm", "mm")) { - res <- graphics::grconvertX(x = x, from = from, to = "inches") - if (to == "cm") res <- res * 2.54 else res <- res * 25.4 - return(res) - } else return(graphics::grconvertX(x = x, from = from, to = to)) -} - -yConvert <- function (y, from = "user", to = "user") -{ - ## Perform conversion from cm or mm to inches in from, if needed - y <- switch(from, - cm = y / 2.54, - mm = y / 25.4, - y) - if (from %in% c("cm", "mm")) from <- "inches" - if (to %in% c("cm", "mm")) { - res <- graphics::grconvertY(y = y, from = from, to = "inches") - if (to == "cm") res <- res * 2.54 else res <- res * 25.4 - return(res) - } else return(graphics::grconvertY(y = y, from = from, to = to)) -} - -xyConvert <- function (x, y, from = "user", to = "user") -{ - ## Either x and y are provided, or x is a list with $x and $y - if (missing(y)) { - if (!is.list(x) && names(x) != c("x", "y")) - stop("You must provide a list with 'x' and 'y', or two vectors") - y <- x$y - x <- x$x - } - return(list( - x = xConvert(x = x, from = from, to = to), - y = yConvert(y = y, from = from, to = to))) -} - -## Basic drawing functions in the graphics package -#points() # generic -#lines() # generic -#matpoints() -#matlines() -#abline() -#segments() -#arrows() -#polygon() -#polypath() -#curve() -#rect() -## TODO: add circle() and ellipse()??? from ellipse package! -## ellipse there is a generic function to draw confidence region for two parameters -## It is: ellipse <- function (x, ...) UseMethod("ellipse") -## => use ovals() and circles() instead??? -## There are draw.circle() and draw.ellipse() in the plotrix package -#box() -# axis() and Axis() as an exception! -#axTicks() in graphics versus axisTicks() in grDevices => forgot about the former? -#grid() # TODO: rename this to avoid confusion with the grid graphic system? -#rug() -#text() # generic -#mtext() -#title() -#legend() -# Note: graphics::strwidth() and graphics::strheight() are treated in character.R! -#symbols() -#rasterImage() # Leave like it is? -## Problem with contour() # generic, add items to a graph when using add = TRUE -## We want the same mechanisms as for plot() vs points()/lines()... So, here, -## we must redefine a generic for that! -## We use contours() vs contourplot() -contours <- function (x, ...) UseMethod("contours") -contours.default <- function (x = seq(0, 1, length.out = nrow(z)), -y = seq(0, 1, length.out = ncol(z)), z, labels = NULL, labcex = 0.6, -drawlabels = TRUE, method = "flattest", col = par("fg"), lty = par("lty"), -lwd = par("lwd"), add = TRUE, ...) - graphics::contour.default(x = x, y = y, z = z, labels = labels, - labcex = labcex, drawlabels = drawlabels, method = method, col = col, - lty = lty, lwd = lwd, add = add, ...) -## See also the shape package! -## qqlines() from stats -## plot() method of density object in stats + plot() method of hclust objects - -## High-level plot function in the graphics package -## Covered functions: graphics::filled.contour(), graphics::dotchart(), -## graphics::smoothScatter(), graphics::stars(), graphics::stem(), -## graphics::stripchart(), graphics::contour() -#plot() # generic -#hist() # generic -#pie() # Exception, because pieplot() does not exist in English -#image() # generic # Exception: instead of imageplot() or so? -# + heat.colors(), terrain.colors() and topo.colors() from grDevices! -#pairs() # generic # TODO: define pairsplot()? -#persp() # generic # TODO: define persplot()? + perspx() in plotrix!? -#matplot() -#barplot() # generic -#boxplot() # generic -#contour() # generic, create a graph when using default add = FALSE; contourplot() in lattice! -contourplot <- graphics::contour -## R CMD check claims he does not find filledcontour => we don't copy the -## function but call it from filledplot() -filledplot <- function (x = seq(0, 1, length.out = nrow(z)), y = seq(0, 1, -length.out = ncol(z)), z, xlim = range(x, finite = TRUE), -ylim = range(y, finite = TRUE), zlim = range(z, finite = TRUE), -levels = pretty(zlim, nlevels), nlevels = 20, color.palette = cm.colors, -col = color.palette(length(levels) - 1), plot.title, plot.axes, -key.title, key.axes, asp = NA, xaxs = "i", yaxs = "i", las = 1, -axes = TRUE, frame.plot = axes, ...) - graphics::filled.contour(x = x, y = y, z = z, xlim = xlim, ylim = ylim, - zlim = zlim, levels = levels, nlevels = nlevels, - color.palette = color.palette, col = col, plot.title = plot.title, - plot.axes = plot.axes, key.title = key.title, key.axes = key.axes, - asp = asp, xaxs = xaxs, yaxs, yaxs, las = las, axes = axes, - frame.plot = frame.plot, ...) -starplot <- graphics::stars -stemplot <- function (x, scale = 1, width = 80, atom = 1e-08) - graphics::stem(x = x, scale = scale, width = width, atom = atom) -stripplot <- graphics::stripchart -clevelandplot <- graphics::dotchart -smoothplot <- graphics::smoothScatter -#coplot() -#fourfoldplot() -#cdplot() # generic -#mosaicplot() # generic -#spineplot() # generic -#sunflowerplot() # generic -#assocplot() with a better version in vcd as assoc()! -#+vectorplot() in vectorplot.R! -## + qqplot()/qqnorm() from stats -## + screeplot() + biplot() - -## Panel functions and other utilities -## Problem: panel is used in lattice => rename this here? -## Covered functions: graphics::panel.smooth(), graphics::co.intervals() -## + panel.hist() and panel.cor() in ?pairs example -## one way to differentiate them, is to use panel at the end here, like plot -smoothPanel <- graphics::panel.smooth -## TODO: rework the various panel.XXX function in panels.R, panels.diag.R & pcomp.R! -coplotIntervals <- graphics::co.intervals - -## Graphic interaction in the graphics package -#locator() -#identify() # generic - -## "Internal" function in graphics package (normally not for the end-user) -# Not normally called by the end-user -## Covered functions: graphics::plot.window(), graphics::plot.xy(), -## graphics::.filled.contour(), graphics::bxp(), -plotWindowInternal <- .Recode(graphics::plot.window) -plotInternal <- .Recode(graphics::plot.xy) -boxplotInternal <- graphics::bxp -## Apparently not in R 2.14.0! -#filledplotInternal <- graphics::.filled.contour - - -## grDevices ################################################################### - -## Devices management -devNew <- grDevices::dev.new -devCur <- .Recode(grDevices::dev.cur) -devList <- grDevices::dev.list -devNext <- .Recode(grDevices::dev.next); formals(devNext)$which <- quote(devCur()) -devPrev <- .Recode(grDevices::dev.prev); formals(devPrev)$which <- quote(devCur()) -devSet <- .Recode(grDevices::dev.set); formals(devSet)$which <- quote(devNext()) -devClose <- .Recode(grDevices::dev.off); formals(devClose)$which <- quote(devCur()) -devCloseAll <- grDevices::graphics.off -devControl <- .Recode(grDevices::dev.control) - -## The following two functions call .Internal(devHoldFlush) and here R CMD check -## got fooled because it does not found a function called devHoldFlush() -## So, we define also a devHoldFlush() function here to cope with this problem -devHold <- .Recode(grDevices::dev.hold) -devFlush <- .Recode(grDevices::dev.flush) -devHoldFlush <- function (level = 1L) -{ - level <- round(level)[1] - if (level > 0) { - devHold(level) - } else if (level < 0) { - devFlush(-level) - } else return() -} -devCopy <- .Recode(grDevices::dev.copy) -devCopyNew <- grDevices::dev.print -devCopyEps <- grDevices::dev.copy2eps -devCopyPdf <- grDevices::dev.copy2pdf -devCopyBitmap <- grDevices::dev2bitmap -devSave <- .Recode(grDevices::savePlot) -## Because these two function call .Internal() that fools R CMD check, we define -## getSnapshot() and playSnapshot() as synonyms of devRecord() and devReplay() -devRecord <- getSnapshot <- .Recode(grDevices::recordPlot) -devReplay <- playSnapshot <- .Recode(grDevices::replayPlot) -devCapture <- .Recode(grDevices::dev.capture) -#devAskNewPage() -## For devSize, default unit is "cm" instead of "in" for dev.size() -devSize <- .Recode(grDevices::dev.size); formals(devSize)$units <- c("cm", "in", "px") -devCapabilities <- .Recode(grDevices::dev.capabilities) -devInteractive <- grDevices::dev.interactive -isDevInteractive <- grDevices::deviceIsInteractive - -## TODO: a way to define these functions as platform independent! -## Graphic devices -#if (.Platform$OS.type == "unix") { -# devX11 <- grDevices::X11 # + x11() -# devX11Opt <- grDevices::X11.options -#} -#if (grepl("^mac", .Platform$pkgType)) { -# devQuartz <- grDevices::quartz -# devQuartzOpt <- grDevices::quartz.options -# ## There is a quartz.save() function defined somewhere! -#} -#if (.Platform$OS.type == "windows") { -# devWin <- grDevices::windows -# devWinOpt <- grDevices::windows.options -# devWinPrint <- grDevices::win.print -# devWinMetafile <- grDevices::win.metafile -# devToTop <- grDevices::bringToTop # TODO: a similar function for Linux and Mac OS X! -# formals(devToTop)$which <- quote(devCur()) -# # this is bringToTop(which = dev.cur(), stay = FALSE) # with -1 is console -# devMsg <- grDevices::msgWindow -# formals(devMsg)$which <- quote(devCur()) # TODO: a similar function for Linux and Mac OS X -# # this is msgWindow(type = c("minimize", "restore", "maximize", "hide", "recordOn", "recordOff"), -# # which = dev.cur() -# #recordGraphics(expr, list, env) # A function intended *only* for experts -#} -devPdf <- grDevices::pdf -devPdfOpt <- grDevices::pdf.options -devPS <- grDevices::postscript -devPSOpt <- grDevices::ps.options -#setEPS() -#setPS() -devPdfCairo <- grDevices::cairo_pdf -devPSCairo <- grDevices::cairo_ps -devSvg <- grDevices::svg -devBitmap <- grDevices::bitmap -devXfig <- grDevices::xfig - -## The following four functions call .Internal(X11) which has more arguments -## than X11 itself => we don't copy content, but call it instead -devBmp <- function (filename = "Rplot%03d.bmp", width = 480, height = 480, -units = "px", pointsize = 12, bg = "white", res = NA, ..., -type = c("cairo", "Xlib", "quartz"), antialias) - grDevices::bmp(filename = filename, width = width, height = height, - units = units, pointsize = pointsize, bg = bg, res = res, ..., - type = type, antialias = antialias) -devJpeg <- function (filename = "Rplot%03d.jpeg", width = 480, height = 480, -units = "px", pointsize = 12, quality = 75, bg = "white", -res = NA, ..., type = c("cairo", "Xlib", "quartz"), antialias) - grDevices::jpeg(filename = filename, width = width, height = height, - units = units, pointsize = pointsize, quality = quality, bg = bg, - res = res, ..., type = type, antialias = antialias) -devPng <- function (filename = "Rplot%03d.png", width = 480, height = 480, -units = "px", pointsize = 12, bg = "white", res = NA, ..., -type = c("cairo", "cairo-png", "Xlib", "quartz"), antialias) - grDevices::png(filename = filename, width = width, height = height, - units = units, pointsize = pointsize, bg = bg, res = res, ..., - type = type, antialias = antialias) -devTiff <- function (filename = "Rplot%03d.tiff", width = 480, height = 480, -units = "px", pointsize = 12, compression = c("none", "rle", -"lzw", "jpeg", "zip"), bg = "white", res = NA, ..., type = c("cairo", -"Xlib", "quartz"), antialias) - grDevices::tiff(filename = filename, width = width, height = height, - units = units, pointsize = pointsize, compression = compression, - bg = bg, res = res, ..., type = type, antialias = antialias) -#pictex() # device, historical interest only - -## Color management -#palette() # get or set the color palette -#colors() and colours() for a list of color names -## TODO: wrong name? colorToRgb? -color2rgb <- .Recode(grDevices::col2rgb) # convert colors to rgb -#rgb() -#rgb2hsv() -#hsv() -#hcl() -#gray() and grey() -colorAdjust <- grDevices::adjustcolor -#colorRamp() and colorRampPalette() to create color ramps -colorDens <- grDevices::densCols -## Predefined color sets -colorBlues9 <- function () grDevices::blues9 -colorRainbow <- grDevices::rainbow -colorHeat <- grDevices::heat.colors -colorTerrain <- grDevices::terrain.colors -colorTopo <- grDevices::topo.colors -colorCm <- grDevices::cm.colors -colorCwm <- cwm.colors -colorRwb <- rwb.colors -colorRyg <- ryg.colors -colorGray <- grDevices::gray.colors -colorGrey <- grDevices::grey.colors -## colorConverter object -#colorConverter() -colorConvertRgb <- grDevices::make.rgb -colorConvert <- grDevices::convertColor - -## Fonts -fontType1 <- grDevices::Type1Font -fontCid <- grDevices::CIDFont -fontsPS <- grDevices::postscriptFonts -fontsPdf <- grDevices::pdfFonts -fontsEmbed <- grDevices::embedFonts -#if (.Platform$OS.type == "windows") { -# #windowsFont() -# #windowsFonts() -#} -#if (grepl("^mac", .Platform$pkgType)) { -# #quartzFont() -# #quartzFonts() -#} -#if (.Platform$OS.type == "unix") { -# #X11Font() -# #X11Fonts() -#} - -## raster objects -## TODO: a raster() function to create such an object -#as.raster() -#is.raster() -#+ rasterImage() in the package graphics to draw such a raster object in a plot - -## Graphic events -## TODO: change to: devEvent, devEventHandlers, devEventEnv, devEventEnv<- -#getGraphicsEvent() -#setGraphicsEventHandlers() -#getGraphicsEventEnv() -#setGraphicsEventEnv() - -## Graphic annotations -#as.graphicsAnnot() - -## Utility functions -optCheck <- grDevices::check.options #utility function to check options consistency! -nclassSturges <- grDevices::nclass.Sturges -nclassScott <- grDevices::nclass.scott -nclassFD <- grDevices::nclass.FD -#chull() -#contourLines() -#trans3d() # transform from 3d to 2d -rangeExtend <- grDevices::extendrange -#pretty() from base => rangePretty()? but a generic function! -#axisTicks(), .axisPars() -boxplotStats <- grDevices::boxplot.stats -#xyTable() # Used by sunflowerplot() -xyCoords <- grDevices::xy.coords -xyzCoords <- grDevices::xyz.coords -in2cm <- grDevices::cm -cm2in <- function (x) x / cm(1) -#n2mfrow() computes sensible mfrow from number of graphs -# + .ps.prolog - -## Dynamite plot by Samuel Brown -## http://www.r-bloggers.com/dynamite-plots-in-r/ -## Much critisize! See http://emdbolker.wikidot.com/blog%3Adynamite -## http://pablomarin-garcia.blogspot.co.nz/2010/02/why-dynamite-plots-are-bad.html -## http://biostat.mc.vanderbilt.edu/wiki/pub/Main/TatsukiKoyama/Poster3.pdf -## TODO: find a better representation for ANOVE; al least both hgh ad low wiskers -## or superpose points, or vioplot, or...? -#dynamitePlot <- function (height, error, names = NA, significance = NA, -#ylim = c(0, maxLim), ...) -#{ -# maxLim <- 1.1 * max(mapply(sum, height, error)) -# bp <- barplot(height, names.arg = names, ylim = ylim, ...) -# arrows(x0 = bp, y0 = height, y1 = height + error, angle = 90) -# text(x = bp, y = 0.2 + height + error, labels = significance) -#} -#Values <- c(1, 2, 5, 4) -#Errors <- c(0.25, 0.5, 0.33, 0.12) -#Names <- paste("Trial", 1:4) -#Sig <- c("a", "a", "b", "b") -#dynamitePlot(Values, Errors, names = Names, significance = Sig) diff --git a/R/ln.R b/R/ln.R index c0479c0..6ad5aca 100755 --- a/R/ln.R +++ b/R/ln.R @@ -1,18 +1,47 @@ -## ln(x) and ln1p(x) are wrappers for log(x) and log1p(x) to avoid confusion -## with log10(x) that some beginneRs do, thinking that log(x) is logarithm in -## base 10! lg(x) is a wrapper for log10(x) for the same reason, -## and lb() is a wrapper for log2() -## lg1p(x) is the same as log1p() but it returns its result in base 10 log -## 'e' is a useful constant and is equal to exp(1) -ln <- function (x) log(x) +#' Logarithms. +#' +#' To avoid confusion using the default `log()` function, which is natural +#' logarithm, but spells out like base 10 logarithm in the mind of some +#' beginneRs, we define `ln()` and `ln1p()` as wrappers for `log()`` with +#' default `base = exp(1)` argument and for `log1p()`, respectively. +#' For similar reasons, `lg()` is a wrapper of `log10()` (there is no possible +#' confusion here, but 'lg' is another common notation for base 10 logarithm). +#' `lg1p()` is a convenient way to use the optimized code to calculate the +#' logarithm of x + 1, but returning the result in base 10 logarithm. `E` is the +#' Euler constant and is provided for convenience as `exp(1)`. Finally `lb()` is +#' a synonym of `log2()`. +#' +#' @param x A numeric or complex vector. +#' @export +#' @seealso [log()] +#' @keywords math +#' @concept logarithms and exponentials +#' @examples +#' ln(exp(3)) # Same as log(exp(3)) +#' ln1p(c(0, 1, 10, 100)) # Wrapper for log1p() +#' lg(10^3) # Same as log10(10^3) +#' lg1p(c(0, 1, 10, 100)) # log10(x + 1), but optimized for x << 1 +#' E^4 # Similar to exp(4), but different calculation! +# Note: exp(4) is to be preferred to E^4, if possible! +#' lb(1:3) # Wrapper for log2() +ln <- function(x) log(x) -ln1p <- function (x) log1p(x) +#' @export +#' @rdname ln +ln1p <- log1p -lg <- function (x) log10(x) +#' @export +#' @rdname ln +lg <- log10 -lg1p <- function (x) log1p(x) / log(10) +#' @export +#' @rdname ln +lg1p <- function(x) log1p(x) / log(10) -## Use of uppercase E, because all constants start with an uppercase +#' @export +#' @rdname ln E <- exp(1) -lb <- function (x) log2(x) +#' @export +#' @rdname ln +lb <- log2 diff --git a/R/misc.R b/R/misc.R index d21a24a..178a7ea 100644 --- a/R/misc.R +++ b/R/misc.R @@ -1,570 +1,168 @@ -## A series of functions defined or redefined for a simpler or better use of R -## Note to get a function, but change its default parameters, use: -## fun2 <- fun -## formals(fun2)$arg <- newDefaultValue -## -## Note that we should do something about T and F!!! -## -## Try using utils::globalVariables(c(".obj1", "obj2")) -## Use the dataframe package (+ aggregate?) -## Parentheses slower than curly braces in R? See http://radfordneal.wordpress.com/category/statistics/statistics-computing/r-programming/ -## fileHead() to print first few lines of a file -## Note that dataset is good replacement for data.frame, except it is also used in DMwR (minor issue?) -## unscale inverts the effect of scale() - -## Note that missing is an object class associated to a missing argument in S4 -## method signatures. So, the missing() function should probably be named -## is.missing() for consistency -#is.missing <- missing -## Also, classes() can show this: -classes <- function (x) -{ - ## Special case for a missing argument - X <- substitute(x) - res <- try(missing(X), silent = TRUE) - if (isTRUE(res)) return(c("missing", "ANY")) - ## Return the class of an object plus atomic/recursive and ANY - res <- class(x) - ## Special case for NULL - if (res == "NULL") return(c("NULL", "atomic", "ANY")) - ## Special case for name which is neither atomic, nor recursive - if (res == "name") return(c("name", "symbol", "language", "ANY")) - ## Is this a recursive or atomic object? - if (is.recursive(x)) { - ## Is this a language object? - if (is.language(x)) c(res, "language", "recursive", "ANY") else - c(res, "recursive", "ANY") - } else if (is.atomic(x)) { - c(res, "atomic", "ANY") - } else c(res, "ANY") +#' Enumerate items in an object. +#' +#' `enum()` is creating a vector of integers to enumarate items in an object. It +#' is particularly useful in the `for(i in enum(object))` construct. +#' +#' @param x Any object. +#' @note The pattern `for(i in 1:length(object))` is often found, but it fails +#' in case `length(object) == 0`! `enum()` is indeed a synonym of `seq_along()`, +#' but the later one is less expressive in the context. +#' @export +#' @seealso [seq_along()] +#' @examples +#' enum(letters) +#' enum(numeric(0)) +#' # Compare with: +#' 1:length(numeric(0)) +#' enum(NULL) +#' letters5 <- letters[1:5] +#' for (i in enum(letters5)) cat("letter", i, "=", letters5[i], "\n") +enum <- function(x) seq_along(x) + +#' Convenience functions for rows or columns manipulations. +#' +#' `nr()` and `nc()` are synonyms of the ugly `NROW()` or `NCOL()` that still +#' provide a result, even if `dim` attribute of the object is not set, on the +#' contrary to `nrow()`or `ncol()`. `ROWS` and `COLS` are constants that makes +#' call to `apply()` more expressive. +#' +#' @param x Any object. +#' @export +#' @seealso [NROW()] +#' @examples +#' mm <- matrix(1:6, nrow = 3) +#' nr(mm) +#' nc(mm) +#' vv <- 1:6 +#' nr(vv) +#' nc(vv) +#' # ROWS and COLS constants used with apply() +#' apply(mm, ROWS, mean) # Idem apply(mm, 1, mean) +#' apply(mm, COLS, mean) # Idem apply(mm, 2, mean) +nr <- NROW + +#' @export +#' @rdname nr +nc <- NCOL + +#' @export +#' @rdname nr +ROWS <- 1 + +#' @export +#' @rdname nr +COLS <- 2 + +#' Timing of R expressions. +#' +#' Similar to `system.time()` but returns a more convenient 'difftime' object. +#' +#' @param expr Valid \R expression to be timed. If missing, [proc.time()] is +#' used instead. +#' @param gc.first Logical - should a garbage collection be performed immediately +#' before the timing? Default is `TRUE`. +#' @export +#' @seealso [system.time()] +#' @examples +#' test <- timing(Sys.sleep(0.5)) +#' test +#' attr(test, "details") +timing <- function(expr, gc.first = TRUE) { + if (missing(expr)) { + res <- proc.time() + } else { + res <- system.time(expr, gcFirst = gc.first) + } + details <- as.difftime(res, units = "secs") + res <- as.difftime(res["elapsed"], units = "secs") + res <- details["elapsed"] + attr(res, "details") <- details + res } -## Warn when using = instead of <- for assignation... -## if option warnAssignWithEqualSign is TRUE -## NOTE: names(x) <- "a" assigns "a" to `names(x)` => this is wrong! -#`=` <- function(x, value) -#{ -# if (isTRUE(getOption("warnAssignWithEqualSign"))) -# warning("Use <- instead of = for assignation, or use == for equality test") -# assign(deparse(substitute(x)), value, envir = parent.frame()) +# To do later... ---------------------------------------------------------- + +# For non S4 objects, reuse @ to set attributes +# After all, they are, indeed, attributes! +# Note that we force exact match (same behaviour as @ used for S4 objects) +# Benchmark shows that my version is, at least 10x slower than the original `@` +# or `@<-` for S4 objects => should I use it or not?! +#Loc <- setClass("Loc", slots = c(lat = "numeric", long = "numeric")) +#loc <- Loc(lat = 0, long = 0) +#at <- base::`@` +#`at<-` <- base::`@<-` +#identical(loc@lat, at(loc, lat)) +#identical(loc@lat <- 1, at(loc, lat) <- 1) +#mbench <- microbenchmark::microbenchmark +#mbench(loc@lat, at(loc, lat), loc@lat <- 1, at(loc, lat) <- 1) +#`@` <- function(object, name) { +# arg <- substitute(name) +# if (is.name(arg)) name <- as.character(arg) +# if (isS4(object)) slot(object, name) else attr(object, name, exact = TRUE) +#} +# +#`@<-` <- function(x, which, value){ +# arg <- substitute(which) +# if (is.name(arg)) which <- as.character(arg) +# if (isS4(x)) { +# `slot<-`(x, which, check = TRUE, value) +# } else { +# `attr<-`(x, which, value) +# } #} -# is.wholenumber(), see ?as.integer => define isWholeInt? - -## A convenient starting object for holding items: .. == .GlobalEnv -#.. <- base::.GlobalEnv - -## Testing is.null(obj) is not enough to decide if an object is empty, because -## there may be like numeric(0), character(0), etc. The right way to do so is -## to use if (!length(obj)), but it would be more intuitive to define: -## TODO: isEmpty is a generic function (or even S4?) in filehash that does -## something different => change the name! -isEmpty <- function (x) !length(x) - -ifElse <- get("ifelse", envir = baseenv()) +# classes <- function(x) { +# # Special case for a missing argument +# X <- substitute(x) +# res <- try(missing(X), silent = TRUE) +# if (isTRUE(res)) return(c("missing", "ANY")) +# # Return the class of an object plus atomic/recursive and ANY +# res <- class(x) +# # Special case for NULL +# if (res == "NULL") return(c("NULL", "atomic", "ANY")) +# # Special case for name which is neither atomic, nor recursive +# if (res == "name") return(c("name", "symbol", "language", "ANY")) +# # Is this a recursive or atomic object? +# if (is.recursive(x)) { +# # Is this a language object? +# if (is.language(x)) c(res, "language", "recursive", "ANY") else +# c(res, "recursive", "ANY") +# } else if (is.atomic(x)) { +# c(res, "atomic", "ANY") +# } else c(res, "ANY") +# } + + +# Warn when using = instead of <- for assignation... +# if option warnAssignWithEqualSign is TRUE +# NOTE: names(x) <- "a" assigns "a" to `names(x)` => this is wrong! +#`=` <- function(x, value) { +# if (isTRUE(getOption("warnAssignWithEqualSign"))) +# warning("Use <- instead of = for assignation, or use == for equality test") +# assign(deparse(substitute(x)), value, envir = parent.frame()) +#} -`%else%` <- function (test, expr) if (test) return(invisible()) else expr -## Useful to write shorter code in something like: +#`%else%` <- function(test, expr) if (test) invisible() else expr +# Useful to write shorter code in something like: #test %else% break #test %else% stop(msg) #test %else% return(res) -## TODO: a tryError(), or some other name making basically -# res <- try(...., silent = TRUE) -# if (inherits(res, "try-error")) stop(msg) - -enum <- function (x) seq_along(x) - -## Defines only increasing integer sequences -## TODO: rethink this to make a more flexible sequencer + x:step:y? + rep()? -`%:%` <- function (lower, upper) - if (lower > upper) integer(0) else - seq.int(from = as.integer(lower), to = as.integer(upper), by = 1L) -## Useful in: -# for (ii in 1%:%l(v)) print(v) -## Because if (!l(v)) => prints nothing! 1:l(v) would give an error in this case -# for (ii in enum(v)) print(v) -## is fine too! - -## A better require() -package <- function (package, lib.loc = NULL, silent = TRUE, quietly = silent, -warn.conflicts = silent, -error = stop("there is no package called '", package, "'")) -{ - res <- suppressWarnings(require(package, lib.loc = lib.loc, - quietly = quietly, warn.conflicts = warn.conflicts, - character.only = TRUE)) - if (!res) error else invisible(res) -} - -## Now, we want to be able to use names() on environments too! -## Note that for environments, we got items by alphabetic order -## => not exactly the same as for vector, list, or so! -names <- function (x) - if (inherits(x, "environment")) ls(x, all.names = TRUE) else base::names(x) -## Do we implement `names<-` for environments??? This is a nonsense, may be? - -## Simpler names for often used functions -#as.num <- base::as.numeric -#as.int <- base::as.integer -#as.logic <- base::as.logical -# is.num, is.int, is.logic + num, int and logic -## To avoid problems with factors, tell to always use s(f1), or n(f1)/i(f1) - -## Since n is already used for a synonym of as.numeric(), I use l() here -l <- base::length -nc <- base::NCOL -nr <- base::NROW - -## Constants (must start with an uppercase letter) -## => redefine Pi instead of pi -## TODO: only uppercase for constants => PI??? -Pi <- base::pi -## Useful for apply() familly: -Rows <- 1 -Cols <- 2 -## Instead of apply(x, 2, sum), it gives apply(x, Cols, sum) - -## I don't like isTRUE, because if there is an attribute attached to TRUE, -## it returns FALSE! => define asTRUE which is more permissive! -## TODO: rethink all this! -asTRUE <- function (x) identical(TRUE, as.logical(x)) -isFALSE <- function (x) identical(FALSE, x) -asFALSE <- function (x) identical(FALSE, as.logical(x)) - -## How to simplify the use of if() by limiting possible special cases? -## use of any() and all() is there to cope with this, but still: -## 1) any(NA) => NA, unless any(NA, na.rm = TRUE) => FALSE -## 2) any(NULL) & any(logical(0)) => FALSE => OK -## We solve this by defining any.() and all.() -any. <- function (..., na.rm = TRUE) any(..., na.rm = na.rm) -all. <- function (..., na.rm = TRUE) all(..., na.rm = na.rm) -one <- function (x, na.rm = FALSE) UseMethod("one") -## Same as asTRUE(), but slower, because it is a method -one.default <- function (x, na.rm = FALSE) -{ - if (isTRUE(na.rm)) x <- na.omit(x) - return(identical(TRUE, as.logical(x))) -} -one. <- function (x, na.rm = TRUE) one(x, na.rm = na.rm) -stopIfNot <- base::stopifnot - -## TODO: other xxx. functions for those using na.rm = FALSE -## like mean, median, sd, var, quantile, fivenum, ... - -`%is%` <- function (x, class) is(x, as.character(substitute(class))) -`%as%` <- function (x, class) as(x, as.character(substitute(class))) - -#s1 <- 12.3 -#s1 %is% numeric -#s1 %is% integer -#s1 %as% integer %is% integer - -## Ternary condition statement, like in JavaScript cond ? yes : no -## Not possible to do in R... but the closest is: -#`%?%` <- function (cond, yes.no) { if (cond) yes.no[1] else yes.no[2] } -## ... and its vectorized conterpart: -#`%??%` <- function (cond, yes.no) ifelse(cond, yes = yes.no[1], no = yes.no[2]) -#TRUE %?% c(1, 2) -#FALSE %?% c(yes = 1, no = 2) -#x <- 1:3 -#res <- any(x > 2) %?% c("yes", "no"); res -#res <- (x > 2) %??% c("yes", "no"); res # Take care of parentheses! -#rm(x, res) - -## It is common to test if something is zero, or one... Here, the non vectorized -## version asks for all items being zero or one, excluding missing data! -## TODO: good idea (perhaps)... but this does not work well! -#`%?0%` <- function (x, yes.no) { if (all.(x == 0)) yes.no[1] else yes.no[2] } -#`%?1%` <- function (x, yes.no) { if (all.(x == 1)) yes.no[1] else yes.no[2] } -#`%??0%` <- function (x, yes.no) ifelse(x == 0, yes = yes.no[1], no = yes.no[2]) -#`%??1%` <- function (x, yes.no) ifelse(x == 1, yes = yes.no[1], no = yes.no[2]) -#x <- 1; x %?0% c(yes = stop("x must be non null"), no = x^2) -#x <- 0; x %?0% c(yes = stop("x must be non null"), no = x^2) -## This helps to construct sentences with single or plural -#x <- 1; rep(x, 3) %??1% c(single = c("There is ", 1, " item in x"), -# plural = c("There are ", length(x), " items in x")) -#x <- 3; rep(x, 3) %??1% c(single = c("There is ", 1, " item in x"), -# plural = c("There are ", length(x), " items in x")) - - -## Should we keep these without renaming??? -#na.action() -#na.omit() -#na.fail() -#na.exclude() -#na.pass() -## And what to do with naresid() and napredict()? - -## Problem of functional language like R: too much copy! -## For instance, change a simple attribute using attr(x) <- value -## leads to a copy of the object.... If the object is large, time -## needed is significant (+ memory wasted!) -#n <- 1e7 -#x <- double(n) -## Trace when x is copied -#tracemem(x) -#system.time(attr(x, "a") <- 1) -## There is a copy of the object => sooo, slow! - -## Solution: from data.table... -## 1) setattr() does the same without copying the object -## but the syntax is not very nice! -## 2) for data.table[, ....] authors define the `:=` function to -## "assign by reference", i.e., changing a part of a table without -## copying the object -## -## One could generalize this... plus take advantage of `@` and `@<-` that is -## not used for S3 objects and of the same precedence as `$` to simplify -## manipulation of attributes! - -## For non S4 objects, reuse @ for attributes! -## After all, they are, indeed, attributes! -## Note that we force exact match, less error-prone that the opposite, -## and same behaviour as @ used for S4 objects! -## TODO: also use it for S4 object, in the case a slot is not defined -## TODO: add check argument for `attr<-` too -## TODO: attrNames() like slotNames() -## TODO: slots() as a synonym of getSlots() in parallel with attributes() -## NO: getSlots() does not recover the content, but only the class for each -## object in slots =< should reallybe called slotClasses() -## and we need an attrClasses() too! -`@` <- function (object, name) -{ - arg <- substitute(name) - if (is.name(arg)) name <- as.character(arg) - if (isS4(object)) slot(object, name) else attr(object, name, exact = TRUE) -} - -## Reuse `@<-` to set attribute from a non S4 object -## TODO: also use it for S4 object, in the case a slot is not defined -## TODO: reduce the number of copies done here! -`@<-` <- function (x, which, value) -{ - arg <- substitute(which) - if (is.name(arg)) which <- as.character(arg) - if (isS4(x)) { - `slot<-`(x, which, check = TRUE, value) - } else { - `attr<-`(x, which, value) - } -} - -## Define the "replace by reference" function for attributes, here using -## setattr() from data.table package -## TODO: we need also something like that for S4 slots! -## Since they really are attributes with checking, check first, and then, -## use setattr(), and it is done! -## This does not quite work yet, nor := -#`@:=` <- function (x, which, value) -#{ -# arg <- substitute(which) -# if (is.name(arg)) which <- as.character(arg) -# if (isS4(x)) { -# ## TODO: we need an assign by reference function for S4 slots here -# `slot<-`(x, which, TRUE, value) -# } else { -# ## TODO: use setattr() from data.table, but we don't want to depend on all this stuff!!! -# #setattr(x, which, value) -# `attr<-`(x, which, value) -# } +# How to simplify the use of if() by limiting possible special cases? +# use of any() and all() is there to cope with this, but still: +# 1) any(NA) => NA, unless any(NA, na.rm = TRUE) => FALSE +# 2) any(NULL) & any(logical(0)) => FALSE => OK +# We solve this by defining any.() and all.() +#any. <- function(..., na.rm = TRUE) any(..., na.rm = na.rm) +#all. <- function(..., na.rm = TRUE) all(..., na.rm = na.rm) +#one <- function(x, na.rm = FALSE) UseMethod("one") +#one.default <- function(x, na.rm = FALSE) { +# if (isTRUE(na.rm)) x <- na.omit(x) +# identical(TRUE, as.logical(x)) #} +#one. <- function(x, na.rm = TRUE) one(x, na.rm = na.rm) +# TODO: other xxx. functions for those using na.rm = FALSE +# like mean, median, sd, var, quantile, fivenum, ... -## TODO: `[:=`, `$:=` and `[[:=` -## This does not work... -#`[:=` <- `[<-` - -## The`:=` function emulates fun(x) <- value, but with a different mechanism -## that does not imply a copy of x. This is called "replacement by reference" -## in comparison to the usual "replacement by value". It calls `fun:=` -## like fun(x) <- value calls `fun<-` -## TODO: a validation mechanism for the value passed to the function? -## TODO: use alist() instead of list()!!! -## TODO: this does not work as expected! -#`:=` <- function (x, value) { -# call <- match.call() -# X <- substitute(x) -# ## pairlist() because NULL would be lost using list() -# value <- pairlist(value = value) -# ## In case single name, do the same as x[] <- value, i.e., keeping size -# ## and attributes of x ("replacement inside x") -# if (length(X) == 1) { -## tryCatch(do.call("[<-", c(list(x = X), value), envir = parent.frame(1)), -## error = function (e) { -## ## Construct a call that is closer to the actual syntax! -## e$call <- paste(deparse(call[[2]]), ":=", deparse(call[[3]])) -## stop(e) -## }) -# stop(":= cannot be used directly on an object") -# } -# ## If a more complex call is provided, try to run `fun:=` instead -# X <- as.pairlist(substitute(X)) -# ## To emulate `fun<-`, but using `fun:=` -# fun <- paste(deparse(X[[1]]), ":=", sep = "") -# X[[1]] <- NULL -# ## Use tryCatch() to ensure a better error message is issued -# tryCatch(assign(deparse(X[[1]]), do.call(fun, c(X, value), -# envir = parent.frame(1))), -# error = function (e) { -# ## Construct a call that is closer to the actual syntax! -# e$call <- paste(deparse(call[[2]]), ":=", deparse(call[[3]])) -# stop(e) -# }) -# ## Like for `fun<-`, value is returned invisibly, probably to allow -# ## something like x <- y[2] <- value -# invisible(value) -#} - -## I don't like much system.time(), first because it returns 3 numbers where -## we want most of the time only one, and second because it creates a new -## object proc_time, where a difftime object should be perfectly suitable -## => new function timing(). It also replaces the synonym unix.time() and -## the other function proc.time() when called without an expression. -timing <- function (expr, gc.first = TRUE) -{ - if (missing(expr)) { - res <- proc.time() - } else { - res <- system.time(expr, gcFirst = gc.first) - } - ## Results split into result and details - #details <- as.difftime(res[c("user.self", "sys.self")], units = "secs") - #details@names <- c("user", "system") - details <- as.difftime(res, units = "secs") - #res <- as.difftime(res["elapsed"], units = "secs") - res <- details["elapsed"] - res@details <- details - return(res) -} -## Test... -#tst <- timing(Sys.sleep(1.5)) -#tst -#tst@details - -## Sys.sleep() -> sleep()... no, because sleep is a dataset!!! -#wait <- Sys.sleep -#traceMemory <- tracemem -## From stats: xxx.test() => give a 'htest' object => htestXxxx() -#htestT <- t.test -#htestAnsari <- ansari.test -#htestBatlett <- bartlett.test -#htestChisq <- chisq.test -#htestFisher <- fisher.test -#htestFligner <- fligner.test -#htestFriedman <- friedman.test -#htestKS <- ks.test -#htestMantelHaenszel <- mantelhaen.test -#htestMauchly <- mauchly.test -#htestMcNemar <- mcnemar.test -#htestMood <- mood.test -#htestAnovaPower <- power.anova.test -#htestPropPower <- power.prop.test -#htestTPower <- power.t.test -#htestPhillipsPerron <- PP.test -#htestProp <- prop.test -#htestPropTrend <- prop.trend.test -#htestShapiroWilk <- shapiro.test - -#all.names -#all.vars - -#Data and POSIXct - -#contrHelmert <- contr.helmert -#contrPoly <- contr.poly -#contrSum <- contr.sum -#contrTreatment <- contr.treatment -#contrTreatmentL <- contr.SAS - -#equal <- all.equal -#equalA <- attr.all.equal - -#expandGrid <- expand.grid - -#gcTiming <- gc.time + return a difftime object -#gcInfo <- gcinfo -#gcTorture <- gctorture -#??? gcTorture2 <- gctorture2 - -#rleInverse <- inverse.rle - -#No -> isAtomic <- is.atomic -#No -> isCall <- is.call?? -#isElement <- is.element -#?isExpression?? -#isFinite <- is.finite -#No -> isLanguage <- is.language -#isLoaded <- is.loaded -#isNA <- is.na -#isNaN <- is.nan -#isNULL <- is.null -#isR <- is.R -#No -> isRecursive <- is.recursive -#No -> isSymbol <- is.symbol -#isUnsorted <- is.unsorted -#No -> isVector <- is.vector -#isTTY <- isatty - -#l10n.info? -#list2env should be as.environment() applied to list, really -#margin.table, prop.table -#mat.or.vec -#maxCol <- max.col... or colMax, cf. colSum -#average() as a simpler version than mean() for fast run - -#qrX <- qr.X -#qrQ <- qr.Q -#qrR <- qr.R -# + other qr. functions - -#R.home, R.Version, R.version.string -#Recall?? -#doCall <- do.call -#rowsum vs rowSums -#enum <- seq_along -#seql <- seq_len -#setdiff & other setxxx functions - -#dateCurrent <- Sys.Date -#Sys.getenv, Sys.getlocale, Sys.getpid, Sys.info, Sys.localeconv -#Sys.setenv, Sys.setlocale -#timeCurrent <- Sys.time -#Sys.timezone, Sys.unsetenv - -#lowerTri <- lower.tri -#upperTri <- upper.tri - -#utf8ToInt -#cStackInfo <- base::Cstack_info -#.Internal() triggers notes => what to do? -#.Primitive() - -## Read this carefully before rethinking these function, trying to simplify a bit things: -## http://obeautifulcode.com/R/How-R-Searches-And-Finds-Stuff/ -## Environments management -## Use of frame as a synonym of environment brings an additional difficulty on -## an already difficult subject! => use env(ironment) everywhere?! -## TODO: all these sys.xxx must remain like this! -sysFunction <- .Recode(base::sys.function) -sysCall <- .Recode(base::sys.call) -sysCalls <- .Recode(base::sys.calls) -matchCall <- .Recode(base::match.call) -sysParent <- .Recode(base::sys.parent) -sysParents <- .Recode(base::sys.parents) -## TODO: do not use frame => what??? sys.prevEnv()?? -parentFrame <- .Recode(base::parent.frame) -sysFrame <- .Recode(base::sys.frame) -sysFrames <- .Recode(base::sys.frames) -sysnFrame <- .Recode(base::sys.nframe) -sysStatus <- base::sys.status -onExit <- function (expr = NULL, add = FALSE) base::on.exit(expr = expr, add = add) -sysOnExit <- .Recode(base::sys.on.exit) -dumpFrames <- utils::dump.frames -#debugger(dump = last.dump) # utils -#browser() -#browserText() -#browserCondition() -#browserSetDebug() -#debug() -#undebug() -debugOnce <- .Recode(base::debugonce) -isDebugged <- .Recode(base::isdebugged) -baseEnv <- base::baseenv -.BaseEnv <- base::baseenv() -baseNamespaceEnv <- function () return(.BaseNamespaceEnv) -#.BaseNamespaceEnv already defined -## Those four environments are specials and start with an uppercase letter! -emptyEnv <- base::emptyenv -.EmptyEnv <- base::emptyenv() -globalEnv <- base::globalenv # Also .GlobalEnv -# .GlobalEnv already defined -autoloadEnv <- function () return(.AutoloadEnv) -#.AutoloadEnv already defined -#TempEnv() in svMisc -tempEnv <- .TempEnv_() -.TempEnv <- .TempEnv_() -## TODO: or sys.topEnv()??? -## RCMD check claims he cannot find isNamespaceEnv() in topEnv() => provide it -isNamespaceEnv <- function (envir = parentFrame()) - .Intern(isNamespaceEnv(envir)) -topEnv <- .Recode(base::topenv) -# Usually, to create an object, we use its name, but -## environment() means something else here! -## So, OK, we'll stick with: -envNew <- .Recode(base::new.env) -## Should not be used! -envParent <- .Recode(base::parent.env) -`envParent<-` <- .Recode(base::`parent.env<-`) -#environmentName() -#environment() -#`environment<-`() -#is.environment() -envProfile <- .Recode(base::env.profile) -## name attribute to an environment,... see ?environment -#source() - -sysSource <- base::sys.source -#.First.sys and .Last.sys cannot be changed! -#eval() -evalQuote <- .Recode(base::evalq) -evalParent <- base::eval.parent -evalLocal <- base::local - -autoloaded <- function () return(.Autoloaded) -#autoload() -#autoloader() -#delayedAssign() - -## This is the options() mechanism: -## I don't like the options("width") returning a list with only $width in it! -## I want a mechanisms much like par("ask") which directly returns the value, thus: -### Covered function: base::options(), base::getOption(), base::.Options -opt <- function (...) { - arg <- list(...) - l <- length(arg) - if (l == 0) { - return(options()) # List of all options - } else if (l == 1 && is.null(names(arg))) { - return(options(...)[[1]]) # The value for this option - } else return(invisible(options(...))) # Invisible list of previous options -} -## With a single argument, opt() and optDef() give the same thing, but -## optDef() allows to provide a default value for the option, if not found -optDef <- getOption # (x, default = NULL) - - -## For R help on the web: -## http://rseek.org -## http://www.r-project.org/mail.html for mailing lists -## StackOverflow http://stackoverflow.com/questions/tagged/r -## #rstats Twitter hashtag http://search.twitter.com/search?q=%23rstats -## R-Bloggers http://www.r-bloggers.com -## Video Rchive (of presentations) http://www.vcasmo.com/user/drewconway - -## Useful packages from "machine learning for hackers" -## arm, glmnet, ggplot2, igraph, lme4, lubridate, RCurl, plyr, RJSONIO, spatstat, RSXML - -## ! is not defined for character strings... Use it here for quick conversion -## of character into an "s" (string) object... Used in doc blocks for an R script -## compatible with Sweave -`!` <- function(x) if (is.character(x)) - structure(x, class = c("s", "character")) else .Primitive("!")(x) - -## The print.s method is designed to print nothing in case of a doc block -## TODO: need methods to convert these into Html or Pdf for quick view! -## TODO: a method to check correctness of these blocks for Asciidoc blocks -## (for LaTeX blocks, it would not work) -print.s <- function (x, ...) -{ - ## If the string starts with @\n and ends with <<.*>>=, - ## treat it specially (it is a doc chunk!): print just nothing! - if (grepl("^@[ \t]*\n.*<<[^\n]*>>=[ \t]*$", x)) { - cat("<...doc chunk...>\n") - } else print(as.character(x)) - return(invisible(x)) -} - - -## It may be useful to define a constant which represents a null byte (raw obj) -# RAW_NULL <- raw(1) +#`%is%` <- function(x, class) inherits(x,class) diff --git a/R/panels.R b/R/panels.R index 7980f59..7337e98 100755 --- a/R/panels.R +++ b/R/panels.R @@ -1,73 +1,160 @@ -## More panel functions -## TODO: define fill colors differently -## TODO: allow for a separate treatment per group -## TODO: a better grid for log axes: something like -#abline(h = c(1:10 * 0.01, 2:10 * 0.1, 2:10 * 1, 2:10 * 10), lty = "dotted", col = "lightgray") -#abline(v = c(1:10 * 0.01, 2:10 * 0.1, 2:10 * 1, 2:10 * 10), lty = "dotted", col = "lightgray") - -## Inspired from panel.car() in car package, but without smooth line... -panel.reg <- function (x, y, col = par("col"), bg = par("bg"), pch = par("pch"), +#' More panel plots. +#' +#' Several panel plots that can be used with functions like [coplot()] and +#' [pairs))]. +#' +#' @param x A numeric vector. +#' @param y A numeric vector of same length as `x`. +#' @param col The color of the points. +#' @param bg The background color for symbol used for the points. +#' @param pch The symbol used for the points. +#' @param cex The expansion factor used for the points. +#' @param lwd The line width. +#' @param line.reg A function that calculates coefficients of a straight line, +#' for instance, [lm()], or [rlm()] for robust linear regression. +#' @param line.col The color of the line. +#' @param line.lwd The width of the line. +#' @param untf Logical asking whether to untransform the straight line in case +#' one or both axis are in log scale. +#' @param el.level The confidence level for the bivariate normal ellipse around +#' data; the default value of 0.7 draws an ellipse of roughly +/-1 sd. +#' @param el.col The color used to fill the ellipse. +#' @param el.border The color used to draw the border of the ellipse and the +#' standardized major axis. +#' @param major If `TRUE`, the standardized major axis is also drawn. +#' @param use One of `"everything"`, `"all.obs"`, `"complete.obs"`, +#' `"na.or.complete"`, or `"pairwise.complete.obs"` (can be abbreviated). +#' Defines how the [cor()] function behaves with missing observations. +#' @param method One of the three correlation coefficients `"pearson"` +#' (default), `"kendall"`, or `"spearman"`. Can be abbreviated. +#' @param alternative The alternative hypothesis in correlation test, see +#' [cor.test()]. +#' @param digits The number of decimal digits to print when the correlation +#' coefficient is printed in the graph. +#' @param prefix A prefix (character string) to use before the correlation +#' coefficient printed in the graph. +#' @param cor.cex Expansion coefficient for text in printing correlation +#' coefficients. +#' @param stars.col The color used for significance stars (with: *** p < 0.001, +#' ** p < 0.1, * p < 0.05, . p < 0.1. +#' @param col.smooth Color to be used by lines for drawing the smooths. +#' @param span Smoothing parameter f for [lowess()], see there. +#' @param iter Number of robustness iterations for [lowess()]. +#' @param ... Further arguments to plot functions. +#' @return These functions return nothing and are used for their side effect of +#' plotting in panels of composite plots. +#' @details Theses functions should be used outside of the diagonal in +#' [pairs()], or with [coplot()], as they are bivariate plots. +#' @author Philippe Grosjean , but code inspired from +#' [panel.smooth()] in **graphics** and `panel.car()` in package **car**. +#' @export +#' @name panels +#' @seealso [coplot()], [pairs()], [panel.smooth()], [lm()], [ellipse()], +#' [cor()] and [cor.test()] +#' @keywords aplot +#' @concept panel plots +#' @examples +#' # Smooth lines in lower graphs and straight lines in upper graphs +#' pairs(trees, lower.panel = panel_smooth, upper.panel = panel_reg) +#' # Robust regression lines +#' library(MASS) # For rlm() +#' pairs(trees, panel = panel_reg, diag.panel = panel_boxplot, +#' reg.line = rlm, line.col = "blue", line.lwd = 2) +#' # A Double log graph +#' pairs(trees, lower.panel = panel_smooth, upper.panel = panel_reg, log = "xy") +#' +#' # Graph suitables to explore correlations (take care there are potentially +#' # many simultaneous tests done here... So, you loose much power in the whole +#' # analysis... use it just as an indication!) +#' # Pearson's r +#' pairs(trees, lower.panel = panel_ellipse, upper.panel = panel_cor) +#' # Spearman's rho (ellipse and straight lines not suitable here!) +#' pairs(trees, lower.panel = panel_smooth, upper.panel = panel_cor, +#' method = "spearman", span = 1) +#' # Several groups (visualize how bad it is to consider the whole set at once!) +#' pairs(iris[, -5], lower.panel = panel_smooth, upper.panel = panel_cor, +#' method = "kendall", span = 1, +#' col = c("red3", "blue3", "green3")[iris$Species]) +#' # Now analyze correlation for one species only +#' pairs(iris[iris$Species == "virginica", -5], lower.panel = panel_ellipse, +#' upper.panel = panel_cor) +#' +#' # A coplot with custom panes +#' coplot(Petal.Length ~ Sepal.Length | Species, data = iris, +#' panel = panel_ellipse) +panel_reg <- function(x, y, col = par("col"), bg = par("bg"), pch = par("pch"), cex = par("cex"), lwd = par("lwd"), line.reg = lm, line.col = "red", -line.lwd = lwd, untf = TRUE, ...) -{ - points(x, y, col = col, bg = bg, pch = pch, cex = cex) - if (is.function(line.reg)) - abline(reg = line.reg(y ~ x), col = line.col, lwd = line.lwd, - untf = untf, ...) +line.lwd = lwd, untf = TRUE, ...) { + # Inspired from panel.car() in car package, but without smooth line... + points(x, y, col = col, bg = bg, pch = pch, cex = cex) + if (is.function(line.reg)) + abline(reg = line.reg(y ~ x), col = line.col, lwd = line.lwd, + untf = untf, ...) } -## panel.ellipse (note the low conf.level to get the ellipse inside the graph) -panel.ellipse <- function (x, y, col = par("col"), bg = par("bg"), +#' @export +#' @rdname panels +panel.reg <- panel_reg # Backward compatibility + +#' @export +#' @rdname panels +panel_ellipse <- function(x, y, col = par("col"), bg = par("bg"), pch = par("pch"), cex = par("cex"), el.level = 0.7, el.col = "cornsilk", -el.border = "red", major = TRUE, ...) -{ - el <- ellipse(cor(x, y, use = "complete.obs"), scale = c(sd(x), sd(y)), - centre = c(mean(x), mean(y)), level = el.level) - polygon(el, col = el.col, border = el.border) - if (isTRUE(major)) { - ## b is the slope of the standardized major axis - d <- na.omit(data.frame(y, x)) - v <- cov(d) * (nrow(d) - 1) - b <- sign(v[1, 2]) * sqrt(v[1, 1] / v[2, 2]) - a <- mean(y, na.rm = TRUE) - b * mean(x, na.rm = TRUE) - abline(a = a, b = b, col = el.border, ...) - } - points(x, y, col = col, bg = bg, pch = pch, cex = cex) +el.border = "red", major = TRUE, ...) { + el <- ellipse(cor(x, y, use = "complete.obs"), scale = c(sd(x), sd(y)), + centre = c(mean(x), mean(y)), level = el.level) + polygon(el, col = el.col, border = el.border) + if (isTRUE(major)) { + # b is the slope of the standardized major axis + d <- na.omit(data.frame(y, x)) + v <- cov(d) * (nrow(d) - 1) + b <- sign(v[1, 2]) * sqrt(v[1, 1] / v[2, 2]) + a <- mean(y, na.rm = TRUE) - b * mean(x, na.rm = TRUE) + abline(a = a, b = b, col = el.border, ...) + } + points(x, y, col = col, bg = bg, pch = pch, cex = cex) } -## One way to visualize correlation coefficients, inspired from -## http://addictedtor.free.fr/graphiques/sources/source_137.R -panel.cor <- function (x, y, use = "everything", +#' @export +#' @rdname panels +panel.ellipse <- panel_ellipse # Backward compatibility + +#' @export +#' @rdname panels +panel_cor <- function(x, y, use = "everything", method = c("pearson", "kendall", "spearman"), alternative = c("two.sided", "less", "greater"), digits = 2, prefix = "", -cex = par("cex"), cor.cex = cex, stars.col = "red", ...) -{ - ## Set plot parameters - usr <- par("usr") - on.exit(par(usr)) - par(usr = c(0, 1, 0, 1)) - - ## We don't use cor.test()$estimate, but result from cor() - ## That way, we have more flexibility in defining the "use" argument - corr <- cor(x, y, use = use, method = method) - - ## Format this result - txt <- format(c(corr, 0.123456789), digits = digits)[1] - txt <- paste(prefix, txt, sep = "") - cor.cex <- cor.cex / strwidth(txt) - - ## Perform a test on this coefficient - test <- cor.test(x, y, alternative = alternative, method = method) +cex = par("cex"), cor.cex = cex, stars.col = "red", ...) { + # One way to visualize correlation coefficients, inspired from + # http://addictedtor.free.fr/graphiques/sources/source_137.R + usr <- par("usr") + on.exit(par(usr)) + par(usr = c(0, 1, 0, 1)) + + # We don't use cor.test()$estimate, but result from cor() + # That way, we have more flexibility in defining the "use" argument + corr <- cor(x, y, use = use, method = method) + + txt <- format(c(corr, 0.123456789), digits = digits)[1] + txt <- paste(prefix, txt, sep = "") + cor.cex <- cor.cex / strwidth(txt) - ## Format this result - star <- symnum(test$p.value, corr = FALSE, na = FALSE, - cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), - symbols = c("***", "**", "*", ".", " ")) + test <- cor.test(x, y, alternative = alternative, method = method) - ## Write the text on the plot - text(0.5, 0.5, txt, cex = cor.cex * abs(corr), ...) - text(0.8, 0.8, as.character(star), cex = cor.cex, col = stars.col) - - ## Return the result of the test invisibly - return(invisible(test)) + star <- symnum(test$p.value, corr = FALSE, na = FALSE, + cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), + symbols = c("***", "**", "*", ".", " ")) + + text(0.5, 0.5, txt, cex = cor.cex * abs(corr), ...) + text(0.8, 0.8, as.character(star), cex = cor.cex, col = stars.col) + + invisible(test) } + +#' @export +#' @rdname panels +panel.cor <- panel_cor # Backward compatibility + +#' @export +#' @rdname panels +panel_smooth <- panel.smooth diff --git a/R/panels.diag.R b/R/panels.diag.R index bc56782..3bc94f3 100755 --- a/R/panels.diag.R +++ b/R/panels.diag.R @@ -1,56 +1,124 @@ -## Panel function to use on the diagonal (univariate graphs) -## TODO: define fill colors differently - -## Boxplot -panel.boxplot <- function (x, col = par("col"), box.col = "cornsilk", ...) -{ - ## Note: col is defined here, but unused, because otherwise redefining - ## col would cause an error about duplicated 'col' arguments to boxplot()! - ## further arguments to boxplot are allowed (try notch = TRUE ... not very - ## useful here, but just for test). Note that warnings are generates in - ## pairs() in case of a call with non-graphic arguments, or even, col.box = - par(new = TRUE) - boxplot(x, axes = FALSE, col = box.col, horizontal = TRUE, - xlim = c(0.5, 2), ...) +#' More univariate panel plots. +#' +#' Several panel plots that can be used with [pairs()]. +#' +#' @param x A numeric vector. +#' @param col The color of the points. +#' @param box.col The filling color of the boxplots. +#' @param adjust The bandwidth adjustment factor, see [density()]. +#' @param rug Do we add a rug representation (1-d plot) of the points too? +#' @param lwd The line width. +#' @param line.col The color of the line. +#' @param line.lwd The width of the line. +#' @param breaks The number of breaks, the name of a break algorithm, a vector +#' of breakpoints, or any other acceptable value for `breaks =` argument of +#' [hist()]. +#' @param hist.col The filling color for the histograms. +#' @param hist.border The border color for the histograms. +#' @param hist.density The density for filling lines in the histograms. +#' @param hist.angle The angle for filling lines in the histograms. +#' @param pch The symbol used for the points. +#' @param bg The background color for symbol used for the points. +#' @param cex The expansion factor used for the points. +#' @param qq.pch The symbol used to plot points in the QQ-plots. +#' @param qq.col The color of the symbol used to plot points in the QQ-plots. +#' @param qq.bg The background color of the symbol used to plot points in the +#' QQ-plots. +#' @param qq.cex The expansion factor for points in the QQ-plots. +#' @param qqline.col The color for the QQ-plot lines. +#' @param qqline.lwd The width for the QQ-plot lines. +#' @param ... Further arguments to plot functions, or functions that construct +#' items, like [density()], depending on the context. +#' @return These functions return nothing and are used for their side effect of +#' plotting in panels of composite plots. +#' @details Panel functions [panel_boxplot()], [panel_density()], [panel_hist()] +#' and [panel_qqnorm()] should be used only to plot univariate data on the +#' diagonals of pair plots (or scatterplot matrix). +#' @author Philippe Grosjean , but code inspired from +#' `spm()` in package **car**. +#' @export +#' @name panels.diag +#' @seealso [pairs()], [boxplot()], [hist()], [density()], [qqnorm()] +#' @keywords aplot +#' @concept panel plots +#' @examples +#' # Example of scatterplot matrices with custom plots on the diagonal +#' # Boxplots +#' pairs(trees, panel = panel_smooth, diag.panel = panel_boxplot) +#' pairs(trees, diag.panel = panel_boxplot, box.col = "gray") +#' # Densities +#' pairs(trees, panel = panel_smooth, diag.panel = panel_density) +#' pairs(trees, diag.panel = panel_density, line.col = "red", adjust = 0.5) +#' # Histograms +#' pairs(trees, panel = panel_smooth, diag.panel = panel_hist) +#' pairs(trees, diag.panel = panel_hist, hist.col = "gray", breaks = "Scott") +#' # QQ-plots against Normal theoretical distribution +#' pairs(trees, panel = panel_smooth, diag.panel = panel_qqnorm) +#' pairs(trees, diag.panel = panel_qqnorm, qqline.col = 2, qq.cex = .5, qq.pch = 3) +panel_boxplot <- function(x, col = par("col"), box.col = "cornsilk", ...) { + # Note: col is defined here, but unused, because otherwise redefining + # col would cause an error about duplicated 'col' arguments to boxplot()! + # further arguments to boxplot are allowed (try notch = TRUE ... not very + # useful here, but just for test). Note that warnings are generates in + # pairs() in case of a call with non-graphic arguments, or even, col.box = + par(new = TRUE) + boxplot(x, axes = FALSE, col = box.col, horizontal = TRUE, + xlim = c(0.5, 2), ...) } -## Density plot -panel.density <- function (x, adjust = 1, rug = TRUE, col = par("col"), -lwd = par("lwd"), line.col = col, line.lwd = lwd,...) -{ - ## Further arguments to density() are allowed (see examples) but it generates - ## warnings in pairs() - dens.x <- density(x, adjust = adjust, ...) - lines(dens.x$x, min(x) + dens.x$y * diff(range(x)) / diff(range(dens.x$y)), - col = line.col, lwd = line.lwd) - if (isTRUE(rug)) - points(x, rep(min(x), length(x)), pch = "|", col = line.col) +#' @export +#' @rdname panels.diag +panel.boxplot <- panel_boxplot # Backward compatibility + +#' @export +#' @rdname panels.diag +panel_density <- function(x, adjust = 1, rug = TRUE, col = par("col"), +lwd = par("lwd"), line.col = col, line.lwd = lwd, ...) { + # Further arguments to density() are allowed (see examples) but it generates + # warnings in pairs() + dens.x <- density(x, adjust = adjust, ...) + lines(dens.x$x, min(x) + dens.x$y * diff(range(x)) / diff(range(dens.x$y)), + col = line.col, lwd = line.lwd) + if (isTRUE(rug)) + points(x, rep(min(x), length(x)), pch = "|", col = line.col) } -## Histogram -panel.hist <- function (x, breaks = "Sturges", hist.col = "cornsilk", -hist.border = NULL, hist.density = NULL, hist.angle = 45, ...) -{ - ## Here, we try to define all arguments that are specific to the histogram - ## (col, border, density and angle) with specific arguments to allow better - ## control of the appearance of the histograms independently from the other - ## panels - par(new = TRUE) - hist(x, breaks = breaks, col = hist.col, border = hist.border, - density = hist.density, angle = hist.angle, axes = FALSE, - xlab = "", ylab = "", main = "") +#' @export +#' @rdname panels.diag +panel.density <- panel_density # Backward compatibility + +#' @export +#' @rdname panels.diag +panel_hist <- function(x, breaks = "Sturges", hist.col = "cornsilk", +hist.border = NULL, hist.density = NULL, hist.angle = 45, ...) { + # Here, we try to define all arguments that are specific to the histogram + # (col, border, density and angle) with specific arguments to allow better + # control of the appearance of the histograms independently from the other + # panels + par(new = TRUE) + hist(x, breaks = breaks, col = hist.col, border = hist.border, + density = hist.density, angle = hist.angle, axes = FALSE, + xlab = "", ylab = "", main = "") } -## QQ-plot agains a Normal distribution -panel.qqnorm <- function(x, pch = par("pch"), col = par("col"), bg = par("bg"), +#' @export +#' @rdname panels.diag +panel.hist <- panel_hist # Backward compatibility + +#' @export +#' @rdname panels.diag +panel_qqnorm <- function(x, pch = par("pch"), col = par("col"), bg = par("bg"), cex = par("cex"), lwd = par("lwd"), qq.pch = pch, qq.col = col, qq.bg = bg, -qq.cex = cex, qqline.col = qq.col, qqline.lwd = lwd, ...) -{ - par(new = TRUE) - ylim <- range(x, na.rm = TRUE) - ## Leave enough space for name of variables on top of the graph - ylim[2] <- ylim[2] + (ylim[2] - ylim[1]) / 4 - qqnorm(x, axes = FALSE, xlab = "", ylab = "", main = "", - ylim = ylim, col = qq.col, bg = qq.bg, pch = qq.pch, cex = qq.cex) - qqline(x, col = qqline.col, lwd = qqline.lwd, ...) +qq.cex = cex, qqline.col = qq.col, qqline.lwd = lwd, ...) { + par(new = TRUE) + ylim <- range(x, na.rm = TRUE) + # Leave enough space for name of variables on top of the graph + ylim[2] <- ylim[2] + (ylim[2] - ylim[1]) / 4 + qqnorm(x, axes = FALSE, xlab = "", ylab = "", main = "", + ylim = ylim, col = qq.col, bg = qq.bg, pch = qq.pch, cex = qq.cex) + qqline(x, col = qqline.col, lwd = qqline.lwd, ...) } + +#' @export +#' @rdname panels.diag +panel.qqnorm <- panel_qqnorm # Backward compatibility diff --git a/R/pcomp.R b/R/pcomp.R index 8697a87..682a541 100755 --- a/R/pcomp.R +++ b/R/pcomp.R @@ -1,487 +1,620 @@ -## Define a "pcomp" S3 object for PCA, because there is too much chaos with -## default "prcomp" and "princomp" R objects, plus "pca" in ade4 and labdsv, -## "PCA" in FactoMineR, etc. +#' Principal Components Analysis. +#' +#' Perform a principal components analysis on a matrix or data frame and return +#' a `pcomp` object. +#' +#' @param x A matrix or data frame with numeric data. +#' @param formula A formula with no response variable, referring only to numeric +#' variables. +#' @param data An optional data frame (or similar: see [model.frame()]) +#' containing the variables in the formula `formula =`. By default the variables +#' are taken from `environment(formula)`. +#' @param subset An optional vector used to select rows (observations) of the +#' data matrix `x`. +#' @param na.action A function which indicates what should happen when the data +#' contain `NA`s. The default is set by the `na.action =` setting of +#' [options()], and is [na.fail()] if that is not set. The 'factory-fresh' +#' default is [na.omit()]. +#' @param method Either `"svd"` (using [prcomp()]), `"eigen"` (using +#' [princomp()]), or an abbreviation. +#' @param ... Arguments passed to or from other methods. If \`x` is a +#' formula one might specify `scale =`, `tol =` or `covmat =`. +#' @param scores A logical value indicating whether the score on each principal +#' component should be calculated. +#' @param center A logical value indicating whether the variables should be +#' shifted to be zero centered. Alternately, a vector of length equal the +#' number of columns of `x` can be supplied. The value is passed to `scale =`. +#' Note that this argument is ignored for `method = "eigen"` and the dataset is +#' always centered in this case. +#' @param scale A logical value indicating whether the variables should be +#' scaled to have unit variance before the analysis takes place. The default is +#' `TRUE`, which in general, is advisable. Alternatively, a vector of length +#' equal the number of columns of `x` can be supplied. The value is passed to +#' [scale()]. +#' @param tol Only when `method = "svd"`. A value indicating the magnitude +#' below which components should be omitted. (Components are omitted if their +#' standard deviations are less than or equal to `tol` times the standard +#' deviation of the first component.) With the default null setting, no +#' components are omitted. Other settings for `tol =` could be `tol = 0` or +#' `tol = sqrt(.Machine$double.eps)`, which would omit essentially constant +#' components. +#' @param covmat A covariance matrix, or a covariance list as returned by +#' [cov.wt()] (and [cov.mve()] or [cov.mcd()] from package **MASS**). If +#' supplied, this is used rather than the covariance matrix of `x`. +#' @param object A 'pcomp' object. +#' @param loadings Do we also summarize the loadings? +#' @param cutoff The cutoff value below which loadings are replaced by white +#' spaces in the table. That way, larger values are easier to spot and to read +#' in large tables. +#' @param digits The number of digits to print. +#' @param which The graph to plot. +#' @param choices Which principal axes to plot. For 2D graphs, specify two +#' integers. +#' @param col The color to use in graphs. +#' @param bar.col The color of bars in the screeplot. +#' @param circle.col The color for the circle in the loadings or correlations +#' plots. +#' @param ar.length The length of the arrows in the loadings and correlations +#' plots. +#' @param pos The position of text relative to arrows in loadings and +#' correlation plots. +#' @param labels The labels to write. If `NULL` default values are computed. +#' @param cex The factor of expansion for text (labels) in the graphs. +#' @param main The title of the graph. +#' @param xlab The label of the x-axis. +#' @param ylab The label of the y-axis. +#' @param pch The type of symbol to use. +#' @param bg The background color for symbols. +#' @param groups A grouping factor. +#' @param border The color of the border. +#' @param level The probability level to use to draw the ellipse. +#' @param pc.biplot Do we create a Gabriel's biplot (see [biplot()])? +#' @param npcs The number of principal components to represent in the screeplot. +#' @param type The type of screeplot (`"barplot"` or `"lines"`) or pairs plot +#' (`"loadings"` or `"correlations"`). +#' @param ar.col Color of arrows. +#' @param ar.cex Expansion factor for terxt on arrows. +#' @param newdata New individuals with observations for the same variables as +#' those used for calculating the PCA. You can then plot these additional +#' individuals in the scores plot. +#' @param newvars New variables with observations for same individuals as those +#' used for mcalculating the PCA. Correlation with PCs is calculated. You can +#' then plot these additional variables in the correlation plot. +#' @param dim The number of principal components to keep. +#' @return A `c("pcomp", "pca", "princomp")` object. +#' @details `pcomp()` is a generic function with `"formula"` and `"default"` +#' methods. It is essentially a wrapper around [prcomp()] and [princomp()] to +#' provide a coherent interface and object for both methods. +#' +#' A 'pcomp' object is created. It inherits from 'pca' (as in **labdsv** +#' package, but not compatible with the 'pca' object of package **ade4**) and of +#' 'princomp'. +#' +#' For more information on calculation done, refer to [prcomp()] for +#' `method = "svd"` or [princomp()] for `method = "eigen"`. +#' @note The signs of the columns of the loadings and scores are arbitrary, and +#' so may differ between functions for PCA, and even between different builds of +#' \R. +#' @author Philippe Grosjean , but the core code is +#' indeed in package **stats**. +#' @export +#' @seealso [vectorplot()], [prcomp()], [princomp()], [loadings()], +#' [Correlation()] +#' @keywords models +#' @concept principal component analysis and biplot +#' @examples +#' # We will analyze mtcars without the Mercedes data (rows 8:14) +#' data(mtcars) +#' cars.pca <- pcomp(~ mpg + cyl + disp + hp + drat + wt + qsec, data = mtcars, +#' subset = -(8:14)) +#' cars.pca +#' summary(cars.pca) +#' screeplot(cars.pca) +#' +#' # Loadings are extracted and plotted like this +#' (cars.ldg <- loadings(cars.pca)) +#' plot(cars.pca, which = "loadings") # Equivalent to vectorplot(cars.ldg) +#' +#' # Similarly, correlations of variables with PCs are extracted and plotted +#' (cars.cor <- Correlation(cars.pca)) +#' plot(cars.pca, which = "correlations") # Equivalent to vectorplot(cars.cor) +#' # One can add supplementary variables on this graph +#' lines(Correlation(cars.pca, +#' newvars = mtcars[-(8:14), c("vs", "am", "gear", "carb")])) +#' +#' # Plot the scores +#' plot(cars.pca, which = "scores", cex = 0.8) # Similar to plot(scores(x)[, 1:2]) +#' # Add supplementary individuals to this plot (labels), also points() or lines() +#' text(predict(cars.pca, newdata = mtcars[8:14, ]), col = "gray", cex = 0.8) +#' +#' # Pairs plot for 3 PCs +#' iris.pca <- pcomp(iris[, -5]) +#' pairs(iris.pca, col = (2:4)[iris$Species]) +pcomp <- function(x, ...) + UseMethod("pcomp") -## Create the pcomp generic function that returns a "pcomp" object -pcomp <- function (x, ...) - UseMethod("pcomp") - -pcomp.formula <- function (formula, data = NULL, subset, na.action, -method = c("svd", "eigen"), ...) -{ - ## Define a PCA through the formula interface - ## Largely inspired from prcomp.formula - mt <- terms(formula, data = data) - if (attr(mt, "response") > 0L) - stop("response not allowed in formula") - cl <- match.call() - mf <- match.call(expand.dots = FALSE) - mf$... <- NULL - mf[[1L]] <- as.name("model.frame") - mf <- eval.parent(mf) - ## TODO: avoid this! - if (stats:::.check_vars_numeric(mf)) - stop("PCA applies only to numerical variables") - na.act <- attr(mf, "na.action") - mt <- attr(mf, "terms") - attr(mt, "intercept") <- 0L - x <- model.matrix(mt, mf) - res <- pcomp.default(x, ...) - cl[[1L]] <- as.name("pcomp") - res$call <- cl - if (!is.null(na.act)) { - res$na.action <- na.act - if (!is.null(sc <- res$x)) - res$x <- napredict(na.act, sc) +#' @export +#' @rdname pcomp +pcomp.formula <- function(formula, data = NULL, subset, na.action, +method = c("svd", "eigen"), ...) { + # Largely inspired from prcomp.formula + mt <- terms(formula, data = data) + if (attr(mt, "response") > 0L) + stop("response not allowed in formula") + cl <- match.call() + mf <- match.call(expand.dots = FALSE) + mf$... <- NULL + mf[[1L]] <- as.name("model.frame") + mf <- eval.parent(mf) + if (.check_vars_numeric(mf)) + stop("PCA applies only to numerical variables") + na.act <- attr(mf, "na.action") + mt <- attr(mf, "terms") + attr(mt, "intercept") <- 0L + x <- model.matrix(mt, mf) + res <- pcomp.default(x, ...) + cl[[1L]] <- as.name("pcomp") + res$call <- cl + if (!is.null(na.act)) { + res$na.action <- na.act + if (!is.null(sc <- res$x)) + res$x <- napredict(na.act, sc) + } + res +} + +.svd_pca <- function(x, retx, center, scale, tol, ...) { +pca <- prcomp(x, retx = retx, center = center, scale = scale, tol = tol, ...) + # Required by pcomp.default() + # Rework the result to make it fit in the "pcomp" object + names(pca$sdev) <- paste("PC", 1:length(pca$sdev), sep = "") + if (isTRUE(!pca$center)) { + pca$center <- rep(0, length(pca$sdev)) + names(pca$center) <- colnames(pca$rotation) + } + if (isTRUE(!pca$scale)) { + pca$scale <- rep(1, length(pca$sdev)) + names(pca$scale) <- colnames(pca$rotation) + } + rn <- rownames(x) + if (is.null(rn)) { + rownames(pca$x) <- as.character(1:nrow(pca$x)) + } else { + rownames(pca$x) <- rn + } + res <- list( + loadings = structure(pca$rotation, class = "loadings"), + scores = if (is.null(pca$x)) NULL else as.data.frame(pca$x), + sdev = pca$sdev, + totdev = sqrt(sum(pca$sdev^2)), + n.obs = nrow(pca$x), + center = pca$center, + scale = pca$scale, + method = "svd" + ) + res +} + +.eigen_pca <- function(x, cor, scores, covmat = NULL, subset, ...) { + # Required by pcomp.default() + if (is.null(covmat)) { + pca <- princomp(x, cor = cor, scores = scores, subset = subset, ...) + } else { + pca <- princomp(cor = cor, scores = scores, covmat = covmat, + subset = subset, ...) + } + n <- length(pca$sdev) + pc <- paste("PC", 1:n, sep = "") # rename Comp.1, ... in PC1, ... + names(pca$sdev) <- pc + colnames(pca$loadings) <- pc + if (!is.null(pca$scores)) { + colnames(pca$scores) <- pc + # If there are rownames to x, use it + rn <- rownames(x) + if (is.null(rn)) { + rownames(pca$scores) <- as.character(1:nrow(pca$scores)) + } else { + rownames(pca$scores) <- rn } - return(res) + pca$scores <- as.data.frame(pca$scores) + } + res <- list( + loadings = pca$loadings, + scores = pca$scores, + sdev = pca$sdev, + totdev = sum(pca$sdev), + n.obs = pca$n.obs, + center = pca$center, + scale = pca$scale, + method = "eigen" + ) + res } -pcomp.default <- function (x, method = c("svd", "eigen"), scores = TRUE, +#' @export +#' @rdname pcomp +pcomp.default <- function(x, method = c("svd", "eigen"), scores = TRUE, center = TRUE, scale = TRUE, tol = NULL, covmat = NULL, -subset = rep(TRUE, nrow(as.matrix(x))), ...) -{ - ## Perform a PCA, either using prcomp (method = "svd"), or princomp ("eigen") - svd.pca <- function (x, retx, center, scale, tol, ...) { - pca <- prcomp(x, retx = retx, center = center, scale = scale, tol = tol, - ...) - ## Rework the result to make it fit in the "pcomp" object - names(pca$sdev) <- paste("PC", 1:length(pca$sdev), sep = "") - if (isTRUE(!pca$center)) { - pca$center <- rep(0, length(pca$sdev)) - names(pca$center) <- colnames(pca$rotation) - } - if (isTRUE(!pca$scale)) { - pca$scale <- rep(1, length(pca$sdev)) - names(pca$scale) <- colnames(pca$rotation) - } - rn <- rownames(x) - if (is.null(rn)) { - rownames(pca$x) <- as.character(1:nrow(pca$x)) - } else { - rownames(pca$x) <- rn - } - res <- list( - loadings = structure(pca$rotation, class = "loadings"), - scores = if (is.null(pca$x)) NULL else as.data.frame(pca$x), - sdev = pca$sdev, - totdev = sqrt(sum(pca$sdev^2)), - n.obs = nrow(pca$x), - center = pca$center, - scale = pca$scale, - method = "svd" - ) - return(res) - } - - eigen.pca <- function (x, cor, scores, covmat = NULL, subset, ...) { - if (is.null(covmat)) { - pca <- princomp(x, cor = cor, scores = scores, subset = subset, ...) - } else { - pca <- princomp(cor = cor, scores = scores, covmat = covmat, - subset = subset, ...) - } - n <- length(pca$sdev) - pc <- paste("PC", 1:n, sep = "") # rename Comp.1, ... in PC1, ... - names(pca$sdev) <- pc - colnames(pca$loadings) <- pc - if (!is.null(pca$scores)) { - colnames(pca$scores) <- pc - ## If there are rownames to x, use it - rn <- rownames(x) - if (is.null(rn)) { - rownames(pca$scores) <- as.character(1:nrow(pca$scores)) - } else { - rownames(pca$scores) <- rn - } - pca$scores <- as.data.frame(pca$scores) - } - res <- list( - loadings = pca$loadings, - scores = pca$scores, - sdev = pca$sdev, - totdev = sum(pca$sdev), - n.obs = pca$n.obs, - center = pca$center, - scale = pca$scale, - method = "eigen" - ) - return(res) - } +subset = rep(TRUE, nrow(as.matrix(x))), ...) { + # Perform a PCA, either using prcomp (method = "svd"), or princomp ("eigen") + cl <- match.call() + cl[[1L]] <- as.name("pcomp") + + # Check that all variables are numeric (otherwise, issue a clear message)! + x <- as.data.frame(x) + if (!all(sapply(x, is.numeric))) + stop("Cannot perform a PCA: one or more variables are not numeric.") - cl <- match.call() - cl[[1L]] <- as.name("pcomp") - - ## Check that all variables are numeric (otherwise, issue a clear message)! - x <- as.data.frame(x) - if (!all(sapply(x, is.numeric))) - stop("Cannot perform a PCA: one or more variables are not numeric.") - - method <- match.arg(method) - if (method == "eigen" && !isTRUE(center)) - warning("For method 'eigen', center is always TRUE") - res <- switch(method, - svd = svd.pca(x, retx = scores, center = center, scale = scale, - tol = tol, ...), - eigen = eigen.pca(x, cor = scale, scores = scores, covmat = covmat, - subset = subset, ...), - stop("method must be either 'svd' or 'eigen'") - ) - ## Add a call item - res$call <- cl - ## We return a specific object, but it is compatible (i.e., overloads), both - ## "pca" in package labdsv and "princomp" in package stats - class(res) <- c("pcomp", "pca", "princomp") - return(res) + method <- match.arg(method) + if (method == "eigen" && !isTRUE(center)) + warning("For method 'eigen', center is always TRUE") + res <- switch(method, + svd = .svd_pca(x, retx = scores, center = center, scale = scale, + tol = tol, ...), + eigen = .eigen_pca(x, cor = scale, scores = scores, covmat = covmat, + subset = subset, ...), + stop("method must be either 'svd' or 'eigen'") + ) + + res$call <- cl + # We return a specific object, but it is compatible (i.e., overloads), both + # "pca" in package labdsv and "princomp" in package stats + class(res) <- c("pcomp", "pca", "princomp") + res } -## print method (similar to print.princomp, but reports variances instead of sds) -print.pcomp <- function (x, ...) -{ - cat("Call:\n") - dput(x$call, control = NULL) - cat("\nVariances:\n") - print(x$sdev^2, ...) - cat("\n", length(x$scale), " variables and ", x$n.obs, "observations.\n") - invisible(x) +#' @export +#' @rdname pcomp +print.pcomp <- function(x, ...) { + # similar to print.princomp, but reports variances instead of sds + cat("Call:\n") + dput(x$call, control = NULL) + cat("\nVariances:\n") + print(x$sdev^2, ...) + cat("\n", length(x$scale), " variables and ", x$n.obs, "observations.\n") + invisible(x) } -## summary method (same as summary.princomp, but with TRUE for loadings) -summary.pcomp <- function (object, loadings = TRUE, cutoff = 0.1, ...) -{ - object$cutoff <- cutoff - object$print.loadings <- loadings - class(object) <- "summary.pcomp" - object +#' @export +#' @rdname pcomp +# print method (similar to print.princomp, but reports variances instead of sds) +# summary method (same as summary.princomp, but with TRUE for loadings) +summary.pcomp <- function(object, loadings = TRUE, cutoff = 0.1, ...) { + object$cutoff <- cutoff + object$print.loadings <- loadings + class(object) <- "summary.pcomp" + object } -## print method for summary.pcomp object (slightly modified from princomp) -print.summary.pcomp <- function (x, digits = 3, loadings = x$print.loadings, -cutoff = x$cutoff, ...) -{ - vars <- x$sdev^2 - vars <- vars/sum(vars) - cat("Importance of components (eigenvalues):\n") - print(rbind(`Variance` = round(x$sdev^2, 5), - `Proportion of Variance` = round(vars, 5), - `Cumulative Proportion` = round(cumsum(vars), 5)), digits = digits, ...) - if (loadings) { - cat("\nLoadings (eigenvectors, rotation matrix):\n") - cx <- format(round(x$loadings, digits = digits)) - cx[abs(x$loadings) < cutoff] <- paste(rep(" ", nchar(cx[1, 1], - type = "w")), collapse = "") - print(cx, quote = FALSE, ...) - } - invisible(x) +#' @export +#' @rdname pcomp +# print method for summary.pcomp object (slightly modified from princomp) +print.summary.pcomp <- function(x, digits = 3, loadings = x$print.loadings, +cutoff = x$cutoff, ...) { + vars <- x$sdev^2 + vars <- vars/sum(vars) + cat("Importance of components (eigenvalues):\n") + print(rbind(`Variance` = round(x$sdev^2, 5), + `Proportion of Variance` = round(vars, 5), + `Cumulative Proportion` = round(cumsum(vars), 5)), digits = digits, ...) + if (loadings) { + cat("\nLoadings (eigenvectors, rotation matrix):\n") + cx <- format(round(x$loadings, digits = digits)) + cx[abs(x$loadings) < cutoff] <- paste(rep(" ", nchar(cx[1, 1], + type = "w")), collapse = "") + print(cx, quote = FALSE, ...) + } + invisible(x) } -## plot method -## TODO: same mechanism as for plot.lm: multiplot allowed! -plot.pcomp <- function (x, +.plot_scores <- function(x, choices, col, circle.col, labels, cex, main, +xlab, ylab, ...) { + # Required by plot.pcomp() + if (is.null(x$scores)) + stop("no scores are available: refit with 'scores = TRUE'") + if (is.null(labels)) { + labels <- rownames(x$scores) + if (is.null(labels)) # If still no labels + labels <- as.character(1:nrow(x$scores)) + } else if (!isTRUE(!as.numeric(labels))) { + labels <- as.character(labels) + } + scores <- scores(x)[, choices] + plot(scores, type = "n", asp = 1, main = main, xlab = xlab, ylab = ylab) + abline(h = 0, col = circle.col) + abline(v = 0, col = circle.col) + if (!isTRUE(!as.numeric(labels))) + text(scores, labels = labels, col = col, cex = cex, ...) +} + + +#' @export +#' @rdname pcomp +plot.pcomp <- function(x, which = c("screeplot", "loadings", "correlations", "scores"), choices = 1L:2L, col = par("col"), bar.col = "gray", circle.col = "gray", ar.length = 0.1, pos = NULL, labels = NULL, cex = par("cex"), -main = paste(deparse(substitute(x)), which, sep = " - "), xlab, ylab, ...) -{ - plotScores <- function (x, choices, col, circle.col, labels, cex, main, - xlab, ylab, ...) - { - if (is.null(x$scores)) - stop("no scores are available: refit with 'scores = TRUE'") - if (is.null(labels)) { - labels <- rownames(x$scores) - if (is.null(labels)) # If still no labels - labels <- as.character(1:nrow(x$scores)) - } else if (!isTRUE(!as.numeric(labels))) - labels <- as.character(labels) - scores <- scores(x)[, choices] - plot(scores, type = "n", asp = 1, main = main, xlab = xlab, ylab = ylab) - abline(h = 0, col = circle.col) - abline(v = 0, col = circle.col) - if (!isTRUE(!as.numeric(labels))) - text(scores, labels = labels, col = col, cex = cex, ...) - } - - which <- match.arg(which) - main <- main[1] - ## Calculate default xlab and ylab - labs <- paste(names(x$sdev), " (", round((x$sdev^2 / x$totdev^2) * 100, - digits = 1), "%)", sep = "") - if (missing(xlab)) xlab <- labs[choices[1]] else xlab - if (missing(ylab)) ylab <- labs[choices[2]] else ylab - switch(which, - ## TODO: avoid this! - screeplot = stats:::screeplot.default(x, col = bar.col, main = main, ...), - loadings = vectorplot(loadings(x), choices = choices, col = col, - circle.col = circle.col, ar.length = ar.length, pos = pos, cex = cex, - labels = if (is.null(labels)) rownames(loadings(x)) else labels, - main = main, xlab = xlab, ylab = ylab, ...), - correlations = vectorplot(correlation(x), choices = choices, col = col, - circle.col = circle.col, ar.length = ar.length, pos = pos, cex = cex, - labels = if (is.null(labels)) rownames(loadings(x)) else labels, - main = main, xlab = xlab, ylab = ylab, ...), - scores = plotScores(x, choices = choices, col = col, cex = cex, - circle.col = circle.col, labels = labels, main = main, - xlab = xlab, ylab = ylab, ...), - stop("unknown graph type") - ) +main = paste(deparse(substitute(x)), which, sep = " - "), xlab, ylab, ...) { + which <- match.arg(which) + main <- main[1] + # Calculate default xlab and ylab + labs <- paste(names(x$sdev), " (", round((x$sdev^2 / x$totdev^2) * 100, + digits = 1), "%)", sep = "") + if (missing(xlab)) xlab <- labs[choices[1]] else xlab + if (missing(ylab)) ylab <- labs[choices[2]] else ylab + switch(which, + screeplot = screeplot(unclass(x), col = bar.col, main = main, ...), + loadings = vectorplot(loadings(x), choices = choices, col = col, + circle.col = circle.col, ar.length = ar.length, pos = pos, cex = cex, + labels = if (is.null(labels)) rownames(loadings(x)) else labels, + main = main, xlab = xlab, ylab = ylab, ...), + correlations = vectorplot(Correlation(x), choices = choices, col = col, + circle.col = circle.col, ar.length = ar.length, pos = pos, cex = cex, + labels = if (is.null(labels)) rownames(loadings(x)) else labels, + main = main, xlab = xlab, ylab = ylab, ...), + scores = .plot_scores(x, choices = choices, col = col, cex = cex, + circle.col = circle.col, labels = labels, main = main, + xlab = xlab, ylab = ylab, ...), + stop("unknown plot type") + ) } -## screeplot method (add cumulative variance curve to the plot) -screeplot.pcomp <- function (x, npcs = min(10, length(x$sdev)), +#' @export +#' @rdname pcomp +screeplot.pcomp <- function(x, npcs = min(10, length(x$sdev)), type = c("barplot", "lines"), col = "cornsilk", main = deparse(substitute(x)), -...) -{ - force(main) - type <- match.arg(type) - pcs <- x$sdev^2 - xp <- seq_len(npcs) - if (type == "barplot") - barplot(pcs[xp], names.arg = names(pcs[xp]), main = main, - ylab = "Variances", col = col, ...) - else { - plot(xp, pcs[xp], type = "b", axes = FALSE, main = main, - xlab = "", ylab = "Variances", ...) - axis(2) - axis(1, at = xp, labels = names(pcs[xp])) - } - return(invisible()) +...) { + # screeplot() method (add cumulative variance curve to the plot) + force(main) + type <- match.arg(type) + pcs <- x$sdev^2 + xp <- seq_len(npcs) + if (type == "barplot") { + barplot(pcs[xp], names.arg = names(pcs[xp]), main = main, + ylab = "Variances", col = col, ...) + } else { + plot(xp, pcs[xp], type = "b", axes = FALSE, main = main, + xlab = "", ylab = "Variances", ...) + axis(2) + axis(1, at = xp, labels = names(pcs[xp])) + } + invisible() } -## points method -# This is supposed to add points to a graph of scores -points.pcomp <- function (x, choices = 1L:2L, type = "p", pch = par("pch"), -col = par("col"), bg = par("bg"), cex = par("cex"), ...) -{ +#' @export +#' @rdname pcomp +points.pcomp <- function(x, choices = 1L:2L, type = "p", pch = par("pch"), +col = par("col"), bg = par("bg"), cex = par("cex"), ...) { if (is.null(x$scores)) stop("no scores are available: refit with 'scores = TRUE'") points(scores(x)[, choices], type = type, pch = pch, col = col, bg = bg, cex = cex, ...) } -## lines method -# Uses groups to draw either polygons or ellipses for each group -lines.pcomp <- function (x, choices = 1L:2L, groups, type = c("p", "e"), -col = par("col"), border = par("fg"), level = 0.9, ...) -{ - - polygons <- function (scores, groups, n, col, border, ...) { - for (i in 1:n) { - sc <- na.omit(scores[as.numeric(groups) == i, ]) - if (NROW(sc) > 1) { - pts <- chull(sc) - ## Close polygon - pts <- c(pts, pts[1]) - polygon(sc[pts, 1], sc[pts, 2], col = col[i], - border = border[i], ...) - } - } - } +.polygons <- function(scores, groups, n, col, border, ...) { + # Required by lines.pcomp() + for (i in 1:n) { + sc <- na.omit(scores[as.numeric(groups) == i, ]) + if (NROW(sc) > 1) { + pts <- chull(sc) + # Close polygon + pts <- c(pts, pts[1]) + polygon(sc[pts, 1], sc[pts, 2], col = col[i], + border = border[i], ...) + } + } +} - ellipses <- function (scores, groups, n, col, border, level, ...) { - for (i in 1:n) { - sc <- na.omit(scores[as.numeric(groups) == i, ]) - if (NROW(sc) > 1) { - x <- sc[, 1] - y <- sc[, 2] - polygon(ellipse(cor(x, y), scale = c(sd(x), sd(y)), - centre = c(mean(x), mean(y)), level = level), col = col[i], - border = border[i], ...) - } - } - } - - if (is.null(x$scores)) - stop("no scores are available: refit with 'scores = TRUE'") - if (missing(groups)) - stop("you must provide groups") - scores <- x$scores[, choices] - groups <- as.factor(groups) - n <- length(levels(groups)) - col <- rep(col, length.out = n) - border <- rep(border, length.out = n) - type <- match.arg(type) - switch(type, - p = polygons(scores, groups = groups, n = n, col = col, - border = border, ...), - e = ellipses(scores, groups = groups, n = n, col = col, - border = border, level = level, ...), - stop("unknown type, currently only 'p' for polygons et 'e' for ellipses") - ) +.ellipses <- function(scores, groups, n, col, border, level, ...) { + # Required by lines.pcomp() + for (i in 1:n) { + sc <- na.omit(scores[as.numeric(groups) == i, ]) + if (NROW(sc) > 1) { + x <- sc[, 1] + y <- sc[, 2] + polygon(ellipse(cor(x, y), scale = c(sd(x), sd(y)), + centre = c(mean(x), mean(y)), level = level), col = col[i], + border = border[i], ...) + } + } +} + +#' @export +#' @rdname pcomp +lines.pcomp <- function(x, choices = 1L:2L, groups, type = c("p", "e"), +col = par("col"), border = par("fg"), level = 0.9, ...) { + # Use groups to draw either polygons or ellipses for each group + if (is.null(x$scores)) + stop("no scores are available: refit with 'scores = TRUE'") + if (missing(groups)) + stop("you must provide groups") + scores <- x$scores[, choices] + groups <- as.factor(groups) + n <- length(levels(groups)) + col <- rep(col, length.out = n) + border <- rep(border, length.out = n) + type <- match.arg(type) + switch(type, + p = .polygons(scores, groups = groups, n = n, col = col, + border = border, ...), + e = .ellipses(scores, groups = groups, n = n, col = col, + border = border, level = level, ...), + stop("unknown type, currently only 'p' for polygons et 'e' for ellipses") + ) } -## text method -text.pcomp <- function (x, choices = 1L:2L, labels = NULL, col = par("col"), +#' @export +#' @rdname pcomp +text.pcomp <- function(x, choices = 1L:2L, labels = NULL, col = par("col"), cex = par("cex"), pos = NULL, ...) { - if (is.null(x$scores)) - stop("no scores are available: refit with 'scores = TRUE'") - if (is.null(labels)) - labels <- as.character(1:nrow(x$scores)) - text(x$scores[, choices], labels = labels, col = col, cex = cex, - pos = pos, ...) + if (is.null(x$scores)) + stop("no scores are available: refit with 'scores = TRUE'") + if (is.null(labels)) + labels <- as.character(1:nrow(x$scores)) + text(x$scores[, choices], labels = labels, col = col, cex = cex, + pos = pos, ...) } -## biplot method (note: it plots loadings, not correlations!) -biplot.pcomp <- function (x, choices = 1L:2L, scale = 1, pc.biplot = FALSE, ...) -{ - if (length(choices) != 2) - stop("length of choices must be 2") - if (!length(scores <- x$scores)) - stop(gettextf("object '%s' has no scores", deparse(substitute(x))), - domain = NA) - if (is.complex(scores)) - stop("biplots are not defined for complex PCA") - lam <- x$sdev[choices] - n <- NROW(scores) - lam <- lam * sqrt(n) - if (scale < 0 || scale > 1) - warning("'scale' is outside [0, 1]") - if (scale != 0) - lam <- lam^scale - else lam <- 1 - if (pc.biplot) - lam <- lam/sqrt(n) - ## TODO: avoid this! - stats:::biplot.default(t(t(scores[, choices])/lam), - t(t(x$loadings[, choices]) * lam), ...) - return(invisible()) +#' @export +#' @rdname pcomp +biplot.pcomp <- function(x, choices = 1L:2L, scale = 1, pc.biplot = FALSE, ...) { + if (length(choices) != 2) + stop("length of choices must be 2") + if (!length(scores <- x$scores)) + stop(gettextf("object '%s' has no scores", deparse(substitute(x))), + domain = NA) + if (is.complex(scores)) + stop("biplots are not defined for complex PCA") + lam <- x$sdev[choices] + n <- NROW(scores) + lam <- lam * sqrt(n) + if (scale < 0 || scale > 1) + warning("'scale' is outside [0, 1]") + if (scale != 0) { + lam <- lam^scale + } else { + lam <- 1 + } + if (pc.biplot) + lam <- lam/sqrt(n) + biplot(unclass(t(t(scores[, choices])/lam)), + t(t(x$loadings[, choices]) * lam), ...) } -## .panel.individuals required by pairs.pcomp -.panel.individuals <- function (x, y, ...) { - ## x and y are c(indivs, NaN, vars) => collect indivs - pos <- 1:(which(is.nan(x))[1] - 1) - points(x[pos], y[pos], ...) +.panel_individuals <- function(x, y, ...) { + # Required by pairs.pcomp() + pos <- 1:(which(is.nan(x))[1] - 1) + points(x[pos], y[pos], ...) } -## .panel.variables required by pairs.pcomp -.panel.variables <- function (x, y, ar.labels, ar.col, ar.cex, labels, col, -cex, ...) -{ - ## x and y are c(indivs, NaN, vars) => collect indivs - pos <- (which(is.nan(x))[1] + 1):length(x) - par(new = TRUE) - ## We want to invert position of x and y here to get same one as indivs - vectorplot(y[pos], x[pos], axes = FALSE, labels = ar.labels, col = ar.col, - cex = ar.cex, ...) +.panel_variables <- function(x, y, ar.labels, ar.col, ar.cex, labels, col, +cex, ...) { + # Required by pairs.pcomp() + pos <- (which(is.nan(x))[1] + 1):length(x) + par(new = TRUE) + # We want to invert position of x and y here to get same one as indivs + vectorplot(y[pos], x[pos], axes = FALSE, labels = ar.labels, col = ar.col, + cex = ar.cex, ...) } -## pairs plot for pcomp objects -pairs.pcomp <- function (x, choices = 1L:3L, +#' @export +#' @rdname pcomp +pairs.pcomp <- function(x, choices = 1L:3L, type = c("loadings", "correlations"), col = par("col"), circle.col = "gray", ar.col = par("col"), ar.length = 0.05, pos = NULL, ar.cex = par("cex"), -cex = par("cex"), ...) -{ - type <- match.arg(type) - X <- scores(x)[, choices] - ## Calculate labels - labs <- paste(names(x$sdev), " (", round((x$sdev^2 / x$totdev^2) * 100, - digits = 1), "%)", sep = "")[choices] - ## Add a row of NaN to separate indivs and vars - X <- rbind(X, rep(NaN, length(choices))) - ## Add vars - vars <- switch(type, - loadings = loadings(x)[, choices], - correlations = correlation(x)[, choices] - ) - X <- rbind(X, vars) - ## Change names - names(X) <- labs - ## Why do I get warning with non par arguments?! - suppressWarnings(pairs(X, lower.panel = .panel.individuals, - upper.panel = .panel.variables, - col = col, circle.col = circle.col, ar.col = ar.col, ar.cex = ar.cex, - ar.length = ar.length, ar.labels = rownames(vars), pos = pos, - cex = cex, ...)) +cex = par("cex"), ...) { + type <- match.arg(type) + X <- scores(x)[, choices] + labs <- paste(names(x$sdev), " (", round((x$sdev^2 / x$totdev^2) * 100, + digits = 1), "%)", sep = "")[choices] + # Add a row of NaN to separate indivs and vars + X <- rbind(X, rep(NaN, length(choices))) + # Add vars + vars <- switch(type, + loadings = loadings(x)[, choices], + correlations = Correlation(x)[, choices] + ) + X <- rbind(X, vars) + names(X) <- labs + # TODO: Why do I get warning with non par arguments?! + suppressWarnings(pairs(X, lower.panel = .panel_individuals, + upper.panel = .panel_variables, + col = col, circle.col = circle.col, ar.col = ar.col, ar.cex = ar.cex, + ar.length = ar.length, ar.labels = rownames(vars), pos = pos, + cex = cex, ...)) } -## predict method -predict.pcomp <- function (object, newdata, dim = length(object$sdev), ...) -{ - if (dim > length(object$sdev)) { - warning("Only", length(object$sdev), " axes available\n") - dim <- length(object$sdev) - } - if (missing(newdata)) - if (!is.null(object$scores)) - return(object$scores[, 1:dim]) - else stop("no scores are available: refit with 'scores = TRUE'") - if (length(dim(newdata)) != 2L) - stop("'newdata' must be a matrix or data frame") - nm <- rownames(object$loadings) - if (!is.null(nm)) { - if (!all(nm %in% colnames(newdata))) - stop("'newdata' does not have named columns matching one or more of the original columns") - newdata <- newdata[, nm, drop = FALSE] +#' @export +#' @rdname pcomp +predict.pcomp <- function(object, newdata, dim = length(object$sdev), ...) { + if (dim > length(object$sdev)) { + warning("Only", length(object$sdev), " axes available\n") + dim <- length(object$sdev) + } + if (missing(newdata)) { + if (!is.null(object$scores)) { + return(object$scores[, 1:dim]) } else { - if (NCOL(newdata) != NROW(object$loadings)) - stop("'newdata' does not have the correct number of columns") + stop("no scores are available: refit with 'scores = TRUE'") } - scale(newdata, object$center, object$scale) %*% object$loadings[, 1:dim] + } + if (length(dim(newdata)) != 2L) + stop("'newdata' must be a matrix or data frame") + nm <- rownames(object$loadings) + if (!is.null(nm)) { + if (!all(nm %in% colnames(newdata))) + stop("'newdata' does not have named columns matching one or more of the original columns") + newdata <- newdata[, nm, drop = FALSE] + } else { + if (NCOL(newdata) != NROW(object$loadings)) + stop("'newdata' does not have the correct number of columns") + } + scale(newdata, object$center, object$scale) %*% object$loadings[, 1:dim] } -## Extract correlation from a pcomp object. If newvar is provided, it -## calculates correlations between this new variable and corresponding PCs -## (providing that scores were calculated, and that the nrow() of new -## variable is the same as nrow(scores), assumed to be the same individuals -## as in the original PCA) -## It creates a 'corr' object -correlation.pcomp <- function (x, newvars, dim = length(x$sdev), ...) -{ - Call <- match.call() - - dim <- as.integer(dim)[1] - if (dim > length(x$sdev)) { - warning("Only", length(x$sdev), " axes available\n") - dim <- length(x$sdev) - } - dims <- 1:dim - - if (missing(newvars)) { - ## Just extract correlations (calculated after loadings) - if (is.null(loads <- loadings(x))) { - return(NULL) - } else { - res <- sweep(loads[, dims], 2, x$sdev[dims], "*") - ## Create a 'corr' object with this - attr(res, "method") <- "PCA variables and components correlation" - attr(res, "call") <- Call - class(res) <- c("correlation", "matrix") - return(res) - } - } else { - ## Calculate correlation of new variables with PCs - ## Must have same number of observations as in scores, otherwise, we got - ## the error message: "incompatible dimensions" - if (is.null(scores <- x$scores)) - stop("no scores are available: refit with 'scores = TRUE'") - ## TODO: if these are rownames, check that they match - res <- correlation(newvars, scores[, dims]) - ## Just change method attribute - attr(res, "method") <- "PCA variables and components correlation" - return(res) - } -} +#' @export +#' @rdname pcomp +correlation.pcomp <- function(x, newvars, dim = length(x$sdev), ...) { + # Extract Correlation from a pcomp object. If newvar is provided, it + # calculates correlations between this new variable and corresponding PCs + # (providing that scores were calculated, and that the nrow() of new + # variable is the same as nrow(scores), assumed to be the same individuals + # as in the original PCA) + Call <- match.call() -## A generic function compatible with the corresponding one in labdsv package -scores <- function (x, ...) - UseMethod("scores") + dim <- as.integer(dim)[1] + if (dim > length(x$sdev)) { + warning("Only", length(x$sdev), " axes available\n") + dim <- length(x$sdev) + } + dims <- 1:dim -## Borrowed from scores in labdsv -## but return a data frame instead of a matrix -## TODO: check labels length, dim perhaps not the best argument name -scores.pcomp <- function (x, labels = NULL, dim = length(x$sdev), ...) -{ - if (dim > length(x$sdev)) { - warning("Only", length(x$sdev), " axes available\n") - dim <- length(x$sdev) - } - if (is.null(x$scores)) - stop("no scores are available: refit with 'scores = TRUE'") - if (!is.null(labels)) { - res <- as.data.frame(cbind(x$scores[, 1:dim], labels)) + if (missing(newvars)) { + # Just extract correlations (calculated after loadings) + if (is.null(loads <- loadings(x))) { + return(NULL) } else { - res <- as.data.frame(x$scores[, 1:dim]) + res <- sweep(loads[, dims], 2, x$sdev[dims], "*") + # Create a 'Correlation' object with this + attr(res, "method") <- "PCA variables and components correlation" + attr(res, "call") <- Call + class(res) <- c("Correlation", "matrix") + return(res) } - return(res) + } else { + # Calculate correlation of new variables with PCs + # Must have same number of observations as in scores, otherwise, we got + # the error message: "incompatible dimensions" + if (is.null(scores <- x$scores)) + stop("no scores are available: refit with 'scores = TRUE'") + # TODO: if these are rownames, check that they match + res <- Correlation(newvars, scores[, dims]) + attr(res, "method") <- "PCA variables and components correlation" + return(res) + } +} + +#' @export +#' @rdname pcomp +scores <- function(x, ...) { + # A generic function compatible with the corresponding one in labdsv package + UseMethod("scores") +} + +#' @export +#' @rdname pcomp +scores.pcomp <- function(x, labels = NULL, dim = length(x$sdev), ...) { + # Borrowed from scores in labdsv + # but returns a data frame instead of a matrix + if (dim > length(x$sdev)) { + warning("Only", length(x$sdev), " axes available\n") + dim <- length(x$sdev) + } + if (is.null(x$scores)) + stop("no scores are available: refit with 'scores = TRUE'") + if (!is.null(labels)) { + as.data.frame(cbind(x$scores[, 1:dim], labels)) + } else { + as.data.frame(x$scores[, 1:dim]) + } } diff --git a/R/vectorplot.R b/R/vectorplot.R index ef2dbe7..3ca7302 100755 --- a/R/vectorplot.R +++ b/R/vectorplot.R @@ -1,46 +1,80 @@ -## A generic function for eigenvectors plots and similar graphs -vectorplot <- function (x, ...) - UseMethod("vectorplot") +#' Plot vectors inside a unit circle (PCA loadings or correlations plots). +#' +#' Plots vectors with 0 < norms < 1 inside a circle. These plots are mainly +#' designed to represent variables in principal components space for PCAs. +#' +#' @param x An object that has a [vectorplot()] method, like 'loadings' +#' or 'correlation', or a numeric vector with 0 < values < 1. +#' @param y A numeric vector with 0 < values < 1 of same length as `x. +#' @param choices A vector of two integers indicating the axes to plot. +#' @param col Color of the arrows and labels. +#' @param circle.col The color for the circle around the vector plot. +#' @param ar.length The length of the arrows. +#' @param pos The position of text relative to arrows. If `NULL`, a suitable +#' position is calculated according to the direction where the arrows are +#' pointing. +#' @param cex The factor of expansion for labels in the graph. +#' @param labels The labels to draw near the arrows. +#' @param main The title of the plot. +#' @param ... Further arguments passed to plot functions. +#' @return The object 'x' is returned invisibly. These functions are called for +#' their side-effect of drawing a vector plot. +#' @export +#' @seealso [pcomp()], [loadings()], [Correlation()] +#' @keywords aplot +#' @concept Vector and circular plot +#' @examples +#' # Create a PCA and plot loadings and correlations +#' iris.pca <- pcomp(iris[, -5]) +#' vectorplot(loadings(iris.pca)) +#' vectorplot(Correlation(iris.pca)) +#' # Note: on screen devices, change aspect ratio of the graph by resizing +#' # the window to reveal cropped labels... +vectorplot <- function(x, ...) + UseMethod("vectorplot") -vectorplot.default <- function (x, y, col = par("col"), circle.col = "gray", -ar.length = 0.1, pos = NULL, cex = par("cex"), labels = NULL, ...) -{ - plot(x, y, type = "n", xlim = c(-1.1, 1.1), ylim = c(-1.1, 1.1), asp = 1, - ...) - abline(h = 0, col = circle.col) - abline(v = 0, col = circle.col) - a <- seq(0, 2 * pi, len = 100) - lines(cos(a), sin(a), col = circle.col) - arrows(0, 0, x, y, col = col, length = ar.length, ...) - if (!is.null(labels)){ - ## If pos is NULL, calculate pos for each variable so that label is - ## located outside - if (is.null(pos)) - pos <- c(2, 1, 4, 3, 2)[floor((atan2(y, x)/pi + 1.25) / 0.5) + 1] - text(x, y, labels = labels, col = col, pos = pos, cex = cex, ...) - } - return(invisible()) +#' @export +#' @rdname vectorplot +vectorplot.default <- function(x, y, col = par("col"), circle.col = "gray", +ar.length = 0.1, pos = NULL, cex = par("cex"), labels = NULL, ...) { + plot(x, y, type = "n", xlim = c(-1.1, 1.1), ylim = c(-1.1, 1.1), asp = 1, ...) + abline(h = 0, col = circle.col) + abline(v = 0, col = circle.col) + a <- seq(0, 2 * pi, len = 100) + lines(cos(a), sin(a), col = circle.col) + arrows(0, 0, x, y, col = col, length = ar.length, ...) + if (!is.null(labels)) { + # If pos is NULL, calculate pos for each variable so that label is + # located outside + if (is.null(pos)) + pos <- c(2, 1, 4, 3, 2)[floor((atan2(y, x)/pi + 1.25) / 0.5) + 1] + text(x, y, labels = labels, col = col, pos = pos, cex = cex, ...) + } + invisible(list(x = x, y = y)) } -vectorplot.loadings <- function (x, choices = 1L:2L, col = par("col"), +#' @export +#' @rdname vectorplot +vectorplot.loadings <- function(x, choices = 1L:2L, col = par("col"), circle.col = "gray", ar.length = 0.1, pos = NULL, cex = par("cex"), labels = rownames(x), main = deparse(substitute(x)), ...) { - X <- x[, choices] - vectorplot.default(X[, 1], X[, 2], col = col, circle.col = circle.col, - ar.length = ar.length, pos = pos, cex = cex, labels = labels, - main = main, ...) - return(invisible(x)) + X <- x[, choices] + vectorplot.default(X[, 1], X[, 2], col = col, circle.col = circle.col, + ar.length = ar.length, pos = pos, cex = cex, labels = labels, + main = main, ...) + invisible(x) } -## Plot vectors inside a circle for correlations along 2 axes (i.e., 2 columns -## in the correlation matrix). This is the typical correlations plot in PCA -vectorplot.correlation <- function (x, choices = 1L:2L, col = par("col"), +#' @export +#' @rdname vectorplot +# Plot vectors inside a circle for correlations along 2 axes (i.e., 2 columns +# in the correlation matrix). This is the typical correlations plot in PCA +vectorplot.Correlation <- function(x, choices = 1L:2L, col = par("col"), circle.col = "gray", ar.length = 0.1, pos = NULL, cex = par("cex"), -labels = rownames(x), main = deparse(substitute(x)), ...) -{ - X <- x[, choices] - vectorplot.default(X[, 1], X[, 2], col = col, circle.col = circle.col, - ar.length = ar.length, pos = pos, cex = cex, labels = labels, - main = main, ...) - return(invisible(x)) +labels = rownames(x), main = deparse(substitute(x)), ...) { + X <- x[, choices] + vectorplot.default(X[, 1], X[, 2], col = col, circle.col = circle.col, + ar.length = ar.length, pos = pos, cex = cex, labels = labels, + main = main, ...) + invisible(x) } diff --git a/SciViews.Rproj b/SciViews.Rproj index dde2c3a..6fccb25 100644 --- a/SciViews.Rproj +++ b/SciViews.Rproj @@ -18,3 +18,4 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace,vignette diff --git a/TODO b/TODO index 48fa1e6..71ca090 100644 --- a/TODO +++ b/TODO @@ -1,32 +1,25 @@ -= SciViews To Do list - -* Change isEmpty because clash with a generic in filehash. permutations() -defined in gtools and e1071 and do something different! Also, combine() is -defined in both gdata and randomForests. It should really be a generic with the -gdata version being the default and the randomForest version being -combine.randomForest(). nobs() in gdata and stats are both S3 generics defined -a little bit differently... but their default, at least, do something very -different! There is also object.size() in gdata and utils. In this case, the -version in gdata is compatible with the one in utils, but it is vectorized. -For gplots, there is lowess() which is also defined in stats. Indeed, in gplots -it is a S3 generic with lowess.default() being the same as the function in stats -=> it is fine! For Rweka, LogitBoost() is also defined in caTools. These seem -very different and should be named differently too! -A clash for Args() between gdata and svMisc is solved, because the function in -svMisc is renamed argsTip(). - -* Rd files for character, file and misc. +# SciViews To Do list + +* file_head() + +* attrNames() or attr_names() like slotNames() + +* unscale() to revert the effect of scale()? + +* is.wholenumber() + +* other xxx. functions for those using na.rm = FALSE like mean, median, sd, var, + quantile, fivenum, ... * Refine panel.xxx() functions to avoid warning in case we provide non-par -arguments to pairs() and design a better mechanism to define default (colors, -line type and weight, etc.). + arguments to pairs() and design a better mechanism to define default (colors, + line type and weight, etc.). * A better grid() for log scale, a grid() with intermediate lines (between two -axis ticks), and a slightly different line for origin axes. + axis ticks), and a slightly different line for origin axes. * A plot.htest() method that graphically shows the test results (needs specific -representation for each test => difficult, but think about a solution for that -particular problem!) + representation for each test => how to do that? * QQ-plot and statistical distributions plots. @@ -40,89 +33,24 @@ particular problem!) * For correspondance analysis, see ca package. -* recordPlot(), replayPlot() to save graphs between sessions. -Look also at dev.control("inhibit") and dev.interactive(). - -* gui? (dialog box), cmd? (fill dialog box with values) or gui? (do nothing), - cmd? (run it). - -* Rethink depends, imports and suggests to install and/or load the right number -of packages (+ options to avoiding to load certains packages, in certain cases). - -* Save SciViews-R/SciViews-K communication parameters and reconfigure the socket -server accordingly (do not start the socket server automatically in case there -is no config file). - -* Automatically load projects and snippets associated with a package. Use a - context string that ensures that we have require(XXXX) somewhere up the scrip - for using a command in a R package. Reload the tooltips definition each time - we use them... or better: use contextual arguments: svMisc::expr => can - superseed expr tooltip but uses it, if there is no svMisc::expr tooltip! +* What about c.factor()??? C() applied to factor currently just produces + rubbish... But, we may break existing code! * unlist() should really be S3 generic with unlist.default + unlist.factor being -the original unlist() function. Need also at least unlist.Date, unlist.POSIXct -and unlist.POSIXlt, although the first two treatmenbts are (unexpectendly) -already correct! + the original unlist() function. Need also at least unlist.Date(), + unlist.POSIXct() and unlist.POSIXlt(), although the first two treatments are + already correct! -* What about c.factor()??? C() applied to factor currently just produces -rubbish... But, we may break existing code! - -* unlist applies to a list of matrices or arrays or data frames of the same -shape, i.e., same objects types and same column names should really rbind() -items... but using rbind() is inefficient => need another solution. Also, this -should be called differently to unlist() to avoid breaking code, why not -lbind()??? +* unlist() applies to a list of matrices or arrays or data frames of the same + shape, i.e., same objects types and same column names should really rbind() + items... but using rbind() is inefficient => need another solution. Also, this + should be called differently to unlist() to avoid breaking code, why not + lbind()? * Generic method bare(object) to drop all attributes except the one required to -define a given object... perhaps onlyNames(object) that does the same as c(x), -but see ?c. - -* matrices of lists -l <- list(x = 1:10, y = 3:7, z = TRUE, w = "azza") -l -#! $x -#! [1] 1 2 3 4 5 6 7 8 9 10 -#! -#! $y -#! [1] 3 4 5 6 7 -#! -#! $z -#! [1] TRUE -#! -#! $w -#! [1] "azza" - -as.matrix(l) -#! [,1] -#! x Integer,10 -#! y Integer,5 -#! z TRUE -#! w "azza" - -(ml <- matrix(l, nrow = 2)) -#! [,1] [,2] -#! [1,] Integer,10 TRUE -#! [2,] Integer,5 "azza" - -l$x -#! [1] 1 2 3 4 5 6 7 8 9 10 - -ml$x -#! NULL - -names(ml) <- c("x", "y", "z", "w") -ml -#! [,1] [,2] -#! [1,] Integer,10 TRUE -#! [2,] Integer,5 "azza" -#! attr(,"names") -#! [1] "x" "y" "z" "w" -ml$x -#! [1] 1 2 3 4 5 6 7 8 9 10 - -Why not to keep names when transforming into a matrix, since it is also -considered as a vector... and it seems to work well!? - - - -* Translation of this package (and what about the doc?). + define a given object... perhaps onlyNames(object) that does the same as c(x), + but see ?c. + +* Analyze this: http://obeautifulcode.com/R/How-R-Searches-And-Finds-Stuff/ + +* Translation of this package. diff --git a/man/SciViews-package.Rd b/man/SciViews-package.Rd old mode 100755 new mode 100644 index 631a145..57a2aee --- a/man/SciViews-package.Rd +++ b/man/SciViews-package.Rd @@ -1,35 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SciViews-package.R +\docType{package} \name{SciViews-package} \alias{SciViews-package} -\alias{SciViews} -\docType{package} - -\title{ SciViews - Main package } - +\title{SciViews - Main package} \description{ - Functions to install SciViews additions to R, and miscellaneous +The SciViews package provides various functions to install the SciViews::R +dialect. It also provides additional utilites besides base, recommended and +tidyverse. } +\section{Important functions}{ -\details{ - \tabular{ll}{ - Package: \tab SciViews\cr - Type: \tab Package\cr - Version: \tab 0.9-13\cr - Date: \tab 2018-01-05\cr - License: \tab GPL (>= 2)\cr - LazyLoad: \tab yes\cr - } -} -\author{ - Philippe Grosjean - - Maintainer: Philippe Grosjean -} - -\references{ - SciViews: http://www.sciviews.org/SciViews-R/ +TODO... } -\keyword{ package } - -\concept{ R enhancement } diff --git a/man/as.intBase.Rd b/man/as.intBase.Rd deleted file mode 100755 index f98159b..0000000 --- a/man/as.intBase.Rd +++ /dev/null @@ -1,44 +0,0 @@ -\name{as.intBase} -\alias{as.integerBase} -\alias{as.intBase} - -\title{Convert strings to integers} - -\description{ - Convert strings to integers according to the given base using the C function - \code{strtol}, or choose a suitable base following the C rules. -} - -\usage{ -as.integerBase(x, base = 0L) -as.intBase(x, base = 0L) -} - -\arguments{ - \item{x}{ a character vector, or something coercible to this by \code{as.character}. } - \item{base}{ an integer which is between 2 and 36 inclusive, or zero (default). } -} - -\details{ - TODO... -} - -\value{ - TODO... -} - -\author{ - This is a wrapper for \code{strtoi()} function in base package. -} - -\seealso{ \code{\link[base]{strtoi}} } - -\examples{ -as.intBase(c("0xff", "077", "123")) -as.intBase(c("ffff", "FFFF"), 16L) -as.intBase(c("177", "377"), 8L) -} - -\keyword{character} - -\concept{ convert character into integer } diff --git a/man/character.Rd b/man/character.Rd deleted file mode 100755 index 0c8635c..0000000 --- a/man/character.Rd +++ /dev/null @@ -1,197 +0,0 @@ -\name{char-fun} -\alias{char} -\alias{as.char} -\alias{is.char} -\alias{p0} -\alias{p_} -\alias{ct} -\alias{cta} -\alias{ct_} -\alias{cta_} -\alias{charTrans} -\alias{charFold} -\alias{charLower} -\alias{charUpper} -\alias{charTrim} -\alias{charTrimL} -\alias{charTrimR} -\alias{charTrunc} -\alias{charSubstr} -\alias{charSubstr<-} -\alias{charSplit} -\alias{charSub} -\alias{charSubAll} -\alias{charFind} -\alias{charFindAll} -\alias{charSearch} -\alias{charMatch} -\alias{charPMatch} -\alias{charExpand} -\alias{charAbbrev} -\alias{charEscape} -\alias{charWrap} -\alias{charHeight} -\alias{charWidth} -\alias{useBytes} -\alias{useBytes<-} -\alias{encodingToNative} -\alias{encodingToUTF8} -\alias{encoding} -\alias{encoding<-} -\alias{setEncoding} - -\title{Character strings manipulation functions} - -\description{ - A series of functions to manipulate character objects. -} - -\usage{ -## Create, test or coerce to character -char(length = 0) -as.char(x, \dots) -is.char(x) - -## Paste strings together -p0(\dots, collapse = NULL) -p_(\dots, sep = " ", collapse = NULL) - -## Concatenate and print strings to the console or in a file -ct(\dots, file = "", end = "\n", fill = FALSE, labels = NULL) -cta(\dots, file = "", end = "\n", fill = FALSE, labels = NULL) -ct_(\dots, file = "", sep = " ", end = "\n", fill = FALSE, labels = NULL) -cta_(\dots, file = "", sep = " ", end = "\n", fill = FALSE, labels = NULL) - -## character translation or folding -charTrans(x, old, new) -charFold(x, upper = FALSE) -charLower(x) -charUpper(x) - -## Split, truncate or work with substrings -charTrim(x, all.spaces = FALSE) -charTrimL(x, all.spaces = FALSE) -charTrimR(x, all.spaces = FALSE) -charTrunc(x, width) -charSubstr(x, start, stop) -charSubstr(x, start, stop) <- value -charSplit(x, pattern) - -## Find and replace in character strings -charSub(x, pattern, replacement, ignore.case = FALSE) -charSubAll(x, pattern, replacement, ignore.case = FALSE) -charFind(x, pattern, ignore.case = FALSE) -charFindAll(x, pattern, ignore.case = FALSE) -charSearch(x, pattern, ignore.case = FALSE, - type = c("logical", "position", "value"), max.distance = 0, costs = NULL) - -## Match, expand or abbreviate character strings -charMatch(x, table, nomatch = NA_integer_) -charPMatch(x, table, nomatch = NA_integer_, duplicates.ok = FALSE) -charExpand(x, target, nomatch = NA_character_) -charAbbrev(x, min.length = 4, dot = FALSE, strict = FALSE, - method = c("left.kept", "both.sides")) - -## Format character strings -charEscape(x, width = 0L, quote = "", na.encode = TRUE, - justify = c("left", "right", "centre", "none")) -charWrap(x, width = 0.9 * getOption("width"), indent = 0, exdent = 0, - prefix = "", simplify = TRUE, initial = prefix) - -## Measure size of a character string or expression in a plot -charHeight(s, units = "user", cex = NULL, font = NULL, vfont = NULL, \dots) -charWidth(s, units = "user", cex = NULL, font = NULL, vfont = NULL, \dots) - -## String encoding -encodingToNative(x) -encodingToUTF8(x) -encoding(x) -encoding(x) <- value -setEncoding(x, value) -} - -\arguments{ - \item{length}{ the length of the character vector to create. } - \item{x}{ a character string or an object that can be coerced to character or - tested as such. } - \item{\dots}{ a series of character strings or objects that can be coerced to - character, or further arguments passed to or from other methods for - \code{as.char()}. } - \item{sep}{ the character string to use to separate successive strings. } - \item{collapse}{ an optional character string to separate items in a character - vector that is collapsed to a single item. } - \item{file}{ a character string naming a file, or a connection. The default - \code{""} prints to the standard output connection (see \code{\link[base]{cat}}). } - \item{end}{ the character string to append at the end. By default, a carriage - return. } - \item{fill}{ a logical or positive numeric. With \code{FALSE} (default), only - carriage returns (\code{"\n"}) create new lines. With \code{TRUE}, strings - are formatted within \code{getOption("width")}. With a positive integer, - strings are formatted within this number of characters. } - \item{labels}{ labels of the lines printed. Ignored when \code{fill = FALSE}. } - \item{old}{ a character string with characters to be translated. First element - is used with a warning if length is higher than two. } - \item{new}{ a character string with the translations. First element - is used with a warning if length is higher than two. } - \item{upper}{ a logical indicating if conversion is done to upper- or lowercase. } - \item{all.spaces}{ eliminate all spacing characters, or only blanks? } - \item{width}{ todo } - \item{start}{ todo } - \item{stop}{ todo } - \item{value}{ todo } - \item{pattern}{ todo } - \item{replacement}{ todo } - \item{ignore.case}{ todo } - \item{type}{ todo } - \item{max.distance}{ todo } - \item{costs}{ todo } - \item{table}{ todo } - \item{nomatch}{ todo } - \item{duplicates.ok}{ todo } - \item{target}{ todo } - \item{min.length}{ todo } - \item{dot}{ todo } - \item{strict}{ todo } - \item{method}{ todo } - \item{quote}{ todo } - \item{na.encode}{ todo } - \item{justify}{ todo } - \item{indent}{ todo } - \item{exdent}{ todo } - \item{prefix}{ todo } - \item{simplify}{ todo } - \item{initial}{ todo } - \item{s}{ todo } - \item{units}{ todo } - \item{cex}{ todo } - \item{font}{ todo } - \item{vfont}{ todo } -} - -\details{ - All these functions start with char before an uppercase letter to - indicate that they operate on character strings or coerce their first - argument \code{x} to a character object. - - \code{charEscape(x)} ... -} - -\value{ - Most of these functions return a modified character string object, except ... -} - -\author{ - Philippe Grosjean , but most functions are wrappers - around existing functions written by the R Core Team in base, or recommended - packages. -} - -\seealso{ \code{\link[base]{character}}, \code{\link{regex}} } - -\examples{ -## TODO: various examples of character string manipulations -} - -\keyword{character} - -\concept{ character strings manipulation } diff --git a/man/colors.Rd b/man/colors.Rd old mode 100755 new mode 100644 index 034a401..68a3ffe --- a/man/colors.Rd +++ b/man/colors.Rd @@ -1,51 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/colors.R \name{colors} -\alias{cwm.colors} +\alias{colors} +\alias{rwb_colors} \alias{rwb.colors} +\alias{rwg_colors} \alias{rwg.colors} +\alias{ryg_colors} \alias{ryg.colors} +\alias{cwm_colors} +\alias{cwm.colors} +\title{Various color palettes.} +\usage{ +rwb_colors(n, alpha = 1, s = 0.9, v = 0.9) -\title{Various color palettes} +rwb.colors(n, alpha = 1, s = 0.9, v = 0.9) -\description{ - Create vectors of \code{n} contiguous colors. -} +rwg_colors(n, alpha = 1, s = 0.9, v = 0.9) -\usage{ -cwm.colors(n, alpha = 1, s = 0.9, v = 0.9) -rwb.colors(n, alpha = 1, s = 0.9, v = 0.9) rwg.colors(n, alpha = 1, s = 0.9, v = 0.9) + +ryg_colors(n, alpha = 1, s = 0.9, v = 0.9) + ryg.colors(n, alpha = 1, s = 0.9, v = 0.9) -} -\arguments{ - \item{n}{ the number of colors (>= 1) to be in the palette. } - \item{alpha}{ the alpha transparency, a number in [0, 1], see argument - \code{alpha} in \code{\link{hsv}}. } - \item{s}{ the 'saturation' to be used to complete the HSV color descriptions. } - \item{v}{ the 'value' to use for the HSV color descriptions. } -} +cwm_colors(n, alpha = 1, s = 0.9, v = 0.9) -\details{ - \code{cwm.colors(s = 0.5, v = 1)} gives very similar colors to - \code{cm.colors()}. - - \code{ryg.colors()} is similar to \code{rainbow(start = 0, end = 2/6)}. +cwm.colors(n, alpha = 1, s = 0.9, v = 0.9) } +\arguments{ +\item{n}{The number of colors (>= 1) to be in the palette.} -\value{ - A character vector, \code{cv} of color names. This can be used for - user-defined color palette, using \code{\link{palette}}\code{(cv)}, or a - \code{col = cv} specification in a graphic function or in \code{\link{par}}. -} +\item{alpha}{The alpha transparency, a number in [0, 1], see argument +\code{alpha =} in [hsv()]. -\author{ - Philippe Grosjean -} +[0, 1]: R:0,%201%5C +[hsv()]: R:hsv()} -\seealso{ \code{\link{cm.colors}}, \code{\link{colorRampPalette}} } +\item{s}{The 'saturation' to be used to complete the HSV color descriptions.} +\item{v}{The 'value' to use for the HSV color descriptions.} +} +\description{ +Create vectors of \code{n} contiguous colors. +} +\details{ +\code{cwm_colors(s = 0.5, v = 1)} gives very similar colors to +\code{cm.colors()}. +\code{ryg_colors()} is similar to \code{rainbow(start = 0, end = 2/6)}. +The \code{xxx_colors()} (tidyverse name-compatible) and `xxx.colors()`` +(grDevices name-compatible) functions are synonyms. +} \examples{ -## Draw color wheels with various palettes +# Draw color wheels with various palettes opar <- par(mfrow = c(2, 2)) pie(rep(1, 11), col = cwm.colors(11), main = "Cyan - white - magenta") pie(rep(1, 11), col = rwb.colors(11), main = "Red - white - blue") @@ -53,7 +60,10 @@ pie(rep(1, 11), col = rwg.colors(11), main = "Red - white - green") pie(rep(1, 11), col = ryg.colors(11), main = "Red - yellow - green") par(opar) } - +\seealso{ +\code{\link[=cm.colors]{cm.colors()}}, \code{\link[=colorRampPalette]{colorRampPalette()}} +} +\concept{ +color palettes +} \keyword{color} - -\concept{ color palettes } diff --git a/man/correlation.Rd b/man/correlation.Rd old mode 100755 new mode 100644 index 0dc371a..03b32ac --- a/man/correlation.Rd +++ b/man/correlation.Rd @@ -1,139 +1,178 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/correlation.R \name{correlation} \alias{correlation} +\alias{Correlation} \alias{correlation.formula} \alias{correlation.default} +\alias{is.Correlation} \alias{is.correlation} +\alias{as.Correlation} \alias{as.correlation} -\alias{print.correlation} -\alias{summary.correlation} -\alias{print.summary.correlation} -\alias{plot.correlation} +\alias{print.Correlation} +\alias{summary.Correlation} +\alias{print.summary.Correlation} +\alias{plot.Correlation} +\alias{lines.Correlation} +\title{Correlation matrices.} +\usage{ +correlation(x, ...) -\title{Correlation matrices} +Correlation(x, ...) -\description{ - Compute the correlation matrix between two variables, or more (between all - columns of a matrix or data frame). -} +\method{correlation}{formula}(formula, data = NULL, subset, na.action, ...) -\usage{ -correlation(x, \dots) -\method{correlation}{formula}(formula, data = NULL, subset, na.action, \dots) \method{correlation}{default}(x, y = NULL, use = "everything", - method = c("pearson", "kendall", "spearman"), \dots) + method = c("pearson", "kendall", "spearman"), ...) + +is.Correlation(x) is.correlation(x) + +as.Correlation(x) + as.correlation(x) -\method{print}{correlation}(x, digits = 3, cutoff = 0, \dots) -\method{summary}{correlation}(object, cutpoints = c(0.3, 0.6, 0.8, 0.9, 0.95), - symbols = c(" ", ".", ",", "+", "*", "B"), \dots) -\method{print}{summary.correlation}(x, \dots) -\method{plot}{correlation}(x, y = NULL, outline = TRUE, - cutpoints = c(0.3, 0.6, 0.8, 0.9, 0.95), palette = rwb.colors, col = NULL, - numbers = TRUE, digits = 2, type = c("full", "lower", "upper"), - diag = (type == "full"), cex.lab = par("cex.lab"), cex = 0.75 * par("cex"), - \dots) -} +\method{print}{Correlation}(x, digits = 3, cutoff = 0, ...) -\arguments{ - \item{x}{ a numeric vector, matrix or data frame (or any object for - \code{is.correlation()}, or \code{as.correlation()}). } - \item{formula}{ a formula with no response variable, referring only to numeric - variables. } - \item{data}{ an optional data frame (or similar: see \code{\link{model.frame}}) - containing the variables in the formula \code{formula}. By default the - variables are taken from \code{environment(formula)}. } - \item{subset}{ an optional vector used to select rows (observations) of the - data matrix \code{x}. } - \item{na.action}{ a function which indicates what should happen when the data - contain \code{NA}s. The default is set by the \code{na.action} setting of - \code{\link{options}}, and is \code{\link{na.fail}} if that is unset. The - 'factory-fresh' default is \code{\link{na.omit}}. } - \item{method}{ a character string indicating which correlation coefficient is - to be computed. One of \code{"pearson"} (default), \code{"kendall"}, or - \code{"spearman"}, can be abbreviated. } - \item{y}{ \code{NULL} (default), or a vector, matrix or data frame with - compatible dimensions to \code{x} for \code{correlation()}. The default is - equivalent to \code{x = y}, but more efficient. For \code{plot.correlation()}, - if a second 'correlation' object is provided in \code{y}, then a visual - comparison of two correlation matrices is performed (not implemented yet)! } - \item{use}{ an optional character string giving a method for computing - correlations in the presence of missing values. This must be (an abbreviation - of) one of the strings \code{"everything"}, \code{"all.obs"}, - \code{"complete.obs"}, \code{"na.or.complete"}, or \code{"pairwise.complete.obs"}. } - \item{digits}{ digits to print after the decimal separator. } - \item{cutoff}{ correlation coefficients lower than this (in absolute value) are - suppressed. } - \item{object}{ a 'correlation' object. } - \item{cutpoints}{ the cut points to use for categories. Specify only positive - values (absolute value of correlation coefficients are summarized, or - negative equivalents are automatically computed for the graph. Do not include - 0 or 1 in the cutpoints). } - \item{symbols}{ the symbols to use to summarize the correlation matrix. } - \item{outline}{ do we draw the outline of the ellipse? } - \item{palette}{ a function that can produce a palette of colors. } - \item{col}{ color of the ellipse. If \code{NULL} (default), the colors will be - computed using \code{cutpoints} and \code{palette}. } - \item{numbers}{ do we print correlation values in the center of the ellipses? } - \item{type}{ do we plot a complete matrix, or only lower or upper triangle? } - \item{diag}{ do we plot items on the diagonal? They have always a correlation - of one. } - \item{cex.lab}{ the expansion factor for labels. } - \item{cex}{ the expansion factor for text. } - \item{\dots}{ further arguments passed to functions. } -} +\method{summary}{Correlation}(object, cutpoints = c(0.3, 0.6, 0.8, 0.9, 0.95), + symbols = c(" ", ".", ",", "+", "*", "B"), ...) -\value{ - \code{correlation()} and \code{as.correlation()} create a 'correlation' object, - while \code{is.correlation()} tests for it. - - There are \code{print()} and \code{summary()} methods for the 'correlation' - object that differ in the symbolic encoding of the correlations in - \code{summary()}, using \code{\link{symnum}}, which makes large correlation - matrices more readable. - - The method \code{plot} returns nothing, but it draws ellipses on a graph that - represent the correlation matrix visually. This is essentially the - \code{plotcorr()} function from package ellipse, with slightly different - default arguments and with default \code{cutpoints} equivalent to those used - in the \code{summary} method. +\method{print}{summary.Correlation}(x, ...) + +\method{plot}{Correlation}(x, y = NULL, outline = TRUE, cutpoints = c(0.3, + 0.6, 0.8, 0.9, 0.95), palette = rwb.colors, col = NULL, numbers = TRUE, + digits = 2, type = c("full", "lower", "upper"), diag = (type == "full"), + cex.lab = par("cex.lab"), cex = 0.75 * par("cex"), ...) + +\method{lines}{Correlation}(x, choices = 1L:2L, col = par("col"), lty = 2, + ar.length = 0.1, pos = NULL, cex = par("cex"), labels = rownames(x), + ...) } +\arguments{ +\item{x}{A numeric vector, matrix or data frame (or any object for +\code{is.Correlation()}, \code{as.Correlation()}.} -\author{ - Philippe Grosjean , wrapping code in package ellipse, - function \code{plotcorr()} for the \code{plot.correlation()} method. +\item{...}{Further arguments passed to functions.} + +\item{formula}{A formula with no response variable, referring only to numeric +variables.} + +\item{data}{An optional data frame (or similar: see \code{\link[=model.frame]{model.frame()}}) +containing the variables in the formula \code{formula}. By default the variables +are taken from \code{environment(formula)}.} + +\item{subset}{An optional vector used to select rows (observations) of the +data matrix \code{x}.} + +\item{na.action}{A function which indicates what should happen when the data +contain \code{NA}s. The default is set by the \code{na.action =} setting of \code{options()} +and \code{na.fail()} is used if that is not set. The 'factory-fresh' default is +\code{na.omit()}.} + +\item{y}{\code{NULL} (default), or a vector, matrix or data frame with compatible +dimensions to \code{x} for \code{Correlation()}. The default is equivalent to \code{x = y}, +but more efficient.} + +\item{use}{An optional character string giving a method for computing +correlations in the presence of missing values. This must be (an abbreviation +of) one of the strings \code{"everything"}, \code{"all.obs"}, \code{"complete.obs"}, +\code{"na.or.complete"}, or \code{"pairwise.complete.obs"}.} + +\item{method}{A character string indicating which correlation coefficient is +to be computed. One of \code{"pearson"} (default), \code{"kendall"}, or \code{"spearman"}, +can be abbreviated.} + +\item{digits}{Digits to print after the decimal separator.} + +\item{cutoff}{Correlation coefficients lower than this (in absolute value) +are suppressed.} + +\item{object}{A 'Correlation' object.} + +\item{cutpoints}{The cut points to use for categories. Specify only positive +values (absolute value of correlation coefficients are summarized, or +negative equivalents are automatically computed for the graph. Do not include +0 or 1 in the cutpoints).} + +\item{symbols}{The symbols to use to summarize the correlation matrix.} + +\item{outline}{Do we draw the outline of the ellipse?} + +\item{palette}{A function that can produce a palette of colors.} + +\item{col}{Color of the ellipse. If \code{NULL} (default), the colors will be +computed using \code{cutpoints =} and \code{palette =}.} + +\item{numbers}{Do we print correlation values in the center of the ellipses?} + +\item{type}{Do we plot a complete matrix, or only lower or upper triangle?} + +\item{diag}{Do we plot items on the diagonal? They have always a correlation +of one.} + +\item{cex.lab}{The expansion factor for labels.} + +\item{cex}{The expansion factor for text.} + +\item{choices}{The items to select} + +\item{lty}{The line type to draw.} + +\item{ar.length}{The length of the arrow head.} + +\item{pos}{The position relative to arrows.} + +\item{labels}{The label to draw nead arrows.} } +\value{ +\code{Correlation()} and \code{as.Correlation()`` create a 'Correlation' object, while}is.Correlation()`` tests for it. -\seealso{ \code{\link{cov}}, \code{\link{cov2cor}}, \code{\link{cov.wt}}, - \code{\link[stats]{symnum}}, \code{\link[ellipse]{plotcorr}} and look also at - \code{\link{panel.cor}} } +There are \code{print()} and \code{summary()} methods for the 'Correlation' object +that differ in the symbolic encoding of the correlations in \code{summary()}, +using5 symnum()], which makes large correlation matrices more readable. +The method \code{plot()} returns nothing, but it draws ellipses on a graph that +represent the correlation matrix visually. This is essentially the +\code{\link[=plotcorr]{plotcorr()}} function from package \strong{ellipse}, with slightly different +default arguments and with default \code{cutpoints} equivalent to those used in +the \code{summary()} method. +} +\description{ +Compute the correlation matrix between two variables, or more (between all +columns of a matrix or data frame). +} \examples{ -## This is a simple correlation coefficient +# This is a simple correlation coefficient cor(rnorm(10), runif(10)) -## but this is a 'correlation' object containing a correlation matrix -correlation(rnorm(10), runif(10)) +Correlation(rnorm(10), runif(10)) -## 'correlation' objects allow better inspection of the correlation matrices -## than the output of default R cor() function -(longley.cor <- correlation(longley)) +# 'Correlation' objects allow better inspection of the correlation matrices +# than the output of default R cor() function +(longley.cor <- Correlation(longley)) summary(longley.cor) # Synthetic view of the correlation matrix plot(longley.cor) # Graphical representation -## Use of the formula interface -(mtcars.cor <- correlation(~ mpg + cyl + disp + hp, data = mtcars, - method = "spearman", na.action = "na.omit")) +# Use of the formula interface +(mtcars.cor <- Correlation(~ mpg + cyl + disp + hp, data = mtcars, + method = "spearman", na.action = "na.omit")) -mtcars.cor2 <- correlation(mtcars, method = "spearman") +mtcars.cor2 <- Correlation(mtcars, method = "spearman") print(mtcars.cor2, cutoff = 0.6) summary(mtcars.cor2) plot(mtcars.cor2, type = "lower") -mtcars.cor2["mpg", "cyl"] # Extract one correlation from the correlation matrix -## TODO: a plot comparing two correlation matrices +mtcars.cor2["mpg", "cyl"] # Extract a correlation from the correlation matrix +} +\seealso{ +\code{\link[=cov]{cov()}}, \code{\link[=cov2cor]{cov2cor()}}, \code{\link[=cov.wt]{cov.wt()}}, \code{\link[=symnum]{symnum()}}, \code{\link[=plotcorr]{plotcorr()}} and look +at \code{\link[=panel_cor]{panel_cor()}} +} +\author{ +Philippe Grosjean \href{mailto:phgrosjean@sciviews.org}{phgrosjean@sciviews.org}, wrapping code in package +ellipse, function \code{\link[=plotcorr]{plotcorr()}} for the \code{plot.Correlation()} method. +} +\concept{ +correlation matrix and plot } - \keyword{distribution} - -\concept{ correlation matrix and plot } diff --git a/man/enum.Rd b/man/enum.Rd new file mode 100644 index 0000000..13e8234 --- /dev/null +++ b/man/enum.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R +\name{enum} +\alias{enum} +\title{Enumerate items in an object.} +\usage{ +enum(x) +} +\arguments{ +\item{x}{Any object.} +} +\description{ +\code{enum()} is creating a vector of integers to enumarate items in an object. It +is particularly useful in the \code{for(i in enum(object))} construct. +} +\note{ +The pattern \code{for(i in 1:length(object))} is often found, but it fails +in case \code{length(object) == 0}! \code{enum()} is indeed a synonym of \code{seq_along()}, +but the later one is less expressive in the context. +} +\examples{ +enum(letters) +enum(numeric(0)) +# Compare with: +1:length(numeric(0)) +enum(NULL) +letters5 <- letters[1:5] +for (i in enum(letters5)) cat("letter", i, "=", letters5[i], "\\n") +} +\seealso{ +\code{\link[=seq_along]{seq_along()}} +} diff --git a/man/file.Rd b/man/file.Rd deleted file mode 100755 index 5313b59..0000000 --- a/man/file.Rd +++ /dev/null @@ -1,173 +0,0 @@ -\name{file} -\alias{filePath} -\alias{print.filePath} -\alias{as.filePath} -\alias{is.filePath} -\alias{isDir} -\alias{isFile} -\alias{fileName} -\alias{fileDir} -\alias{fileExpand} -\alias{fileNormalize} -\alias{dirR} -\alias{filePackage} -\alias{dirTemp} -\alias{fileTemp} -\alias{fileFind} -\alias{dirList} -\alias{fileList} -\alias{fileListGlob} -\alias{dirCreate} -\alias{fileAccess} -\alias{fileAppend} -\alias{fileRename} -\alias{fileCopy} -\alias{fileCreate} -\alias{fileExists} -\alias{fileInfo} -\alias{fileChmod} -\alias{fileUMask} -\alias{fileTime} -\alias{fileRemove} -\alias{fileDelete} -\alias{fileLink} -\alias{fileSymLink} -\alias{fileReadLink} -\alias{fileShow} -\alias{wdir} -\alias{sdir} - - -\title{Files and directories manipulation} - -\description{ - Manipulate files and directories and create or use 'filePath' objects. -} - -\usage{ -## Create, test or print a filePath object -filePath(\dots, fsep = .Platform$file.sep) -is.filePath(x) -as.filePath(x, \dots) -\method{print}{filePath}(x, \dots) - -## Test or manipulate path for files or directories -isDir(filePath) -isFile(filePath) -fileName(filePath) -fileDir(filePath) -fileExpand(filePath) -fileNormalize(filePath, mustWork = FALSE) - -## Look for R-associated files or dirs -dirR(component = "home") -filePackage(\dots, package = "base", lib.loc = NULL, mustWork = FALSE) - -## Temporary directories or files -dirTemp() -fileTemp(pattern = "file", tmpdir = tempdir(), fileext = "") - -## List of find files and directories -fileFind(names) -dirList(filePath = ".", full.names = TRUE, recursive = TRUE) -fileList(filePath = ".", pattern = NULL, all.files = FALSE, full.names = FALSE, - recursive = FALSE, ignore.case = FALSE, include.dirs = FALSE) -fileListGlob(filePath, dir.mark = FALSE) - -## Create, delete, append, etc. files -dirCreate(path, showWarnings = TRUE, recursive = FALSE, mode = "0777") -fileAccess(names, mode = 0) -fileAppend(file1, file2) -fileRename(from, to) -fileCopy(from, to, overwrite = recursive, recursive = FALSE, copy.mode = TRUE) -fileCreate(\dots, showWarnings = TRUE) -fileExists(\dots) - -fileChmod(paths, mode = "0777", use_umask = TRUE) -fileUMask(mode = NA) -fileTime(filePath, time) -fileRemove(\dots) -fileDelete(filePath, recursive = FALSE, force = FALSE) - -## File links and symbolic links -fileLink(from, to) -fileSymLink(from, to) -fileReadLink(filePath) - -## Show the content of a file -fileShow(\dots, header = rep("", nfiles), title = "R Information", - delete.file = FALSE, pager = getOption("pager"), encoding = "") - -## Working or session directory -wdir(dir = NULL) -sdir(dir = NULL) -} - -\arguments{ - \item{\dots}{ character vector containing file items or file paths. } - \item{fsep}{ character used as file separator. } - \item{x}{ an object. } - \item{filePath}{ a filePath object. } - \item{component}{ As well as \code{"home"} which gives the R home directory, - other known values are \code{"bin"}, \code{"doc"}, \code{"etc"}, - \code{"modules"} and \code{"share"} giving the paths to the corresponding - parts of an R installation. } - \item{package}{ the R package to look for file. } - \item{lib.loc}{ the library path to look for the R package. } - \item{mustWork}{ should it always return? } - \item{pattern}{ pattern to use for the temporary file. } - \item{tmpdir}{ temporary directory. } - \item{fileext}{ extension of the temporary file. } - \item{names}{ file names to find. } - \item{full.names}{ return full path names or only file names? } - \item{recursive}{ list dirs or files recursively? } - \item{all.files}{ return all files? } - \item{ignore.case}{ be case insensitive? } - \item{include.dirs}{ also list directories? } - \item{dir.mark}{ logical: should matches to directories from patterns that do - not already end in / have a slash appended? May not be supported on all - platforms. } - \item{path}{ path to file. } - \item{showWarnings}{ warn if dir cannot be created? } - \item{mode}{ Unix mode of the file or dir. } - \item{file1}{ first file. } - \item{file2}{ second file. } - \item{from}{ first file. } - \item{to}{ second file. } - \item{overwrite}{ overwrite the destination file? } - \item{copy.mode}{ logical: should file permission bits be copied where - possible? This applies to both files and directories. } - \item{paths}{ path to files. } - \item{use_umask}{ use a mask? } - \item{time}{ time to put on the file. } - \item{force}{ force file deletion? } - \item{header}{ header to use. } - \item{title}{ title of the window. } - \item{delete.file}{ delete the file once it is displayed? } - \item{pager}{ pager to use to display this file. } - \item{encoding}{ encoding to use for the file content. } - \item{dir}{ directory to set. No change if it is \code{NULL} (by default). } -} - -\details{ - TODO... -} - -\value{ - TODO... -} - -\author{ - Philippe Grosjean , but these are indeed wrappers - around existing functions written by the R Core Team in base or utils packages. -} - -\seealso{ \code{\link{char}} } - -\examples{ -## TODO: various examples of dirs and files manipulation -} - -\keyword{character} - -\concept{ directories and files manipulation } diff --git a/man/ln.Rd b/man/ln.Rd old mode 100755 new mode 100644 index 5469e21..f5daf3e --- a/man/ln.Rd +++ b/man/ln.Rd @@ -1,61 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ln.R +\docType{data} \name{ln} \alias{ln} \alias{ln1p} \alias{lg} \alias{lg1p} -\alias{e} +\alias{E} \alias{lb} +\title{Logarithms.} +\format{An object of class \code{numeric} of length 1.} +\usage{ +ln(x) -\title{Logarithms} +ln1p() -\description{ - To avoid confusion using the default \code{log()} function, which is natural - logarithm, but spells out like base 10 logarithm in the mind of some beginneRs, - we define \code{ln()} and \code{ln1p()} as wrappers for \code{log()} with - default \code{base = exp(1)} argument and for \code{log1p()}, respectively. - For similar reasons, \code{lg()} is a wrapper of \code{log10()} (there is no - possible confusion here, but 'lg' is another common notation for base 10 - logarithm). \code{lg1p()} is a convenient way to use the optimized code to - calculate the logarithm of x + 1, but returning the result in base 10 - logarithm. \code{e} is the euler constant and is provided for convenience as - \code{exp(1)}. Finally \code{lb()} is a synonym of \code{log2()}. -} +lg() -\usage{ -ln(x) -ln1p(x) -lg(x) lg1p(x) -e -lb(x) -} -\arguments{ - \item{x}{ a numeric or complex vector. } -} +E -\value{ - A vector of the same length as \code{x} containing the transformed values. - \code{ln(0)} gives \code{-Inf}, and negative values give \code{NaN}. +lb() } - -\author{ - Philippe Grosjean , but these are just convenient - wrappers around standard R logarithm functions in the base package. +\arguments{ +\item{x}{A numeric or complex vector.} +} +\description{ +To avoid confusion using the default \code{log()} function, which is natural +logarithm, but spells out like base 10 logarithm in the mind of some +beginneRs, we define \code{ln()} and \code{ln1p()} as wrappers for \code{log()`` with default}base = exp(1)\code{argument and for}log1p()\code{, respectively. For similar reasons,}lg()\code{is a wrapper of}log10()\code{(there is no possible confusion here, but 'lg' is another common notation for base 10 logarithm).}lg1p()\code{is a convenient way to use the optimized code to calculate the logarithm of x + 1, but returning the result in base 10 logarithm.}E\code{is the Euler constant and is provided for convenience as}exp(1)\code{. Finally}lb()\code{is a synonym of}log2()`. } - -\seealso{ \code{\link{log}} } - \examples{ ln(exp(3)) # Same as log(exp(3)) ln1p(c(0, 1, 10, 100)) # Wrapper for log1p() lg(10^3) # Same as log10(10^3) lg1p(c(0, 1, 10, 100)) # log10(x + 1), but optimized for x << 1 E^4 # Similar to exp(4), but different calculation! -## Note: exp(4) is to be preferred to E^4, if possible! lb(1:3) # Wrapper for log2() } - -\keyword{ math } - -\concept{ logarithm and exponential } +\seealso{ +\code{\link[=log]{log()}} +} +\concept{ +logarithms and exponentials +} +\keyword{datasets} +\keyword{math} diff --git a/man/nr.Rd b/man/nr.Rd new file mode 100644 index 0000000..279d3d0 --- /dev/null +++ b/man/nr.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R +\docType{data} +\name{nr} +\alias{nr} +\alias{nc} +\alias{ROWS} +\alias{COLS} +\title{Convenience functions for rows or columns manipulations.} +\format{An object of class \code{numeric} of length 1.} +\usage{ +nr(x) + +nc(x) + +ROWS + +COLS +} +\arguments{ +\item{x}{Any object.} +} +\description{ +\code{nr()} and \code{nc()} are synonyms of the ugly \code{NROW()} or \code{NCOL()} that still +provide a result, even if \code{dim} attribute of the object is not set, on the +contrary to \code{nrow()}or \code{ncol()}. \code{ROWS} and \code{COLS} are constants that makes +call to \code{apply()} more expressive. +} +\examples{ +mm <- matrix(1:6, nrow = 3) +nr(mm) +nc(mm) +vv <- 1:6 +nr(vv) +nc(vv) +# ROWS and COLS constants used with apply() +apply(mm, ROWS, mean) # Idem apply(mm, 1, mean) +apply(mm, COLS, mean) # Idem apply(mm, 2, mean) +} +\seealso{ +\code{\link[=NROW]{NROW()}} +} +\keyword{datasets} diff --git a/man/panels.Rd b/man/panels.Rd old mode 100755 new mode 100644 index 780bd81..ce2525f --- a/man/panels.Rd +++ b/man/panels.Rd @@ -1,116 +1,161 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/panels.R \name{panels} +\alias{panels} +\alias{panel_reg} \alias{panel.reg} +\alias{panel_ellipse} \alias{panel.ellipse} +\alias{panel_cor} \alias{panel.cor} +\alias{panel_smooth} +\title{More panel plots.} +\usage{ +panel_reg(x, y, col = par("col"), bg = par("bg"), pch = par("pch"), + cex = par("cex"), lwd = par("lwd"), line.reg = lm, line.col = "red", + line.lwd = lwd, untf = TRUE, ...) -\title{More panel plots} +panel.reg(x, y, col = par("col"), bg = par("bg"), pch = par("pch"), + cex = par("cex"), lwd = par("lwd"), line.reg = lm, line.col = "red", + line.lwd = lwd, untf = TRUE, ...) -\description{ - Several panel plots that can be used with functions like \code{\link{coplot}} - and \code{\link{pairs}}. -} +panel_ellipse(x, y, col = par("col"), bg = par("bg"), pch = par("pch"), + cex = par("cex"), el.level = 0.7, el.col = "cornsilk", + el.border = "red", major = TRUE, ...) -\usage{ -panel.reg(x, y, col = par("col"), bg = par("bg"), pch = par("pch"), - cex = par("cex"), lwd = par("lwd"), line.reg = lm, line.col = "red", - line.lwd = lwd, untf = TRUE, \dots) panel.ellipse(x, y, col = par("col"), bg = par("bg"), pch = par("pch"), - cex = par("cex"), el.level = 0.7, el.col = "cornsilk", el.border = "red", - major = TRUE, \dots) -panel.cor(x, y, use = "everything", method = c("pearson", "kendall", "spearman"), - alternative = c("two.sided", "less", "greater"), digits = 2, prefix = "", - cex = par("cex"), cor.cex = cex, stars.col = "red", \dots) -} + cex = par("cex"), el.level = 0.7, el.col = "cornsilk", + el.border = "red", major = TRUE, ...) -\arguments{ - \item{x}{ a numeric vector. } - \item{y}{ a numeric vector of same length as \code{x} } - \item{col}{ the color of the points. } - \item{bg}{ the background color for symbol used for the points. } - \item{pch}{ the symbol used for the points. } - \item{cex}{ the expansion factor used for the points. } - \item{lwd}{ the line width. } - \item{line.reg}{ a function that calculates coefficients of a straight line, - for instance, \code{\link{lm}}, or \code{\link[MASS]{rlm}} for robust - linear regression. } - \item{line.col}{ the color of the line. } - \item{line.lwd}{ the width of the line. } - \item{untf}{ logical asking whether to untransform the straight line in case - one or both axis are in log scale. } - \item{el.level}{ the confidence level for the bivariate normal ellipse - around data; the default value of 0.7 draws an ellipse of roughly +/-1 sd. } - \item{el.col}{ the color used to fill the ellipse. } - \item{el.border}{ the color used to draw the border of the ellipse and the - standardized major axis. } - \item{major}{ if \code{TRUE}, the standardized major axis is also drawn. } - \item{use}{ one of \code{"everything"}, \code{"all.obs"}, - \code{"complete.obs"}, \code{"na.or.complete"}, or - \code{"pairwise.complete.obs"} (can be abbreviated). Defines how the - \code{cor()} function behaves with missing observations. } - \item{method}{ one of the three correlation coefficients \code{"pearson"}, - (default), \code{"kendall"}, or \code{"spearman"} (can be abbreviated). } - \item{alternative}{ the alternative hypothesis in correlation test, see - \code{\link{cor.test}}. } - \item{digits}{ the number of decimal digits to print when the correlation - coefficient is printed in the graph. } - \item{prefix}{ a prefix (character string) to use before the correlation - coefficient printed in the graph. } - \item{cor.cex}{ expansion coefficient for text in printing correlation - coefficients. } - \item{stars.col}{ the color used for significance stars (with: *** p < 0.001, - ** p < 0.1, * p < 0.05, . p < 0.1. } - \item{\dots}{ further arguments to plot functions. } -} +panel_cor(x, y, use = "everything", method = c("pearson", "kendall", + "spearman"), alternative = c("two.sided", "less", "greater"), digits = 2, + prefix = "", cex = par("cex"), cor.cex = cex, stars.col = "red", ...) -\details{ - Theses functions should be used outside of the diagonal in - \code{\link{pairs}()}, or with \code{\link{coplot}()}, as they are bivariate - plots. +panel.cor(x, y, use = "everything", method = c("pearson", "kendall", + "spearman"), alternative = c("two.sided", "less", "greater"), digits = 2, + prefix = "", cex = par("cex"), cor.cex = cex, stars.col = "red", ...) + +panel_smooth(x, y, col = par("col"), bg = NA, pch = par("pch"), cex = 1, + col.smooth = "red", span = 2/3, iter = 3, ...) } +\arguments{ +\item{x}{A numeric vector.} + +\item{y}{A numeric vector of same length as \code{x}.} + +\item{col}{The color of the points.} + +\item{bg}{The background color for symbol used for the points.} + +\item{pch}{The symbol used for the points.} + +\item{cex}{The expansion factor used for the points.} + +\item{lwd}{The line width.} + +\item{line.reg}{A function that calculates coefficients of a straight line, +for instance, \code{\link[=lm]{lm()}}, or \code{\link[=rlm]{rlm()}} for robust linear regression.} + +\item{line.col}{The color of the line.} + +\item{line.lwd}{The width of the line.} + +\item{untf}{Logical asking whether to untransform the straight line in case +one or both axis are in log scale.} + +\item{...}{Further arguments to plot functions.} + +\item{el.level}{The confidence level for the bivariate normal ellipse around +data; the default value of 0.7 draws an ellipse of roughly +/-1 sd.} + +\item{el.col}{The color used to fill the ellipse.} +\item{el.border}{The color used to draw the border of the ellipse and the +standardized major axis.} + +\item{major}{If \code{TRUE}, the standardized major axis is also drawn.} + +\item{use}{One of \code{"everything"}, \code{"all.obs"}, \code{"complete.obs"}, +\code{"na.or.complete"}, or \code{"pairwise.complete.obs"} (can be abbreviated). +Defines how the \code{\link[=cor]{cor()}} function behaves with missing observations.} + +\item{method}{One of the three correlation coefficients \code{"pearson"} +(default), \code{"kendall"}, or \code{"spearman"}. Can be abbreviated.} + +\item{alternative}{The alternative hypothesis in correlation test, see +\code{\link[=cor.test]{cor.test()}}.} + +\item{digits}{The number of decimal digits to print when the correlation +coefficient is printed in the graph.} + +\item{prefix}{A prefix (character string) to use before the correlation +coefficient printed in the graph.} + +\item{cor.cex}{Expansion coefficient for text in printing correlation +coefficients.} + +\item{stars.col}{The color used for significance stars (with: *** p < 0.001, +** p < 0.1, * p < 0.05, . p < 0.1.} + +\item{col.smooth}{Color to be used by lines for drawing the smooths.} + +\item{span}{Smoothing parameter f for \code{\link[=lowess]{lowess()}}, see there.} + +\item{iter}{Number of robustness iterations for \code{\link[=lowess]{lowess()}}.} +} \value{ - These functions return nothing and are used for their side effect of plotting - in panels of composite plots. +These functions return nothing and are used for their side effect of +plotting in panels of composite plots. } +\description{ +Several panel plots that can be used with functions like \code{\link[=coplot]{coplot()}} and +[pairs))]. -\author{ - Philippe Grosjean , but code inspired from - \code{panel.smooth()} in graphics and \code{panel.car()} in package car. +[pairs))]: R:pairs)) } - -\seealso{ \code{\link{coplot}}, \code{\link{pairs}}, \code{\link{panel.smooth}}, - \code{\link{lm}}, \code{\link[ellipse]{ellipse}}, \code{\link{cor}} and - \code{\link{cor.test}} } - -\examples{ -## Smooth lines in lower graphs and straight lines in upper graphs -pairs(trees, lower.panel = panel.smooth, upper.panel = panel.reg) -## Robust regression lines -require(MASS) # For rlm() -pairs(trees, panel = panel.reg, diag.panel = panel.boxplot, - reg.line = rlm, line.col = "blue", line.lwd = 2) -## A Double log graph -pairs(trees, lower.panel = panel.smooth, upper.panel = panel.reg, log = "xy") - -## Graph suitables to explore correlations (take care that there are potentially -## many simultaneous tests done here... So, you loose much power is the whole -## analysis... use it just as an indication, nothing more!) -## Pearson's r -pairs(trees, lower.panel = panel.ellipse, upper.panel = panel.cor) -## Spearman's rho (ellipse and straight lines not suitable here!) -pairs(trees, lower.panel = panel.smooth, upper.panel = panel.cor, - method = "spearman", span = 1) -## Several groups (visualize how bad it is to consider the whole set at once!) -pairs(iris[, -5], lower.panel = panel.smooth, upper.panel = panel.cor, - method = "kendall", span = 1, col = c("red3", "blue3", "green3")[iris$Species]) -## Now analyze correlation for one species only -pairs(iris[iris$Species == "virginica", -5], lower.panel = panel.ellipse, - upper.panel = panel.cor) - -## A coplot with custom panes -coplot(Petal.Length ~ Sepal.Length | Species, data = iris, panel = panel.ellipse) +\details{ +Theses functions should be used outside of the diagonal in +\code{\link[=pairs]{pairs()}}, or with \code{\link[=coplot]{coplot()}}, as they are bivariate plots. } +\examples{ +# Smooth lines in lower graphs and straight lines in upper graphs +pairs(trees, lower.panel = panel_smooth, upper.panel = panel_reg) +# Robust regression lines +library(MASS) # For rlm() +pairs(trees, panel = panel_reg, diag.panel = panel_boxplot, + reg.line = rlm, line.col = "blue", line.lwd = 2) +# A Double log graph +pairs(trees, lower.panel = panel_smooth, upper.panel = panel_reg, log = "xy") -\keyword{ aplot } +# Graph suitables to explore correlations (take care there are potentially +# many simultaneous tests done here... So, you loose much power in the whole +# analysis... use it just as an indication!) +# Pearson's r +pairs(trees, lower.panel = panel_ellipse, upper.panel = panel_cor) +# Spearman's rho (ellipse and straight lines not suitable here!) +pairs(trees, lower.panel = panel_smooth, upper.panel = panel_cor, + method = "spearman", span = 1) +# Several groups (visualize how bad it is to consider the whole set at once!) +pairs(iris[, -5], lower.panel = panel_smooth, upper.panel = panel_cor, + method = "kendall", span = 1, + col = c("red3", "blue3", "green3")[iris$Species]) +# Now analyze correlation for one species only +pairs(iris[iris$Species == "virginica", -5], lower.panel = panel_ellipse, + upper.panel = panel_cor) -\concept{ panel plots } +# A coplot with custom panes +coplot(Petal.Length ~ Sepal.Length | Species, data = iris, + panel = panel_ellipse) +} +\seealso{ +\code{\link[=coplot]{coplot()}}, \code{\link[=pairs]{pairs()}}, \code{\link[=panel.smooth]{panel.smooth()}}, \code{\link[=lm]{lm()}}, \code{\link[=ellipse]{ellipse()}}, +\code{\link[=cor]{cor()}} and \code{\link[=cor.test]{cor.test()}} +} +\author{ +Philippe Grosjean \href{mailto:phgrosjean@sciviews.org}{phgrosjean@sciviews.org}, but code inspired from +\code{\link[=panel.smooth]{panel.smooth()}} in \strong{graphics} and \code{panel.car()} in package \strong{car}. +} +\concept{ +panel plots +} +\keyword{aplot} diff --git a/man/panels.diag.Rd b/man/panels.diag.Rd old mode 100755 new mode 100644 index a56644f..1502cf7 --- a/man/panels.diag.Rd +++ b/man/panels.diag.Rd @@ -1,91 +1,127 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/panels.diag.R \name{panels.diag} +\alias{panels.diag} +\alias{panel_boxplot} \alias{panel.boxplot} +\alias{panel_density} \alias{panel.density} +\alias{panel_hist} \alias{panel.hist} +\alias{panel_qqnorm} \alias{panel.qqnorm} +\title{More univariate panel plots.} +\usage{ +panel_boxplot(x, col = par("col"), box.col = "cornsilk", ...) -\title{More univariate panel plots} +panel.boxplot(x, col = par("col"), box.col = "cornsilk", ...) -\description{ - Several panel plots that can be used with function \code{\link{pairs}}. -} +panel_density(x, adjust = 1, rug = TRUE, col = par("col"), + lwd = par("lwd"), line.col = col, line.lwd = lwd, ...) + +panel.density(x, adjust = 1, rug = TRUE, col = par("col"), + lwd = par("lwd"), line.col = col, line.lwd = lwd, ...) + +panel_hist(x, breaks = "Sturges", hist.col = "cornsilk", + hist.border = NULL, hist.density = NULL, hist.angle = 45, ...) + +panel.hist(x, breaks = "Sturges", hist.col = "cornsilk", + hist.border = NULL, hist.density = NULL, hist.angle = 45, ...) + +panel_qqnorm(x, pch = par("pch"), col = par("col"), bg = par("bg"), + cex = par("cex"), lwd = par("lwd"), qq.pch = pch, qq.col = col, + qq.bg = bg, qq.cex = cex, qqline.col = qq.col, qqline.lwd = lwd, ...) -\usage{ -panel.boxplot(x, col = par("col"), box.col = "cornsilk", \dots) -panel.density(x, adjust = 1, rug = TRUE, col = par("col"), lwd = par("lwd"), - line.col = col, line.lwd = lwd, \dots) -panel.hist(x, breaks = "Sturges", hist.col = "cornsilk", hist.border = NULL, - hist.density = NULL, hist.angle = 45, \dots) panel.qqnorm(x, pch = par("pch"), col = par("col"), bg = par("bg"), - cex = par("cex"), lwd = par("lwd"), qq.pch = pch, qq.col = col, qq.bg = bg, - qq.cex = cex, qqline.col = qq.col, qqline.lwd = lwd, \dots) + cex = par("cex"), lwd = par("lwd"), qq.pch = pch, qq.col = col, + qq.bg = bg, qq.cex = cex, qqline.col = qq.col, qqline.lwd = lwd, ...) } - \arguments{ - \item{x}{ a numeric vector. } - \item{col}{ the color of the points. } - \item{box.col}{ the filling color of the boxplots. } - \item{adjust}{ the bandwidth adjustment factor, see \code{\link{density}}. } - \item{rug}{ do we add a rug representation (1-d plot) of the points too? } - \item{lwd}{ the line width. } - \item{line.col}{ the color of the line. } - \item{line.lwd}{ the width of the line. } - \item{breaks}{ the number of breaks, the name of a break algorithm, a vector of - breakpoints, or any other acceptable value for \code{breaks} argument of - \code{\link{hist}}} - \item{hist.col}{ the filling color for the histograms. } - \item{hist.border}{ the border color for the histograms. } - \item{hist.density}{ the density for filling lines in the histograms. } - \item{hist.angle}{ the angle for filling lines in the histograms. } - \item{pch}{ the symbol used for the points. } - \item{bg}{ the background color for symbol used for the points. } - \item{cex}{ the expansion factor used for the points. } - \item{qq.pch}{ the symbol used to plot points in the QQ-plots. } - \item{qq.col}{ the color of the symbol used to plot points in the QQ-plots. } - \item{qq.bg}{ the background color of the symbol used to plot points in the - QQ-plots. } - \item{qq.cex}{ the expansion factor for points in the QQ-plots. } - \item{qqline.col}{ the color for the QQ-plot lines. } - \item{qqline.lwd}{ the width for the QQ-plot lines. } - \item{\dots}{ further arguments to plot functions, or functions that - construct items, like \code{density()}, depending on the context. } -} +\item{x}{A numeric vector.} -\details{ - Panel functions \code{panel.boxplot()}, \code{panel.density()}, - \code{panel.hist()} and \code{panel.qqnorm()} should be used only to plot - univariate data on the diagonals of pair plots (or scatterplot matrix). -} +\item{col}{The color of the points.} -\value{ - These functions return nothing and are used for their side effect of plotting - in panels of composite plots. -} +\item{box.col}{The filling color of the boxplots.} -\author{ - Philippe Grosjean , but code inspired from - \code{spm()} in package car. -} +\item{...}{Further arguments to plot functions, or functions that construct +items, like \code{\link[=density]{density()}}, depending on the context.} + +\item{adjust}{The bandwidth adjustment factor, see \code{\link[=density]{density()}}.} + +\item{rug}{Do we add a rug representation (1-d plot) of the points too?} + +\item{lwd}{The line width.} + +\item{line.col}{The color of the line.} + +\item{line.lwd}{The width of the line.} + +\item{breaks}{The number of breaks, the name of a break algorithm, a vector +of breakpoints, or any other acceptable value for \code{breaks =} argument of +\code{\link[=hist]{hist()}}.} + +\item{hist.col}{The filling color for the histograms.} -\seealso{ \code{\link{pairs}}, \code{\link{boxplot}}, \code{\link{hist}}, - \code{\link{density}}, \code{\link{qqnorm}} } +\item{hist.border}{The border color for the histograms.} +\item{hist.density}{The density for filling lines in the histograms.} + +\item{hist.angle}{The angle for filling lines in the histograms.} + +\item{pch}{The symbol used for the points.} + +\item{bg}{The background color for symbol used for the points.} + +\item{cex}{The expansion factor used for the points.} + +\item{qq.pch}{The symbol used to plot points in the QQ-plots.} + +\item{qq.col}{The color of the symbol used to plot points in the QQ-plots.} + +\item{qq.bg}{The background color of the symbol used to plot points in the +QQ-plots.} + +\item{qq.cex}{The expansion factor for points in the QQ-plots.} + +\item{qqline.col}{The color for the QQ-plot lines.} + +\item{qqline.lwd}{The width for the QQ-plot lines.} +} +\value{ +These functions return nothing and are used for their side effect of +plotting in panels of composite plots. +} +\description{ +Several panel plots that can be used with \code{\link[=pairs]{pairs()}}. +} +\details{ +Panel functions \code{\link[=panel_boxplot]{panel_boxplot()}}, \code{\link[=panel_density]{panel_density()}}, \code{\link[=panel_hist]{panel_hist()}} +and \code{\link[=panel_qqnorm]{panel_qqnorm()}} should be used only to plot univariate data on the +diagonals of pair plots (or scatterplot matrix). +} \examples{ -## Example of scatterplot matrices with custom plots on the diagonal -## Boxplots -pairs(trees, panel = panel.smooth, diag.panel = panel.boxplot) -pairs(trees, diag.panel = panel.boxplot, box.col = "gray") -## Densities -pairs(trees, panel = panel.smooth, diag.panel = panel.density) -pairs(trees, diag.panel = panel.density, line.col = "red", adjust = 0.5) -## Histograms -pairs(trees, panel = panel.smooth, diag.panel = panel.hist) -pairs(trees, diag.panel = panel.hist, hist.col = "gray", breaks = "Scott") -## QQ-plots against Normal theoretical distribution -pairs(trees, panel = panel.smooth, diag.panel = panel.qqnorm) -pairs(trees, diag.panel = panel.qqnorm, qqline.col = 2, qq.cex = .5, qq.pch = 3) +# Example of scatterplot matrices with custom plots on the diagonal +# Boxplots +pairs(trees, panel = panel_smooth, diag.panel = panel_boxplot) +pairs(trees, diag.panel = panel_boxplot, box.col = "gray") +# Densities +pairs(trees, panel = panel_smooth, diag.panel = panel_density) +pairs(trees, diag.panel = panel_density, line.col = "red", adjust = 0.5) +# Histograms +pairs(trees, panel = panel_smooth, diag.panel = panel_hist) +pairs(trees, diag.panel = panel_hist, hist.col = "gray", breaks = "Scott") +# QQ-plots against Normal theoretical distribution +pairs(trees, panel = panel_smooth, diag.panel = panel_qqnorm) +pairs(trees, diag.panel = panel_qqnorm, qqline.col = 2, qq.cex = .5, qq.pch = 3) +} +\seealso{ +\code{\link[=pairs]{pairs()}}, \code{\link[=boxplot]{boxplot()}}, \code{\link[=hist]{hist()}}, \code{\link[=density]{density()}}, \code{\link[=qqnorm]{qqnorm()}} +} +\author{ +Philippe Grosjean \href{mailto:phgrosjean@sciviews.org}{phgrosjean@sciviews.org}, but code inspired from +\code{spm()} in package \strong{car}. +} +\concept{ +panel plots } - \keyword{aplot} - -\concept{ panel plots } diff --git a/man/pcomp.Rd b/man/pcomp.Rd old mode 100755 new mode 100644 index f924795..3448bb4 --- a/man/pcomp.Rd +++ b/man/pcomp.Rd @@ -1,7 +1,9 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pcomp.R \name{pcomp} \alias{pcomp} -\alias{pcomp.default} \alias{pcomp.formula} +\alias{pcomp.default} \alias{print.pcomp} \alias{summary.pcomp} \alias{print.summary.pcomp} @@ -16,201 +18,240 @@ \alias{correlation.pcomp} \alias{scores} \alias{scores.pcomp} - -\title{Principal Components Analysis} - -\description{ - Perform a principal components analysis on a matrix or data frame and return a - \code{pcomp} object. -} - +\title{Principal Components Analysis.} \usage{ -pcomp(x, \dots) +pcomp(x, ...) + \method{pcomp}{formula}(formula, data = NULL, subset, na.action, - method = c("svd", "eigen"), \dots) + method = c("svd", "eigen"), ...) + \method{pcomp}{default}(x, method = c("svd", "eigen"), scores = TRUE, - center = TRUE, scale = TRUE, tol = NULL, covmat = NULL, - subset = rep(TRUE, nrow(as.matrix(x))), \dots) + center = TRUE, scale = TRUE, tol = NULL, covmat = NULL, + subset = rep(TRUE, nrow(as.matrix(x))), ...) + +\method{print}{pcomp}(x, ...) + +\method{summary}{pcomp}(object, loadings = TRUE, cutoff = 0.1, ...) -\method{print}{pcomp}(x, \dots) -\method{summary}{pcomp}(object, loadings = TRUE, cutoff = 0.1, \dots) \method{print}{summary.pcomp}(x, digits = 3, loadings = x$print.loadings, - cutoff = x$cutoff, \dots) - -\method{plot}{pcomp}(x, which = c("screeplot", "loadings", "correlations", "scores"), - choices = 1L:2L, col = par("col"), bar.col = "gray", circle.col = "gray", - ar.length = 0.1, pos = NULL, labels = NULL, cex = par("cex"), - main = paste(deparse(substitute(x)), which, sep = " - "), xlab, ylab, \dots) -\method{screeplot}{pcomp}(x, npcs = min(10, length(x$sdev)), type = c("barplot", "lines"), - col = "cornsilk", main = deparse(substitute(x)), \dots) + cutoff = x$cutoff, ...) + +\method{plot}{pcomp}(x, which = c("screeplot", "loadings", "correlations", + "scores"), choices = 1L:2L, col = par("col"), bar.col = "gray", + circle.col = "gray", ar.length = 0.1, pos = NULL, labels = NULL, + cex = par("cex"), main = paste(deparse(substitute(x)), which, sep = + " - "), xlab, ylab, ...) + +\method{screeplot}{pcomp}(x, npcs = min(10, length(x$sdev)), + type = c("barplot", "lines"), col = "cornsilk", + main = deparse(substitute(x)), ...) + \method{points}{pcomp}(x, choices = 1L:2L, type = "p", pch = par("pch"), - col = par("col"), bg = par("bg"), cex = par("cex"), \dots) + col = par("col"), bg = par("bg"), cex = par("cex"), ...) + \method{lines}{pcomp}(x, choices = 1L:2L, groups, type = c("p", "e"), - col = par("col"), border = par("fg"), level = 0.9, \dots) + col = par("col"), border = par("fg"), level = 0.9, ...) + \method{text}{pcomp}(x, choices = 1L:2L, labels = NULL, col = par("col"), - cex = par("cex"), pos = NULL, \dots) -\method{biplot}{pcomp}(x, choices = 1L:2L, scale = 1, pc.biplot = FALSE, \dots) + cex = par("cex"), pos = NULL, ...) -\method{pairs}{pcomp}(x, choices = 1L:3L, type = c("loadings", "correlations"), - col = par("col"), circle.col = "gray", ar.col = par("col"), ar.length = 0.05, - pos = NULL, ar.cex = par("cex"), cex = par("cex"), \dots) +\method{biplot}{pcomp}(x, choices = 1L:2L, scale = 1, pc.biplot = FALSE, + ...) -\method{predict}{pcomp}(object, newdata, dim = length(object$sdev), \dots) -\method{correlation}{pcomp}(x, newvars, dim = length(x$sdev), \dots) -scores(x, \dots) -\method{scores}{pcomp}(x, labels = NULL, dim = length(x$sdev), \dots) -} +\method{pairs}{pcomp}(x, choices = 1L:3L, type = c("loadings", + "correlations"), col = par("col"), circle.col = "gray", + ar.col = par("col"), ar.length = 0.05, pos = NULL, + ar.cex = par("cex"), cex = par("cex"), ...) -\arguments{ - \item{x}{ a matrix or data frame with numeric data. } - \item{formula}{ a formula with no response variable, referring only to numeric - variables. } - \item{data}{ an optional data frame (or similar: see \code{\link{model.frame}}) - containing the variables in the formula \code{formula}. By default the - variables are taken from \code{environment(formula)}. } - \item{subset}{ an optional vector used to select rows (observations) of the - data matrix \code{x}. } - \item{na.action}{ a function which indicates what should happen when the data - contain \code{NA}s. The default is set by the \code{na.action} setting of - \code{\link{options}}, and is \code{\link{na.fail}} if that is unset. The - 'factory-fresh' default is \code{\link{na.omit}}. } - \item{method}{ either \code{"svd"} (the function uses \code{\link{prcomp}}), - or \code{"eigen"} (the function uses \code{princomp}), or an abbreviation. } - \item{\dots}{ arguments passed to or from other methods. If \code{x} is a - formula one might specify \code{scale}, \code{tol} or \code{covmat}. } - \item{scores}{ a logical value indicating whether the score on each principal - component should be calculated. } - \item{center}{ a logical value indicating whether the variables should be - shifted to be zero centered. Alternately, a vector of length equal the - number of columns of \code{x} can be supplied. The value is passed to - \code{scale}. Note that this argument is ignored for \code{method = "eigen"} - and the dataset is always centered in this case. } - \item{scale}{ a logical value indicating whether the variables should be - scaled to have unit variance before the analysis takes place. The default is - \code{TRUE}, which in general, is advisable. Alternatively, a vector of - length equal the number of columns of \code{x} can be supplied. The value is - passed to \code{scale}. } - \item{tol}{ only when \code{method = "svd"}. A value indicating the magnitude - below which components should be omitted. (Components are omitted if their - standard deviations are less than or equal to \code{tol} times the standard - deviation of the first component.) With the default null setting, no - components are omitted. Other settings for tol could be \code{tol = 0} or - \code{tol = sqrt(.Machine$double.eps)}, which would omit essentially - constant components. } - \item{covmat}{ a covariance matrix, or a covariance list as returned by - \code{\link{cov.wt}} (and \code{\link[MASS]{cov.mve}} or - \code{\link[MASS]{cov.mcd}} from package MASS). If supplied, this is used - rather than the covariance matrix of \code{x}. } - \item{object}{ a 'pcomp' object. } - \item{loadings}{ do we also summarize the loadings? } - \item{cutoff}{ the cutoff value below which loadings are replaced by white - spaces in the table. That way, larger values are easier to spot and to - read in large tables. } - \item{digits}{ the number of digits to print. } - \item{which}{ the graph to plot. } - \item{choices}{ which principal axes to plot. For 2D graphs, specify two - integers. } - \item{col}{ the color to use in graphs. } - \item{bar.col}{ the color of bars in the screeplot. } - \item{circle.col}{ the color for the circle in the loadings or correlations - plots. } - \item{ar.length}{ the length of the arrows in the loadings and correlations - plots. } - \item{pos}{ the position of text relative to arrows in loadings and - correlations plots. } - \item{labels}{ the labels to write. If \code{NULL} default values are computed. } - \item{cex}{ the factor of expansion for text (labels) in the graphs. } - \item{main}{ the title of the graph. } - \item{xlab}{ the label of X-axis. } - \item{ylab}{ the label of Y-axis. } - \item{pch}{ type of symbol to use. } - \item{bg}{ background color for symbols. } - \item{groups}{ a grouping factor. } - \item{border}{ the color of the border. } - \item{level}{ the probability level to use to draw the ellipse. } - \item{pc.biplot}{ do we create a Gabriel's biplot (see \code{\link{biplot}()} - documentation)? } - \item{npcs}{ the number of principal components to represent in the screeplot. } - \item{type}{ the type of screeplot (\code{"barplot"} or \code{"lines"}) or pairs - plot (\code{"loadings"} or \code{"correlations"}). } - \item{ar.col}{ color of arrows. } - \item{ar.cex}{ expansion factor for terxt on arrows. } - \item{newdata}{ new individuals with observations for the same variables as - those used for making the PCA. You can then plot these additional - individuals in the scores graph. } - \item{newvars}{ new variables with observations for same individuals as those - used for making the PCA. Correlation with PCs is calculated. You can then - plot these additional variables in the correlation graph. } - \item{dim}{ The number of principal components to keep. } -} +\method{predict}{pcomp}(object, newdata, dim = length(object$sdev), ...) -\details{ - \code{pcomp()} is a generic function with \code{"formula"} and \code{"default"} - methods. It is essentially a wrapper around \code{prcomp()} and - \code{princomp()} to provide a coherent interface and object for both methods. - - A 'pcomp' object is created. It inherits from 'pca' (as in labdsv package, but - not compatible with the 'pca' object of package ade4!) and of 'princomp'. - - For more information on calculation done, refer to \code{\link{prcomp}} for - \code{method = "svd"} or \code{\link{princomp}} for \code{method = "eigen"}. -} +\method{correlation}{pcomp}(x, newvars, dim = length(x$sdev), ...) -\value{ - A \code{c("pcomp", "pca", "princomp")} object containing list components: - \item{comp_i}{Description of comp_i.} - TODO: complete this (also speak about the various methods)! -} +scores(x, ...) -\note{ - The signs of the columns of the loadings and scores are arbitrary, and so may - differ between different programs for PCA, and even between different builds - of \R. +\method{scores}{pcomp}(x, labels = NULL, dim = length(x$sdev), ...) } +\arguments{ +\item{x}{A matrix or data frame with numeric data.} -\author{ - Philippe Grosjean , but the core code is indeed in - package stats. +\item{...}{Arguments passed to or from other methods. If `x\code{is a formula one might specify}scale =\code{,}tol =\code{or}covmat =`.} + +\item{formula}{A formula with no response variable, referring only to numeric +variables.} + +\item{data}{An optional data frame (or similar: see \code{\link[=model.frame]{model.frame()}}) +containing the variables in the formula \code{formula =}. By default the variables +are taken from \code{environment(formula)}.} + +\item{subset}{An optional vector used to select rows (observations) of the +data matrix \code{x}.} + +\item{na.action}{A function which indicates what should happen when the data +contain \code{NA}s. The default is set by the \code{na.action =} setting of +\code{\link[=options]{options()}}, and is \code{\link[=na.fail]{na.fail()}} if that is not set. The 'factory-fresh' +default is \code{\link[=na.omit]{na.omit()}}.} + +\item{method}{Either \code{"svd"} (using \code{\link[=prcomp]{prcomp()}}), \code{"eigen"} (using +\code{\link[=princomp]{princomp()}}), or an abbreviation.} + +\item{scores}{A logical value indicating whether the score on each principal +component should be calculated.} + +\item{center}{A logical value indicating whether the variables should be +shifted to be zero centered. Alternately, a vector of length equal the +number of columns of \code{x} can be supplied. The value is passed to \code{scale =}. +Note that this argument is ignored for \code{method = "eigen"} and the dataset is +always centered in this case.} + +\item{scale}{A logical value indicating whether the variables should be +scaled to have unit variance before the analysis takes place. The default is +\code{TRUE}, which in general, is advisable. Alternatively, a vector of length +equal the number of columns of \code{x} can be supplied. The value is passed to +\code{\link[=scale]{scale()}}.} + +\item{tol}{Only when \code{method = "svd"}. A value indicating the magnitude +below which components should be omitted. (Components are omitted if their +standard deviations are less than or equal to \code{tol} times the standard +deviation of the first component.) With the default null setting, no +components are omitted. Other settings for \code{tol =} could be \code{tol = 0} or +\code{tol = sqrt(.Machine$double.eps)}, which would omit essentially constant +components.} + +\item{covmat}{A covariance matrix, or a covariance list as returned by +\code{\link[=cov.wt]{cov.wt()}} (and \code{\link[=cov.mve]{cov.mve()}} or \code{\link[=cov.mcd]{cov.mcd()}} from package \strong{MASS}). If +supplied, this is used rather than the covariance matrix of \code{x}.} + +\item{object}{A 'pcomp' object.} + +\item{loadings}{Do we also summarize the loadings?} + +\item{cutoff}{The cutoff value below which loadings are replaced by white +spaces in the table. That way, larger values are easier to spot and to read +in large tables.} + +\item{digits}{The number of digits to print.} + +\item{which}{The graph to plot.} + +\item{choices}{Which principal axes to plot. For 2D graphs, specify two +integers.} + +\item{col}{The color to use in graphs.} + +\item{bar.col}{The color of bars in the screeplot.} + +\item{circle.col}{The color for the circle in the loadings or correlations +plots.} + +\item{ar.length}{The length of the arrows in the loadings and correlations +plots.} + +\item{pos}{The position of text relative to arrows in loadings and +correlation plots.} + +\item{labels}{The labels to write. If \code{NULL} default values are computed.} + +\item{cex}{The factor of expansion for text (labels) in the graphs.} + +\item{main}{The title of the graph.} + +\item{xlab}{The label of the x-axis.} + +\item{ylab}{The label of the y-axis.} + +\item{npcs}{The number of principal components to represent in the screeplot.} + +\item{type}{The type of screeplot (\code{"barplot"} or \code{"lines"}) or pairs plot +(\code{"loadings"} or \code{"correlations"}).} + +\item{pch}{The type of symbol to use.} + +\item{bg}{The background color for symbols.} + +\item{groups}{A grouping factor.} + +\item{border}{The color of the border.} + +\item{level}{The probability level to use to draw the ellipse.} + +\item{pc.biplot}{Do we create a Gabriel's biplot (see \code{\link[=biplot]{biplot()}})?} + +\item{ar.col}{Color of arrows.} + +\item{ar.cex}{Expansion factor for terxt on arrows.} + +\item{newdata}{New individuals with observations for the same variables as +those used for calculating the PCA. You can then plot these additional +individuals in the scores plot.} + +\item{dim}{The number of principal components to keep.} + +\item{newvars}{New variables with observations for same individuals as those +used for mcalculating the PCA. Correlation with PCs is calculated. You can +then plot these additional variables in the correlation plot.} +} +\value{ +A \code{c("pcomp", "pca", "princomp")} object. +} +\description{ +Perform a principal components analysis on a matrix or data frame and return +a \code{pcomp} object. } +\details{ +\code{pcomp()} is a generic function with \code{"formula"} and \code{"default"} +methods. It is essentially a wrapper around \code{\link[=prcomp]{prcomp()}} and \code{\link[=princomp]{princomp()}} to +provide a coherent interface and object for both methods. -\seealso{ \code{\link{vectorplot}}, \code{\link{prcomp}}, \code{\link{princomp}}, - \code{\link{loadings}}, \code{link{correlation}} } +A 'pcomp' object is created. It inherits from 'pca' (as in \strong{labdsv} +package, but not compatible with the 'pca' object of package \strong{ade4}) and of +'princomp'. +For more information on calculation done, refer to \code{\link[=prcomp]{prcomp()}} for +\code{method = "svd"} or \code{\link[=princomp]{princomp()}} for \code{method = "eigen"}. +} +\note{ +The signs of the columns of the loadings and scores are arbitrary, and +so may differ between functions for PCA, and even between different builds of +\R. +} \examples{ -## We will analyze mtcars without the Mercedes data (rows 8:14) +# We will analyze mtcars without the Mercedes data (rows 8:14) data(mtcars) -cars.pca <- pcomp(~mpg+cyl+disp+hp+drat+wt+qsec, data = mtcars, subset = -(8:14)) +cars.pca <- pcomp(~ mpg + cyl + disp + hp + drat + wt + qsec, data = mtcars, + subset = -(8:14)) cars.pca summary(cars.pca) screeplot(cars.pca) -## Loadings are extracted and plotted like this +# Loadings are extracted and plotted like this (cars.ldg <- loadings(cars.pca)) plot(cars.pca, which = "loadings") # Equivalent to vectorplot(cars.ldg) -## Similarly, correlations of variables with PCs are extracted and plotted -(cars.cor <- correlation(cars.pca)) +# Similarly, correlations of variables with PCs are extracted and plotted +(cars.cor <- Correlation(cars.pca)) plot(cars.pca, which = "correlations") # Equivalent to vectorplot(cars.cor) -## One can add supplementary variables on this graph -lines(correlation(cars.pca, - newvars = mtcars[-(8:14), c("vs", "am", "gear", "carb")])) +# One can add supplementary variables on this graph +lines(Correlation(cars.pca, + newvars = mtcars[-(8:14), c("vs", "am", "gear", "carb")])) -## Plot the scores +# Plot the scores plot(cars.pca, which = "scores", cex = 0.8) # Similar to plot(scores(x)[, 1:2]) -## Add supplementary individuals to this plot (labels), use also points() or lines() +# Add supplementary individuals to this plot (labels), also points() or lines() text(predict(cars.pca, newdata = mtcars[8:14, ]), col = "gray", cex = 0.8) -## More scores plot -## TODO... - -## Pairs plot for 3 PCs +# Pairs plot for 3 PCs iris.pca <- pcomp(iris[, -5]) pairs(iris.pca, col = (2:4)[iris$Species]) - -## rgl plot for 3 PCs -## TODO... } - -\keyword{ models } - -\concept{ principal component analysis and biplot } +\seealso{ +\code{\link[=vectorplot]{vectorplot()}}, \code{\link[=prcomp]{prcomp()}}, \code{\link[=princomp]{princomp()}}, \code{\link[=loadings]{loadings()}}, +\code{\link[=Correlation]{Correlation()}} +} +\author{ +Philippe Grosjean \href{mailto:phgrosjean@sciviews.org}{phgrosjean@sciviews.org}, but the core code is +indeed in package \strong{stats}. +} +\concept{ +principal component analysis and biplot +} +\keyword{models} diff --git a/man/regex.Rd b/man/regex.Rd deleted file mode 100755 index 8cff49f..0000000 --- a/man/regex.Rd +++ /dev/null @@ -1,63 +0,0 @@ -\name{regex} -\alias{regex} -\alias{is.regex} -\alias{print.regex} -\alias{pcre} -\alias{is.pcre} -\alias{print.pcre} - -\title{Regular expressions} - -\description{ - Create regular expression (either 'regex' -POSIX 1003.2 extended regular - expressions- or 'pcre' -perl compatibe regular expressions- objects). -} - -\usage{ -## Create, test or print a regular expression object -regex(pattern, use.bytes = FALSE, globbing, trim.head = FALSE, trim.tail = TRUE) -is.regex(x) -\method{print}{regex}(x, \dots) - -## Create, test or print a perl-compatible regular expression object -pcre(pattern, use.bytes = FALSE) -is.pcre(x) -\method{print}{pcre}(x, \dots) -} - -\arguments{ - \item{pattern}{ character string with the pattern to use for the regular expression. } - \item{use.bytes}{ logical. If \code{TRUE}, the matching is done byte-by-byte - rather than character-by-character. } - \item{globbing}{ character string with wildcard or globbing pattern to be - transformed into a regular expression. If provided, \code{pattern} is ignored. } - \item{trim.head}{ associated to \code{globbing} only. Specify if \code{"^.*"} - should be trimmed from the result. } - \item{trim.tail}{ associated to \code{globbing} only. Specify if \code{".*$"} - should be trimmed from the result. } - \item{x}{ an object. } - \item{\dots}{ unused. } -} - -\details{ - TODO... -} - -\value{ - TODO... -} - -\author{ - Philippe Grosjean , but these are indeed wrappers - around existing functions written by the R Core Team in base or utils packages. -} - -\seealso{ \code{\link{char}}, \code{\link[base]{character}} } - -\examples{ -## TODO: various examples of character string manipulations -} - -\keyword{character} - -\concept{ regular expressions for character strings manipulation } diff --git a/man/snippets.Rd b/man/snippets.Rd deleted file mode 100755 index 4b70532..0000000 --- a/man/snippets.Rd +++ /dev/null @@ -1,19 +0,0 @@ -\name{snippets} -\Rdversion{2} -\alias{snippets} -\docType{package} - -\title{SciViews snippet help} - -\description{ - \Sexpr[results=rd, stage=render]{c("We are now \\\\emph{", Sys.time(), - "}", sep = "")} -} - -\arguments{ - \Sexpr[results=rd, stage=render]{c("The content of the arguments section...", sep = "")} -} - -\keyword{ package } - -\concept{ snippets and electronic reference card } diff --git a/man/timing.Rd b/man/timing.Rd new file mode 100644 index 0000000..ebec495 --- /dev/null +++ b/man/timing.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R +\name{timing} +\alias{timing} +\title{Timing of R expressions.} +\usage{ +timing(expr, gc.first = TRUE) +} +\arguments{ +\item{expr}{Valid \R expression to be timed. If missing, \code{\link[=proc.time]{proc.time()}} is +used instead.} + +\item{gc.first}{Logical - should a garbage collection be performed immediately +before the timing? Default is \code{TRUE}.} +} +\description{ +Similar to \code{system.time()} but returns a more convenient 'difftime' object. +} +\examples{ +test <- timing(Sys.sleep(0.5)) +test +attr(test, "details") +} +\seealso{ +\code{\link[=system.time]{system.time()}} +} diff --git a/man/vectorplot.Rd b/man/vectorplot.Rd old mode 100755 new mode 100644 index ac9ac40..ada029d --- a/man/vectorplot.Rd +++ b/man/vectorplot.Rd @@ -1,66 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/vectorplot.R \name{vectorplot} \alias{vectorplot} \alias{vectorplot.default} \alias{vectorplot.loadings} -\alias{vectorplot.correlation} - -\title{Plot vectors inside a unit circle (PCA loadings or correlations plots)} - -\description{ - Plots vectors with 0. -} +\item{y}{A numeric vector with 0 < values < 1 of same length as `x.} + +\item{col}{Color of the arrows and labels.} -\seealso{ \code{\link{pcomp}}, \code{\link{loadings}}, - \code{\link{correlation}} } +\item{circle.col}{The color for the circle around the vector plot.} +\item{ar.length}{The length of the arrows.} + +\item{pos}{The position of text relative to arrows. If \code{NULL}, a suitable +position is calculated according to the direction where the arrows are +pointing.} + +\item{cex}{The factor of expansion for labels in the graph.} + +\item{labels}{The labels to draw near the arrows.} + +\item{choices}{A vector of two integers indicating the axes to plot.} + +\item{main}{The title of the plot.} +} +\value{ +The object 'x' is returned invisibly. These functions are called for +their side-effect of drawing a vector plot. +} +\description{ +Plots vectors with 0 < norms < 1 inside a circle. These plots are mainly +designed to represent variables in principal components space for PCAs. +} \examples{ -## Create a PCA and plot loadings and correlations +# Create a PCA and plot loadings and correlations iris.pca <- pcomp(iris[, -5]) vectorplot(loadings(iris.pca)) -vectorplot(correlation(iris.pca)) -## Note: on screen devices, change aspect ratio of the graph by resizing -## the window to reveal cropped labels... +vectorplot(Correlation(iris.pca)) +# Note: on screen devices, change aspect ratio of the graph by resizing +# the window to reveal cropped labels... +} +\seealso{ +\code{\link[=pcomp]{pcomp()}}, \code{\link[=loadings]{loadings()}}, \code{\link[=Correlation]{Correlation()}} +} +\concept{ +Vector and circular plot } - \keyword{aplot} - -\concept{ Vector and circular plot } diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..b6b397e --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,3 @@ +library("testthat") + +test_check("SciViews") diff --git a/tests/testthat/test-correlation.R b/tests/testthat/test-correlation.R new file mode 100644 index 0000000..9c36670 --- /dev/null +++ b/tests/testthat/test-correlation.R @@ -0,0 +1,23 @@ +context("Correlation") + +describe("correlation", { + corr <- correlation(1:10, 1:10) + + it("produces a Correlation, matrix object", { + expect_is(corr, "matrix") + expect_is(corr, "Correlation") + }) + it("produces a 2x2 matrix for 2 variables", { + expect_equal(nrow(corr), 2) + expect_equal(ncol(corr), 2) + }) + it("produces a 1/1/1/1 matrix for twice the same variable", { + expect_equivalent(as.numeric(corr), rep(1, 4)) + }) + rm(corr) + + it("raises an error if non-numeric arguments for x or y", { + expect_error(correlation("text", 1:10), "'x' must be numeric", fixed = TRUE) + expect_error(correlation(1:10, "text"), "'y' must be numeric", fixed = TRUE) + }) +}) diff --git a/vignettes/pca.R b/vignettes/pca.R new file mode 100644 index 0000000..59c32bd --- /dev/null +++ b/vignettes/pca.R @@ -0,0 +1,6 @@ +## ----setup, include = FALSE---------------------------------------------- +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +