diff --git a/DESCRIPTION b/DESCRIPTION index 65eb683..0b26088 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,8 +11,8 @@ Authors@R: c( Description: Cross-validated eigenvalues are estimated by splitting a graph into two parts, the training and the test graph. The training graph is use to estimate eigenvectors, and - the test graph is use to evaluate the correlation between the sample - eigenvectors and the eigenspace of the test graph. + the test graph is use to evaluate the correlation between the training + eigenvectors and the eigenvectors of the test graph. The correlations follow a simple central limit theorem that can be used to estimate graph dimension via hypothesis testing. License: GPL (>= 3) diff --git a/NAMESPACE b/NAMESPACE index bed5cff..5b197cc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,23 +2,7 @@ S3method(plot,eigcv) S3method(print,eigcv) +export("%>%") export(eigcv) import(Matrix) -importFrom(dplyr,group_by) -importFrom(dplyr,mutate) -importFrom(dplyr,select) -importFrom(dplyr,summarise) -importFrom(dplyr,summarize) -importFrom(dplyr,ungroup) -importFrom(ggplot2,aes) -importFrom(ggplot2,geom_hline) -importFrom(ggplot2,geom_line) -importFrom(ggplot2,geom_point) -importFrom(ggplot2,ggplot) -importFrom(ggplot2,labs) -importFrom(ggplot2,scale_color_manual) -importFrom(ggplot2,scale_x_continuous) -importFrom(ggplot2,theme) -importFrom(ggplot2,theme_bw) importFrom(magrittr,"%>%") -importFrom(rlang,.data) diff --git a/R/eigcv.R b/R/eigcv.R index 55addb1..dae3660 100644 --- a/R/eigcv.R +++ b/R/eigcv.R @@ -1,9 +1,10 @@ + +# assumes elements of A are non-negative integers (and Poisson) split_graph <- function(A, test_portion = 0.1) { A <- methods::as(A, "dMatrix") A <- methods::as(A, "TsparseMatrix") - # assumes elements of A are Poisson (i.e. non-negative integers) stopifnot(isTRUE(all.equal(A@x, as.integer(A@x)))) test_edges <- stats::rbinom(length(A@x), size = A@x, prob = test_portion) @@ -41,9 +42,11 @@ glaplacian <- function(A, regularize = TRUE) { L } -# Given the trained left/right singular vectors, compute the test statistic for +# given the training singular vectors, compute the test statistic for # graph dimension. gdstat <- function(full, test, u, v, test_portion) { + + # standard error calculation is different for directed and undirected graphs if (isSymmetric(full)) { se <- sqrt(2 * test_portion * as.numeric(t(u^2) %*% full %*% v^2) - test_portion * sum(diag(full) * u^2 * v^2)) @@ -59,41 +62,56 @@ gdstat <- function(full, test, u, v, test_portion) { -#' Edge Bootstrapping and Splitting +#' Compute cross-validate eigenvalues #' -#' Estimate the graph dimension via eigenvalue cross-validation (EigCV). +#' Estimate graph dimension via eigenvalue cross-validation (EigCV). #' A graph has dimension `k` if the first `k` eigenvectors of its adjacency #' matrix are correlated with its population eigenspace, and the others are not. #' Edge bootstrapping sub-samples the edges of the graph (without replacement). #' Edge splitting separates the edges into a training part and a testing part. #' -#' @param A The adjacency matrix of graph. Must be non-negative integer valued. -#' @param k_max `integer(1)`, number of eigenvectors to compute. -#' @param num_bootstraps `integer(1)`, number of graph bootstraps, default to 10. -#' Graph bootstrapping is to account for the randomness in graph splitting, -#' rather than obtaining any statistic (as a traditional num_bootstraps does). -#' Hence, a small number (e.g., 3~10) of bootstraps usually suffices. -#' If `num_bootstraps>1`, the test statistics will be averaged across bootstraps +#' @param A The adjacency matrix of graph. Must be non-negative and +#' integer valued. +#' @param k_max The maximum dimension of the graph to consider. This many +#' eigenvectors are computed. Should be a non-negative integer smallish +#' relative the dimensions of `A`. +#' @param ... Ignored. +#' @param num_bootstraps The number of times to bootstrap the graph. Since +#' cross-validated eigenvalues are based on a random graph split, they +#' are themselves random. By repeatedly computing cross-validated eigenvalues +#' for different sample splits, the idea is to smooth away some of the +#' randomness due to the graph splits. A small number of bootstraps +#' (3 to 10) usually suffices. Defaults to `10`. Test statistics (i.e. +#' z-scores for cv eigenvalues) are averaged across bootstraps #' and the p-values will be calculated based on the averaged statistics. -#' @param alpha Significance level of each test, defaults to `0.05`. -#' This is used to cut off the dimension estimation. -#' @param ptol `numeric(1)`, the tolerance of minimal p-value. -#' @param regularize TODO -#' @param test_portion TODO -#' @inheritParams stats::p.adjust -#' @param laplacian `logical(1)`, use the normalized and regularized adjacency -#' matrix (i.e. L) -#' This option is experimental and should be used with caution. -#' @return A `eigcv` object, which contains: -#' \item{estimated_dimension}{inferred graph dimension.} -#' \item{summary}{summary table of the tests.} -#' \item{num_bootstraps}{number of bootstraps performed.} -#' \item{test_portion}{graph splitting probability used.} -#' \item{alpha}{significance level of each test.} +#' @param test_portion The portion of the graph to put into the test graph, +#' as opposed to the training graph. Defaults to `0.1`. Must be strictly +#' between zero and one. +#' @param alpha Significance level for hypothesis tests. Each dimension +#' `1, ..., k_max` is tested when estimating graph dimension, and the +#' overall graph dimension is taken to be the smallest number of dimensions +#' such that all the tests reject. +#' @param method Method to adjust p-values for multiple testing. Must be +#' one of `"none"`, `"holm"`, `"hochberg"`, `"hommel"`, `"bonferroni"`, +#' `"BH"`, `"BY"`, or `"fdr"`. Passed to [stats::p.adjust()]. Defaults to +#' `"none"`. +#' @param laplacian Logical value indicating where to compute cross-validated +#' eigenvalues for the degree-normalize graph Laplacian rather than the +#' graph adjacency matrix. Experimental and should be used with caution. +#' Defaults to `FALSE`. +#' @param regularize Only applicable when `laplacian == TRUE`, in which case +#' this parameter controls whether or not the degree-normalized graph +#' Laplacian is regularized. Defaults to `TRUE`. +#' +#' @return A `eigcv` object, which is a list with the following named +#' elements. +#' +#' - `estimated_dimension`: inferred graph dimension. +#' - `summary`: summary table of the tests. +#' - `num_bootstraps`: number of bootstraps performed. +#' - `test_portion`: graph splitting probability used. +#' - `alpha`: significance level of each test. #' -#' @importFrom dplyr summarize group_by ungroup mutate summarise -#' @importFrom magrittr %>% -#' @importFrom rlang .data #' @export #' #' @examples @@ -116,16 +134,19 @@ gdstat <- function(full, test, u, v, test_portion) { #' #' A <- sample_sparse(model) #' -#' eigcv_result <- eigcv(A, k_max = 10) -#' eigcv_result +#' eigs<- eigcv(A, k_max = 10) +#' eigs +#' +#' plot(eigs, type = "z-score") # default +#' plot(eigs, type = "adjacency") +#' plot(eigs, type = "laplacian") +#' #' eigcv <- function(A, k_max, ..., num_bootstraps = 10, test_portion = 0.1, alpha = 0.05, - ptol = .Machine$double.eps, - method = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", - "none"), + method = c("none", "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr"), laplacian = FALSE, regularize = TRUE) { n <- min(dim(A)) @@ -182,29 +203,23 @@ eigcv <- function(A, k_max, pb$tick() } - ## summarize across CV/num_bootstraps - if (num_bootstraps > 1) { - cv_means <- cv_stats %>% - group_by(.data$k) %>% - summarise( - cv_lambda_A = mean(.data$cv_lambda_A), - cv_lambda_L = mean(.data$cv_lambda_L), - z = mean(.data$z) - ) %>% - ungroup() - } else { - cv_means <- cv_stats - } + cv_means <- cv_stats %>% + dplyr::group_by(k) %>% + dplyr::summarise( + cv_lambda_A = mean(cv_lambda_A), + cv_lambda_L = mean(cv_lambda_L), + z = mean(z) + ) %>% + dplyr::ungroup() + + cv_means <- cv_means %>% - mutate( - pvals = stats::pnorm(.data$z, lower.tail = FALSE), - pvals = pmax(.data$pvals, ptol) - ) ## avoid exact 0 + dplyr::mutate( + pvals = stats::pnorm(z, lower.tail = FALSE) + ) - ## correct for multiplicity cv_means$padj <- stats::p.adjust(cv_means$pvals, method = method) - ## inference criteria <- cv_means$padj k_stop <- which(criteria > alpha) k_infer <- ifelse(length(k_stop), min(k_stop) - 1, k_max) @@ -221,69 +236,80 @@ eigcv <- function(A, k_max, } -#' Print `eigcv` +#' Print cross-validated eigenvalues #' -#' @method print eigcv -#' -#' @param x an `eigcv` object. +#' @param x An `eigcv` object created by a call to [eigcv()]. #' @param ... Ignored. #' @export +#' +#' @inherit eigcv examples +#' @return `x`, but invisibly. +#' +#' @method print eigcv print.eigcv <- function(x, ...) { cat("Estimated graph dimension:\t", x$estimated_dimension, fill = TRUE) - cat("\nNumber of bootstraps:\t\t", x$num_bootstraps, fill = TRUE) - cat("Edge splitting probabaility:\t", x$test_portion, fill = TRUE) - cat("Significance level:\t\t", x$alpha, fill = TRUE) - cat("\n ------------ Summary of Tests ------------\n") - print(data.frame(x$summary[, -c(2, 3)]), row.names = FALSE) - cat(fill = TRUE) + cat("\nNumber of bootstraps:\t\t", x$num_bootstraps, fill = TRUE) + cat("Edge splitting probabaility:\t", x$test_portion, fill = TRUE) + cat("Significance level:\t\t", x$alpha, fill = TRUE) + cat("\n ------------ Summary of Tests ------------\n") + print(data.frame(x$summary[, -c(2, 3)]), row.names = FALSE) + cat(fill = TRUE) + invisible(x) } -#' Plot `eigcv` +#' Plot cross-validated eigenvalues #' -#' @method plot eigcv +#' @param x An `eigcv` object created by a call to [eigcv()]. +#' @param type Specifies what to plot. Must be one of the following options: +#' +#' - `"z-score"`, in which case the Z-statistic test scores are plotted +#' for each value of `k` (i.e. dimension of the eigenspace). +#' - `"adjacency"` in which case the cross-validated eigenvalues of the +#' adjacency matrix are plotted for each value of `k`. +#' - `"laplacian"` in which case the cross-validated eigenvalues of the +#' graph Laplacian matrix are plotted for each value of `k`. #' -#' @param x an `eigcv` object. -#' @param type either "z", "A", or "L" to specify the y-axis of the plot. -#' If "z", plot the test statistics (asymptotic z score) for each k. -#' If "A", plot x'Ax for each eigenvector x. -#' If "L", plot x'Lx for each eigenvector x. -#' @param threshold `numeric(1)`, cut-off of p-value (in log10), default to 2. -#' @param ... ignored. -#' @return Plot an `eigcv` object. -#' @importFrom ggplot2 ggplot aes labs theme_bw theme scale_color_manual -#' @importFrom ggplot2 geom_hline geom_point geom_line scale_x_continuous -#' @importFrom magrittr %>% -#' @importFrom dplyr select -#' @importFrom rlang .data +#' @param threshold Only used when `type == "z-score"`. Adds a horizontal +#' line at the value of `threshold`, which should be a numeric of length +#' one. Defaults to `2`. +#' @param ... Ignored. +#' @return A `ggplot2` object. #' @export -plot.eigcv <- function(x, type = c("z", "A", "L"), threshold = 2, ...) { +#' @method plot eigcv +#' +#' @inherit eigcv examples +plot.eigcv <- function(x, type = c("z-score", "adjacency", "laplacian"), threshold = 2, ...) { stopifnot("Threshold of statistics must be greater than 0." = threshold > 0) + type <- rlang::arg_match(type) + type <- type[1] - if (type == "z") { - dat <- x$summary %>% select(.data$k, val = .data$z) + if (type == "z-score") { + dat <- dplyr::select(x$summary, k, val = z) ylab <- "z score" } - if (type == "A") { - dat <- x$summary %>% select(.data$k, val = .data$cv_lambda_A) + if (type == "adjacency") { + dat <- dplyr::select(x$summary, k, val = cv_lambda_A) ylab <- "cross validated x' A x" } - if (type == "L") { - dat <- x$summary %>% select(.data$k, val = .data$cv_lambda_L) + if (type == "laplacian") { + dat <- dplyr::select(x$summary, k, val = cv_lambda_L) ylab <- "cross validated x' L x" } - g <- ggplot(aes(.data$k, .data$val), data = dat) + - geom_point(alpha = .8) + - geom_line(color = "blue") + - theme_bw() + - scale_x_continuous(breaks = function(x) { - unique(floor(pretty(seq(0, (max(x) + 1) * 1.1)))) - }) - - if (type == "z") { + g <- ggplot2::ggplot(ggplot2::aes(k, val), data = dat) + + ggplot2::geom_point(alpha = .8) + + ggplot2::geom_line(color = "blue") + + ggplot2::theme_bw() + + ggplot2::scale_x_continuous( + breaks = function(x) { + unique(floor(pretty(seq(0, (max(x) + 1) * 1.1)))) + } + ) + + if (type == "z-score") { g <- g + - geom_hline( + ggplot2::geom_hline( yintercept = threshold, alpha = .8, linetype = 2, color = "grey60", show.legend = TRUE ) diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..2eba9f2 --- /dev/null +++ b/R/utils.R @@ -0,0 +1,24 @@ +#' Pipe operator +#' +#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +#' +#' @name %>% +#' @rdname pipe +#' @keywords internal +#' @export +#' @importFrom magrittr %>% +#' @usage lhs \%>\% rhs +#' @param lhs A value or the magrittr placeholder. +#' @param rhs A function call using the magrittr semantics. +#' @return The result of calling `rhs(lhs)`. +NULL + +utils::globalVariables( + c( + "cv_lambda_A", + "cv_lambda_L", + "k", + "val", + "z" + ) +) diff --git a/README.Rmd b/README.Rmd index 95df027..8dfbb0b 100644 --- a/README.Rmd +++ b/README.Rmd @@ -21,7 +21,7 @@ knitr::opts_chunk$set( [![CRAN status](https://www.r-pkg.org/badges/version/gdim)](https://CRAN.R-project.org/package=gdim) -`gdim` estimates graph dimension using cross-validated eigenvalues, via the graph-splitting technique developed in . Theoretically, the method works by computing a special type of cross-validated eigenvalue which follows a simple central limit theorem. This allows users to perform hypothesis tests on the rank of the graph. +`gdim` estimates graph dimension using cross-validated eigenvalues, via the graph-splitting technique developed in . Theoretically, the method works by computing a special type of cross-validated eigenvalue which follows a simple central limit theorem. This allows users to perform hypothesis tests on the rank of the graph. ## Installation @@ -77,4 +77,4 @@ plot(eigcv_result) ## Reference -Chen, Fan, Sebastien Roch, Karl Rohe, and Shuqi Yu. “Estimating Graph Dimension with Cross-Validated Eigenvalues.” ArXiv:2108.03336 [Cs, Math, Stat], August 6, 2021. http://arxiv.org/abs/2108.03336. +Chen, Fan, Sebastien Roch, Karl Rohe, and Shuqi Yu. “Estimating Graph Dimension with Cross-Validated Eigenvalues.” ArXiv:2108.03336 [Cs, Math, Stat], August 6, 2021. https://arxiv.org/abs/2108.03336. diff --git a/README.md b/README.md index 2c099f6..afccfc8 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ status](https://www.r-pkg.org/badges/version/gdim)](https://CRAN.R-project.org/p `gdim` estimates graph dimension using cross-validated eigenvalues, via the graph-splitting technique developed in -. Theoretically, the method works by +. Theoretically, the method works by computing a special type of cross-validated eigenvalue which follows a simple central limit theorem. This allows users to perform hypothesis tests on the rank of the graph. @@ -80,16 +80,16 @@ eigcv_result #> #> ------------ Summary of Tests ------------ #> k z pvals padj -#> 1 60.1118360 2.220446e-16 2.220446e-15 -#> 2 12.4766591 2.220446e-16 2.220446e-15 -#> 3 12.0401480 2.220446e-16 2.220446e-15 -#> 4 10.7393316 2.220446e-16 2.220446e-15 -#> 5 8.9024334 2.220446e-16 2.220446e-15 -#> 6 -1.1480263 8.745212e-01 1.000000e+00 -#> 7 -1.6158410 9.469357e-01 1.000000e+00 -#> 8 -0.9936334 8.397993e-01 1.000000e+00 -#> 9 -0.8575451 8.044281e-01 1.000000e+00 -#> 10 -1.1543480 8.758212e-01 1.000000e+00 +#> 1 59.7488180 2.220446e-16 2.220446e-15 +#> 2 12.9094629 2.220446e-16 2.220446e-15 +#> 3 11.8600427 2.220446e-16 2.220446e-15 +#> 4 11.9412340 2.220446e-16 2.220446e-15 +#> 5 9.0252520 2.220446e-16 2.220446e-15 +#> 6 -0.8512008 8.026711e-01 1.000000e+00 +#> 7 -0.8182195 7.933841e-01 1.000000e+00 +#> 8 -0.9912649 8.392219e-01 1.000000e+00 +#> 9 -0.9005808 8.160944e-01 1.000000e+00 +#> 10 -1.1677953 8.785553e-01 1.000000e+00 ``` In this example, `eigcv()` suggests `k=5`. @@ -108,4 +108,4 @@ plot(eigcv_result) Chen, Fan, Sebastien Roch, Karl Rohe, and Shuqi Yu. “Estimating Graph Dimension with Cross-Validated Eigenvalues.” ArXiv:2108.03336 \[Cs, -Math, Stat\], August 6, 2021. . +Math, Stat\], August 6, 2021. . diff --git a/cran-comments.md b/cran-comments.md index 858617d..4d11deb 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,5 +1,5 @@ ## R CMD check results -0 errors | 0 warnings | 1 note +0 errors | 0 warnings | 1 notes * This is a new release. diff --git a/man/eigcv.Rd b/man/eigcv.Rd index b96ad23..179d10c 100644 --- a/man/eigcv.Rd +++ b/man/eigcv.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/eigcv.R \name{eigcv} \alias{eigcv} -\title{Edge Bootstrapping and Splitting} +\title{Compute cross-validate eigenvalues} \usage{ eigcv( A, @@ -11,50 +11,66 @@ eigcv( num_bootstraps = 10, test_portion = 0.1, alpha = 0.05, - ptol = .Machine$double.eps, - method = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"), + method = c("none", "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr"), laplacian = FALSE, regularize = TRUE ) } \arguments{ -\item{A}{The adjacency matrix of graph. Must be non-negative integer valued.} +\item{A}{The adjacency matrix of graph. Must be non-negative and +integer valued.} -\item{k_max}{\code{integer(1)}, number of eigenvectors to compute.} +\item{k_max}{The maximum dimension of the graph to consider. This many +eigenvectors are computed. Should be a non-negative integer smallish +relative the dimensions of \code{A}.} -\item{num_bootstraps}{\code{integer(1)}, number of graph bootstraps, default to 10. -Graph bootstrapping is to account for the randomness in graph splitting, -rather than obtaining any statistic (as a traditional num_bootstraps does). -Hence, a small number (e.g., 3~10) of bootstraps usually suffices. -If \code{num_bootstraps>1}, the test statistics will be averaged across bootstraps -and the p-values will be calculated based on the averaged statistics.} +\item{...}{Ignored.} -\item{test_portion}{TODO} +\item{num_bootstraps}{The number of times to bootstrap the graph. Since +cross-validated eigenvalues are based on a random graph split, they +are themselves random. By repeatedly computing cross-validated eigenvalues +for different sample splits, the idea is to smooth away some of the +randomness due to the graph splits. A small number of bootstraps +(3 to 10) usually suffices. Defaults to \code{10}. Test statistics (i.e. +z-scores for cv eigenvalues) are averaged across bootstraps +and the p-values will be calculated based on the averaged statistics.} -\item{alpha}{Significance level of each test, defaults to \code{0.05}. -This is used to cut off the dimension estimation.} +\item{test_portion}{The portion of the graph to put into the test graph, +as opposed to the training graph. Defaults to \code{0.1}. Must be strictly +between zero and one.} -\item{ptol}{\code{numeric(1)}, the tolerance of minimal p-value.} +\item{alpha}{Significance level for hypothesis tests. Each dimension +\verb{1, ..., k_max} is tested when estimating graph dimension, and the +overall graph dimension is taken to be the smallest number of dimensions +such that all the tests reject.} -\item{method}{correction method, a \code{\link{character}} string. - Can be abbreviated.} +\item{method}{Method to adjust p-values for multiple testing. Must be +one of \code{"none"}, \code{"holm"}, \code{"hochberg"}, \code{"hommel"}, \code{"bonferroni"}, +\code{"BH"}, \code{"BY"}, or \code{"fdr"}. Passed to \code{\link[stats:p.adjust]{stats::p.adjust()}}. Defaults to +\code{"none"}.} -\item{laplacian}{\code{logical(1)}, use the normalized and regularized adjacency -matrix (i.e. L) -This option is experimental and should be used with caution.} +\item{laplacian}{Logical value indicating where to compute cross-validated +eigenvalues for the degree-normalize graph Laplacian rather than the +graph adjacency matrix. Experimental and should be used with caution. +Defaults to \code{FALSE}.} -\item{regularize}{TODO} +\item{regularize}{Only applicable when \code{laplacian == TRUE}, in which case +this parameter controls whether or not the degree-normalized graph +Laplacian is regularized. Defaults to \code{TRUE}.} } \value{ -A \code{eigcv} object, which contains: -\item{estimated_dimension}{inferred graph dimension.} -\item{summary}{summary table of the tests.} -\item{num_bootstraps}{number of bootstraps performed.} -\item{test_portion}{graph splitting probability used.} -\item{alpha}{significance level of each test.} +A \code{eigcv} object, which is a list with the following named +elements. +\itemize{ +\item \code{estimated_dimension}: inferred graph dimension. +\item \code{summary}: summary table of the tests. +\item \code{num_bootstraps}: number of bootstraps performed. +\item \code{test_portion}: graph splitting probability used. +\item \code{alpha}: significance level of each test. +} } \description{ -Estimate the graph dimension via eigenvalue cross-validation (EigCV). +Estimate graph dimension via eigenvalue cross-validation (EigCV). A graph has dimension \code{k} if the first \code{k} eigenvectors of its adjacency matrix are correlated with its population eigenspace, and the others are not. Edge bootstrapping sub-samples the edges of the graph (without replacement). @@ -80,7 +96,12 @@ model <- sbm( A <- sample_sparse(model) -eigcv_result <- eigcv(A, k_max = 10) -eigcv_result +eigs<- eigcv(A, k_max = 10) +eigs + +plot(eigs, type = "z-score") # default +plot(eigs, type = "adjacency") +plot(eigs, type = "laplacian") + } diff --git a/man/figures/README-unnamed-chunk-3-1.png b/man/figures/README-unnamed-chunk-3-1.png index ef24dad..d179cec 100644 Binary files a/man/figures/README-unnamed-chunk-3-1.png and b/man/figures/README-unnamed-chunk-3-1.png differ diff --git a/man/gdim-package.Rd b/man/gdim-package.Rd index 1080e90..e00e090 100644 --- a/man/gdim-package.Rd +++ b/man/gdim-package.Rd @@ -18,7 +18,7 @@ Useful links: } \author{ -\strong{Maintainer}: Alex Hayes \email{alexpghayes@gmail.com} (\href{https://orcid.org/0000-0002-4985-5160}{ORCID}) +\strong{Maintainer}: Alex Hayes \email{alexpghayes@gmail.com} (\href{https://orcid.org/0000-0002-4985-5160}{ORCID}) [copyright holder] Authors: \itemize{ diff --git a/man/pipe.Rd b/man/pipe.Rd new file mode 100644 index 0000000..5fa90fe --- /dev/null +++ b/man/pipe.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{\%>\%} +\alias{\%>\%} +\title{Pipe operator} +\usage{ +lhs \%>\% rhs +} +\arguments{ +\item{lhs}{A value or the magrittr placeholder.} + +\item{rhs}{A function call using the magrittr semantics.} +} +\value{ +The result of calling \code{rhs(lhs)}. +} +\description{ +See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +} +\keyword{internal} diff --git a/man/plot.eigcv.Rd b/man/plot.eigcv.Rd index f9987d9..3c96057 100644 --- a/man/plot.eigcv.Rd +++ b/man/plot.eigcv.Rd @@ -2,25 +2,61 @@ % Please edit documentation in R/eigcv.R \name{plot.eigcv} \alias{plot.eigcv} -\title{Plot \code{eigcv}} +\title{Plot cross-validated eigenvalues} \usage{ -\method{plot}{eigcv}(x, type = c("z", "A", "L"), threshold = 2, ...) +\method{plot}{eigcv}(x, type = c("z-score", "adjacency", "laplacian"), threshold = 2, ...) } \arguments{ -\item{x}{an \code{eigcv} object.} +\item{x}{An \code{eigcv} object created by a call to \code{\link[=eigcv]{eigcv()}}.} -\item{type}{either "z", "A", or "L" to specify the y-axis of the plot. -If "z", plot the test statistics (asymptotic z score) for each k. -If "A", plot x'Ax for each eigenvector x. -If "L", plot x'Lx for each eigenvector x.} +\item{type}{Specifies what to plot. Must be one of the following options: +\itemize{ +\item \code{"z-score"}, in which case the Z-statistic test scores are plotted +for each value of \code{k} (i.e. dimension of the eigenspace). +\item \code{"adjacency"} in which case the cross-validated eigenvalues of the +adjacency matrix are plotted for each value of \code{k}. +\item \code{"laplacian"} in which case the cross-validated eigenvalues of the +graph Laplacian matrix are plotted for each value of \code{k}. +}} -\item{threshold}{\code{numeric(1)}, cut-off of p-value (in log10), default to 2.} +\item{threshold}{Only used when \code{type == "z-score"}. Adds a horizontal +line at the value of \code{threshold}, which should be a numeric of length +one. Defaults to \code{2}.} -\item{...}{ignored.} +\item{...}{Ignored.} } \value{ -Plot an \code{eigcv} object. +A \code{ggplot2} object. } \description{ -Plot \code{eigcv} +Plot cross-validated eigenvalues +} +\examples{ + +library(fastRG) + +set.seed(27) + +B <- matrix(0.1, 5, 5) +diag(B) <- 0.3 + +model <- sbm( + n = 1000, + k = 5, + B = B, + expected_degree = 40, + poisson_edges = FALSE, + allow_self_loops = FALSE +) + +A <- sample_sparse(model) + +eigs<- eigcv(A, k_max = 10) +eigs + +plot(eigs, type = "z-score") # default +plot(eigs, type = "adjacency") +plot(eigs, type = "laplacian") + + } diff --git a/man/print.eigcv.Rd b/man/print.eigcv.Rd index 2a47a1d..e30249b 100644 --- a/man/print.eigcv.Rd +++ b/man/print.eigcv.Rd @@ -2,15 +2,47 @@ % Please edit documentation in R/eigcv.R \name{print.eigcv} \alias{print.eigcv} -\title{Print \code{eigcv}} +\title{Print cross-validated eigenvalues} \usage{ \method{print}{eigcv}(x, ...) } \arguments{ -\item{x}{an \code{eigcv} object.} +\item{x}{An \code{eigcv} object created by a call to \code{\link[=eigcv]{eigcv()}}.} \item{...}{Ignored.} } +\value{ +\code{x}, but invisibly. +} \description{ -Print \code{eigcv} +Print cross-validated eigenvalues +} +\examples{ + +library(fastRG) + +set.seed(27) + +B <- matrix(0.1, 5, 5) +diag(B) <- 0.3 + +model <- sbm( + n = 1000, + k = 5, + B = B, + expected_degree = 40, + poisson_edges = FALSE, + allow_self_loops = FALSE +) + +A <- sample_sparse(model) + +eigs<- eigcv(A, k_max = 10) +eigs + +plot(eigs, type = "z-score") # default +plot(eigs, type = "adjacency") +plot(eigs, type = "laplacian") + + }