diff --git a/DESCRIPTION b/DESCRIPTION index 64ee79f0..834e8d0c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,7 +5,8 @@ Version: 0.5.9605 Authors@R: c( person("Julien", "Chiquet", role = c("aut", "cre"), email = "julien.chiquet@inra.fr", comment = c(ORCID = "0000-0002-3629-3429")), - person("Mahendra", "Mariadassou", role = "aut", email = "mahendra.mariadassou@inra.fr"), + person("Mahendra", "Mariadassou", role = "aut", email = "mahendra.mariadassou@inra.fr", + comment = c(ORCID = "0000-0003-2986-354X")), person("Stéphane", "Robin", role ="ctb", email = "stephane.robininra.fr") ) Maintainer: Julien Chiquet diff --git a/NAMESPACE b/NAMESPACE index f369a756..06ed0596 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand S3method(coef,PLNfit) +S3method(fisher,PLNfit) S3method(getBestModel,PLNPCAfamily) S3method(getBestModel,PLNnetworkfamily) S3method(getModel,PLNPCAfamily) @@ -12,15 +13,18 @@ S3method(plot,PLNnetworkfamily) S3method(plot,PLNnetworkfit) S3method(predict,PLNLDAfit) S3method(predict,PLNfit) +S3method(standard_error,PLNfit) S3method(vcov,PLNfit) export(PLN) export(PLNLDA) export(PLNPCA) export(PLNnetwork) export(coefficient_path) +export(fisher) export(getBestModel) export(getModel) export(rPLN) +export(standard_error) import(Matrix) import(R6) import(RcppArmadillo) diff --git a/R/PLNPCAfit-class.R b/R/PLNPCAfit-class.R index 02297cfd..eaf4ecf5 100644 --- a/R/PLNPCAfit-class.R +++ b/R/PLNPCAfit-class.R @@ -235,6 +235,17 @@ PLNPCAfit$set("public", "plot_PCA", } ) +# Compute the (one-data) Fisher information matrix of Theta using one of two approximations scheme. +PLNPCAfit$set("private", "compute_fisher", + function(type = c("wald", "louis"), X = NULL) { + type = match.arg(type) + if (type == "louis") { + stop("Louis approximation scheme not available yet for object of class PLNPLCA, use `type = \"wald\"' instead.") + } + super$fisher(type = "wald", X = X) + } +) + PLNPCAfit$set("public", "show", function() { super$show(paste0("Poisson Lognormal with rank constrained for PCA (rank = ",self$rank,")\n")) diff --git a/R/PLNfit-S3methods.R b/R/PLNfit-S3methods.R index f87af2fb..4e612fb4 100644 --- a/R/PLNfit-S3methods.R +++ b/R/PLNfit-S3methods.R @@ -49,3 +49,82 @@ vcov.PLNfit <- function(object, ...) { stopifnot(isPLNfit(object)) object$model_par$Sigma } + +#' Generic function for the Fisher information matrix of Theta +#' +#' @description Compute or extract component-wise standard errors of Theta in multivariate (generalized) linear models of the form \deqn{g(E[Y|X]) = X\Theta} Useful to create confidence intervals and (multivariate) confidence regions under a Gaussian approximation of \eqn{\Theta}. Note that the Fisher information matrix is the one-data version (not scaled by the number of observations). +#' +#' @param x An `R` object. Currently there are methods for \code{\link{PLNfit}} (and its variants) objects. +#' @param ... Further arguments passed to or from other methods. +#' +#' @return The fisher information matrix of \eqn{\Theta} in generalized linear models. +#' @export +#' +fisher <- function (x, ...) { + UseMethod("fisher", x) +} + +#' Fisher information matrix for Theta +#' +#' @description Extracts Fisher information matrix of \eqn{\Theta} from objects returned by \code{\link[=PLN]{PLN}} and its variants. Fisher matrix is computed using one of two approximation scheme: wald (default, conservative, gives large confidence interval) or louis (anticonservative). Note that the Fisher information matrix is the one-data version (not scaled by the number of observations). +#' +#' @name fisher.PLNfit +#' +#' @param object an R6 object with class PLNfit +#' @param type Either `wald` (default) or `louis`. Approxomation scheme used to compute the +#' Fisher information matrix +#' @param ... additional parameters for S3 compatibility. Not used +#' @return A block-diagonal matrix with p (number of species) blocks of size d (number of covariates), assuming +#' \eqn{\Theta} is a matrix of size d * p. +#' +#' @seealso \code{\link[=standard_error.PLNfit]{standard_error}} for standard errors +#' +#' @export +#' +fisher.PLNfit <- function(object, type = c("wald", "louis"), ...) { + stopifnot(isPLNfit(object)) + type <- match.arg(type) + if (type != object$fisher$type) { + stop(paste("Fisher information was not computed using the", type, "approximation. Try another approximation scheme.")) + } + object$fisher$mat +} + +#' Component-wise standard errors of Theta +#' +#' @description Compute or extract component-wise standard errors of Theta in multivariate (generalized) linear models of the form \deqn{g(E[Y|X]) = X\Theta} Useful to compute Z-scores and p-values under a gaussian/student approximation of \eqn{\Theta} +#' +#' @param x An `R` object. Currently there are methods for \code{\link{PLNfit}} (and its variants) objects. +#' @param ... Further arguments passed to or from other methods. +#' +#' @return The standard errors associated with coefficients of \eqn{\Theta} +#' @export +#' +standard_error <- function (x, ...) { + UseMethod("standard_error", x) +} + +#' Component-wise standard errors of Theta +#' +#' @description Extracts univariate standard errors for the estimated coefficient of Theta. Standard errors are computed from the (approximate) Fisher information matrix. See \code{\link[=fisher.PLNfit]{fisher}} for more details on the approximations. +#' +#' @name standard_error.PLNfit +#' +#' @param object an R6 object with class PLNfit +#' @param type Either `Wald` (default) or `Louis`. Approxomation scheme used to compute the +#' Fisher information matrix +#' @param ... additional parameters for S3 compatibility. Not used +#' +#' @seealso \code{\link[=fisher.PLNfit]{fisher}} for the complete Fisher information matrix +#' +#' @return A p * d positive matrix (same size as \eqn{\Theta}) with standard errors for the coefficients of \eqn{\Theta} +#' @export +#' +standard_error.PLNfit <- function(object, type = c("wald", "louis"), ...) { + stopifnot(isPLNfit(object)) + type <- match.arg(type) + if (type != object$fisher$type) { + stop(paste("Standard errors were not computed using the", type, "approximation. Try another approximation scheme.")) + } + object$std_err +} diff --git a/R/PLNfit-class.R b/R/PLNfit-class.R index 4035c934..831f49fa 100644 --- a/R/PLNfit-class.R +++ b/R/PLNfit-class.R @@ -50,6 +50,10 @@ PLNfit <- Z = NA, # the matrix of latent variable R2 = NA, # approximated goodness of fit criterion Ji = NA, # element-wise approximated loglikelihood + FIM = NA, # Fisher information matrix of Theta, computed using of two approximation scheme + FIM_type = NA, # Either "wald" or "louis". Approximation scheme used to compute FIM + .std_err = NA, # element-wise standard error for the elements of Theta computed + # from the Fisher information matrix covariance = NA, # a string describing the covariance model optimizer = NA, # link to the function that performs the optimization monitoring = NA # a list with optimization monitoring quantities @@ -60,8 +64,9 @@ PLNfit <- q = function() {ncol(private$M)}, p = function() {nrow(private$Theta)}, d = function() {ncol(private$Theta)}, - ## model_par and var_par allow write access for bootstrapping purposes model_par = function() { list(Theta = private$Theta, Sigma = private$Sigma)}, + fisher = function() { list(mat = private$FIM, type = private$FIM_type) }, + std_err = function() { private$.std_err }, var_par = function() {list(M = private$M, S = private$S)}, latent = function() {private$Z}, fitted = function() {exp(private$Z + .5 * private$S)}, @@ -164,7 +169,7 @@ function(responses, covariates, offsets, weights) { }) PLNfit$set("public", "postTreatment", -function(responses, covariates, offsets, weights = rep(1, nrow(responses))) { +function(responses, covariates, offsets, weights = rep(1, nrow(responses)), type = c("wald", "louis")) { ## compute R2 self$set_R2(responses, covariates, offsets, weights) ## Set the name of the matrices according to those of the data matrices @@ -172,6 +177,12 @@ function(responses, covariates, offsets, weights = rep(1, nrow(responses))) { colnames(private$Theta) <- colnames(covariates) rownames(private$Sigma) <- colnames(private$Sigma) <- colnames(responses) rownames(private$M) <- rownames(private$S) <- rownames(responses) + ## compute and store Fisher Information matrix + type <- match.arg(type) + private$FIM <- self$compute_fisher(type, X = covariates) + private$FIM_type <- type + ## compute and store matrix of standard errors + private$.std_err <- self$compute_standard_error() }) # Positions in the (Euclidian) parameter space, noted as Z in the model. Used to compute the likelihood. @@ -248,6 +259,42 @@ PLNfit$set("public", "predict", } ) +# Compute the (one-data) Fisher information matrix of Theta using one of two approximations scheme (wald or fisher) +PLNfit$set("public", "compute_fisher", + function(type = c("wald", "louis"), X = NULL) { + type = match.arg(type) + A <- self$fitted + n <- self$n + if (type == "louis") { + ## TODO check how to adapt for PLNPCA + ## A = A + A \odot A \odot (exp(S) - 1_{n \times p}) + A <- A + A * A * (exp(self$var_par$S) - 1) + } + result <- bdiag(lapply(1:self$p, function(i) { + ## t(X) %*% diag(A[, i]) %*% X + crossprod(X, A[, i] * X) / n + })) + ## set proper names + element.names <- expand.grid(covariates = colnames(private$Theta), + species = rownames(private$Theta)) %>% rev() %>% + ## Hack to make sure that species is first and varies slowest + apply(1, paste0, collapse = "_") + rownames(result) <- element.names + return(result) + } +) + +# Compute univariate standard errors for the estimated coefficient of Theta. Standard errors are computed from the (approximate) +# Fisher information matrix. See \code{\link[=PLNfit_fisher]{fisher}} for more details on the approximations. +PLNfit$set("public", "compute_standard_error", + function() { + fim <- self$n * self$fisher$mat ## Fisher Information matrix I_n(\Theta) = n * I(\Theta) + stderr <- fim %>% solve %>% diag %>% sqrt %>% matrix(nrow = self$d) %>% t() + dimnames(stderr) <- dimnames(self$model_par$Theta) + return(stderr) + } +) + PLNfit$set("public", "show", function(model = paste("A multivariate Poisson Lognormal fit with", private$covariance, "covariance model.\n")) { cat(model) diff --git a/man/fisher.PLNfit.Rd b/man/fisher.PLNfit.Rd new file mode 100644 index 00000000..0918c060 --- /dev/null +++ b/man/fisher.PLNfit.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PLNfit-S3methods.R +\name{fisher.PLNfit} +\alias{fisher.PLNfit} +\title{Fisher information matrix for Theta} +\usage{ +\method{fisher}{PLNfit}(object, type = c("wald", "louis"), ...) +} +\arguments{ +\item{object}{an R6 object with class PLNfit} + +\item{type}{Either `wald` (default) or `louis`. Approxomation scheme used to compute the +Fisher information matrix} + +\item{...}{additional parameters for S3 compatibility. Not used} +} +\value{ +A block-diagonal matrix with p (number of species) blocks of size d (number of covariates), assuming +\eqn{\Theta} is a matrix of size d * p. +} +\description{ +Extracts Fisher information matrix of \eqn{\Theta} from objects returned by \code{\link[=PLN]{PLN}} and its variants. Fisher matrix is computed using one of two approximation scheme: wald (default, conservative, gives large confidence interval) or louis (anticonservative). Note that the Fisher information matrix is the one-data version (not scaled by the number of observations). +} +\seealso{ +\code{\link[=standard_error.PLNfit]{standard_error}} for standard errors +} diff --git a/man/fisher.Rd b/man/fisher.Rd new file mode 100644 index 00000000..8b6fd8fd --- /dev/null +++ b/man/fisher.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PLNfit-S3methods.R +\name{fisher} +\alias{fisher} +\title{Generic function for the Fisher information matrix of Theta} +\usage{ +fisher(x, ...) +} +\arguments{ +\item{x}{An `R` object. Currently there are methods for \code{\link{PLNfit}} (and its variants) objects.} + +\item{...}{Further arguments passed to or from other methods.} +} +\value{ +The fisher information matrix of \eqn{\Theta} in generalized linear models. +} +\description{ +Compute or extract component-wise standard errors of Theta in multivariate (generalized) linear models of the form \deqn{g(E[Y|X]) = X\Theta} Useful to create confidence intervals and (multivariate) confidence regions under a Gaussian approximation of \eqn{\Theta}. Note that the Fisher information matrix is the one-data version (not scaled by the number of observations). +} diff --git a/man/standard_error.PLNfit.Rd b/man/standard_error.PLNfit.Rd new file mode 100644 index 00000000..16a033bd --- /dev/null +++ b/man/standard_error.PLNfit.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PLNfit-S3methods.R +\name{standard_error.PLNfit} +\alias{standard_error.PLNfit} +\title{Component-wise standard errors of Theta} +\usage{ +\method{standard_error}{PLNfit}(object, type = c("wald", "louis"), ...) +} +\arguments{ +\item{object}{an R6 object with class PLNfit} + +\item{type}{Either `Wald` (default) or `Louis`. Approxomation scheme used to compute the +Fisher information matrix} + +\item{...}{additional parameters for S3 compatibility. Not used} +} +\value{ +A p * d positive matrix (same size as \eqn{\Theta}) with standard errors for the coefficients of \eqn{\Theta} +} +\description{ +Extracts univariate standard errors for the estimated coefficient of Theta. Standard errors are computed from the (approximate) Fisher information matrix. See \code{\link[=fisher.PLNfit]{fisher}} for more details on the approximations. +} +\seealso{ +\code{\link[=fisher.PLNfit]{fisher}} for the complete Fisher information matrix +} diff --git a/man/standard_error.Rd b/man/standard_error.Rd new file mode 100644 index 00000000..d61cb600 --- /dev/null +++ b/man/standard_error.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PLNfit-S3methods.R +\name{standard_error} +\alias{standard_error} +\title{Component-wise standard errors of Theta} +\usage{ +standard_error(x, ...) +} +\arguments{ +\item{x}{An `R` object. Currently there are methods for \code{\link{PLNfit}} (and its variants) objects.} + +\item{...}{Further arguments passed to or from other methods.} +} +\value{ +The standard errors associated with coefficients of \eqn{\Theta} +} +\description{ +Compute or extract component-wise standard errors of Theta in multivariate (generalized) linear models of the form \deqn{g(E[Y|X]) = X\Theta} Useful to compute Z-scores and p-values under a gaussian/student approximation of \eqn{\Theta} +} diff --git a/tests/testthat/test-standard-error.R b/tests/testthat/test-standard-error.R new file mode 100644 index 00000000..cacfed13 --- /dev/null +++ b/tests/testthat/test-standard-error.R @@ -0,0 +1,80 @@ +context("test-standard-error") + +data("trichoptera") + +test_that("Check that fisher and standard_error return objects with proper dimensions and sign", { + + myPLN_cov <- myPLN_cov <- PLN(Abundance ~ Wind + offset(log(TotalCounts)), data = trichoptera) + expect_is(myPLN_cov, "PLNfit") + p <- myPLN_cov$p + d <- myPLN_cov$d + + fim <- fisher(myPLN_cov) + sem <- standard_error(myPLN_cov) + ## Dimensions + expect_equal(dim(fim), c(p*d, p*d)) + expect_equal(dim(sem), c(p, d)) + + ## Names + expect_equal(rownames(sem), rownames(coef(myPLN_cov))) + expect_equal(colnames(sem), colnames(coef(myPLN_cov))) + + ## Fisher is block diagonal + expect_equal(inherits(fim, "dgCMatrix"), TRUE) + + ## Standard errors are all positive + for (i in 1:(p*d)) { + expect_gte(sem[i], 0) + } + +}) + +test_that("Check internal consistency of Fisher matrix for PLN models with no covariates", { + tol <- 1e-8 + + ## Fit model without covariates + myPLN <- PLN(Abundance ~ 1 + offset(log(TotalCounts)), data = trichoptera) + + ## Consistency of the diagonal of the fisher matrix + fim.diag <- Matrix::diag(fisher(myPLN)) + manual.fim.diag <- colSums(myPLN$fitted) / myPLN$n + ## Consistency of the standard error matrix + sem <- standard_error(myPLN) %>% as.numeric() + manual.sem <- 1/colSums(myPLN$fitted) %>% sqrt() + + ## Internal consistency + expect_equal(fim.diag , manual.fim.diag , tolerance = tol) + expect_equal(sem , manual.sem , tolerance = tol) + +}) + + +test_that("Check temporal consistency of Fisher matrix for PLN models with no covariates", { + tol <- 1e-4 + + ## Fit model without covariates + myPLN <- PLN(Abundance ~ 1 + offset(log(TotalCounts)), data = trichoptera) + + ## Consistency of the diagonal of the fisher matrix + fim.diag <- Matrix::diag(fisher(myPLN)) + expected.fim.diag <- c(0.0612123698810698, 0.0612384161054906, 3.73462487824109, 0.122467107738817, + 122.19280897578, 2.2230572191967, 0.285741065637069, 0.285687659219944, + 0.142744327711051, 2.36736421753514, 3.85859113231971, 1.06111199011525, + 3.90356517005791, 2.72098275756987, 9.59722821630398, 0.183645852556891, + 5.93888146445577) + ## Consistency of the standard error matrix + sem <- standard_error(myPLN) %>% as.numeric() + expected.sem <- c(0.577407423403546, 0.577284617461014, 0.0739228099688871, 0.40821807394677, + 0.0129234699024801, 0.0958134855472534, 0.267248717630853, 0.267273696185322, + 0.378113801869815, 0.0928473302527288, 0.072725644559697, 0.138682400064212, + 0.0723054848787022, 0.0866042221012381, 0.0461136022101119, 0.333358395876535, + 0.058620515251328) + + ## Temporal consistency (with previous fits of the PLN model, here fitted on the 2018/12/11 with PLNmodels version 0.5.9601) + expect_equal(fim.diag , expected.fim.diag, tolerance = tol) + expect_equal(sem , expected.sem , tolerance = tol) + +}) + + +