Merge branch 'score' into dev

PLN-team · Dec 11, 2018 · af761b5 · af761b5
2 parents 245de79 + f92e212
commit af761b5
Show file tree

Hide file tree

Showing 10 changed files with 314 additions and 3 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -5,7 +5,8 @@ Version: 0.5.9605
 Authors@R: c(
   person("Julien", "Chiquet", role = c("aut", "cre"), email = "julien.chiquet@inra.fr",
       comment = c(ORCID = "0000-0002-3629-3429")),
-  person("Mahendra", "Mariadassou", role = "aut", email = "mahendra.mariadassou@inra.fr"),
+  person("Mahendra", "Mariadassou", role = "aut", email = "mahendra.mariadassou@inra.fr", 
+      comment = c(ORCID = "0000-0003-2986-354X")),
   person("Stéphane", "Robin", role ="ctb", email = "stephane.robininra.fr")
   )
 Maintainer: Julien Chiquet <julien.chiquet@inra.fr>

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
 S3method(coef,PLNfit)
+S3method(fisher,PLNfit)
 S3method(getBestModel,PLNPCAfamily)
 S3method(getBestModel,PLNnetworkfamily)
 S3method(getModel,PLNPCAfamily)
@@ -12,15 +13,18 @@ S3method(plot,PLNnetworkfamily)
 S3method(plot,PLNnetworkfit)
 S3method(predict,PLNLDAfit)
 S3method(predict,PLNfit)
+S3method(standard_error,PLNfit)
 S3method(vcov,PLNfit)
 export(PLN)
 export(PLNLDA)
 export(PLNPCA)
 export(PLNnetwork)
 export(coefficient_path)
+export(fisher)
 export(getBestModel)
 export(getModel)
 export(rPLN)
+export(standard_error)
 import(Matrix)
 import(R6)
 import(RcppArmadillo)

diff --git a/R/PLNPCAfit-class.R b/R/PLNPCAfit-class.R
@@ -235,6 +235,17 @@ PLNPCAfit$set("public", "plot_PCA",
   }
 )
 
+# Compute the (one-data) Fisher information matrix of Theta using one of two approximations scheme.
+PLNPCAfit$set("private", "compute_fisher",
+  function(type = c("wald", "louis"), X = NULL) {
+    type = match.arg(type)
+    if (type == "louis") {
+      stop("Louis approximation scheme not available yet for object of class PLNPLCA, use `type = \"wald\"' instead.")
+    }
+    super$fisher(type = "wald", X = X)
+  }
+)
+
 PLNPCAfit$set("public", "show",
 function() {
   super$show(paste0("Poisson Lognormal with rank constrained for PCA (rank = ",self$rank,")\n"))

diff --git a/R/PLNfit-S3methods.R b/R/PLNfit-S3methods.R
@@ -49,3 +49,82 @@ vcov.PLNfit <- function(object, ...) {
   stopifnot(isPLNfit(object))
   object$model_par$Sigma
 }
+
+#' Generic function for the Fisher information matrix of Theta
+#'
+#' @description Compute or extract component-wise standard errors of Theta in multivariate (generalized) linear models of the form \deqn{g(E[Y|X]) = X\Theta} Useful to create confidence intervals and (multivariate) confidence regions under a Gaussian approximation of \eqn{\Theta}. Note that the Fisher information matrix is the one-data version (not scaled by the number of observations).
+#'
+#' @param x An `R` object. Currently there are methods for \code{\link{PLNfit}} (and its variants) objects.
+#' @param ... Further arguments passed to or from other methods.
+#'
+#' @return The fisher information matrix of \eqn{\Theta} in generalized linear models.
+#' @export
+#'
+fisher <- function (x, ...) {
+  UseMethod("fisher", x)
+}
+
+#' Fisher information matrix for Theta
+#'
+#' @description Extracts Fisher information matrix of \eqn{\Theta} from objects returned by \code{\link[=PLN]{PLN}} and its variants. Fisher matrix is computed using one of two approximation scheme: wald (default, conservative, gives large confidence interval) or louis (anticonservative). Note that the Fisher information matrix is the one-data version (not scaled by the number of observations).
+#'
+#' @name fisher.PLNfit
+#'
+#' @param object an R6 object with class PLNfit
+#' @param type Either `wald` (default) or `louis`. Approxomation scheme used to compute the
+#' Fisher information matrix
+#' @param ... additional parameters for S3 compatibility. Not used
+#' @return A block-diagonal matrix with p (number of species) blocks of size d (number of covariates), assuming
+#' \eqn{\Theta} is a matrix of size d * p.
+#'
+#' @seealso \code{\link[=standard_error.PLNfit]{standard_error}} for standard errors
+#'
+#' @export
+#'
+fisher.PLNfit <- function(object, type = c("wald", "louis"), ...) {
+  stopifnot(isPLNfit(object))
+  type <- match.arg(type)
+  if (type != object$fisher$type) {
+    stop(paste("Fisher information was not computed using the", type, "approximation. Try another approximation scheme."))
+  }
+  object$fisher$mat
+}
+
+#' Component-wise standard errors of Theta
+#'
+#' @description Compute or extract component-wise standard errors of Theta in multivariate (generalized) linear models of the form \deqn{g(E[Y|X]) = X\Theta} Useful to compute Z-scores and p-values under a gaussian/student approximation of \eqn{\Theta}
+#'
+#' @param x An `R` object. Currently there are methods for \code{\link{PLNfit}} (and its variants) objects.
+#' @param ... Further arguments passed to or from other methods.
+#'
+#' @return The standard errors associated with coefficients of \eqn{\Theta}
+#' @export
+#'
+standard_error <- function (x, ...) {
+  UseMethod("standard_error", x)
+}
+
+#' Component-wise standard errors of Theta
+#'
+#' @description Extracts univariate standard errors for the estimated coefficient of Theta. Standard errors are computed from the (approximate) Fisher information matrix. See \code{\link[=fisher.PLNfit]{fisher}} for more details on the approximations.
+#'
+#' @name standard_error.PLNfit
+#'
+#' @param object an R6 object with class PLNfit
+#' @param type Either `Wald` (default) or `Louis`. Approxomation scheme used to compute the
+#' Fisher information matrix
+#' @param ... additional parameters for S3 compatibility. Not used
+#'
+#' @seealso \code{\link[=fisher.PLNfit]{fisher}} for the complete Fisher information matrix
+#'
+#' @return A p * d positive matrix (same size as \eqn{\Theta}) with standard errors for the coefficients of \eqn{\Theta}
+#' @export
+#'
+standard_error.PLNfit <- function(object, type = c("wald", "louis"), ...) {
+  stopifnot(isPLNfit(object))
+  type <- match.arg(type)
+  if (type != object$fisher$type) {
+    stop(paste("Standard errors were not computed using the", type, "approximation. Try another approximation scheme."))
+  }
+  object$std_err
+}
diff --git a/R/PLNfit-class.R b/R/PLNfit-class.R
@@ -50,6 +50,10 @@ PLNfit <-
       Z          = NA, # the matrix of latent variable
       R2         = NA, # approximated goodness of fit criterion
       Ji         = NA, # element-wise approximated loglikelihood
+      FIM        = NA, # Fisher information matrix of Theta, computed using of two approximation scheme
+      FIM_type   = NA, # Either "wald" or "louis". Approximation scheme used to compute FIM
+      .std_err   = NA, # element-wise standard error for the elements of Theta computed
+                       # from the Fisher information matrix
       covariance = NA, # a string describing the covariance model
       optimizer  = NA, # link to the function that performs the optimization
       monitoring = NA  # a list with optimization monitoring quantities
@@ -60,8 +64,9 @@ PLNfit <-
       q = function() {ncol(private$M)},
       p = function() {nrow(private$Theta)},
       d = function() {ncol(private$Theta)},
-      ## model_par and var_par allow write access for bootstrapping purposes
       model_par  = function() { list(Theta = private$Theta, Sigma = private$Sigma)},
+      fisher     = function() { list(mat = private$FIM, type = private$FIM_type) },
+      std_err    = function() { private$.std_err },
       var_par    = function() {list(M = private$M, S = private$S)},
       latent     = function() {private$Z},
       fitted     = function() {exp(private$Z + .5 * private$S)},
@@ -164,14 +169,20 @@ function(responses, covariates, offsets, weights) {
 })
 
 PLNfit$set("public", "postTreatment",
-function(responses, covariates, offsets, weights = rep(1, nrow(responses))) {
+function(responses, covariates, offsets, weights = rep(1, nrow(responses)), type = c("wald", "louis")) {
   ## compute R2
   self$set_R2(responses, covariates, offsets, weights)
   ## Set the name of the matrices according to those of the data matrices
   rownames(private$Theta) <- colnames(responses)
   colnames(private$Theta) <- colnames(covariates)
   rownames(private$Sigma) <- colnames(private$Sigma) <- colnames(responses)
   rownames(private$M) <- rownames(private$S) <- rownames(responses)
+  ## compute and store Fisher Information matrix
+  type <- match.arg(type)
+  private$FIM <- self$compute_fisher(type, X = covariates)
+  private$FIM_type <- type
+  ## compute and store matrix of standard errors
+  private$.std_err <- self$compute_standard_error()
 })
 
 # Positions in the (Euclidian) parameter space, noted as Z in the model. Used to compute the likelihood.
@@ -248,6 +259,42 @@ PLNfit$set("public", "predict",
   }
 )
 
+# Compute the (one-data) Fisher information matrix of Theta using one of two approximations scheme (wald or fisher)
+PLNfit$set("public", "compute_fisher",
+  function(type = c("wald", "louis"), X = NULL) {
+    type = match.arg(type)
+    A <- self$fitted
+    n <- self$n
+    if (type == "louis") {
+      ## TODO check how to adapt for PLNPCA
+      ## A = A + A \odot A \odot (exp(S) - 1_{n \times p})
+      A <- A + A * A * (exp(self$var_par$S) - 1)
+    }
+    result <- bdiag(lapply(1:self$p, function(i) {
+      ## t(X) %*% diag(A[, i]) %*% X
+      crossprod(X, A[, i] * X) / n
+    }))
+    ## set proper names
+    element.names <- expand.grid(covariates = colnames(private$Theta),
+                                 species    = rownames(private$Theta)) %>% rev() %>%
+      ## Hack to make sure that species is first and varies slowest
+      apply(1, paste0, collapse = "_")
+    rownames(result) <- element.names
+    return(result)
+  }
+)
+
+# Compute univariate standard errors for the estimated coefficient of Theta. Standard errors are computed from the (approximate)
+# Fisher information matrix. See \code{\link[=PLNfit_fisher]{fisher}} for more details on the approximations.
+PLNfit$set("public", "compute_standard_error",
+  function() {
+    fim <- self$n * self$fisher$mat ## Fisher Information matrix I_n(\Theta) = n * I(\Theta)
+    stderr <- fim %>% solve %>% diag %>% sqrt %>% matrix(nrow = self$d) %>% t()
+    dimnames(stderr) <- dimnames(self$model_par$Theta)
+    return(stderr)
+  }
+)
+
 PLNfit$set("public", "show",
 function(model = paste("A multivariate Poisson Lognormal fit with", private$covariance, "covariance model.\n")) {
   cat(model)

diff --git a/man/fisher.PLNfit.Rd b/man/fisher.PLNfit.Rd
diff --git a/man/fisher.Rd b/man/fisher.Rd
diff --git a/man/standard_error.PLNfit.Rd b/man/standard_error.PLNfit.Rd
diff --git a/man/standard_error.Rd b/man/standard_error.Rd
diff --git a/tests/testthat/test-standard-error.R b/tests/testthat/test-standard-error.R
@@ -0,0 +1,80 @@
+context("test-standard-error")
+
+data("trichoptera")
+
+test_that("Check that fisher and standard_error return objects with proper dimensions and sign",  {
+
+  myPLN_cov <- myPLN_cov <- PLN(Abundance ~ Wind + offset(log(TotalCounts)), data = trichoptera)
+  expect_is(myPLN_cov, "PLNfit")
+  p <- myPLN_cov$p
+  d <- myPLN_cov$d
+
+  fim <- fisher(myPLN_cov)
+  sem <- standard_error(myPLN_cov)
+  ## Dimensions
+  expect_equal(dim(fim), c(p*d, p*d))
+  expect_equal(dim(sem), c(p, d))
+
+  ## Names
+  expect_equal(rownames(sem), rownames(coef(myPLN_cov)))
+  expect_equal(colnames(sem), colnames(coef(myPLN_cov)))
+
+  ## Fisher is block diagonal
+  expect_equal(inherits(fim, "dgCMatrix"), TRUE)
+
+  ## Standard errors are all positive
+  for (i in 1:(p*d)) {
+    expect_gte(sem[i], 0)
+  }
+
+})
+
+test_that("Check internal consistency of Fisher matrix for PLN models with no covariates",  {
+  tol <- 1e-8
+
+  ## Fit model without covariates
+  myPLN <- PLN(Abundance ~ 1 + offset(log(TotalCounts)), data = trichoptera)
+
+  ## Consistency of the diagonal of the fisher matrix
+  fim.diag <- Matrix::diag(fisher(myPLN))
+  manual.fim.diag <- colSums(myPLN$fitted) / myPLN$n
+  ## Consistency of the standard error matrix
+  sem <- standard_error(myPLN) %>% as.numeric()
+  manual.sem <- 1/colSums(myPLN$fitted) %>% sqrt()
+
+  ## Internal consistency
+  expect_equal(fim.diag        , manual.fim.diag  , tolerance = tol)
+  expect_equal(sem             , manual.sem       , tolerance = tol)
+
+})
+
+
+test_that("Check temporal consistency of Fisher matrix for PLN models with no covariates",  {
+  tol <- 1e-4
+
+  ## Fit model without covariates
+  myPLN <- PLN(Abundance ~ 1 + offset(log(TotalCounts)), data = trichoptera)
+
+  ## Consistency of the diagonal of the fisher matrix
+  fim.diag <- Matrix::diag(fisher(myPLN))
+  expected.fim.diag <- c(0.0612123698810698, 0.0612384161054906, 3.73462487824109, 0.122467107738817,
+                         122.19280897578, 2.2230572191967, 0.285741065637069, 0.285687659219944,
+                         0.142744327711051, 2.36736421753514, 3.85859113231971, 1.06111199011525,
+                         3.90356517005791, 2.72098275756987, 9.59722821630398, 0.183645852556891,
+                         5.93888146445577)
+  ## Consistency of the standard error matrix
+  sem <- standard_error(myPLN) %>% as.numeric()
+  expected.sem <- c(0.577407423403546, 0.577284617461014, 0.0739228099688871, 0.40821807394677,
+                    0.0129234699024801, 0.0958134855472534, 0.267248717630853, 0.267273696185322,
+                    0.378113801869815, 0.0928473302527288, 0.072725644559697, 0.138682400064212,
+                    0.0723054848787022, 0.0866042221012381, 0.0461136022101119, 0.333358395876535,
+                    0.058620515251328)
+
+  ## Temporal consistency (with previous fits of the PLN model, here fitted on the 2018/12/11 with PLNmodels version 0.5.9601)
+  expect_equal(fim.diag        , expected.fim.diag, tolerance = tol)
+  expect_equal(sem             , expected.sem     , tolerance = tol)
+
+})
+
+
+