Skip to content

Commit

Permalink
Merge branch 'score' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
mahendra-mariadassou committed Dec 11, 2018
2 parents 245de79 + f92e212 commit af761b5
Show file tree
Hide file tree
Showing 10 changed files with 314 additions and 3 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ Version: 0.5.9605
Authors@R: c(
person("Julien", "Chiquet", role = c("aut", "cre"), email = "julien.chiquet@inra.fr",
comment = c(ORCID = "0000-0002-3629-3429")),
person("Mahendra", "Mariadassou", role = "aut", email = "mahendra.mariadassou@inra.fr"),
person("Mahendra", "Mariadassou", role = "aut", email = "mahendra.mariadassou@inra.fr",
comment = c(ORCID = "0000-0003-2986-354X")),
person("Stéphane", "Robin", role ="ctb", email = "stephane.robininra.fr")
)
Maintainer: Julien Chiquet <julien.chiquet@inra.fr>
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

S3method(coef,PLNfit)
S3method(fisher,PLNfit)
S3method(getBestModel,PLNPCAfamily)
S3method(getBestModel,PLNnetworkfamily)
S3method(getModel,PLNPCAfamily)
Expand All @@ -12,15 +13,18 @@ S3method(plot,PLNnetworkfamily)
S3method(plot,PLNnetworkfit)
S3method(predict,PLNLDAfit)
S3method(predict,PLNfit)
S3method(standard_error,PLNfit)
S3method(vcov,PLNfit)
export(PLN)
export(PLNLDA)
export(PLNPCA)
export(PLNnetwork)
export(coefficient_path)
export(fisher)
export(getBestModel)
export(getModel)
export(rPLN)
export(standard_error)
import(Matrix)
import(R6)
import(RcppArmadillo)
Expand Down
11 changes: 11 additions & 0 deletions R/PLNPCAfit-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,17 @@ PLNPCAfit$set("public", "plot_PCA",
}
)

# Compute the (one-data) Fisher information matrix of Theta using one of two approximations scheme.
PLNPCAfit$set("private", "compute_fisher",
function(type = c("wald", "louis"), X = NULL) {
type = match.arg(type)
if (type == "louis") {
stop("Louis approximation scheme not available yet for object of class PLNPLCA, use `type = \"wald\"' instead.")
}
super$fisher(type = "wald", X = X)
}
)

PLNPCAfit$set("public", "show",
function() {
super$show(paste0("Poisson Lognormal with rank constrained for PCA (rank = ",self$rank,")\n"))
Expand Down
79 changes: 79 additions & 0 deletions R/PLNfit-S3methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,82 @@ vcov.PLNfit <- function(object, ...) {
stopifnot(isPLNfit(object))
object$model_par$Sigma
}

#' Generic function for the Fisher information matrix of Theta
#'
#' @description Compute or extract component-wise standard errors of Theta in multivariate (generalized) linear models of the form \deqn{g(E[Y|X]) = X\Theta} Useful to create confidence intervals and (multivariate) confidence regions under a Gaussian approximation of \eqn{\Theta}. Note that the Fisher information matrix is the one-data version (not scaled by the number of observations).
#'
#' @param x An `R` object. Currently there are methods for \code{\link{PLNfit}} (and its variants) objects.
#' @param ... Further arguments passed to or from other methods.
#'
#' @return The fisher information matrix of \eqn{\Theta} in generalized linear models.
#' @export
#'
fisher <- function (x, ...) {
UseMethod("fisher", x)
}

#' Fisher information matrix for Theta
#'
#' @description Extracts Fisher information matrix of \eqn{\Theta} from objects returned by \code{\link[=PLN]{PLN}} and its variants. Fisher matrix is computed using one of two approximation scheme: wald (default, conservative, gives large confidence interval) or louis (anticonservative). Note that the Fisher information matrix is the one-data version (not scaled by the number of observations).
#'
#' @name fisher.PLNfit
#'
#' @param object an R6 object with class PLNfit
#' @param type Either `wald` (default) or `louis`. Approxomation scheme used to compute the
#' Fisher information matrix
#' @param ... additional parameters for S3 compatibility. Not used
#' @return A block-diagonal matrix with p (number of species) blocks of size d (number of covariates), assuming
#' \eqn{\Theta} is a matrix of size d * p.
#'
#' @seealso \code{\link[=standard_error.PLNfit]{standard_error}} for standard errors
#'
#' @export
#'
fisher.PLNfit <- function(object, type = c("wald", "louis"), ...) {
stopifnot(isPLNfit(object))
type <- match.arg(type)
if (type != object$fisher$type) {
stop(paste("Fisher information was not computed using the", type, "approximation. Try another approximation scheme."))
}
object$fisher$mat
}

#' Component-wise standard errors of Theta
#'
#' @description Compute or extract component-wise standard errors of Theta in multivariate (generalized) linear models of the form \deqn{g(E[Y|X]) = X\Theta} Useful to compute Z-scores and p-values under a gaussian/student approximation of \eqn{\Theta}
#'
#' @param x An `R` object. Currently there are methods for \code{\link{PLNfit}} (and its variants) objects.
#' @param ... Further arguments passed to or from other methods.
#'
#' @return The standard errors associated with coefficients of \eqn{\Theta}
#' @export
#'
standard_error <- function (x, ...) {
UseMethod("standard_error", x)
}

#' Component-wise standard errors of Theta
#'
#' @description Extracts univariate standard errors for the estimated coefficient of Theta. Standard errors are computed from the (approximate) Fisher information matrix. See \code{\link[=fisher.PLNfit]{fisher}} for more details on the approximations.
#'
#' @name standard_error.PLNfit
#'
#' @param object an R6 object with class PLNfit
#' @param type Either `Wald` (default) or `Louis`. Approxomation scheme used to compute the
#' Fisher information matrix
#' @param ... additional parameters for S3 compatibility. Not used
#'
#' @seealso \code{\link[=fisher.PLNfit]{fisher}} for the complete Fisher information matrix
#'
#' @return A p * d positive matrix (same size as \eqn{\Theta}) with standard errors for the coefficients of \eqn{\Theta}
#' @export
#'
standard_error.PLNfit <- function(object, type = c("wald", "louis"), ...) {
stopifnot(isPLNfit(object))
type <- match.arg(type)
if (type != object$fisher$type) {
stop(paste("Standard errors were not computed using the", type, "approximation. Try another approximation scheme."))
}
object$std_err
}
51 changes: 49 additions & 2 deletions R/PLNfit-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ PLNfit <-
Z = NA, # the matrix of latent variable
R2 = NA, # approximated goodness of fit criterion
Ji = NA, # element-wise approximated loglikelihood
FIM = NA, # Fisher information matrix of Theta, computed using of two approximation scheme
FIM_type = NA, # Either "wald" or "louis". Approximation scheme used to compute FIM
.std_err = NA, # element-wise standard error for the elements of Theta computed
# from the Fisher information matrix
covariance = NA, # a string describing the covariance model
optimizer = NA, # link to the function that performs the optimization
monitoring = NA # a list with optimization monitoring quantities
Expand All @@ -60,8 +64,9 @@ PLNfit <-
q = function() {ncol(private$M)},
p = function() {nrow(private$Theta)},
d = function() {ncol(private$Theta)},
## model_par and var_par allow write access for bootstrapping purposes
model_par = function() { list(Theta = private$Theta, Sigma = private$Sigma)},
fisher = function() { list(mat = private$FIM, type = private$FIM_type) },
std_err = function() { private$.std_err },
var_par = function() {list(M = private$M, S = private$S)},
latent = function() {private$Z},
fitted = function() {exp(private$Z + .5 * private$S)},
Expand Down Expand Up @@ -164,14 +169,20 @@ function(responses, covariates, offsets, weights) {
})

PLNfit$set("public", "postTreatment",
function(responses, covariates, offsets, weights = rep(1, nrow(responses))) {
function(responses, covariates, offsets, weights = rep(1, nrow(responses)), type = c("wald", "louis")) {
## compute R2
self$set_R2(responses, covariates, offsets, weights)
## Set the name of the matrices according to those of the data matrices
rownames(private$Theta) <- colnames(responses)
colnames(private$Theta) <- colnames(covariates)
rownames(private$Sigma) <- colnames(private$Sigma) <- colnames(responses)
rownames(private$M) <- rownames(private$S) <- rownames(responses)
## compute and store Fisher Information matrix
type <- match.arg(type)
private$FIM <- self$compute_fisher(type, X = covariates)
private$FIM_type <- type
## compute and store matrix of standard errors
private$.std_err <- self$compute_standard_error()
})

# Positions in the (Euclidian) parameter space, noted as Z in the model. Used to compute the likelihood.
Expand Down Expand Up @@ -248,6 +259,42 @@ PLNfit$set("public", "predict",
}
)

# Compute the (one-data) Fisher information matrix of Theta using one of two approximations scheme (wald or fisher)
PLNfit$set("public", "compute_fisher",
function(type = c("wald", "louis"), X = NULL) {
type = match.arg(type)
A <- self$fitted
n <- self$n
if (type == "louis") {
## TODO check how to adapt for PLNPCA
## A = A + A \odot A \odot (exp(S) - 1_{n \times p})
A <- A + A * A * (exp(self$var_par$S) - 1)
}
result <- bdiag(lapply(1:self$p, function(i) {
## t(X) %*% diag(A[, i]) %*% X
crossprod(X, A[, i] * X) / n
}))
## set proper names
element.names <- expand.grid(covariates = colnames(private$Theta),
species = rownames(private$Theta)) %>% rev() %>%
## Hack to make sure that species is first and varies slowest
apply(1, paste0, collapse = "_")
rownames(result) <- element.names
return(result)
}
)

# Compute univariate standard errors for the estimated coefficient of Theta. Standard errors are computed from the (approximate)
# Fisher information matrix. See \code{\link[=PLNfit_fisher]{fisher}} for more details on the approximations.
PLNfit$set("public", "compute_standard_error",
function() {
fim <- self$n * self$fisher$mat ## Fisher Information matrix I_n(\Theta) = n * I(\Theta)
stderr <- fim %>% solve %>% diag %>% sqrt %>% matrix(nrow = self$d) %>% t()
dimnames(stderr) <- dimnames(self$model_par$Theta)
return(stderr)
}
)

PLNfit$set("public", "show",
function(model = paste("A multivariate Poisson Lognormal fit with", private$covariance, "covariance model.\n")) {
cat(model)
Expand Down
26 changes: 26 additions & 0 deletions man/fisher.PLNfit.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/fisher.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions man/standard_error.PLNfit.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/standard_error.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

80 changes: 80 additions & 0 deletions tests/testthat/test-standard-error.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
context("test-standard-error")

data("trichoptera")

test_that("Check that fisher and standard_error return objects with proper dimensions and sign", {

myPLN_cov <- myPLN_cov <- PLN(Abundance ~ Wind + offset(log(TotalCounts)), data = trichoptera)
expect_is(myPLN_cov, "PLNfit")
p <- myPLN_cov$p
d <- myPLN_cov$d

fim <- fisher(myPLN_cov)
sem <- standard_error(myPLN_cov)
## Dimensions
expect_equal(dim(fim), c(p*d, p*d))
expect_equal(dim(sem), c(p, d))

## Names
expect_equal(rownames(sem), rownames(coef(myPLN_cov)))
expect_equal(colnames(sem), colnames(coef(myPLN_cov)))

## Fisher is block diagonal
expect_equal(inherits(fim, "dgCMatrix"), TRUE)

## Standard errors are all positive
for (i in 1:(p*d)) {
expect_gte(sem[i], 0)
}

})

test_that("Check internal consistency of Fisher matrix for PLN models with no covariates", {
tol <- 1e-8

## Fit model without covariates
myPLN <- PLN(Abundance ~ 1 + offset(log(TotalCounts)), data = trichoptera)

## Consistency of the diagonal of the fisher matrix
fim.diag <- Matrix::diag(fisher(myPLN))
manual.fim.diag <- colSums(myPLN$fitted) / myPLN$n
## Consistency of the standard error matrix
sem <- standard_error(myPLN) %>% as.numeric()
manual.sem <- 1/colSums(myPLN$fitted) %>% sqrt()

## Internal consistency
expect_equal(fim.diag , manual.fim.diag , tolerance = tol)
expect_equal(sem , manual.sem , tolerance = tol)

})


test_that("Check temporal consistency of Fisher matrix for PLN models with no covariates", {
tol <- 1e-4

## Fit model without covariates
myPLN <- PLN(Abundance ~ 1 + offset(log(TotalCounts)), data = trichoptera)

## Consistency of the diagonal of the fisher matrix
fim.diag <- Matrix::diag(fisher(myPLN))
expected.fim.diag <- c(0.0612123698810698, 0.0612384161054906, 3.73462487824109, 0.122467107738817,
122.19280897578, 2.2230572191967, 0.285741065637069, 0.285687659219944,
0.142744327711051, 2.36736421753514, 3.85859113231971, 1.06111199011525,
3.90356517005791, 2.72098275756987, 9.59722821630398, 0.183645852556891,
5.93888146445577)
## Consistency of the standard error matrix
sem <- standard_error(myPLN) %>% as.numeric()
expected.sem <- c(0.577407423403546, 0.577284617461014, 0.0739228099688871, 0.40821807394677,
0.0129234699024801, 0.0958134855472534, 0.267248717630853, 0.267273696185322,
0.378113801869815, 0.0928473302527288, 0.072725644559697, 0.138682400064212,
0.0723054848787022, 0.0866042221012381, 0.0461136022101119, 0.333358395876535,
0.058620515251328)

## Temporal consistency (with previous fits of the PLN model, here fitted on the 2018/12/11 with PLNmodels version 0.5.9601)
expect_equal(fim.diag , expected.fim.diag, tolerance = tol)
expect_equal(sem , expected.sem , tolerance = tol)

})



0 comments on commit af761b5

Please sign in to comment.