From 18eac2dc5831b9da973d783aace8e74aa42321aa Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Tue, 23 Jan 2024 11:42:55 +0100 Subject: [PATCH 01/30] started inclusino of lambertW close form solution for R in zipln --- R/RcppExports.R | 4 + inst/include/PLNmodels.h | 9 ++ inst/include/PLNmodels_RcppExports.h | 30 ++++++ man/ZIPLN.Rd | 17 ++- man/ZIPLN_param.Rd | 8 +- man/ZIPLNfit.Rd | 29 ++++-- man/ZIPLNfit_diagonal.Rd | 2 +- man/ZIPLNfit_spherical.Rd | 2 +- src/RcppExports.cpp | 18 ++++ src/lambertW.cpp | 148 +++++++++++++++++++++++++++ src/lambertW.h | 15 +++ 11 files changed, 262 insertions(+), 20 deletions(-) create mode 100644 inst/include/PLNmodels.h create mode 100644 inst/include/PLNmodels_RcppExports.h create mode 100644 src/lambertW.cpp create mode 100644 src/lambertW.h diff --git a/R/RcppExports.R b/R/RcppExports.R index 6f186120..7841037d 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -85,3 +85,7 @@ cpp_test_packing <- function() { .Call('_PLNmodels_cpp_test_packing', PACKAGE = 'PLNmodels') } +# Register entry points for exported C++ functions +methods::setLoadAction(function(ns) { + .Call('_PLNmodels_RcppExport_registerCCallable', PACKAGE = 'PLNmodels') +}) diff --git a/inst/include/PLNmodels.h b/inst/include/PLNmodels.h new file mode 100644 index 00000000..9bee263f --- /dev/null +++ b/inst/include/PLNmodels.h @@ -0,0 +1,9 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#ifndef RCPP_PLNmodels_H_GEN_ +#define RCPP_PLNmodels_H_GEN_ + +#include "PLNmodels_RcppExports.h" + +#endif // RCPP_PLNmodels_H_GEN_ diff --git a/inst/include/PLNmodels_RcppExports.h b/inst/include/PLNmodels_RcppExports.h new file mode 100644 index 00000000..f44129f3 --- /dev/null +++ b/inst/include/PLNmodels_RcppExports.h @@ -0,0 +1,30 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#ifndef RCPP_PLNmodels_RCPPEXPORTS_H_GEN_ +#define RCPP_PLNmodels_RCPPEXPORTS_H_GEN_ + +#include +#include + +namespace PLNmodels { + + using namespace Rcpp; + + namespace { + void validateSignature(const char* sig) { + Rcpp::Function require = Rcpp::Environment::base_env()["require"]; + require("PLNmodels", Rcpp::Named("quietly") = true); + typedef int(*Ptr_validate)(const char*); + static Ptr_validate p_validate = (Ptr_validate) + R_GetCCallable("PLNmodels", "_PLNmodels_RcppExport_validate"); + if (!p_validate(sig)) { + throw Rcpp::function_not_exported( + "C++ function with signature '" + std::string(sig) + "' not found in PLNmodels"); + } + } + } + +} + +#endif // RCPP_PLNmodels_RCPPEXPORTS_H_GEN_ diff --git a/man/ZIPLN.Rd b/man/ZIPLN.Rd index 8c0f58df..edfeb5f8 100644 --- a/man/ZIPLN.Rd +++ b/man/ZIPLN.Rd @@ -19,9 +19,13 @@ ZIPLN( \item{subset}{an optional vector specifying a subset of observations to be used in the fitting process.} -\item{zi}{a character describing the model for zero inflation in "single" (one global parameter, the default), "col" (one parameter per variable) -and "row" (one parameter per individuals). If covariates are specified in the formula RHS, after bar pipe like "~ PLN effect | ZI effect", \code{zi} -is ignored and the covariates extracted by model matrix will be used to account for the ZI effect.} +\item{zi}{a character describing the model used for zero inflation, either of +\itemize{ +\item "single" (default, one parameter shared by all counts) +\item "col" (one parameter per variable / feature) +\item "row" (one parameter per sample / individual). +If covariates are specified in the formula RHS (see details) this parameter is ignored. +}} \item{control}{a list-like structure for controlling the optimization, with default generated by \code{\link[=ZIPLN_param]{ZIPLN_param()}}. See the associated documentation for details.} @@ -32,13 +36,20 @@ an R6 object with class \code{\link{ZIPLNfit}} \description{ Fit the multivariate Zero Inflated Poisson lognormal model with a variational algorithm. Use the (g)lm syntax for model specification (covariates, offsets, subset). } +\details{ +Covariates for the Zero-Inflation parameter (using a logistic regression model) can be specified in the formula RHS using the pipe +(\verb{~ PLN effect | ZI effect}) to separate covariates for the PLN part of the model from those for the Zero-Inflation part. +Note that different covariates can be used for each part. +} \examples{ data(trichoptera) trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) myPLN <- PLN(Abundance ~ 1, data = trichoptera) +## Use different models for zero-inflation... myZIPLN_single <- ZIPLN(Abundance ~ 1, data = trichoptera, zi = "single") myZIPLN_row <- ZIPLN(Abundance ~ 1, data = trichoptera, zi = "row") myZIPLN_col <- ZIPLN(Abundance ~ 1, data = trichoptera, zi = "col") +## ...including logistic regression on covariates myZIPLN_covar <- ZIPLN(Abundance ~ 1 | 1 + Wind, data = trichoptera) dplyr::bind_rows( myPLN$criteria, diff --git a/man/ZIPLN_param.Rd b/man/ZIPLN_param.Rd index 11aeb13b..798b7fad 100644 --- a/man/ZIPLN_param.Rd +++ b/man/ZIPLN_param.Rd @@ -24,7 +24,7 @@ ZIPLN_param( \item{Omega}{precision matrix of the latent variables. Inverse of Sigma. Must be specified if \code{covariance} is "fixed"} -\item{penalty}{a user defined penalty for sparsifying the residual covariance. Default is 0 (no sparsity).} +\item{penalty}{a user-defined penalty to sparsify the residual covariance. Defaults to 0 (no sparsity).} \item{config_post}{a list for controlling the post-treatments (optional bootstrap, jackknife, R2, etc.). See details} @@ -35,7 +35,7 @@ log-transformed data, and with the same formula as the one provided by the user. which sometimes speeds up the inference.} } \value{ -list of parameters configuring the fit. +list of parameters used during the fit and post-processing steps } \description{ Helper to define list of parameters to control the PLN fit. All arguments have defaults. @@ -44,7 +44,7 @@ Helper to define list of parameters to control the PLN fit. All arguments have d See \code{\link[=PLN_param]{PLN_param()}} for a full description of the generic optimization parameters. ZIPLN_param() also has two additional parameters controlling the optimization due the inner-outer loop structure of the optimizer: \itemize{ -\item "ftol_out" outer solver stops when an optimization step changes the objective function by less than xtol multiplied by the absolute value of the parameter. Default is 1e-8 -\item "maxit_out" outer solver stops when the number of iteration exceeds maxit_out. Default is 100 +\item "ftol_out" outer solver stops when an optimization step changes the objective function by less than \code{ftol_out} multiplied by the absolute value of the parameter. Default is 1e-8 +\item "maxit_out" outer solver stops when the number of iteration exceeds \code{maxit_out}. Default is 100 } } diff --git a/man/ZIPLNfit.Rd b/man/ZIPLNfit.Rd index 35691793..37542df4 100644 --- a/man/ZIPLNfit.Rd +++ b/man/ZIPLNfit.Rd @@ -4,13 +4,18 @@ \alias{ZIPLNfit} \title{An R6 Class to represent a ZIPLNfit} \description{ -The function \code{\link[=ZIPLN]{ZIPLN()}} fit a model which is an instance of a object with class \code{\link{ZIPLNfit}}. +The function \code{\link[=ZIPLN]{ZIPLN()}} fits a model which is an instance of an object with class \code{\link{ZIPLNfit}}. -This class comes with a set of R6 methods, some of them being useful for the user and exported as S3 methods. +This class comes with a set of R6 methods, some of which are useful for the end-user and exported as S3 methods. See the documentation for \code{\link[=coef]{coef()}}, \code{\link[=sigma]{sigma()}}, \code{\link[=predict]{predict()}}. Fields are accessed via active binding and cannot be changed by the user. } +\details{ +Covariates for the Zero-Inflation parameter (using a logistic regression model) can be specified in the formula RHS using the pipe +(\verb{~ PLN effect | ZI effect}) to separate covariates for the PLN part of the model from those for the Zero-Inflation part. +Note that different covariates can be used for each part. +} \examples{ \dontrun{ # See other examples in function ZIPLN @@ -31,9 +36,9 @@ print(myPLN) \item{\code{p}}{number of variables/species} -\item{\code{d}}{number of covariates in the PLN componente} +\item{\code{d}}{number of covariates in the PLN part} -\item{\code{d0}}{number of covariates in the ZI componente} +\item{\code{d0}}{number of covariates in the ZI part} \item{\code{nb_param}}{number of parameters in the current PLN model} @@ -113,7 +118,7 @@ Update a \code{\link{ZIPLNfit}} object \item{\code{B0}}{matrix of regression parameters in the zero inflated component} -\item{\code{Pi}}{Zero inflated probability parameter (either scalar, row-vector, col-vector of matrix)} +\item{\code{Pi}}{Zero inflated probability parameter (either scalar, row-vector, col-vector or matrix)} \item{\code{Omega}}{precision matrix of the latent variables} @@ -121,7 +126,7 @@ Update a \code{\link{ZIPLNfit}} object \item{\code{M}}{matrix of mean vectors for the variational approximation} -\item{\code{S}}{matrix of variance parameters for the variational approximation} +\item{\code{S}}{matrix of standard deviation parameters for the variational approximation} \item{\code{R}}{matrix of probabilities for the variational approximation} @@ -195,7 +200,7 @@ Call to the Cpp optimizer and update of the relevant fields \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-ZIPLNfit-optimize_vestep}{}}} \subsection{Method \code{optimize_vestep()}}{ -Result of one call to the VE step of the optimization procedure: optimal variational parameters (M, S) and corresponding log likelihood values for fixed model parameters (Sigma, B). Intended to position new data in the latent space. +Result of one call to the VE step of the optimization procedure: optimal variational parameters (M, S, R) and corresponding log likelihood values for fixed model parameters (Sigma, B, B0). Intended to position new data in the latent space. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{ZIPLNfit$optimize_vestep( covariates, @@ -234,8 +239,10 @@ Result of one call to the VE step of the optimization procedure: optimal variati A list with three components: \itemize{ \item the matrix \code{M} of variational means, -\item the matrix \code{S2} of variational variances -\item the vector \code{log.lik} of (variational) log-likelihood of each new observation +\item the matrix \code{S} of variational standard deviations +\item the matrix \code{R} of variational ZI probabilities +\item the vector \code{Ji} of (variational) log-likelihood of each new observation +\item a list \code{monitoring} with information about convergence status } } } @@ -257,9 +264,9 @@ Predict position, scores or observations of new data. \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{newdata}}{A data frame in which to look for variables with which to predict. If omitted, the fitted values are used.} +\item{\code{newdata}}{A data frame in which to look for variables with which to predict. If omitted, the fitted values are returned.} -\item{\code{responses}}{Optional data frame containing the count of the observed variables (matching the names of the provided as data in the PLN function), assuming the interest in in testing the model.} +\item{\code{responses}}{Optional data frame containing the count of the observed variables (matching the names of the provided as data in the PLN function), assuming the interest is in testing the model.} \item{\code{type}}{Scale used for the prediction. Either \code{link} (default, predicted positions in the latent space) or \code{response} (predicted counts).} diff --git a/man/ZIPLNfit_diagonal.Rd b/man/ZIPLNfit_diagonal.Rd index 226bb7ca..acce0d88 100644 --- a/man/ZIPLNfit_diagonal.Rd +++ b/man/ZIPLNfit_diagonal.Rd @@ -53,7 +53,7 @@ print(myPLN) \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-ZIPLNfit_diagonal-new}{}}} \subsection{Method \code{new()}}{ -Initialize a \code{\link{PLNfit}} model +Initialize a \code{\link{ZIPLNfit_diagonal}} model \subsection{Usage}{ \if{html}{\out{
}}\preformatted{ZIPLNfit_diagonal$new( responses, diff --git a/man/ZIPLNfit_spherical.Rd b/man/ZIPLNfit_spherical.Rd index b80571cb..df904d7b 100644 --- a/man/ZIPLNfit_spherical.Rd +++ b/man/ZIPLNfit_spherical.Rd @@ -53,7 +53,7 @@ print(myPLN) \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-ZIPLNfit_spherical-new}{}}} \subsection{Method \code{new()}}{ -Initialize a \code{\link{PLNfit}} model +Initialize a \code{\link{ZIPLNfit_spherical}} model \subsection{Usage}{ \if{html}{\out{
}}\preformatted{ZIPLNfit_spherical$new( responses, diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index c464d23f..a950ea1a 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -1,8 +1,11 @@ // Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 +#include "../inst/include/PLNmodels.h" #include #include +#include +#include using namespace Rcpp; @@ -313,6 +316,20 @@ BEGIN_RCPP END_RCPP } +// validate (ensure exported C++ functions exist before calling them) +static int _PLNmodels_RcppExport_validate(const char* sig) { + static std::set signatures; + if (signatures.empty()) { + } + return signatures.find(sig) != signatures.end(); +} + +// registerCCallable (register entry points for exported C++ functions) +RcppExport SEXP _PLNmodels_RcppExport_registerCCallable() { + R_RegisterCCallable("PLNmodels", "_PLNmodels_RcppExport_validate", (DL_FUNC)_PLNmodels_RcppExport_validate); + return R_NilValue; +} + static const R_CallMethodDef CallEntries[] = { {"_PLNmodels_cpp_test_nlopt", (DL_FUNC) &_PLNmodels_cpp_test_nlopt, 0}, {"_PLNmodels_nlopt_optimize_diagonal", (DL_FUNC) &_PLNmodels_nlopt_optimize_diagonal, 3}, @@ -335,6 +352,7 @@ static const R_CallMethodDef CallEntries[] = { {"_PLNmodels_optim_zipln_M", (DL_FUNC) &_PLNmodels_optim_zipln_M, 9}, {"_PLNmodels_optim_zipln_S", (DL_FUNC) &_PLNmodels_optim_zipln_S, 7}, {"_PLNmodels_cpp_test_packing", (DL_FUNC) &_PLNmodels_cpp_test_packing, 0}, + {"_PLNmodels_RcppExport_registerCCallable", (DL_FUNC) &_PLNmodels_RcppExport_registerCCallable, 0}, {NULL, NULL, 0} }; diff --git a/src/lambertW.cpp b/src/lambertW.cpp new file mode 100644 index 00000000..97942d60 --- /dev/null +++ b/src/lambertW.cpp @@ -0,0 +1,148 @@ +/* lambertW.cpp + +Copyright (C) 2015, Avraham Adler +All rights reserved. + +SPDX-License-Identifier: BSD-2-Clause + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +References: + +Corless, R. M.; Gonnet, G. H.; Hare, D. E.; Jeffrey, D. J. & Knuth, D. E. + "On the Lambert W function", Advances in Computational Mathematics, + Springer, 1996, 5, 329-359 + +Fritsch, F. N.; Shafer, R. E. & Crowley, W. P. + "Solution of the transcendental equation (we^w = x)", + Communications of the ACM, Association for Computing Machinery (ACM), + 1973, 16, 123-124 +*/ + +// [[Rcpp::depends(RcppParallel)]] +// [[Rcpp::interfaces(r, cpp)]] +#include + +#define _USE_MATH_DEFINES +#include + +using namespace Rcpp; + +const double EPS = 2.2204460492503131e-16; +const double M_1_E = 1.0 / M_E; + + /* Fritsch Iteration + * W_{n+1} = W_n * (1 + e_n) + * z_n = ln(x / W_n) - W_n + * q_n = 2 * (1 + W_n) * (1 + W_n + 2 / 3 * z_n) + * e_n = z_n / (1 + W_n) * (q_n - z_n) / (q_n - 2 * z_n) + */ + +double FritschIter(double x, double w){ + int MaxEval = 5; + bool CONVERGED = false; + double k = 2.0 / 3.0; + int i = 0; + do { + double z = std::log(x / w) - w; + double w1 = w + 1.0; + double q = 2.0 * w1 * (w1 + k * z); + double qmz = q - z; + double e = z / w1 * qmz / (qmz - z); + CONVERGED = std::abs(e) <= EPS; + w *= (1.0 + e); + ++i; + } while (!CONVERGED && i < MaxEval); + return(w); +} + +double lambertW0_CS(double x) { + if (x == R_PosInf) { + return(R_PosInf); + } else if (x < -M_1_E) { + return(R_NaN); + } else if (std::abs(x + M_1_E) <= EPS) { + return(-1.0); + } else if (std::abs(x) <= 1e-16) { + /* This close to 0 the W_0 branch is best estimated by its Taylor/Pade + expansion whose first term is the value x and remaining terms are below + machine double precision. See + https://math.stackexchange.com/questions/1700919 + */ + return(x); + } else { + double w; + if (std::abs(x) <= 6.4e-3) { + /* When this close to 0 the Fritsch iteration may underflow. Instead, + * function will use degree-6 minimax polynomial approximation of Halley + * iteration-based values. Should be more accurate by three orders of + * magnitude than Fritsch's equation (5) in this range. + */ + + // Minimax Approximation calculated using R package minimaxApprox 0.1.0 + return((((((-1.0805085529250425e1 * x + 5.2100070265741278) * x - + 2.6666665063383532) * x + 1.4999999657268301) * x - + 1.0000000000016802) * x + 1.0000000000001752) * x + + 2.6020852139652106e-18); + + } else if (x <= M_E) { + /* Use expansion in Corliss 4.22 to create (2, 2) Pade approximant. + * Equation with a few extra terms is: + * -1 + p - 1/3p^2 + 11/72p^3 - 43/540p^4 + 689453/8398080p^4 - O(p^5) + * This is just used to estimate a good starting point for the Fritsch + * iteration process itself. + */ + double p = std::sqrt(2.0 * (M_E * x + 1.0)); + double Numer = (0.2787037037037037 * p + 0.311111111111111) * p - 1.0; + double Denom = (0.0768518518518518 * p + 0.688888888888889) * p + 1.0; + w = Numer / Denom; + } else { + /* Use first five terms of Corliss et al. 4.19 */ + w = std::log(x); + double L_2 = std::log(w); + double L_3 = L_2 / w; + double L_3_sq = L_3 * L_3; + w += -L_2 + L_3 + 0.5 * L_3_sq - L_3 / w + L_3 / (w * w) - 1.5 * L_3_sq / + w + L_3_sq * L_3 / 3.0; + } + return(FritschIter(x, w)); + } +} + +double lambertWm1_CS(double x){ + if (x == 0.0) { + return(R_NegInf); + } else if (x < -M_1_E || x > 0.0) { + return(R_NaN); + } else if (std::abs(x + M_1_E) <= EPS) { + return(-1.0); + } else { + double w; + /* Use first five terms of Corliss et al. 4.19 */ + w = std::log(-x); + double L_2 = std::log(-w); + double L_3 = L_2 / w; + double L_3_sq = L_3 * L_3; + w += -L_2 + L_3 + 0.5 * L_3_sq - L_3 / w + L_3 / (w * w) - 1.5 * L_3_sq / + w + L_3_sq * L_3 / 3.0; + return(FritschIter(x, w)); + } +} + diff --git a/src/lambertW.h b/src/lambertW.h new file mode 100644 index 00000000..6b56a484 --- /dev/null +++ b/src/lambertW.h @@ -0,0 +1,15 @@ +#include + +#define _USE_MATH_DEFINES +#include + +using namespace Rcpp; + +const double EPS = 2.2204460492503131e-16; +const double M_1_E = 1.0 / M_E; + +double FritschIter(double x, double w) ; +double lambertW0_CS(double x) ; +double lambertWm1_CS(double x) ; + + From 5aea835e9fd303ea0ff7a35dcce82b8121f47510 Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Wed, 24 Jan 2024 18:49:57 +0100 Subject: [PATCH 02/30] added exact form and optimization for W|Y (with tests) --- R/RcppExports.R | 12 +++---- R/ZIPLN.R | 7 +++-- R/ZIPLNfit-class.R | 10 +++--- inst/case_studies/scRNA.R | 2 +- inst/include/PLNmodels.h | 9 ------ inst/include/PLNmodels_RcppExports.h | 30 ------------------ inst/simus_ZIPLN/essai_ZIPLN.R | 4 +-- man/ZIPLN_param.Rd | 6 ++-- man/ZIPLNfit.Rd | 4 +-- src/RcppExports.cpp | 47 ++++++++++++++-------------- src/lambertW.cpp | 12 +------ src/optim_zi-pln.cpp | 39 +++++++++++++++++++++-- tests/testthat/test-zipln.R | 11 +++++++ 13 files changed, 98 insertions(+), 95 deletions(-) delete mode 100644 inst/include/PLNmodels.h delete mode 100644 inst/include/PLNmodels_RcppExports.h diff --git a/R/RcppExports.R b/R/RcppExports.R index 4e32f357..d1053c03 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -69,8 +69,12 @@ optim_zipln_zipar_covar <- function(R, init_B0, X0, configuration) { .Call('_PLNmodels_optim_zipln_zipar_covar', PACKAGE = 'PLNmodels', R, init_B0, X0, configuration) } -optim_zipln_R <- function(Y, X, O, M, S, Pi) { - .Call('_PLNmodels_optim_zipln_R', PACKAGE = 'PLNmodels', Y, X, O, M, S, Pi) +optim_zipln_R_var <- function(Y, X, O, M, S, Pi, B) { + .Call('_PLNmodels_optim_zipln_R_var', PACKAGE = 'PLNmodels', Y, X, O, M, S, Pi, B) +} + +optim_zipln_R_exact <- function(Y, X, O, M, S, Pi, B) { + .Call('_PLNmodels_optim_zipln_R_exact', PACKAGE = 'PLNmodels', Y, X, O, M, S, Pi, B) } optim_zipln_M <- function(init_M, Y, X, O, R, S, B, Omega, configuration) { @@ -85,7 +89,3 @@ cpp_test_packing <- function() { .Call('_PLNmodels_cpp_test_packing', PACKAGE = 'PLNmodels') } -# Register entry points for exported C++ functions -methods::setLoadAction(function(ns) { - .Call('_PLNmodels_RcppExport_registerCCallable', PACKAGE = 'PLNmodels') -}) diff --git a/R/ZIPLN.R b/R/ZIPLN.R index f5fe288e..9368571e 100644 --- a/R/ZIPLN.R +++ b/R/ZIPLN.R @@ -77,10 +77,12 @@ ZIPLN <- function(formula, data, subset, zi = c("single", "row", "col"), control #' @return list of parameters used during the fit and post-processing steps #' #' @inherit PLN_param details -#' @details See [PLN_param()] for a full description of the generic optimization parameters. ZIPLN_param() also has two additional parameters controlling the optimization due -#' the inner-outer loop structure of the optimizer: +#' @details See [PLN_param()] for a full description of the generic optimization parameters. ZIPLN_param() also +#' has two additional parameters controlling the optimization due the inner-outer loop structure of the optimizer, +#' and additional parameter controlling the form of the variational approximation of the zero inflation: #' * "ftol_out" outer solver stops when an optimization step changes the objective function by less than `ftol_out` multiplied by the absolute value of the parameter. Default is 1e-8 #' * "maxit_out" outer solver stops when the number of iteration exceeds `maxit_out`. Default is 100 +#' * "approx_ZI" either use an exact or approximated conditional distribution for the zero inflantion. Default is FALSE #' #' @export ZIPLN_param <- function( @@ -113,6 +115,7 @@ ZIPLN_param <- function( config_opt$trace <- trace config_opt$ftol_out <- 1e-6 config_opt$maxit_out <- 100 + config_opt$approx_ZI <- FALSE config_opt[names(config_optim)] <- config_optim structure(list( diff --git a/R/ZIPLNfit-class.R b/R/ZIPLNfit-class.R index 3ab9e286..5f8b0af2 100644 --- a/R/ZIPLNfit-class.R +++ b/R/ZIPLNfit-class.R @@ -132,6 +132,7 @@ ZIPLNfit <- R6Class( "col" = function(R, ...) list(Pi = matrix(colMeans(R), nrow(R), p, byrow = TRUE), B0 = matrix(NA, d0, p)), "covar" = optim_zipln_zipar_covar ) + private$optimizer$R <- ifelse(control$config_optim$approx_ZI, optim_zipln_R_var, optim_zipln_R_exact) private$optimizer$Omega <- optim_zipln_Omega_full }, @@ -179,9 +180,8 @@ ZIPLNfit <- R6Class( ### VE Step # ZI part - new_R <- optim_zipln_R( - Y = data$Y, X = data$X, O = data$O, M = parameters$M, S = parameters$S, Pi = new_Pi - ) + new_R <- private$optimizer$R(Y = data$Y, X = data$X, O = data$O, M = parameters$M, S = parameters$S, Pi = new_Pi, B = new_B) + # PLN part new_M <- optim_zipln_M( init_M = parameters$M, @@ -300,8 +300,8 @@ ZIPLNfit <- R6Class( )$Pi # VE Step - new_R <- optim_zipln_R( - Y = data$Y, X = data$X, O = data$O, M = parameters$M, S = parameters$S, Pi = Pi + new_R <- private$optimizer$R( + Y = data$Y, X = data$X, O = data$O, M = parameters$M, S = parameters$S, Pi = Pi, B = B ) new_M <- optim_zipln_M( init_M = parameters$M, diff --git a/inst/case_studies/scRNA.R b/inst/case_studies/scRNA.R index ddb89b2d..659be1fb 100644 --- a/inst/case_studies/scRNA.R +++ b/inst/case_studies/scRNA.R @@ -5,7 +5,7 @@ data(scRNA) # data subsample: only 500 random cell and the 200 most varying transcript scRNA <- scRNA[sample.int(nrow(scRNA), 500), ] scRNA$counts <- scRNA$counts[, 1:200] -myZIPLN <- ZIPLN(counts ~ 1 + offset(log(total_counts)), data = scRNA) +myZIPLN <- ZIPLN(counts ~ 1 + offset(log(total_counts)), zi = "col", data = scRNA) myPLN <- PLN(counts ~ 1 + offset(log(total_counts)), data = scRNA) data.frame( diff --git a/inst/include/PLNmodels.h b/inst/include/PLNmodels.h deleted file mode 100644 index 9bee263f..00000000 --- a/inst/include/PLNmodels.h +++ /dev/null @@ -1,9 +0,0 @@ -// Generated by using Rcpp::compileAttributes() -> do not edit by hand -// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 - -#ifndef RCPP_PLNmodels_H_GEN_ -#define RCPP_PLNmodels_H_GEN_ - -#include "PLNmodels_RcppExports.h" - -#endif // RCPP_PLNmodels_H_GEN_ diff --git a/inst/include/PLNmodels_RcppExports.h b/inst/include/PLNmodels_RcppExports.h deleted file mode 100644 index f44129f3..00000000 --- a/inst/include/PLNmodels_RcppExports.h +++ /dev/null @@ -1,30 +0,0 @@ -// Generated by using Rcpp::compileAttributes() -> do not edit by hand -// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 - -#ifndef RCPP_PLNmodels_RCPPEXPORTS_H_GEN_ -#define RCPP_PLNmodels_RCPPEXPORTS_H_GEN_ - -#include -#include - -namespace PLNmodels { - - using namespace Rcpp; - - namespace { - void validateSignature(const char* sig) { - Rcpp::Function require = Rcpp::Environment::base_env()["require"]; - require("PLNmodels", Rcpp::Named("quietly") = true); - typedef int(*Ptr_validate)(const char*); - static Ptr_validate p_validate = (Ptr_validate) - R_GetCCallable("PLNmodels", "_PLNmodels_RcppExport_validate"); - if (!p_validate(sig)) { - throw Rcpp::function_not_exported( - "C++ function with signature '" + std::string(sig) + "' not found in PLNmodels"); - } - } - } - -} - -#endif // RCPP_PLNmodels_RCPPEXPORTS_H_GEN_ diff --git a/inst/simus_ZIPLN/essai_ZIPLN.R b/inst/simus_ZIPLN/essai_ZIPLN.R index 1cb9bdcf..bf494ff2 100644 --- a/inst/simus_ZIPLN/essai_ZIPLN.R +++ b/inst/simus_ZIPLN/essai_ZIPLN.R @@ -133,8 +133,8 @@ p <- ggplot(res) + aes(x = factor(n), y = pred_Y, fill = factor(method)) + geom_ scale_y_log10() + ylim(c(0,2)) p -p <- ggplot(res) + aes(x = factor(n), y = rmse_B, fill = factor(method)) + geom_violin() + theme_bw() + scale_y_log10() + ylim(c(2.75,3)) +p <- ggplot(res) + aes(x = factor(n), y = rmse_B, fill = factor(method)) + geom_violin() + theme_bw() + scale_y_log10() + ylim(c(2,5)) p -p <- ggplot(res) + aes(x = factor(n), y = rmse_Omega, fill = factor(method)) + geom_violin() + theme_bw() + scale_y_log10() + ylim(c(0,0.5)) +p <- ggplot(res) + aes(x = factor(n), y = rmse_Omega, fill = factor(method)) + geom_violin() + theme_bw() + scale_y_log10() + ylim(c(0.1,.3)) p diff --git a/man/ZIPLN_param.Rd b/man/ZIPLN_param.Rd index 798b7fad..29b1c5c3 100644 --- a/man/ZIPLN_param.Rd +++ b/man/ZIPLN_param.Rd @@ -41,10 +41,12 @@ list of parameters used during the fit and post-processing steps Helper to define list of parameters to control the PLN fit. All arguments have defaults. } \details{ -See \code{\link[=PLN_param]{PLN_param()}} for a full description of the generic optimization parameters. ZIPLN_param() also has two additional parameters controlling the optimization due -the inner-outer loop structure of the optimizer: +See \code{\link[=PLN_param]{PLN_param()}} for a full description of the generic optimization parameters. ZIPLN_param() also +has two additional parameters controlling the optimization due the inner-outer loop structure of the optimizer, +and additional parameter controlling the form of the variational approximation of the zero inflation: \itemize{ \item "ftol_out" outer solver stops when an optimization step changes the objective function by less than \code{ftol_out} multiplied by the absolute value of the parameter. Default is 1e-8 \item "maxit_out" outer solver stops when the number of iteration exceeds \code{maxit_out}. Default is 100 +\item "approx_ZI" either use an exact or approximated conditional distribution for the zero inflantion. Default is FALSE } } diff --git a/man/ZIPLNfit.Rd b/man/ZIPLNfit.Rd index fd994d67..1f3c43b3 100644 --- a/man/ZIPLNfit.Rd +++ b/man/ZIPLNfit.Rd @@ -264,9 +264,9 @@ Predict position, scores or observations of new data. See \code{\link[=predict.Z \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{newdata}}{A data frame in which to look for variables with which to predict. If omitted, the fitted values are returned.} +\item{\code{newdata}}{A data frame in which to look for variables with which to predict. If omitted, the fitted values are used.} -\item{\code{responses}}{Optional data frame containing the count of the observed variables (matching the names of the provided as data in the PLN function), assuming the interest is in testing the model.} +\item{\code{responses}}{Optional data frame containing the count of the observed variables (matching the names of the provided as data in the PLN function), assuming the interest in in testing the model.} \item{\code{type}}{Scale used for the prediction. Either \code{"link"} (default, predicted positions in the latent space), \code{"response"} (predicted average counts, accounting for zero-inflation) or \code{"deflated"} (predicted average counts, not accounting for zero-inflation and using only the PLN part of the model).} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index d6104c90..01f8105d 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -1,11 +1,8 @@ // Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -#include "../inst/include/PLNmodels.h" #include #include -#include -#include using namespace Rcpp; @@ -253,9 +250,9 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// optim_zipln_R -arma::mat optim_zipln_R(const arma::mat& Y, const arma::mat& X, const arma::mat& O, const arma::mat& M, const arma::mat& S, const arma::mat& Pi); -RcppExport SEXP _PLNmodels_optim_zipln_R(SEXP YSEXP, SEXP XSEXP, SEXP OSEXP, SEXP MSEXP, SEXP SSEXP, SEXP PiSEXP) { +// optim_zipln_R_var +arma::mat optim_zipln_R_var(const arma::mat& Y, const arma::mat& X, const arma::mat& O, const arma::mat& M, const arma::mat& S, const arma::mat& Pi, const arma::mat& B); +RcppExport SEXP _PLNmodels_optim_zipln_R_var(SEXP YSEXP, SEXP XSEXP, SEXP OSEXP, SEXP MSEXP, SEXP SSEXP, SEXP PiSEXP, SEXP BSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -265,7 +262,25 @@ BEGIN_RCPP Rcpp::traits::input_parameter< const arma::mat& >::type M(MSEXP); Rcpp::traits::input_parameter< const arma::mat& >::type S(SSEXP); Rcpp::traits::input_parameter< const arma::mat& >::type Pi(PiSEXP); - rcpp_result_gen = Rcpp::wrap(optim_zipln_R(Y, X, O, M, S, Pi)); + Rcpp::traits::input_parameter< const arma::mat& >::type B(BSEXP); + rcpp_result_gen = Rcpp::wrap(optim_zipln_R_var(Y, X, O, M, S, Pi, B)); + return rcpp_result_gen; +END_RCPP +} +// optim_zipln_R_exact +arma::mat optim_zipln_R_exact(const arma::mat& Y, const arma::mat& X, const arma::mat& O, const arma::mat& M, const arma::mat& S, const arma::mat& Pi, const arma::mat& B); +RcppExport SEXP _PLNmodels_optim_zipln_R_exact(SEXP YSEXP, SEXP XSEXP, SEXP OSEXP, SEXP MSEXP, SEXP SSEXP, SEXP PiSEXP, SEXP BSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type Y(YSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type X(XSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type O(OSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type M(MSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type S(SSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type Pi(PiSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type B(BSEXP); + rcpp_result_gen = Rcpp::wrap(optim_zipln_R_exact(Y, X, O, M, S, Pi, B)); return rcpp_result_gen; END_RCPP } @@ -316,20 +331,6 @@ BEGIN_RCPP END_RCPP } -// validate (ensure exported C++ functions exist before calling them) -static int _PLNmodels_RcppExport_validate(const char* sig) { - static std::set signatures; - if (signatures.empty()) { - } - return signatures.find(sig) != signatures.end(); -} - -// registerCCallable (register entry points for exported C++ functions) -RcppExport SEXP _PLNmodels_RcppExport_registerCCallable() { - R_RegisterCCallable("PLNmodels", "_PLNmodels_RcppExport_validate", (DL_FUNC)_PLNmodels_RcppExport_validate); - return R_NilValue; -} - static const R_CallMethodDef CallEntries[] = { {"_PLNmodels_cpp_test_nlopt", (DL_FUNC) &_PLNmodels_cpp_test_nlopt, 0}, {"_PLNmodels_nlopt_optimize_diagonal", (DL_FUNC) &_PLNmodels_nlopt_optimize_diagonal, 3}, @@ -348,11 +349,11 @@ static const R_CallMethodDef CallEntries[] = { {"_PLNmodels_optim_zipln_Omega_diagonal", (DL_FUNC) &_PLNmodels_optim_zipln_Omega_diagonal, 4}, {"_PLNmodels_optim_zipln_B_dense", (DL_FUNC) &_PLNmodels_optim_zipln_B_dense, 2}, {"_PLNmodels_optim_zipln_zipar_covar", (DL_FUNC) &_PLNmodels_optim_zipln_zipar_covar, 4}, - {"_PLNmodels_optim_zipln_R", (DL_FUNC) &_PLNmodels_optim_zipln_R, 6}, + {"_PLNmodels_optim_zipln_R_var", (DL_FUNC) &_PLNmodels_optim_zipln_R_var, 7}, + {"_PLNmodels_optim_zipln_R_exact", (DL_FUNC) &_PLNmodels_optim_zipln_R_exact, 7}, {"_PLNmodels_optim_zipln_M", (DL_FUNC) &_PLNmodels_optim_zipln_M, 9}, {"_PLNmodels_optim_zipln_S", (DL_FUNC) &_PLNmodels_optim_zipln_S, 7}, {"_PLNmodels_cpp_test_packing", (DL_FUNC) &_PLNmodels_cpp_test_packing, 0}, - {"_PLNmodels_RcppExport_registerCCallable", (DL_FUNC) &_PLNmodels_RcppExport_registerCCallable, 0}, {NULL, NULL, 0} }; diff --git a/src/lambertW.cpp b/src/lambertW.cpp index 97942d60..2d97dd3c 100644 --- a/src/lambertW.cpp +++ b/src/lambertW.cpp @@ -36,17 +36,7 @@ Fritsch, F. N.; Shafer, R. E. & Crowley, W. P. 1973, 16, 123-124 */ -// [[Rcpp::depends(RcppParallel)]] -// [[Rcpp::interfaces(r, cpp)]] -#include - -#define _USE_MATH_DEFINES -#include - -using namespace Rcpp; - -const double EPS = 2.2204460492503131e-16; -const double M_1_E = 1.0 / M_E; +#include "lambertW.h" /* Fritsch Iteration * W_{n+1} = W_n * (1 + e_n) diff --git a/src/optim_zi-pln.cpp b/src/optim_zi-pln.cpp index 5665ed47..559979cb 100644 --- a/src/optim_zi-pln.cpp +++ b/src/optim_zi-pln.cpp @@ -6,6 +6,7 @@ #include "nlopt_wrapper.h" #include "packing.h" #include "utils.h" +#include "lambertW.h" // [[Rcpp::export]] arma::vec zipln_vloglik( @@ -118,13 +119,14 @@ Rcpp::List optim_zipln_zipar_covar( } // [[Rcpp::export]] -arma::mat optim_zipln_R( +arma::mat optim_zipln_R_var( const arma::mat & Y, // responses (n,p) const arma::mat & X, // covariates (n,d) const arma::mat & O, // offsets (n,p) const arma::mat & M, // (n,p) const arma::mat & S, // (n,p) - const arma::mat & Pi // (d,p) + const arma::mat & Pi, // (d,p) + const arma::mat & B // covariates (n,d) ) { arma::mat A = exp(O + M + 0.5 * S % S); arma::mat R = pow(1. + exp(- (A + logit(Pi))), -1); @@ -144,6 +146,39 @@ arma::mat optim_zipln_R( return R; } +double phi (double mu, double sigma2) { + double W = lambertW0_CS(sigma2 * exp(mu)) ; + return(exp(-(pow(W, 2) + 2 * W) / (2 * sigma2)) / sqrt(1 + W)) ; +} + +// [[Rcpp::export]] +arma::mat optim_zipln_R_exact ( + const arma::mat & Y, // covariates (n,d) + const arma::mat & X, // covariates (n,d) + const arma::mat & O, // offsets (n,p) + const arma::mat & M, // (n,p) + const arma::mat & S, // (n,p) + const arma::mat & Pi, // (n,p) + const arma::mat & B // covariates (n,d) +) { + + arma::mat XB = X * B; + arma::mat M_mu = M - XB; + arma::uword n = M.n_rows; + arma::uword p = M.n_cols; + arma::vec diag_Sigma = arma::diagvec((1./n) * (M_mu.t() * M_mu + diagmat(sum(S % S, 0)))) ; + arma::mat R = arma::zeros(n,p); + for(arma::uword i = 0; i < n; i += 1) { + for(arma::uword j = 0; j < p; j += 1) { + if(Y(i, j) < 0.5) { + double Phi = phi(O(i,j) + XB(i,j), diag_Sigma(j)) ; + R(i,j) = Pi(i,j) / (Phi * (1 - Pi(i,j)) + Pi(i,j)) ; + } + } + } + return R; +} + // [[Rcpp::export]] Rcpp::List optim_zipln_M( const arma::mat & init_M, // (n,p) diff --git a/tests/testthat/test-zipln.R b/tests/testthat/test-zipln.R index d6361f9e..7eac24b0 100644 --- a/tests/testthat/test-zipln.R +++ b/tests/testthat/test-zipln.R @@ -60,6 +60,17 @@ test_that("PLN is working with unnamed data matrix", { expect_error(ZIPLN(Abundance ~ 1, data = trichoptera, control = ZIPLN_param(config_optim = list(algorithm = "nawak")))) }) +test_that("ZIPLN is working with exact and variational inference for the conditional distribution of the ZI component", { + + approx <- ZIPLN(Abundance ~ 1, data = trichoptera, control = ZIPLN_param(config_optim = list(approx_ZI = TRUE))) + exact <- ZIPLN(Abundance ~ 1, data = trichoptera, control = ZIPLN_param(config_optim = list(approx_ZI = FALSE))) + + expect_equal(approx$loglik, exact$loglik, tolerance = 1e-1) ## Almost equivalent + expect_equal(approx$model_par$B, exact$model_par$B, tolerance = 1e-1) ## Almost equivalent + expect_equal(approx$model_par$Sigma, exact$model_par$Sigma, tolerance = 1e-1) ## Almost equivalent + +}) + test_that("ZIPLN: Check that univariate ZIPLN models works, with matrix of numeric format", { expect_no_error(uniZIPLN <- ZIPLN(Abundance[,1,drop=FALSE] ~ 1, data = trichoptera)) expect_no_error(uniZIPLN <- ZIPLN(Abundance[,1] ~ 1, data = trichoptera)) From 0318d07801ae36d7e680496914fbc93dbde0b3a0 Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Thu, 8 Feb 2024 15:37:57 +0100 Subject: [PATCH 03/30] added plot function for ZIPLNfit_sparse + mututalizing code woth PLNnetwork_fit --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/PLNnetworkfit-class.R | 53 +++++------------------------- R/ZIPLNfit-S3methods.R | 45 ++++++++++++++++++++++++++ R/ZIPLNfit-class.R | 48 +++++++++++++++++++++++++++ R/plot_utils.R | 50 ++++++++++++++++++++++++++++ man/ZIPLNfit_sparse.Rd | 59 ++++++++++++++++++++++++++++++++++ man/plot.ZIPLNfit_sparse.Rd | 52 ++++++++++++++++++++++++++++++ tests/testthat/test-ziplnfit.R | 13 ++++++++ 9 files changed, 277 insertions(+), 46 deletions(-) create mode 100644 man/plot.ZIPLNfit_sparse.Rd diff --git a/DESCRIPTION b/DESCRIPTION index bfa54fe0..ef0ae662 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,7 +25,7 @@ BugReports: https://github.com/pln-team/PLNmodels/issues License: GPL (>= 3) Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Depends: R (>= 3.4) LazyData: true biocViews: diff --git a/NAMESPACE b/NAMESPACE index ae446525..858d3ebc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,6 +21,7 @@ S3method(plot,PLNmixturefamily) S3method(plot,PLNmixturefit) S3method(plot,PLNnetworkfamily) S3method(plot,PLNnetworkfit) +S3method(plot,ZIPLNfit_sparse) S3method(predict,PLNLDAfit) S3method(predict,PLNfit) S3method(predict,PLNmixturefit) diff --git a/R/PLNnetworkfit-class.R b/R/PLNnetworkfit-class.R index 1dff42da..3995df54 100644 --- a/R/PLNnetworkfit-class.R +++ b/R/PLNnetworkfit-class.R @@ -145,51 +145,14 @@ PLNnetworkfit <- R6Class( node.labels = NULL, layout = layout_in_circle, plot = TRUE) { - - type <- match.arg(type) - output <- match.arg(output) - - net <- self$latent_network(type) - - if (output == "igraph") { - - G <- graph_from_adjacency_matrix(net, mode = "undirected", weighted = TRUE, diag = FALSE) - - if (!is.null(node.labels)) { - igraph::V(G)$label <- node.labels - } else { - igraph::V(G)$label <- colnames(net) - } - ## Nice nodes - V.deg <- degree(G)/sum(degree(G)) - igraph::V(G)$label.cex <- V.deg / max(V.deg) + .5 - igraph::V(G)$size <- V.deg * 100 - igraph::V(G)$label.color <- rgb(0, 0, .2, .8) - igraph::V(G)$frame.color <- NA - ## Nice edges - igraph::E(G)$color <- ifelse(igraph::E(G)$weight > 0, edge.color[1], edge.color[2]) - if (type == "support") - igraph::E(G)$width <- abs(igraph::E(G)$weight) - else - igraph::E(G)$width <- 15*abs(igraph::E(G)$weight) - - if (remove.isolated) { - G <- delete.vertices(G, which(degree(G) == 0)) - } - if (plot) plot(G, layout = layout) - } - if (output == "corrplot") { - if (plot) { - if (ncol(net) > 100) - colnames(net) <- rownames(net) <- rep(" ", ncol(net)) - G <- net - diag(net) <- 0 - corrplot(as.matrix(net), method = "color", is.corr = FALSE, tl.pos = "td", cl.pos = "n", tl.cex = 0.5, type = "upper") - } else { - G <- net - } - } - invisible(G) + .plot_network(self$latent_network(match.arg(type)), + type = match.arg(type), + output = match.arg(output), + edge.color = edge.color, + remove.isolated = remove.isolated, + node.labels = node.labels, + layout = layout, + plot = plot) }, ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/R/ZIPLNfit-S3methods.R b/R/ZIPLNfit-S3methods.R index 6fd24af8..df9e916d 100644 --- a/R/ZIPLNfit-S3methods.R +++ b/R/ZIPLNfit-S3methods.R @@ -95,3 +95,48 @@ sigma.ZIPLNfit <- function(object, ...) { object$model_par$Sigma } +## ========================================================================================= +## +## PUBLIC S3 METHODS FOR ZIPLNfit_sparse +## +## ========================================================================================= + +## Auxiliary functions to check the given class of an objet +isZIPLNfit_sparse <- function(Robject) {inherits(Robject, "ZIPLNfit_sparse")} + +#' Extract and plot the network (partial correlation, support or inverse covariance) from a [`ZIPLNfit_sparse`] object +#' +#' @name plot.ZIPLNfit_sparse +#' +#' @param x an R6 object with class [`PLNnetworkfit`] +#' @param type character. Value of the weight of the edges in the network, either "partial_cor" (partial correlation) or "support" (binary). Default is `"partial_cor"`. +#' @param output the type of output used: either 'igraph' or 'corrplot'. Default is `'igraph'`. +#' @param edge.color Length 2 color vector. Color for positive/negative edges. Default is `c("#F8766D", "#00BFC4")`. Only relevant for igraph output. +#' @param node.labels vector of character. The labels of the nodes. The default will use the column names ot the response matrix. +#' @param remove.isolated if `TRUE`, isolated node are remove before plotting. Only relevant for igraph output. +#' @param layout an optional igraph layout. Only relevant for igraph output. +#' @param plot logical. Should the final network be displayed or only sent back to the user. Default is `TRUE`. +#' @param ... Not used (S3 compatibility). +#' +#' @return Send back an invisible object (igraph or Matrix, depending on the output chosen) and optionally displays a graph (via igraph or corrplot for large ones) +#' @examples +#' data(trichoptera) +#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +#' fits <- ZIPLN(Abundance ~ 1, data = trichoptera, control = ZIPLN_param(penalty = 0.1)) +#' myNet <- getBestModel(fits) +#' \dontrun{ +#' plot(myNet) +#' } +#' @export +plot.ZIPLNfit_sparse <- + function(x, + type = c("partial_cor", "support"), + output = c("igraph", "corrplot"), + edge.color = c("#F8766D", "#00BFC4"), + remove.isolated = FALSE, + node.labels = NULL, + layout = layout_in_circle, + plot = TRUE, ...) { + stopifnot(isZIPLNfit_sparse(x)) + invisible(x$plot_network(type, output, edge.color, remove.isolated, node.labels, layout, plot)) + } diff --git a/R/ZIPLNfit-class.R b/R/ZIPLNfit-class.R index 5f8b0af2..8b63ec50 100644 --- a/R/ZIPLNfit-class.R +++ b/R/ZIPLNfit-class.R @@ -749,6 +749,54 @@ ZIPLNfit_sparse <- R6Class( function(M, X, B, S) { glassoFast( crossprod(M - X %*% B)/self$n + diag(colMeans(S * S), self$p, self$p), rho = control$penalty )$wi } + }, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Extractors ------------------------ + #' @description Extract interaction network in the latent space + #' @param type edge value in the network. Can be "support" (binary edges), "precision" (coefficient of the precision matrix) or "partial_cor" (partial correlation between species) + #' @importFrom Matrix Matrix + #' @return a square matrix of size `PLNnetworkfit$n` + latent_network = function(type = c("partial_cor", "support", "precision")) { + net <- switch( + match.arg(type), + "support" = 1 * (private$Omega != 0 & !diag(TRUE, ncol(private$Omega))), + "precision" = private$Omega, + "partial_cor" = { + tmp <- -private$Omega / tcrossprod(sqrt(diag(private$Omega))); diag(tmp) <- 1 + tmp + } + ) + ## Enforce sparse Matrix encoding to avoid downstream problems with igraph::graph_from_adjacency_matrix + ## as it fails when given dsyMatrix objects + Matrix(net, sparse = TRUE) + }, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Graphical methods------------------ + #' @description plot the latent network. + #' @param type edge value in the network. Either "precision" (coefficient of the precision matrix) or "partial_cor" (partial correlation between species). + #' @param output Output type. Either `igraph` (for the network) or `corrplot` (for the adjacency matrix) + #' @param edge.color Length 2 color vector. Color for positive/negative edges. Default is `c("#F8766D", "#00BFC4")`. Only relevant for igraph output. + #' @param node.labels vector of character. The labels of the nodes. The default will use the column names ot the response matrix. + #' @param remove.isolated if `TRUE`, isolated node are remove before plotting. Only relevant for igraph output. + #' @param layout an optional igraph layout. Only relevant for igraph output. + #' @param plot logical. Should the final network be displayed or only sent back to the user. Default is `TRUE`. + plot_network = function(type = c("partial_cor", "support"), + output = c("igraph", "corrplot"), + edge.color = c("#F8766D", "#00BFC4"), + remove.isolated = FALSE, + node.labels = NULL, + layout = layout_in_circle, + plot = TRUE) { + .plot_network(self$latent_network(match.arg(type)), + type = match.arg(type), + output = match.arg(output), + edge.color = edge.color, + remove.isolated = remove.isolated, + node.labels = node.labels, + layout = layout, + plot = plot) } ), active = list( diff --git a/R/plot_utils.R b/R/plot_utils.R index 647dea8c..1917aa13 100644 --- a/R/plot_utils.R +++ b/R/plot_utils.R @@ -185,3 +185,53 @@ plot_matrix = function(Mat, rowFG = "sample", colFG = "variable", clustering = N } g } + +.plot_network = function(net , + type , + output , + edge.color = c("#F8766D", "#00BFC4"), + remove.isolated = FALSE, + node.labels = NULL, + layout = layout_in_circle, + plot = TRUE) { + + if (output == "igraph") { + + G <- graph_from_adjacency_matrix(net, mode = "undirected", weighted = TRUE, diag = FALSE) + + if (!is.null(node.labels)) { + igraph::V(G)$label <- node.labels + } else { + igraph::V(G)$label <- colnames(net) + } + ## Nice nodes + V.deg <- degree(G)/sum(degree(G)) + igraph::V(G)$label.cex <- V.deg / max(V.deg) + .5 + igraph::V(G)$size <- V.deg * 100 + igraph::V(G)$label.color <- rgb(0, 0, .2, .8) + igraph::V(G)$frame.color <- NA + ## Nice edges + igraph::E(G)$color <- ifelse(igraph::E(G)$weight > 0, edge.color[1], edge.color[2]) + if (type == "support") + igraph::E(G)$width <- abs(igraph::E(G)$weight) + else + igraph::E(G)$width <- 15*abs(igraph::E(G)$weight) + + if (remove.isolated) { + G <- delete.vertices(G, which(degree(G) == 0)) + } + if (plot) plot(G, layout = layout) + } + if (output == "corrplot") { + if (plot) { + if (ncol(net) > 100) + colnames(net) <- rownames(net) <- rep(" ", ncol(net)) + G <- net + diag(net) <- 0 + corrplot(as.matrix(net), method = "color", is.corr = FALSE, tl.pos = "td", cl.pos = "n", tl.cex = 0.5, type = "upper") + } else { + G <- net + } + } + invisible(G) +} diff --git a/man/ZIPLNfit_sparse.Rd b/man/ZIPLNfit_sparse.Rd index 6894af91..a8d9e700 100644 --- a/man/ZIPLNfit_sparse.Rd +++ b/man/ZIPLNfit_sparse.Rd @@ -34,6 +34,8 @@ print(myPLN) \subsection{Public methods}{ \itemize{ \item \href{#method-ZIPLNfit_sparse-new}{\code{ZIPLNfit_sparse$new()}} +\item \href{#method-ZIPLNfit_sparse-latent_network}{\code{ZIPLNfit_sparse$latent_network()}} +\item \href{#method-ZIPLNfit_sparse-plot_network}{\code{ZIPLNfit_sparse$plot_network()}} \item \href{#method-ZIPLNfit_sparse-clone}{\code{ZIPLNfit_sparse$clone()}} } } @@ -77,6 +79,63 @@ Initialize a \code{\link{ZIPLNfit_fixed}} model } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZIPLNfit_sparse-latent_network}{}}} +\subsection{Method \code{latent_network()}}{ +Extract interaction network in the latent space +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ZIPLNfit_sparse$latent_network(type = c("partial_cor", "support", "precision"))}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{type}}{edge value in the network. Can be "support" (binary edges), "precision" (coefficient of the precision matrix) or "partial_cor" (partial correlation between species)} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +a square matrix of size \code{PLNnetworkfit$n} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZIPLNfit_sparse-plot_network}{}}} +\subsection{Method \code{plot_network()}}{ +plot the latent network. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ZIPLNfit_sparse$plot_network( + type = c("partial_cor", "support"), + output = c("igraph", "corrplot"), + edge.color = c("#F8766D", "#00BFC4"), + remove.isolated = FALSE, + node.labels = NULL, + layout = layout_in_circle, + plot = TRUE +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{type}}{edge value in the network. Either "precision" (coefficient of the precision matrix) or "partial_cor" (partial correlation between species).} + +\item{\code{output}}{Output type. Either \code{igraph} (for the network) or \code{corrplot} (for the adjacency matrix)} + +\item{\code{edge.color}}{Length 2 color vector. Color for positive/negative edges. Default is \code{c("#F8766D", "#00BFC4")}. Only relevant for igraph output.} + +\item{\code{remove.isolated}}{if \code{TRUE}, isolated node are remove before plotting. Only relevant for igraph output.} + +\item{\code{node.labels}}{vector of character. The labels of the nodes. The default will use the column names ot the response matrix.} + +\item{\code{layout}}{an optional igraph layout. Only relevant for igraph output.} + +\item{\code{plot}}{logical. Should the final network be displayed or only sent back to the user. Default is \code{TRUE}.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-ZIPLNfit_sparse-clone}{}}} \subsection{Method \code{clone()}}{ diff --git a/man/plot.ZIPLNfit_sparse.Rd b/man/plot.ZIPLNfit_sparse.Rd new file mode 100644 index 00000000..2994db03 --- /dev/null +++ b/man/plot.ZIPLNfit_sparse.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ZIPLNfit-S3methods.R +\name{plot.ZIPLNfit_sparse} +\alias{plot.ZIPLNfit_sparse} +\title{Extract and plot the network (partial correlation, support or inverse covariance) from a \code{\link{ZIPLNfit_sparse}} object} +\usage{ +\method{plot}{ZIPLNfit_sparse}( + x, + type = c("partial_cor", "support"), + output = c("igraph", "corrplot"), + edge.color = c("#F8766D", "#00BFC4"), + remove.isolated = FALSE, + node.labels = NULL, + layout = layout_in_circle, + plot = TRUE, + ... +) +} +\arguments{ +\item{x}{an R6 object with class \code{\link{PLNnetworkfit}}} + +\item{type}{character. Value of the weight of the edges in the network, either "partial_cor" (partial correlation) or "support" (binary). Default is \code{"partial_cor"}.} + +\item{output}{the type of output used: either 'igraph' or 'corrplot'. Default is \code{'igraph'}.} + +\item{edge.color}{Length 2 color vector. Color for positive/negative edges. Default is \code{c("#F8766D", "#00BFC4")}. Only relevant for igraph output.} + +\item{remove.isolated}{if \code{TRUE}, isolated node are remove before plotting. Only relevant for igraph output.} + +\item{node.labels}{vector of character. The labels of the nodes. The default will use the column names ot the response matrix.} + +\item{layout}{an optional igraph layout. Only relevant for igraph output.} + +\item{plot}{logical. Should the final network be displayed or only sent back to the user. Default is \code{TRUE}.} + +\item{...}{Not used (S3 compatibility).} +} +\value{ +Send back an invisible object (igraph or Matrix, depending on the output chosen) and optionally displays a graph (via igraph or corrplot for large ones) +} +\description{ +Extract and plot the network (partial correlation, support or inverse covariance) from a \code{\link{ZIPLNfit_sparse}} object +} +\examples{ +data(trichoptera) +trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +fits <- ZIPLN(Abundance ~ 1, data = trichoptera, control = ZIPLN_param(penalty = 0.1)) +myNet <- getBestModel(fits) +\dontrun{ +plot(myNet) +} +} diff --git a/tests/testthat/test-ziplnfit.R b/tests/testthat/test-ziplnfit.R index 9ba0ffc5..dbc5bfd5 100644 --- a/tests/testthat/test-ziplnfit.R +++ b/tests/testthat/test-ziplnfit.R @@ -118,3 +118,16 @@ test_that("ZIPLN fit: Check number of parameters", { expect_equal(model$vcov_model, "fixed") }) + +test_that("ZIPLN fit: check sparse output and plot", { + + myPLNfit <- ZIPLN(Abundance ~ 1, data = trichoptera, control = ZIPLN_param(trace = 1, penalty = 0.1)) + + expect_is(myPLNfit, "ZIPLNfit_sparse") + expect_is(myPLNfit, "ZIPLNfit") + + expect_equal(myPLNfit$vcov_model, "sparse") + expect_true(igraph::is.igraph(myPLNfit$plot_network(output = "igraph", plot = FALSE))) + expect_true(inherits(myPLNfit$plot_network(output = "corrplot", plot = FALSE), "Matrix")) + +}) From ce4da2c390bc815933e712c94eeed2ccd4427f0b Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Thu, 8 Feb 2024 15:45:07 +0100 Subject: [PATCH 04/30] added entry to pkgdown --- _pkgdown.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/_pkgdown.yml b/_pkgdown.yml index 3a19da12..ec857788 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -69,6 +69,7 @@ reference: - '`sigma.ZIPLNfit`' - '`predict.ZIPLNfit`' - '`fitted.ZIPLNfit`' + - '`plot.ZIPLNfit_sparse`' - title: 'Linear discriminant analysis via a Poisson lognormal fit' desc: > Description of the PLNLDAfit object and methods for its manipulation. From 3efa55aa6183614664ab85e0e14943eee25764d2 Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Thu, 8 Feb 2024 16:03:41 +0100 Subject: [PATCH 05/30] added import to rgb from grDevices --- NAMESPACE | 1 + R/plot_utils.R | 1 + 2 files changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 858d3ebc..1dfb00b0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -78,6 +78,7 @@ importFrom(corrplot,corrplot) importFrom(future.apply,future_lapply) importFrom(future.apply,future_sapply) importFrom(glassoFast,glassoFast) +importFrom(grDevices,rgb) importFrom(grid,nullGrob) importFrom(grid,textGrob) importFrom(gridExtra,arrangeGrob) diff --git a/R/plot_utils.R b/R/plot_utils.R index 1917aa13..51df0dbf 100644 --- a/R/plot_utils.R +++ b/R/plot_utils.R @@ -186,6 +186,7 @@ plot_matrix = function(Mat, rowFG = "sample", colFG = "variable", clustering = N g } +#' @importFrom grDevices rgb .plot_network = function(net , type , output , From 7b9b05ba0070f137ca62379e5bd566de3c749def Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Thu, 8 Feb 2024 16:17:00 +0100 Subject: [PATCH 06/30] correction in example for ZIPLNfit_sparse --- R/ZIPLNfit-S3methods.R | 5 ++--- R/ZIPLNfit-class.R | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/ZIPLNfit-S3methods.R b/R/ZIPLNfit-S3methods.R index df9e916d..a158c241 100644 --- a/R/ZIPLNfit-S3methods.R +++ b/R/ZIPLNfit-S3methods.R @@ -122,10 +122,9 @@ isZIPLNfit_sparse <- function(Robject) {inherits(Robject, "ZIPLNfit_sparse")} #' @examples #' data(trichoptera) #' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) -#' fits <- ZIPLN(Abundance ~ 1, data = trichoptera, control = ZIPLN_param(penalty = 0.1)) -#' myNet <- getBestModel(fits) +#' fit <- ZIPLN(Abundance ~ 1, data = trichoptera, control = ZIPLN_param(penalty = 0.1)) #' \dontrun{ -#' plot(myNet) +#' plot(fit) #' } #' @export plot.ZIPLNfit_sparse <- diff --git a/R/ZIPLNfit-class.R b/R/ZIPLNfit-class.R index 8b63ec50..2151f9d3 100644 --- a/R/ZIPLNfit-class.R +++ b/R/ZIPLNfit-class.R @@ -730,9 +730,10 @@ ZIPLNfit_fixed <- R6Class( #' # See other examples in function ZIPLN #' data(trichoptera) #' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) -#' myPLN <- ZIPLN(Abundance ~ 1, data = trichoptera, control= ZIPLN_param(penalty = 0.2)) +#' myPLN <- ZIPLN(Abundance ~ 1, data = trichoptera, control= ZIPLN_param(penalty = 1)) #' class(myPLN) #' print(myPLN) +#' plot(myPLN) #' } ZIPLNfit_sparse <- R6Class( classname = "ZIPLNfit_sparse", From e5cb2841edec99a96a9dbb17f54bad02151bda7e Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Thu, 8 Feb 2024 17:05:42 +0100 Subject: [PATCH 07/30] regenerating doc for passing (hopefully) checks --- man/ZIPLNfit_sparse.Rd | 3 ++- man/plot.ZIPLNfit_sparse.Rd | 5 ++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/man/ZIPLNfit_sparse.Rd b/man/ZIPLNfit_sparse.Rd index a8d9e700..5ec52b3f 100644 --- a/man/ZIPLNfit_sparse.Rd +++ b/man/ZIPLNfit_sparse.Rd @@ -13,9 +13,10 @@ An R6 Class to represent a ZIPLNfit in a standard, general framework, with spars # See other examples in function ZIPLN data(trichoptera) trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) -myPLN <- ZIPLN(Abundance ~ 1, data = trichoptera, control= ZIPLN_param(penalty = 0.2)) +myPLN <- ZIPLN(Abundance ~ 1, data = trichoptera, control= ZIPLN_param(penalty = 1)) class(myPLN) print(myPLN) +plot(myPLN) } } \section{Super class}{ diff --git a/man/plot.ZIPLNfit_sparse.Rd b/man/plot.ZIPLNfit_sparse.Rd index 2994db03..52dad7c2 100644 --- a/man/plot.ZIPLNfit_sparse.Rd +++ b/man/plot.ZIPLNfit_sparse.Rd @@ -44,9 +44,8 @@ Extract and plot the network (partial correlation, support or inverse covariance \examples{ data(trichoptera) trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) -fits <- ZIPLN(Abundance ~ 1, data = trichoptera, control = ZIPLN_param(penalty = 0.1)) -myNet <- getBestModel(fits) +fit <- ZIPLN(Abundance ~ 1, data = trichoptera, control = ZIPLN_param(penalty = 0.1)) \dontrun{ -plot(myNet) +plot(fit) } } From 7134044d0879856be98d38ad885d90c33bf6bfaa Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Fri, 9 Feb 2024 16:51:32 +0100 Subject: [PATCH 08/30] good first step towards integration of ZIPLNnetworksfamily --- DESCRIPTION | 2 + NAMESPACE | 2 + R/PLNnetwork.R | 5 +- R/ZIPLN.R | 6 + R/ZIPLNfit-class.R | 114 +++++++---- R/ZIPLNnetwork.R | 119 +++++++++++ R/ZIPLNnetworkfamily-class.R | 370 +++++++++++++++++++++++++++++++++++ man/PLNnetwork.Rd | 3 +- man/ZIPLN_param.Rd | 6 + man/ZIPLNfit_sparse.Rd | 14 ++ man/ZIPLNnetwork.Rd | 58 ++++++ man/ZIPLNnetwork_param.Rd | 58 ++++++ man/ZIPLNnetworkfamily.Rd | 275 ++++++++++++++++++++++++++ 13 files changed, 989 insertions(+), 43 deletions(-) create mode 100644 R/ZIPLNnetwork.R create mode 100644 R/ZIPLNnetworkfamily-class.R create mode 100644 man/ZIPLNnetwork.Rd create mode 100644 man/ZIPLNnetwork_param.Rd create mode 100644 man/ZIPLNnetworkfamily.Rd diff --git a/DESCRIPTION b/DESCRIPTION index ef0ae662..bd42f1f3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -90,6 +90,8 @@ Collate: 'ZIPLNfit-class.R' 'ZIPLN.R' 'ZIPLNfit-S3methods.R' + 'ZIPLNnetworkfamily-class.R' + 'ZIPLNnetwork.R' 'barents.R' 'import_utils.R' 'mollusk.R' diff --git a/NAMESPACE b/NAMESPACE index 1dfb00b0..9951428c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -49,6 +49,8 @@ export(PLNnetwork) export(PLNnetwork_param) export(ZIPLN) export(ZIPLN_param) +export(ZIPLNnetwork) +export(ZIPLNnetwork_param) export(coefficient_path) export(compute_PLN_starting_point) export(compute_offset) diff --git a/R/PLNnetwork.R b/R/PLNnetwork.R index a6f70c0e..fa7d8161 100644 --- a/R/PLNnetwork.R +++ b/R/PLNnetwork.R @@ -1,6 +1,7 @@ #' Poisson lognormal model towards sparse network inference #' -#' Fit the sparse inverse covariance variant of the Poisson lognormal with a variational algorithm. Use the (g)lm syntax for model specification (covariates, offsets). +#' Fit the sparse inverse covariance variant of the Poisson lognormal with a variational algorithm +#' for a collection of sparsity parameter values distributed on a log scale. Use the (g)lm syntax for model specification (covariates, offsets). #' #' @param formula an object of class "formula": a symbolic description of the model to be fitted. #' @param data an optional data frame, list or environment (or object coercible by as.data.frame to a data frame) containing the variables in the model. If not found in data, the variables are taken from environment(formula), typically the environment from which lm is called. @@ -12,7 +13,6 @@ #' @return an R6 object with class [`PLNnetworkfamily`], which contains #' a collection of models with class [`PLNnetworkfit`] #' -#' @rdname PLNnetwork #' @examples #' data(trichoptera) #' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) @@ -122,5 +122,6 @@ PLNnetwork_param <- function( variance = TRUE , config_post = config_pst , config_optim = config_opt , +### TODO CHECK: Why two inceptive model (cov and not ?) inception = inception ), class = "PLNmodels_param") } diff --git a/R/ZIPLN.R b/R/ZIPLN.R index 9368571e..761d5b09 100644 --- a/R/ZIPLN.R +++ b/R/ZIPLN.R @@ -74,6 +74,8 @@ ZIPLN <- function(formula, data, subset, zi = c("single", "row", "col"), control #' #' @inheritParams PLN_param #' @param penalty a user-defined penalty to sparsify the residual covariance. Defaults to 0 (no sparsity). +#' @param penalize_diagonal boolean: should the diagonal terms be penalized in the graphical-Lasso? Only relevant with sparse covariance. Default is \code{TRUE} +#' @param penalty_weights p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values. Only relevant with sparse covariance. #' @return list of parameters used during the fit and post-processing steps #' #' @inherit PLN_param details @@ -91,6 +93,8 @@ ZIPLN_param <- function( covariance = c("full", "diagonal", "spherical", "fixed", "sparse"), Omega = NULL, penalty = 0, + penalize_diagonal = TRUE , + penalty_weights = NULL , config_post = list(), config_optim = list(), inception = NULL # pretrained ZIPLNfit used as initialization @@ -124,6 +128,8 @@ ZIPLN_param <- function( covariance = covariance, Omega = Omega , penalty = penalty , + penalize_diagonal = penalize_diagonal, + penalty_weights = penalty_weights , config_post = config_pst, config_optim = config_opt, inception = inception), class = "PLNmodels_param") diff --git a/R/ZIPLNfit-class.R b/R/ZIPLNfit-class.R index 2151f9d3..003ab50e 100644 --- a/R/ZIPLNfit-class.R +++ b/R/ZIPLNfit-class.R @@ -81,48 +81,55 @@ ZIPLNfit <- R6Class( private$covariance <- control$covariance private$ziparam <- control$ziparam - R <- matrix(0, n, p) - M <- matrix(0, n, p) - B <- matrix(0, d , p) - B0 <- matrix(0, d0, p) - ## Feature-wise univariate (ZI)poisson regression as starting point for ZIPLN - for (j in 1:p) { - y = responses[, j] - if (min(y) == 0) { - suppressWarnings( - zip_out <- switch(control$ziparam, - "row" = pscl::zeroinfl(y ~ 0 + covariates$PLN | 0 + factor(1:n), offset = offsets[, j]), - "covar" = pscl::zeroinfl(y ~ 0 + covariates$PLN | 0 + covariates$ZI , offset = offsets[, j]), - pscl::zeroinfl(y ~ 0 + covariates$PLN | 1, offset = offsets[, j])) # offset only for the count model - ) - B0[,j] <- coef(zip_out, "zero") - B[,j] <- coef(zip_out, "count") - R[, j] <- predict(zip_out, type = "zero") - M[,j] <- residuals(zip_out) + covariates$PLN %*% coef(zip_out, "count") - } else { - p_out <- glm(y ~ 0 + covariates$PLN, family = 'poisson', offset = offsets[, j]) - B0[,j] <- rep(-10, d) - B[,j] <- coef(p_out) - R[, j] <- 0 - M[,j] <- residuals(p_out) + covariates$PLN %*% coef(p_out) + if (isZIPLNfit(control$inception)) { + private$R <- control$inception$var_par$R + private$M <- control$inception$var_par$R + private$S <- control$inception$var_par$R + private$B <- control$inception$model_par$B + private$B0 <- control$inception$model_par$B0 + } else { + R <- matrix(0, n, p) + M <- matrix(0, n, p) + B <- matrix(0, d , p) + B0 <- matrix(0, d0, p) + ## Feature-wise univariate (ZI)poisson regression as starting point for ZIPLN + for (j in 1:p) { + y = responses[, j] + if (min(y) == 0) { + suppressWarnings( + zip_out <- switch(control$ziparam, + "row" = pscl::zeroinfl(y ~ 0 + covariates$PLN | 0 + factor(1:n), offset = offsets[, j]), + "covar" = pscl::zeroinfl(y ~ 0 + covariates$PLN | 0 + covariates$ZI , offset = offsets[, j]), + pscl::zeroinfl(y ~ 0 + covariates$PLN | 1, offset = offsets[, j])) # offset only for the count model + ) + B0[,j] <- coef(zip_out, "zero") + B[,j] <- coef(zip_out, "count") + R[, j] <- predict(zip_out, type = "zero") + M[,j] <- residuals(zip_out) + covariates$PLN %*% coef(zip_out, "count") + } else { + p_out <- glm(y ~ 0 + covariates$PLN, family = 'poisson', offset = offsets[, j]) + B0[,j] <- rep(-10, d) + B[,j] <- coef(p_out) + R[, j] <- 0 + M[,j] <- residuals(p_out) + covariates$PLN %*% coef(p_out) + } } - } - ## Initialization of the ZI component - private$R <- R + ## Initialization of the ZI component + private$R <- R + private$B0 <- B0 + ## Initialization of the PLN component + private$B <- B + private$M <- M + private$S <- matrix(.1, n, p) + } private$Pi <- switch(control$ziparam, - "single" = matrix( mean(R), n, p) , - "row" = matrix(rowMeans(R), n, p) , - "col" = matrix(colMeans(R), n, p, byrow = TRUE), - "covar" = R) - private$B0 <- B0 + "single" = matrix( mean(private$R), n, p) , + "row" = matrix(rowMeans(private$R), n, p) , + "col" = matrix(colMeans(private$R), n, p, byrow = TRUE), + "covar" = private$R) private$zeros <- 1 * (responses == 0) - ## Initialization of the PLN component - private$B <- B - private$M <- M - private$S <- matrix(.1, n, p) - ## Link to functions performing the optimization private$optimizer$B <- function(M, X) optim_zipln_B_dense(M, X) private$optimizer$zi <- switch( @@ -738,6 +745,15 @@ ZIPLNfit_fixed <- R6Class( ZIPLNfit_sparse <- R6Class( classname = "ZIPLNfit_sparse", inherit = ZIPLNfit, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## PRIVATE MEMBERS ---- + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + private = list( + lambda = NA, # the sparsity tuning parameter + rho = NA # the p x p penalty weight + ), + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## PUBLIC MEMBERS ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -746,9 +762,13 @@ ZIPLNfit_sparse <- R6Class( #' @importFrom glassoFast glassoFast initialize = function(responses, covariates, offsets, weights, formula, control) { super$initialize(responses, covariates, offsets, weights, formula, control) + private$lambda <- control$penalty + private$rho <- control$penalty_weights + if (!control$penalize_diagonal) diag(private$rho) <- 0 private$optimizer$Omega <- function(M, X, B, S) { - glassoFast( crossprod(M - X %*% B)/self$n + diag(colMeans(S * S), self$p, self$p), rho = control$penalty )$wi + glassoFast( crossprod(M - X %*% B)/self$n + diag(colMeans(S * S), self$p, self$p), + rho = private$lambda * private$rho )$wi } }, @@ -801,9 +821,15 @@ ZIPLNfit_sparse <- R6Class( } ), active = list( + #' @field penalty the global level of sparsity in the current model + penalty = function() {private$lambda}, + #' @field penalty_weights a matrix of weights controlling the amount of penalty element-wise. + penalty_weights = function() {private$rho}, + #' @field n_edges number of edges if the network (non null coefficient of the sparse precision matrix) + n_edges = function() {sum(private$Omega[upper.tri(private$Omega, diag = FALSE)] != 0)}, #' @field nb_param number of parameters in the current PLN model nb_param = function() { - res <- self$p * self$d + (sum(private$Omega != 0) - self$p)/2L + + res <- self$p * self$d + self$n_edges + switch(private$ziparam, "single" = 1L, "row" = self$n, @@ -812,7 +838,15 @@ ZIPLNfit_sparse <- R6Class( as.integer(res) }, #' @field vcov_model character: the model used for the residual covariance - vcov_model = function() {"sparse"} + vcov_model = function() {"sparse"}, + #' @field pen_loglik variational lower bound of the l1-penalized loglikelihood + pen_loglik = function() {self$loglik - private$lambda * sum(abs(private$Omega))}, + #' @field EBIC variational lower bound of the EBIC + EBIC = function() {self$BIC - .5 * ifelse(self$n_edges > 0, self$n_edges * log(.5 * self$p*(self$p - 1)/self$n_edges), 0)}, + #' @field density proportion of non-null edges in the network + density = function() {mean(self$latent_network("support"))}, + #' @field criteria a vector with loglik, penalized loglik, BIC, EBIC, ICL, R_squared, number of parameters, number of edges and graph density + criteria = function() {data.frame(super$criteria, n_edges = self$n_edges, EBIC = self$EBIC, pen_loglik = self$pen_loglik, density = self$density)} ) ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## END OF THE CLASS ZIPLNfit_sparse diff --git a/R/ZIPLNnetwork.R b/R/ZIPLNnetwork.R new file mode 100644 index 00000000..8f300aff --- /dev/null +++ b/R/ZIPLNnetwork.R @@ -0,0 +1,119 @@ +#' Zero Inflated Poisson lognormal model toward sparse network inference +#' +#' Fit the sparse inverse covariance variant of the Zero Inflated Poisson lognormal with a variational algorithm +#' for a collection of sparsity parameter values distributed on a log scale. Use the (g)lm syntax for model specification (covariates, offsets). +#' +#' @inheritParams PLNnetwork +#' @param control a list-like structure for controlling the optimization, with default generated by [ZIPLNnetwork_param()]. See the associated documentation +#' for details. +#' @param zi a character describing the model used for zero inflation, either of +#' - "single" (default, one parameter shared by all counts) +#' - "col" (one parameter per variable / feature) +#' - "row" (one parameter per sample / individual). +#' If covariates are specified in the formula RHS (see details) this parameter is ignored. +#' +#' @details +#' Covariates for the Zero-Inflation parameter (using a logistic regression model) can be specified in the formula RHS using the pipe +#' (`~ PLN effect | ZI effect`) to separate covariates for the PLN part of the model from those for the Zero-Inflation part. +#' Note that different covariates can be used for each part. +#' +#' @return an R6 object with class [`ZIPLNnetworkfamily`] +#' +#' @include ZIPLNnetworkfamily-class.R +#' @examples +#' data(trichoptera) +#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +#' myZIPLNs <- ZIPLNnetwork(Abundance ~ 1, data = trichoptera, zi = "single") +#' @seealso The classes [`ZIPLNfit`] and [`ZIPLNnetworkfamily`] +#' @export +ZIPLNnetwork <- function(formula, data, subset, weights, zi = c("single", "row", "col"), penalties = NULL, control = ZIPLNnetwork_param()) { + + ## extract the data matrices and weights + args <- extract_model_zi(match.call(expand.dots = FALSE), parent.frame()) + control$ziparam <- ifelse((args$zicovar), "covar", match.arg(zi)) + + ## initialization + if (control$trace > 0) cat("\n Initialization...") + myPLN <- ZIPLNnetworkfamily$new(penalties, args$Y, list(PLN = args$X, ZI = args$X0), args$O, args$w, args$formula, control) + + ## optimization + if (control$trace > 0) cat("\n Adjusting", + length(myPLN$penalties), "ZI-PLN with sparse inverse covariance estimation and", + control$ziparam, "specific parameter(s) in Zero inflation component.\n") + myPLN$optimize(control$config_optim) + + if (control$trace > 0) cat("\n DONE!\n") + myPLN +} + +#' Control of PLNnetwork fit +#' +#' Helper to define list of parameters to control the PLN fit. All arguments have defaults. +#' +#' @param backend optimization back used, either "nlopt" or "torch". Default is "nlopt" +#' @param inception_cov Covariance structure used for the inception model used to initialize the PLNfamily. Defaults to "full" and can be constrained to "diagonal" and "spherical". +#' @param config_optim a list for controlling the optimizer (either "nlopt" or "torch" backend). See details +#' @param config_post a list for controlling the post-treatment (optional bootstrap, jackknife, R2, etc). +#' @param trace a integer for verbosity. +#' @param n_penalties an integer that specifies the number of values for the penalty grid when internally generated. Ignored when penalties is non `NULL` +#' @param min_ratio the penalty grid ranges from the minimal value that produces a sparse to this value multiplied by `min_ratio`. Default is 0.1. +#' @param penalize_diagonal boolean: should the diagonal terms be penalized in the graphical-Lasso? Default is \code{TRUE} +#' @param penalty_weights either a single or a list of p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values. +#' @param inception Set up the parameters initialization: by default, the model is initialized with a multivariate linear model applied on +#' log-transformed data, and with the same formula as the one provided by the user. However, the user can provide a PLNfit (typically obtained from a previous fit), +#' which sometimes speeds up the inference. +#' +#' @return list of parameters configuring the fit. +#' @inherit PLN_param details +#' @details See [PLN_param()] for a full description of the generic optimization parameters. PLNnetwork_param() also has two additional parameters controlling the optimization due the inner-outer loop structure of the optimizer: +#' * "ftol_out" outer solver stops when an optimization step changes the objective function by less than xtol multiplied by the absolute value of the parameter. Default is 1e-6 +#' * "maxit_out" outer solver stops when the number of iteration exceeds maxit_out. Default is 50 +#' +#' @seealso [PLN_param()] +#' @export +ZIPLNnetwork_param <- function( + backend = c("nlopt"), + inception_cov = c("full", "spherical", "diagonal"), + trace = 1 , + n_penalties = 30 , + min_ratio = 0.1 , + penalize_diagonal = TRUE , + penalty_weights = NULL , + config_post = list(), + config_optim = list(), + inception = NULL +) { + + if (!is.null(inception)) stopifnot(isZIPLNfit(inception)) + + ## post-treatment config + config_pst <- config_post_default_PLNnetwork + config_pst[names(config_post)] <- config_post + config_pst$trace <- trace + + ## optimization config + stopifnot(backend %in% c("nlopt")) + stopifnot(config_optim$algorithm %in% available_algorithms_nlopt) + config_opt <- config_default_nlopt + config_opt$trace <- trace + config_opt$ftol_out <- 1e-6 + config_opt$maxit_out <- 100 + config_opt$approx_ZI <- FALSE + config_opt[names(config_optim)] <- config_optim + inception_cov <- match.arg(inception_cov) + + structure(list( + backend = backend , + trace = trace , + inception_cov = inception_cov , + n_penalties = n_penalties , + min_ratio = min_ratio , + penalize_diagonal = penalize_diagonal, + penalty_weights = penalty_weights , + jackknife = FALSE , + bootstrap = 0 , + variance = FALSE , + config_post = config_pst , + config_optim = config_opt , + inception = inception ), class = "ZIPLNmodels_param") +} diff --git a/R/ZIPLNnetworkfamily-class.R b/R/ZIPLNnetworkfamily-class.R new file mode 100644 index 00000000..e9b11b7b --- /dev/null +++ b/R/ZIPLNnetworkfamily-class.R @@ -0,0 +1,370 @@ +#' An R6 Class to represent a collection of ZIPLNnetwork +#' +#' @description The function [ZIPLNnetwork()] produces an instance of this class. +#' +#' This class comes with a set of methods, some of them being useful for the user: +#' See the documentation for [getBestModel()], +#' [getModel()] and [plot()][plot.ZIPLNnetworkfamily()] +#' +## Parameters shared by many methods +#' @param penalties a vector of positive real number controlling the level of sparsity of the underlying network. +#' @param responses the matrix of responses common to every models +#' @param covariates the matrix of covariates common to every models +#' @param offsets the matrix of offsets common to every models +#' @param weights the vector of observation weights +#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param control a list for controlling the optimization. +#' @param var value of the parameter (`rank` for PLNPCA, `sparsity` for PLNnetwork) that identifies the model to be extracted from the collection. If no exact match is found, the model with closest parameter value is returned with a warning. +#' @param index Integer index of the model to be returned. Only the first value is taken into account +#' +#' @include PLNfamily-class.R +#' @importFrom R6 R6Class +#' @importFrom glassoFast glassoFast +#' @examples +#' data(trichoptera) +#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +#' fits <- PLNnetwork(Abundance ~ 1, data = trichoptera) +#' class(fits) +#' @seealso The function [ZIPLNnetwork()], the class [`ZIPLNfit_sparse`] +ZIPLNnetworkfamily <- R6Class( + classname = "ZIPLNnetworkfamily", + inherit = PLNfamily, + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## PUBLIC MEMBERS ------ + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + public = list( + covariates_ZI = NULL, # a field to store the covariates of the ZI + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Creation functions ---------------- + #' @description Initialize all models in the collection + #' @return Update current [`PLNnetworkfit`] with smart starting values + initialize = function(penalties, responses, covariates, offsets, weights, formula, control) { + + ## Initialize fields shared by the super class + super$initialize(responses, covariates$PLN, offsets, weights, control) + self$covariates_ZI <- covariates$ZI + + ## A basic model for inception, useless one is defined by the user +### TODO check if it is useful + if (is.null(control$inception)) { + + # CHECK_ME_TORCH_GPU + # This appears to be in torch_gpu only. The commented out line below is + # in both PLNmodels/master and PLNmodels/dev. + myPLN <- switch( + control$inception_cov, + "spherical" = ZIPLNfit_spherical$new(responses, covariates, offsets, weights, formula, control), + "diagonal" = ZIPLNfit_diagonal$new(responses, covariates, offsets, weights, formula, control), + ZIPLNfit$new(responses, covariates, offsets, weights, formula, control) # defaults to full + ) + ## Allow inception with spherical / diagonal / full PLNfit before switching back to PLNfit_fixedcov + ## for the inner-outer loop of PLNnetwork. + myPLN$optimize(responses, covariates, offsets, weights, control$config_optim) + control$inception <- myPLN + } + + if (is.null(control$penalty_weights)) + control$penalty_weights <- matrix(1, ncol(responses), ncol(responses)) + ## Get the number of penalty + if (is.null(penalties)) { + if (is.list(control$penalty_weights)) + control$n_penalties <- length(control$penalty_weights) + } else { + control$n_penalties <- length(penalties) + } + ## Define a matrix of weights for each penalty + if (!is.list(control$penalty_weights)) + list_penalty_weights <- rep(list(control$penalty_weights), control$n_penalties) + else + list_penalty_weights <- control$penalty_weights + + ## Get an appropriate grid of penalties + if (is.null(penalties)) { + if (control$trace > 1) cat("\n Recovering an appropriate grid of penalties.") + # CHECK_ME_TORCH_GPU + # This appears to be in torch_gpu only. The commented out line below is + # in both PLNmodels/master and PLNmodels/dev. + # changed it to other one + max_pen <- list_penalty_weights %>% + map(~ as.matrix(myPLN$model_par$Sigma) / .x) %>% + # map(~ control$inception$model_par$Sigma / .x) %>% + map_dbl(~ max(abs(.x[upper.tri(.x, diag = control$penalize_diagonal)]))) %>% + max() + penalties <- 10^seq(log10(max_pen), log10(max_pen*control$min_ratio), len = control$n_penalties) + } else { + if (control$trace > 1) cat("\nPenalties already set by the user") + stopifnot(all(penalties > 0)) + } + ## Sort the penalty in decreasing order + o <- order(penalties, decreasing = TRUE) + private$params <- penalties[o] + list_penalty_weights <- list_penalty_weights[o] + + ## instantiate as many models as penalties + control$trace <- 0 + self$models <- map2(private$params, list_penalty_weights, function(penalty, penalty_weights) { + control$penalty <- penalty + control$penalty_weights <- penalty_weights + ZIPLNfit_sparse$new(responses, covariates, offsets, weights, formula, control) + }) + }, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Optimization ---------------------- + #' @description Call to the C++ optimizer on all models of the collection + #' @param config a list for controlling the optimization. + optimize = function(config) { + ## Go along the penalty grid (i.e the models) + for (m in seq_along(self$models)) { + + if (config$trace == 1) { + cat("\tsparsifying penalty =", self$models[[m]]$penalty, "\r") + flush.console() + } + if (config$trace > 1) { + cat("\tsparsifying penalty =", self$models[[m]]$penalty, "- iteration:") + } + self$models[[m]]$optimize(self$responses, list(PLN = self$covariates, ZI = self$covariates_ZI), self$offsets, self$weights, config) + ## Save time by starting the optimization of model m + 1 with optimal parameters of model m + if (m < length(self$penalties)) + self$models[[m + 1]]$update( + B = self$models[[m]]$model_par$B, + M = self$models[[m]]$var_par$M, + S = self$models[[m]]$var_par$S + ) + + if (config$trace > 1) { + cat("\r \r") + flush.console() + } + } + + }, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Stability ------------------------- + #' @description Compute the stability path by stability selection + #' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations. + #' @param control a list controlling the main optimization process in each call to PLNnetwork. See [PLNnetwork()] for details. + stability_selection = function(subsamples = NULL, control = PLNnetwork_param()) { + + ## select default subsamples according + if (is.null(subsamples)) { + subsample.size <- round(ifelse(private$n >= 144, 10*sqrt(private$n), 0.8*private$n)) + subsamples <- replicate(20, sample.int(private$n, subsample.size), simplify = FALSE) + } + + ## got for stability selection + cat("\nStability Selection for PLNnetwork: ") + cat("\nsubsampling: ") + + stabs_out <- future.apply::future_lapply(subsamples, function(subsample) { + cat("+") + inception_ <- self$getModel(self$penalties[1]) + inception_$update( + M = inception_$var_par$M[subsample, ], + S = inception_$var_par$S[subsample, ] + ) + + ## force some control parameters + control$inception = inception_ + control$penalty_weights = map(self$models, "penalty_weights") + control$penalize_diagonal = (sum(diag(inception_$penalty_weights)) != 0) + control$trace <- 0 + control$config_optim$trace <- 0 + + myPLN <- PLNnetworkfamily$new(penalties = self$penalties, + responses = self$responses [subsample, , drop = FALSE], + covariates = self$covariates[subsample, , drop = FALSE], + offsets = self$offsets [subsample, , drop = FALSE], + formula = private$formula, + weights = self$weights [subsample], control = control) + + myPLN$optimize(control$config_optim) + nets <- do.call(cbind, lapply(myPLN$models, function(model) { + as.matrix(model$latent_network("support"))[upper.tri(diag(private$p))] + })) + nets + }, future.seed = TRUE, future.scheduling = structure(TRUE, ordering = "random")) + + prob <- Reduce("+", stabs_out, accumulate = FALSE) / length(subsamples) + ## formatting/tyding + node_set <- colnames(self$getModel(index = 1)$model_par$B) + colnames(prob) <- self$penalties + private$stab_path <- prob %>% + as.data.frame() %>% + mutate(Edge = 1:n()) %>% + gather(key = "Penalty", value = "Prob", -Edge) %>% + mutate(Penalty = as.numeric(Penalty), + Node1 = node_set[edge_to_node(Edge)$node1], + Node2 = node_set[edge_to_node(Edge)$node2], + Edge = paste0(Node1, "|", Node2)) + + invisible(subsamples) + }, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Extractors ------------------------ + #' @description Extract the regularization path of a [`PLNnetworkfamily`] + #' @param precision Logical. Should the regularization path be extracted from the precision matrix Omega (`TRUE`, default) or from the variance matrix Sigma (`FALSE`) + #' @param corr Logical. Should the matrix be transformed to (partial) correlation matrix before extraction? Defaults to `TRUE` + coefficient_path = function(precision = TRUE, corr = TRUE) { + lapply(self$penalties, function(x) { + if (precision) { + G <- self$getModel(x)$model_par$Omega + } else { + G <- self$getModel(x)$model_par$Sigma + dimnames(G) <- dimnames(self$getModel(x)$model_par$Omega) + } + if (corr) { + G <- ifelse(precision, -1, 1) * G / tcrossprod(sqrt(diag(G))) + } + setNames( + cbind( + expand.grid(colnames(G), rownames(G)), + as.vector(G)), c("Node1", "Node2", "Coeff") + ) %>% + mutate(Penalty = x, + Node1 = as.character(Node1), + Node2 = as.character(Node2), + Edge = paste0(Node1, "|", Node2)) %>% + filter(Node1 < Node2) + }) %>% bind_rows() + }, + + #' @description Extract the best network in the family according to some criteria + #' @param crit character. Criterion used to perform the selection. Is "StARS" is chosen but `$stability` field is empty, will compute stability path. + #' @param stability Only used for "StARS" criterion. A scalar indicating the target stability (= 1 - 2 beta) at which the network is selected. Default is `0.9`. + getBestModel = function(crit = c("BIC", "EBIC", "StARS"), stability = 0.9){ + crit <- match.arg(crit) + if (crit == "StARS") { + if (is.null(private$stab_path)) self$stability_selection() + id_stars <- self$criteria %>% + select(param, stability) %>% rename(Stability = stability) %>% + filter(Stability > stability) %>% + pull(param) %>% min() %>% match(self$penalties) + model <- self$models[[id_stars]]$clone() + } else { + stopifnot(!anyNA(self$criteria[[crit]])) + id <- 1 + if (length(self$criteria[[crit]]) > 1) { + id <- which.max(self$criteria[[crit]]) + } + model <- self$models[[id]]$clone() + } + model + }, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Graphical methods ----------------- + #' @description Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a [`PLNnetworkfamily`]) + #' @param criteria vector of characters. The criteria to plot in `c("loglik", "pen_loglik", "BIC", "EBIC")`. Defaults to all of them. + #' @param reverse A logical indicating whether to plot the value of the criteria in the "natural" direction + #' (loglik - 0.5 penalty) or in the "reverse" direction (-2 loglik + penalty). Default to FALSE, i.e use the + #' natural direction, on the same scale as the log-likelihood.. + #' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`. + #' @return a [`ggplot`] graph + plot = function(criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), reverse = FALSE, log.x = TRUE) { + vlines <- sapply(intersect(criteria, c("BIC", "EBIC")) , function(crit) self$getBestModel(crit)$penalty) + p <- super$plot(criteria, reverse) + xlab("penalty") + geom_vline(xintercept = vlines, linetype = "dashed", alpha = 0.25) + if (log.x) p <- p + ggplot2::coord_trans(x = "log10") + p + }, + + #' @description Plot stability path + #' @param stability scalar: the targeted level of stability in stability plot. Default is `0.9`. + #' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`. + #' @return a [`ggplot`] graph + plot_stars = function(stability = 0.9, log.x = TRUE) { + if (anyNA(self$stability)) stop("stability selection has not yet been performed! Use stability_selection()") + dplot <- self$criteria %>% select(param, density, stability) %>% + rename(Penalty = param) %>% + gather(key = "Metric", value = "Value", stability:density) + penalty_stars <- dplot %>% filter(Metric == "stability" & Value >= stability) %>% + pull(Penalty) %>% min() + + p <- ggplot(dplot, aes(x = Penalty, y = Value, group = Metric, color = Metric)) + + geom_point() + geom_line() + theme_bw() + + ## Add information correspinding to best lambda + geom_vline(xintercept = penalty_stars, linetype = 2) + + geom_hline(yintercept = stability, linetype = 2) + + annotate(x = penalty_stars, y = 0, + label = paste("lambda == ", round(penalty_stars, 5)), + parse = TRUE, hjust = -0.05, vjust = 0, geom = "text") + + annotate(x = penalty_stars, y = stability, + label = paste("stability == ", stability), + parse = TRUE, hjust = -0.05, vjust = 1.5, geom = "text") + if (log.x) p <- p + ggplot2::scale_x_log10() + annotation_logticks(sides = "b") + p + }, + + #' @description Plot objective value of the optimization problem along the penalty path + #' @return a [`ggplot`] graph + plot_objective = function() { + objective <- unlist(lapply(self$models, function(model) model$optim_par$objective)) + changes <- cumsum(unlist(lapply(self$models, function(model) model$optim_par$outer_iterations))) + dplot <- data.frame(iteration = 1:length(objective), objective = objective) + p <- ggplot(dplot, aes(x = iteration, y = objective)) + geom_line() + + geom_vline(xintercept = changes, linetype="dashed", alpha = 0.25) + + ggtitle("Objective along the alternate algorithm") + xlab("iteration (+ changes of model)") + + annotate("text", x = changes, y = min(dplot$objective), angle = 90, + label = paste("penalty=",format(self$criteria$param, digits = 1)), hjust = -.1, size = 3, alpha = 0.7) + theme_bw() + p + }, + + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Print methods --------------------- + #' @description User friendly print method + show = function() { + super$show() + cat(" Task: Network Inference \n") + cat("========================================================\n") + cat(" -", length(self$penalties) , "penalties considered: from", min(self$penalties), "to", max(self$penalties), "\n") + cat(" - Best model (greater BIC): lambda =", format(self$getBestModel("BIC")$penalty, digits = 3), "\n") + cat(" - Best model (greater EBIC): lambda =", format(self$getBestModel("EBIC")$penalty, digits = 3), "\n") + if (!anyNA(self$criteria$stability)) + cat(" - Best model (regarding StARS): lambda =", format(self$getBestModel("StARS")$penalty, digits = 3), "\n") + } + ), + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## PRIVATE MEMBERS ------ + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + private = list( + stab_path = NULL # a field to store the stability path, + ), + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## ACTIVE BINDINGS ------ + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + active = list( + #' @field penalties the sparsity level of the network in the successively fitted models + penalties = function() private$params, + #' @field stability_path the stability path of each edge as returned by the stars procedure + stability_path = function() private$stab_path, + #' @field stability mean edge stability along the penalty path + stability = function() { + if (!is.null(private$stab_path)) { + stability <- self$stability_path %>% + dplyr::select(Penalty, Prob) %>% + group_by(Penalty) %>% + summarize(Stability = 1 - mean(4 * Prob * (1 - Prob))) %>% + arrange(desc(Penalty)) %>% + pull(Stability) + } else { + stability <- rep(NA, length(self$penalties)) + } + stability + }, + #' @field criteria a data frame with the values of some criteria (approximated log-likelihood, (E)BIC, ICL and R2, stability) for the collection of models / fits + #' BIC, ICL and EBIC are defined so that they are on the same scale as the model log-likelihood, i.e. with the form, loglik - 0.5 penalty + criteria = function() {mutate(super$criteria, stability = self$stability)} + ) + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## END OF CLASS ---- + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +) + diff --git a/man/PLNnetwork.Rd b/man/PLNnetwork.Rd index e711f491..36184572 100644 --- a/man/PLNnetwork.Rd +++ b/man/PLNnetwork.Rd @@ -31,7 +31,8 @@ an R6 object with class \code{\link{PLNnetworkfamily}}, which contains a collection of models with class \code{\link{PLNnetworkfit}} } \description{ -Fit the sparse inverse covariance variant of the Poisson lognormal with a variational algorithm. Use the (g)lm syntax for model specification (covariates, offsets). +Fit the sparse inverse covariance variant of the Poisson lognormal with a variational algorithm +for a collection of sparsity parameter values distributed on a log scale. Use the (g)lm syntax for model specification (covariates, offsets). } \examples{ data(trichoptera) diff --git a/man/ZIPLN_param.Rd b/man/ZIPLN_param.Rd index 29b1c5c3..13d6fb2e 100644 --- a/man/ZIPLN_param.Rd +++ b/man/ZIPLN_param.Rd @@ -10,6 +10,8 @@ ZIPLN_param( covariance = c("full", "diagonal", "spherical", "fixed", "sparse"), Omega = NULL, penalty = 0, + penalize_diagonal = TRUE, + penalty_weights = NULL, config_post = list(), config_optim = list(), inception = NULL @@ -26,6 +28,10 @@ ZIPLN_param( \item{penalty}{a user-defined penalty to sparsify the residual covariance. Defaults to 0 (no sparsity).} +\item{penalize_diagonal}{boolean: should the diagonal terms be penalized in the graphical-Lasso? Only relevant with sparse covariance. Default is \code{TRUE}} + +\item{penalty_weights}{p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values. Only relevant with sparse covariance.} + \item{config_post}{a list for controlling the post-treatments (optional bootstrap, jackknife, R2, etc.). See details} \item{config_optim}{a list for controlling the optimizer (either "nlopt" or "torch" backend). See details} diff --git a/man/ZIPLNfit_sparse.Rd b/man/ZIPLNfit_sparse.Rd index 5ec52b3f..f8b6cecd 100644 --- a/man/ZIPLNfit_sparse.Rd +++ b/man/ZIPLNfit_sparse.Rd @@ -25,9 +25,23 @@ plot(myPLN) \section{Active bindings}{ \if{html}{\out{
}} \describe{ +\item{\code{penalty}}{the global level of sparsity in the current model} + +\item{\code{penalty_weights}}{a matrix of weights controlling the amount of penalty element-wise.} + +\item{\code{n_edges}}{number of edges if the network (non null coefficient of the sparse precision matrix)} + \item{\code{nb_param}}{number of parameters in the current PLN model} \item{\code{vcov_model}}{character: the model used for the residual covariance} + +\item{\code{pen_loglik}}{variational lower bound of the l1-penalized loglikelihood} + +\item{\code{EBIC}}{variational lower bound of the EBIC} + +\item{\code{density}}{proportion of non-null edges in the network} + +\item{\code{criteria}}{a vector with loglik, penalized loglik, BIC, EBIC, ICL, R_squared, number of parameters, number of edges and graph density} } \if{html}{\out{
}} } diff --git a/man/ZIPLNnetwork.Rd b/man/ZIPLNnetwork.Rd new file mode 100644 index 00000000..de38ed41 --- /dev/null +++ b/man/ZIPLNnetwork.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ZIPLNnetwork.R +\name{ZIPLNnetwork} +\alias{ZIPLNnetwork} +\title{Zero Inflated Poisson lognormal model toward sparse network inference} +\usage{ +ZIPLNnetwork( + formula, + data, + subset, + weights, + zi = c("single", "row", "col"), + penalties = NULL, + control = ZIPLNnetwork_param() +) +} +\arguments{ +\item{formula}{an object of class "formula": a symbolic description of the model to be fitted.} + +\item{data}{an optional data frame, list or environment (or object coercible by as.data.frame to a data frame) containing the variables in the model. If not found in data, the variables are taken from environment(formula), typically the environment from which lm is called.} + +\item{subset}{an optional vector specifying a subset of observations to be used in the fitting process.} + +\item{weights}{an optional vector of observation weights to be used in the fitting process.} + +\item{zi}{a character describing the model used for zero inflation, either of +\itemize{ +\item "single" (default, one parameter shared by all counts) +\item "col" (one parameter per variable / feature) +\item "row" (one parameter per sample / individual). +If covariates are specified in the formula RHS (see details) this parameter is ignored. +}} + +\item{penalties}{an optional vector of positive real number controlling the level of sparsity of the underlying network. if NULL (the default), will be set internally. See \code{PLNnetwork_param()} for additional tuning of the penalty.} + +\item{control}{a list-like structure for controlling the optimization, with default generated by \code{\link[=ZIPLNnetwork_param]{ZIPLNnetwork_param()}}. See the associated documentation +for details.} +} +\value{ +an R6 object with class \code{\link{ZIPLNnetworkfamily}} +} +\description{ +Fit the sparse inverse covariance variant of the Zero Inflated Poisson lognormal with a variational algorithm +for a collection of sparsity parameter values distributed on a log scale. Use the (g)lm syntax for model specification (covariates, offsets). +} +\details{ +Covariates for the Zero-Inflation parameter (using a logistic regression model) can be specified in the formula RHS using the pipe +(\verb{~ PLN effect | ZI effect}) to separate covariates for the PLN part of the model from those for the Zero-Inflation part. +Note that different covariates can be used for each part. +} +\examples{ +data(trichoptera) +trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +myZIPLNs <- ZIPLNnetwork(Abundance ~ 1, data = trichoptera, zi = "single") +} +\seealso{ +The classes \code{\link{ZIPLNfit}} and \code{\link{ZIPLNnetworkfamily}} +} diff --git a/man/ZIPLNnetwork_param.Rd b/man/ZIPLNnetwork_param.Rd new file mode 100644 index 00000000..ea3342d1 --- /dev/null +++ b/man/ZIPLNnetwork_param.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ZIPLNnetwork.R +\name{ZIPLNnetwork_param} +\alias{ZIPLNnetwork_param} +\title{Control of PLNnetwork fit} +\usage{ +ZIPLNnetwork_param( + backend = c("nlopt"), + inception_cov = c("full", "spherical", "diagonal"), + trace = 1, + n_penalties = 30, + min_ratio = 0.1, + penalize_diagonal = TRUE, + penalty_weights = NULL, + config_post = list(), + config_optim = list(), + inception = NULL +) +} +\arguments{ +\item{backend}{optimization back used, either "nlopt" or "torch". Default is "nlopt"} + +\item{inception_cov}{Covariance structure used for the inception model used to initialize the PLNfamily. Defaults to "full" and can be constrained to "diagonal" and "spherical".} + +\item{trace}{a integer for verbosity.} + +\item{n_penalties}{an integer that specifies the number of values for the penalty grid when internally generated. Ignored when penalties is non \code{NULL}} + +\item{min_ratio}{the penalty grid ranges from the minimal value that produces a sparse to this value multiplied by \code{min_ratio}. Default is 0.1.} + +\item{penalize_diagonal}{boolean: should the diagonal terms be penalized in the graphical-Lasso? Default is \code{TRUE}} + +\item{penalty_weights}{either a single or a list of p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values.} + +\item{config_post}{a list for controlling the post-treatment (optional bootstrap, jackknife, R2, etc).} + +\item{config_optim}{a list for controlling the optimizer (either "nlopt" or "torch" backend). See details} + +\item{inception}{Set up the parameters initialization: by default, the model is initialized with a multivariate linear model applied on +log-transformed data, and with the same formula as the one provided by the user. However, the user can provide a PLNfit (typically obtained from a previous fit), +which sometimes speeds up the inference.} +} +\value{ +list of parameters configuring the fit. +} +\description{ +Helper to define list of parameters to control the PLN fit. All arguments have defaults. +} +\details{ +See \code{\link[=PLN_param]{PLN_param()}} for a full description of the generic optimization parameters. PLNnetwork_param() also has two additional parameters controlling the optimization due the inner-outer loop structure of the optimizer: +\itemize{ +\item "ftol_out" outer solver stops when an optimization step changes the objective function by less than xtol multiplied by the absolute value of the parameter. Default is 1e-6 +\item "maxit_out" outer solver stops when the number of iteration exceeds maxit_out. Default is 50 +} +} +\seealso{ +\code{\link[=PLN_param]{PLN_param()}} +} diff --git a/man/ZIPLNnetworkfamily.Rd b/man/ZIPLNnetworkfamily.Rd new file mode 100644 index 00000000..ce24c693 --- /dev/null +++ b/man/ZIPLNnetworkfamily.Rd @@ -0,0 +1,275 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ZIPLNnetworkfamily-class.R +\name{ZIPLNnetworkfamily} +\alias{ZIPLNnetworkfamily} +\title{An R6 Class to represent a collection of ZIPLNnetwork} +\description{ +The function \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}} produces an instance of this class. + +This class comes with a set of methods, some of them being useful for the user: +See the documentation for \code{\link[=getBestModel]{getBestModel()}}, +\code{\link[=getModel]{getModel()}} and \link[=plot.ZIPLNnetworkfamily]{plot()} +} +\examples{ +data(trichoptera) +trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +fits <- PLNnetwork(Abundance ~ 1, data = trichoptera) +class(fits) +} +\seealso{ +The function \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}}, the class \code{\link{ZIPLNfit_sparse}} +} +\section{Super class}{ +\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{ZIPLNnetworkfamily} +} +\section{Active bindings}{ +\if{html}{\out{
}} +\describe{ +\item{\code{penalties}}{the sparsity level of the network in the successively fitted models} + +\item{\code{stability_path}}{the stability path of each edge as returned by the stars procedure} + +\item{\code{stability}}{mean edge stability along the penalty path} + +\item{\code{criteria}}{a data frame with the values of some criteria (approximated log-likelihood, (E)BIC, ICL and R2, stability) for the collection of models / fits +BIC, ICL and EBIC are defined so that they are on the same scale as the model log-likelihood, i.e. with the form, loglik - 0.5 penalty} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-ZIPLNnetworkfamily-new}{\code{ZIPLNnetworkfamily$new()}} +\item \href{#method-ZIPLNnetworkfamily-optimize}{\code{ZIPLNnetworkfamily$optimize()}} +\item \href{#method-ZIPLNnetworkfamily-stability_selection}{\code{ZIPLNnetworkfamily$stability_selection()}} +\item \href{#method-ZIPLNnetworkfamily-coefficient_path}{\code{ZIPLNnetworkfamily$coefficient_path()}} +\item \href{#method-ZIPLNnetworkfamily-getBestModel}{\code{ZIPLNnetworkfamily$getBestModel()}} +\item \href{#method-ZIPLNnetworkfamily-plot}{\code{ZIPLNnetworkfamily$plot()}} +\item \href{#method-ZIPLNnetworkfamily-plot_stars}{\code{ZIPLNnetworkfamily$plot_stars()}} +\item \href{#method-ZIPLNnetworkfamily-plot_objective}{\code{ZIPLNnetworkfamily$plot_objective()}} +\item \href{#method-ZIPLNnetworkfamily-show}{\code{ZIPLNnetworkfamily$show()}} +\item \href{#method-ZIPLNnetworkfamily-clone}{\code{ZIPLNnetworkfamily$clone()}} +} +} +\if{html}{\out{ +
Inherited methods + +
+}} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-new}{}}} +\subsection{Method \code{new()}}{ +Initialize all models in the collection +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$new( + penalties, + responses, + covariates, + offsets, + weights, + formula, + control +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{penalties}}{a vector of positive real number controlling the level of sparsity of the underlying network.} + +\item{\code{responses}}{the matrix of responses common to every models} + +\item{\code{covariates}}{the matrix of covariates common to every models} + +\item{\code{offsets}}{the matrix of offsets common to every models} + +\item{\code{weights}}{the vector of observation weights} + +\item{\code{formula}}{model formula used for fitting, extracted from the formula in the upper-level call} + +\item{\code{control}}{a list for controlling the optimization.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Update current \code{\link{PLNnetworkfit}} with smart starting values +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-optimize}{}}} +\subsection{Method \code{optimize()}}{ +Call to the C++ optimizer on all models of the collection +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$optimize(config)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{config}}{a list for controlling the optimization.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-stability_selection}{}}} +\subsection{Method \code{stability_selection()}}{ +Compute the stability path by stability selection +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$stability_selection( + subsamples = NULL, + control = PLNnetwork_param() +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{subsamples}}{a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations.} + +\item{\code{control}}{a list controlling the main optimization process in each call to PLNnetwork. See \code{\link[=PLNnetwork]{PLNnetwork()}} for details.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-coefficient_path}{}}} +\subsection{Method \code{coefficient_path()}}{ +Extract the regularization path of a \code{\link{PLNnetworkfamily}} +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$coefficient_path(precision = TRUE, corr = TRUE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{precision}}{Logical. Should the regularization path be extracted from the precision matrix Omega (\code{TRUE}, default) or from the variance matrix Sigma (\code{FALSE})} + +\item{\code{corr}}{Logical. Should the matrix be transformed to (partial) correlation matrix before extraction? Defaults to \code{TRUE}} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-getBestModel}{}}} +\subsection{Method \code{getBestModel()}}{ +Extract the best network in the family according to some criteria +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$getBestModel( + crit = c("BIC", "EBIC", "StARS"), + stability = 0.9 +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{crit}}{character. Criterion used to perform the selection. Is "StARS" is chosen but \verb{$stability} field is empty, will compute stability path.} + +\item{\code{stability}}{Only used for "StARS" criterion. A scalar indicating the target stability (= 1 - 2 beta) at which the network is selected. Default is \code{0.9}.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-plot}{}}} +\subsection{Method \code{plot()}}{ +Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a \code{\link{PLNnetworkfamily}}) +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$plot( + criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), + reverse = FALSE, + log.x = TRUE +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{criteria}}{vector of characters. The criteria to plot in \code{c("loglik", "pen_loglik", "BIC", "EBIC")}. Defaults to all of them.} + +\item{\code{reverse}}{A logical indicating whether to plot the value of the criteria in the "natural" direction +(loglik - 0.5 penalty) or in the "reverse" direction (-2 loglik + penalty). Default to FALSE, i.e use the +natural direction, on the same scale as the log-likelihood..} + +\item{\code{log.x}}{logical: should the x-axis be represented in log-scale? Default is \code{TRUE}.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +a \code{\link{ggplot}} graph +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-plot_stars}{}}} +\subsection{Method \code{plot_stars()}}{ +Plot stability path +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$plot_stars(stability = 0.9, log.x = TRUE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{stability}}{scalar: the targeted level of stability in stability plot. Default is \code{0.9}.} + +\item{\code{log.x}}{logical: should the x-axis be represented in log-scale? Default is \code{TRUE}.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +a \code{\link{ggplot}} graph +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-plot_objective}{}}} +\subsection{Method \code{plot_objective()}}{ +Plot objective value of the optimization problem along the penalty path +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$plot_objective()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +a \code{\link{ggplot}} graph +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-show}{}}} +\subsection{Method \code{show()}}{ +User friendly print method +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$show()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} From 8baf6952b00ad04754622f251313666c26f6562b Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Fri, 9 Feb 2024 16:57:05 +0100 Subject: [PATCH 09/30] fix in plot_objectif for ZIfamily [ci skip] --- R/ZIPLNnetworkfamily-class.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ZIPLNnetworkfamily-class.R b/R/ZIPLNnetworkfamily-class.R index e9b11b7b..bd8ff397 100644 --- a/R/ZIPLNnetworkfamily-class.R +++ b/R/ZIPLNnetworkfamily-class.R @@ -302,7 +302,7 @@ ZIPLNnetworkfamily <- R6Class( #' @return a [`ggplot`] graph plot_objective = function() { objective <- unlist(lapply(self$models, function(model) model$optim_par$objective)) - changes <- cumsum(unlist(lapply(self$models, function(model) model$optim_par$outer_iterations))) + changes <- cumsum(unlist(lapply(self$models, function(model) model$optim_par$iterations))) dplot <- data.frame(iteration = 1:length(objective), objective = objective) p <- ggplot(dplot, aes(x = iteration, y = objective)) + geom_line() + geom_vline(xintercept = changes, linetype="dashed", alpha = 0.25) + From 21bb537603f89903d131f915b9e2afbe258fb64f Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Sat, 10 Feb 2024 08:10:11 +0100 Subject: [PATCH 10/30] start sharing (ZI)PLNnetworkfamilies by introducing a virtual class for general family of network --- DESCRIPTION | 1 - R/PLNnetworkfamily-S3methods.R | 3 - R/PLNnetworkfamily-class.R | 496 ++++++++++++++++++++++----------- R/PLNnetworkfit-class.R | 2 - R/ZIPLNnetwork.R | 2 +- R/ZIPLNnetworkfamily-class.R | 370 ------------------------ man/PLNnetworkfamily.Rd | 167 +---------- man/PLNnetworkfamilyvirtual.Rd | 246 ++++++++++++++++ man/ZIPLNnetwork_param.Rd | 2 +- man/ZIPLNnetworkfamily.Rd | 169 +---------- 10 files changed, 607 insertions(+), 851 deletions(-) delete mode 100644 R/ZIPLNnetworkfamily-class.R create mode 100644 man/PLNnetworkfamilyvirtual.Rd diff --git a/DESCRIPTION b/DESCRIPTION index bd42f1f3..b203c763 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -90,7 +90,6 @@ Collate: 'ZIPLNfit-class.R' 'ZIPLN.R' 'ZIPLNfit-S3methods.R' - 'ZIPLNnetworkfamily-class.R' 'ZIPLNnetwork.R' 'barents.R' 'import_utils.R' diff --git a/R/PLNnetworkfamily-S3methods.R b/R/PLNnetworkfamily-S3methods.R index ca2648cf..2287ff6a 100644 --- a/R/PLNnetworkfamily-S3methods.R +++ b/R/PLNnetworkfamily-S3methods.R @@ -69,7 +69,6 @@ getBestModel.PLNnetworkfamily <- function(Robject, crit = c("BIC", "EBIC", "StAR Robject$getBestModel(match.arg(crit), stability) } - #' Extract the regularization path of a PLNnetwork fit #' #' @name coefficient_path @@ -120,8 +119,6 @@ stability_selection <- function(Robject, subsamples = NULL, control = PLNnetwork } } - - #' Extract edge selection frequency in bootstrap subsamples #' #' @description Extracts edge selection frequency in networks reconstructed from bootstrap subsamples diff --git a/R/PLNnetworkfamily-class.R b/R/PLNnetworkfamily-class.R index 00f2cec8..471983bd 100644 --- a/R/PLNnetworkfamily-class.R +++ b/R/PLNnetworkfamily-class.R @@ -1,4 +1,4 @@ -#' An R6 Class to represent a collection of PLNnetworkfit +#' An R6 Class to virtually represent a collection of PLNnetworkfit (either standard or ZI) #' #' @description The function [PLNnetwork()] produces an instance of this class. #' @@ -20,14 +20,9 @@ #' @include PLNfamily-class.R #' @importFrom R6 R6Class #' @importFrom glassoFast glassoFast -#' @examples -#' data(trichoptera) -#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) -#' fits <- PLNnetwork(Abundance ~ 1, data = trichoptera) -#' class(fits) #' @seealso The function [PLNnetwork()], the class [`PLNnetworkfit`] -PLNnetworkfamily <- R6Class( - classname = "PLNnetworkfamily", +PLNnetworkfamilyvirtual <- R6Class( + classname = "PLNnetworkfamilyvirtual", inherit = PLNfamily, ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## PUBLIC MEMBERS ------ @@ -42,25 +37,6 @@ PLNnetworkfamily <- R6Class( ## Initialize fields shared by the super class super$initialize(responses, covariates, offsets, weights, control) - ## A basic model for inception, useless one is defined by the user -### TODO check if it is useful - if (is.null(control$inception)) { - - # CHECK_ME_TORCH_GPU - # This appears to be in torch_gpu only. The commented out line below is - # in both PLNmodels/master and PLNmodels/dev. - myPLN <- switch( - control$inception_cov, - "spherical" = PLNfit_spherical$new(responses, covariates, offsets, weights, formula, control), - "diagonal" = PLNfit_diagonal$new(responses, covariates, offsets, weights, formula, control), - PLNfit$new(responses, covariates, offsets, weights, formula, control) # defaults to full - ) - ## Allow inception with spherical / diagonal / full PLNfit before switching back to PLNfit_fixedcov - ## for the inner-outer loop of PLNnetwork. - myPLN$optimize(responses, covariates, offsets, weights, control$config_optim) - control$inception <- myPLN - } - if (is.null(control$penalty_weights)) control$penalty_weights <- matrix(1, ncol(responses), ncol(responses)) ## Get the number of penalty @@ -76,16 +52,19 @@ PLNnetworkfamily <- R6Class( else list_penalty_weights <- control$penalty_weights + ## Check consistency of weights and optionnaly silent diagonal penalties + list_penalty_weights <- + map(list_penalty_weights, function(penalty_weights) { + stopifnot(isSymmetric(penalty_weights), all(penalty_weights >= 0)) + if (!control$penalize_diagonal) diag(penalty_weights) <- 0 + penalty_weights + }) + ## Get an appropriate grid of penalties if (is.null(penalties)) { if (control$trace > 1) cat("\n Recovering an appropriate grid of penalties.") - # CHECK_ME_TORCH_GPU - # This appears to be in torch_gpu only. The commented out line below is - # in both PLNmodels/master and PLNmodels/dev. - # changed it to other one max_pen <- list_penalty_weights %>% - map(~ as.matrix(myPLN$model_par$Sigma) / .x) %>% - # map(~ control$inception$model_par$Sigma / .x) %>% + map(~ as.matrix(control$inception$model_par$Sigma) / .x) %>% map_dbl(~ max(abs(.x[upper.tri(.x, diag = control$penalize_diagonal)]))) %>% max() penalties <- 10^seq(log10(max_pen), log10(max_pen*control$min_ratio), len = control$n_penalties) @@ -96,14 +75,7 @@ PLNnetworkfamily <- R6Class( ## Sort the penalty in decreasing order o <- order(penalties, decreasing = TRUE) private$params <- penalties[o] - list_penalty_weights <- list_penalty_weights[o] - - ## instantiate as many models as penalties - control$trace <- 0 - self$models <- map2(private$params, list_penalty_weights, function(penalty, penalty_weights) { - PLNnetworkfit$new(penalty, penalty_weights, responses, covariates, offsets, weights, formula, control) - }) - + private$penalties_weights <- list_penalty_weights[o] }, ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -139,68 +111,6 @@ PLNnetworkfamily <- R6Class( }, - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - ## Stability ------------------------- - #' @description Compute the stability path by stability selection - #' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations. - #' @param control a list controlling the main optimization process in each call to PLNnetwork. See [PLNnetwork()] for details. - stability_selection = function(subsamples = NULL, control = PLNnetwork_param()) { - - ## select default subsamples according - if (is.null(subsamples)) { - subsample.size <- round(ifelse(private$n >= 144, 10*sqrt(private$n), 0.8*private$n)) - subsamples <- replicate(20, sample.int(private$n, subsample.size), simplify = FALSE) - } - - ## got for stability selection - cat("\nStability Selection for PLNnetwork: ") - cat("\nsubsampling: ") - - stabs_out <- future.apply::future_lapply(subsamples, function(subsample) { - cat("+") - inception_ <- self$getModel(self$penalties[1]) - inception_$update( - M = inception_$var_par$M[subsample, ], - S = inception_$var_par$S[subsample, ] - ) - - ## force some control parameters - control$inception = inception_ - control$penalty_weights = map(self$models, "penalty_weights") - control$penalize_diagonal = (sum(diag(inception_$penalty_weights)) != 0) - control$trace <- 0 - control$config_optim$trace <- 0 - - myPLN <- PLNnetworkfamily$new(penalties = self$penalties, - responses = self$responses [subsample, , drop = FALSE], - covariates = self$covariates[subsample, , drop = FALSE], - offsets = self$offsets [subsample, , drop = FALSE], - formula = private$formula, - weights = self$weights [subsample], control = control) - - myPLN$optimize(control$config_optim) - nets <- do.call(cbind, lapply(myPLN$models, function(model) { - as.matrix(model$latent_network("support"))[upper.tri(diag(private$p))] - })) - nets - }, future.seed = TRUE, future.scheduling = structure(TRUE, ordering = "random")) - - prob <- Reduce("+", stabs_out, accumulate = FALSE) / length(subsamples) - ## formatting/tyding - node_set <- colnames(self$getModel(index = 1)$model_par$B) - colnames(prob) <- self$penalties - private$stab_path <- prob %>% - as.data.frame() %>% - mutate(Edge = 1:n()) %>% - gather(key = "Penalty", value = "Prob", -Edge) %>% - mutate(Penalty = as.numeric(Penalty), - Node1 = node_set[edge_to_node(Edge)$node1], - Node2 = node_set[edge_to_node(Edge)$node2], - Edge = paste0(Node1, "|", Node2)) - - invisible(subsamples) - }, - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Extractors ------------------------ #' @description Extract the regularization path of a [`PLNnetworkfamily`] @@ -269,61 +179,61 @@ PLNnetworkfamily <- R6Class( p }, - #' @description Plot stability path - #' @param stability scalar: the targeted level of stability in stability plot. Default is `0.9`. - #' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`. - #' @return a [`ggplot`] graph - plot_stars = function(stability = 0.9, log.x = TRUE) { - if (anyNA(self$stability)) stop("stability selection has not yet been performed! Use stability_selection()") - dplot <- self$criteria %>% select(param, density, stability) %>% - rename(Penalty = param) %>% - gather(key = "Metric", value = "Value", stability:density) - penalty_stars <- dplot %>% filter(Metric == "stability" & Value >= stability) %>% - pull(Penalty) %>% min() - - p <- ggplot(dplot, aes(x = Penalty, y = Value, group = Metric, color = Metric)) + - geom_point() + geom_line() + theme_bw() + - ## Add information correspinding to best lambda - geom_vline(xintercept = penalty_stars, linetype = 2) + - geom_hline(yintercept = stability, linetype = 2) + - annotate(x = penalty_stars, y = 0, - label = paste("lambda == ", round(penalty_stars, 5)), - parse = TRUE, hjust = -0.05, vjust = 0, geom = "text") + - annotate(x = penalty_stars, y = stability, - label = paste("stability == ", stability), - parse = TRUE, hjust = -0.05, vjust = 1.5, geom = "text") - if (log.x) p <- p + ggplot2::scale_x_log10() + annotation_logticks(sides = "b") - p - }, - - #' @description Plot objective value of the optimization problem along the penalty path - #' @return a [`ggplot`] graph - plot_objective = function() { - objective <- unlist(lapply(self$models, function(model) model$optim_par$objective)) - changes <- cumsum(unlist(lapply(self$models, function(model) model$optim_par$outer_iterations))) - dplot <- data.frame(iteration = 1:length(objective), objective = objective) - p <- ggplot(dplot, aes(x = iteration, y = objective)) + geom_line() + - geom_vline(xintercept = changes, linetype="dashed", alpha = 0.25) + - ggtitle("Objective along the alternate algorithm") + xlab("iteration (+ changes of model)") + - annotate("text", x = changes, y = min(dplot$objective), angle = 90, - label = paste("penalty=",format(self$criteria$param, digits = 1)), hjust = -.1, size = 3, alpha = 0.7) + theme_bw() - p - }, - - - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - ## Print methods --------------------- - #' @description User friendly print method - show = function() { - super$show() - cat(" Task: Network Inference \n") - cat("========================================================\n") - cat(" -", length(self$penalties) , "penalties considered: from", min(self$penalties), "to", max(self$penalties), "\n") - cat(" - Best model (greater BIC): lambda =", format(self$getBestModel("BIC")$penalty, digits = 3), "\n") - cat(" - Best model (greater EBIC): lambda =", format(self$getBestModel("EBIC")$penalty, digits = 3), "\n") - if (!anyNA(self$criteria$stability)) - cat(" - Best model (regarding StARS): lambda =", format(self$getBestModel("StARS")$penalty, digits = 3), "\n") - } + #' @description Plot stability path + #' @param stability scalar: the targeted level of stability in stability plot. Default is `0.9`. + #' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`. + #' @return a [`ggplot`] graph + plot_stars = function(stability = 0.9, log.x = TRUE) { + if (anyNA(self$stability)) stop("stability selection has not yet been performed! Use stability_selection()") + dplot <- self$criteria %>% select(param, density, stability) %>% + rename(Penalty = param) %>% + gather(key = "Metric", value = "Value", stability:density) + penalty_stars <- dplot %>% filter(Metric == "stability" & Value >= stability) %>% + pull(Penalty) %>% min() + + p <- ggplot(dplot, aes(x = Penalty, y = Value, group = Metric, color = Metric)) + + geom_point() + geom_line() + theme_bw() + + ## Add information correspinding to best lambda + geom_vline(xintercept = penalty_stars, linetype = 2) + + geom_hline(yintercept = stability, linetype = 2) + + annotate(x = penalty_stars, y = 0, + label = paste("lambda == ", round(penalty_stars, 5)), + parse = TRUE, hjust = -0.05, vjust = 0, geom = "text") + + annotate(x = penalty_stars, y = stability, + label = paste("stability == ", stability), + parse = TRUE, hjust = -0.05, vjust = 1.5, geom = "text") + if (log.x) p <- p + ggplot2::scale_x_log10() + annotation_logticks(sides = "b") + p + }, + + #' @description Plot objective value of the optimization problem along the penalty path + #' @return a [`ggplot`] graph + plot_objective = function() { + objective <- unlist(lapply(self$models, function(model) model$optim_par$objective)) + changes <- cumsum(unlist(lapply(self$models, function(model) model$optim_par$iterations))) + dplot <- data.frame(iteration = 1:length(objective), objective = objective) + p <- ggplot(dplot, aes(x = iteration, y = objective)) + geom_line() + + geom_vline(xintercept = changes, linetype = "dashed", alpha = 0.25) + + ggtitle("Objective along the alternate algorithm") + xlab("iteration (+ changes of model)") + + annotate("text", x = changes, y = min(dplot$objective), angle = 90, + label = paste("penalty=",format(self$criteria$param, digits = 1)), hjust = -.1, size = 3, alpha = 0.7) + theme_bw() + p + }, + + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Print methods --------------------- + #' @description User friendly print method + show = function() { + super$show() + cat(" Task: Network Inference \n") + cat("========================================================\n") + cat(" -", length(self$penalties) , "penalties considered: from", min(self$penalties), "to", max(self$penalties), "\n") + cat(" - Best model (greater BIC): lambda =", format(self$getBestModel("BIC")$penalty, digits = 3), "\n") + cat(" - Best model (greater EBIC): lambda =", format(self$getBestModel("EBIC")$penalty, digits = 3), "\n") + if (!anyNA(self$criteria$stability)) + cat(" - Best model (regarding StARS): lambda =", format(self$getBestModel("StARS")$penalty, digits = 3), "\n") + } ), ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -331,6 +241,7 @@ PLNnetworkfamily <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% private = list( + penalties_weights = NULL, # a field to store the weights for each penalty, stab_path = NULL # a field to store the stability path, ), @@ -366,3 +277,272 @@ PLNnetworkfamily <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ) +#' An R6 Class to represent a collection of PLNnetworkfit +#' +#' @description The function [PLNnetwork()] produces an instance of this class. +#' +#' This class comes with a set of methods, some of them being useful for the user: +#' See the documentation for [getBestModel()], +#' [getModel()] and [plot()][plot.PLNnetworkfamily()] +#' +## Parameters shared by many methods +#' @param penalties a vector of positive real number controlling the level of sparsity of the underlying network. +#' @param responses the matrix of responses common to every models +#' @param covariates the matrix of covariates common to every models +#' @param offsets the matrix of offsets common to every models +#' @param weights the vector of observation weights +#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param control a list for controlling the optimization. +#' @param var value of the parameter (`rank` for PLNPCA, `sparsity` for PLNnetwork) that identifies the model to be extracted from the collection. If no exact match is found, the model with closest parameter value is returned with a warning. +#' @param index Integer index of the model to be returned. Only the first value is taken into account +#' +#' @include PLNfamily-class.R +#' @importFrom R6 R6Class +#' @importFrom glassoFast glassoFast +#' @examples +#' data(trichoptera) +#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +#' fits <- PLNnetwork(Abundance ~ 1, data = trichoptera) +#' class(fits) +#' @seealso The function [PLNnetwork()], the class [`PLNnetworkfit`] +PLNnetworkfamily <- R6Class( + classname = "PLNnetworkfamily", + inherit = PLNnetworkfamilyvirtual, + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## PUBLIC MEMBERS ------ + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + public = list( + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Creation functions ---------------- + #' @description Initialize all models in the collection + #' @return Update current [`PLNnetworkfit`] with smart starting values + initialize = function(penalties, responses, covariates, offsets, weights, formula, control) { + + ## A basic model for inception, useless one is defined by the user + if (is.null(control$inception)) { + ## Allow inception with spherical / diagonal / full PLNfit before switching back to PLNfit_fixedcov + ## for the inner-outer loop of PLNnetwork. + myPLN <- switch( + control$inception_cov, + "spherical" = PLNfit_spherical$new(responses, covariates, offsets, weights, formula, control), + "diagonal" = PLNfit_diagonal$new(responses, covariates, offsets, weights, formula, control), + PLNfit$new(responses, covariates, offsets, weights, formula, control) # defaults to full + ) + myPLN$optimize(responses, covariates, offsets, weights, control$config_optim) + control$inception <- myPLN + } + + ## Initialize fields shared by the super class + super$initialize(penalties, responses, covariates, offsets, weights, formula, control) + + ## instantiate as many models as penalties + control$trace <- 0 + self$models <- map2(private$params, private$penalties_weights, function(penalty, penalty_weights) { + PLNnetworkfit$new(penalty, penalty_weights, responses, covariates, offsets, weights, formula, control) + }) + + }, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Stability ------------------------- + #' @description Compute the stability path by stability selection + #' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations. + #' @param control a list controlling the main optimization process in each call to PLNnetwork. See [PLNnetwork()] for details. + stability_selection = function(subsamples = NULL, control = PLNnetwork_param()) { + + ## select default subsamples according + if (is.null(subsamples)) { + subsample.size <- round(ifelse(private$n >= 144, 10*sqrt(private$n), 0.8*private$n)) + subsamples <- replicate(20, sample.int(private$n, subsample.size), simplify = FALSE) + } + + ## got for stability selection + cat("\nStability Selection for PLNnetwork: ") + cat("\nsubsampling: ") + + stabs_out <- future.apply::future_lapply(subsamples, function(subsample) { + cat("+") + inception_ <- self$getModel(self$penalties[1]) + inception_$update( + M = inception_$var_par$M[subsample, ], + S = inception_$var_par$S[subsample, ] + ) + + ## force some control parameters + control$inception = inception_ + control$penalty_weights = map(self$models, "penalty_weights") + control$penalize_diagonal = (sum(diag(inception_$penalty_weights)) != 0) + control$trace <- 0 + control$config_optim$trace <- 0 + + myPLN <- PLNnetworkfamily$new(penalties = self$penalties, + responses = self$responses [subsample, , drop = FALSE], + covariates = self$covariates[subsample, , drop = FALSE], + offsets = self$offsets [subsample, , drop = FALSE], + formula = private$formula, + weights = self$weights [subsample], control = control) + + myPLN$optimize(control$config_optim) + nets <- do.call(cbind, lapply(myPLN$models, function(model) { + as.matrix(model$latent_network("support"))[upper.tri(diag(private$p))] + })) + nets + }, future.seed = TRUE, future.scheduling = structure(TRUE, ordering = "random")) + + prob <- Reduce("+", stabs_out, accumulate = FALSE) / length(subsamples) + ## formatting/tyding + node_set <- colnames(self$getModel(index = 1)$model_par$B) + colnames(prob) <- self$penalties + private$stab_path <- prob %>% + as.data.frame() %>% + mutate(Edge = 1:n()) %>% + gather(key = "Penalty", value = "Prob", -Edge) %>% + mutate(Penalty = as.numeric(Penalty), + Node1 = node_set[edge_to_node(Edge)$node1], + Node2 = node_set[edge_to_node(Edge)$node2], + Edge = paste0(Node1, "|", Node2)) + + invisible(subsamples) + } + ) + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## END OF CLASS ---- + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +) + +#' An R6 Class to represent a collection of ZIPLNnetwork +#' +#' @description The function [ZIPLNnetwork()] produces an instance of this class. +#' +#' This class comes with a set of methods, some of them being useful for the user: +#' See the documentation for [getBestModel()], +#' [getModel()] and [plot()][plot.ZIPLNnetworkfamily()] +#' +## Parameters shared by many methods +#' @param penalties a vector of positive real number controlling the level of sparsity of the underlying network. +#' @param responses the matrix of responses common to every models +#' @param covariates the matrix of covariates common to every models +#' @param offsets the matrix of offsets common to every models +#' @param weights the vector of observation weights +#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param control a list for controlling the optimization. +#' @param var value of the parameter (`rank` for PLNPCA, `sparsity` for PLNnetwork) that identifies the model to be extracted from the collection. If no exact match is found, the model with closest parameter value is returned with a warning. +#' @param index Integer index of the model to be returned. Only the first value is taken into account +#' +#' @include PLNfamily-class.R +#' @importFrom R6 R6Class +#' @importFrom glassoFast glassoFast +#' @examples +#' data(trichoptera) +#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) +#' fits <- PLNnetwork(Abundance ~ 1, data = trichoptera) +#' class(fits) +#' @seealso The function [ZIPLNnetwork()], the class [`ZIPLNfit_sparse`] +ZIPLNnetworkfamily <- R6Class( + classname = "ZIPLNnetworkfamily", + inherit = PLNnetworkfamilyvirtual, + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## PUBLIC MEMBERS ------ + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + public = list( + covariates_ZI = NULL, # a field to store the covariates of the ZI + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Creation functions ---------------- + #' @description Initialize all models in the collection + #' @return Update current [`PLNnetworkfit`] with smart starting values + initialize = function(penalties, responses, covariates, offsets, weights, formula, control) { + + ## A basic model for inception, useless one is defined by the user + if (is.null(control$inception)) { + ## Allow inception with spherical / diagonal / full PLNfit before switching back to PLNfit_fixedcov + ## for the inner-outer loop of PLNnetwork. + myPLN <- switch( + control$inception_cov, + "spherical" = ZIPLNfit_spherical$new(responses, covariates, offsets, weights, formula, control), + "diagonal" = ZIPLNfit_diagonal$new(responses, covariates, offsets, weights, formula, control), + ZIPLNfit$new(responses, covariates, offsets, weights, formula, control) # defaults to full + ) + myPLN$optimize(responses, covariates, offsets, weights, control$config_optim) + control$inception <- myPLN + } + + ## Initialize fields shared by the super class + super$initialize(penalties, responses, covariates$PLN, offsets, weights, formula, control) + self$covariates_ZI <- covariates$ZI + self$covariates <- list(PLN = self$covariates, ZI = self$covariates_ZI) + + ## instantiate as many models as penalties + control$trace <- 0 + self$models <- map2(private$params, private$penalties_weights, function(penalty, penalty_weights) { + control$penalty <- penalty + control$penalty_weights <- penalty_weights + ZIPLNfit_sparse$new(responses, covariates, offsets, weights, formula, control) + }) + }, + + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## Stability ------------------------- + #' @description Compute the stability path by stability selection + #' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations. + #' @param control a list controlling the main optimization process in each call to PLNnetwork. See [PLNnetwork()] for details. + stability_selection = function(subsamples = NULL, control = PLNnetwork_param()) { + + ## select default subsamples according + if (is.null(subsamples)) { + subsample.size <- round(ifelse(private$n >= 144, 10*sqrt(private$n), 0.8*private$n)) + subsamples <- replicate(20, sample.int(private$n, subsample.size), simplify = FALSE) + } + + ## got for stability selection + cat("\nStability Selection for PLNnetwork: ") + cat("\nsubsampling: ") + + stabs_out <- future.apply::future_lapply(subsamples, function(subsample) { + cat("+") + inception_ <- self$getModel(self$penalties[1]) + inception_$update( + M = inception_$var_par$M[subsample, ], + S = inception_$var_par$S[subsample, ] + ) + + ## force some control parameters + control$inception = inception_ + control$penalty_weights = map(self$models, "penalty_weights") + control$penalize_diagonal = (sum(diag(inception_$penalty_weights)) != 0) + control$trace <- 0 + control$config_optim$trace <- 0 + + myPLN <- PLNnetworkfamily$new(penalties = self$penalties, + responses = self$responses [subsample, , drop = FALSE], + covariates = self$covariates[subsample, , drop = FALSE], + offsets = self$offsets [subsample, , drop = FALSE], + formula = private$formula, + weights = self$weights [subsample], control = control) + + myPLN$optimize(control$config_optim) + nets <- do.call(cbind, lapply(myPLN$models, function(model) { + as.matrix(model$latent_network("support"))[upper.tri(diag(private$p))] + })) + nets + }, future.seed = TRUE, future.scheduling = structure(TRUE, ordering = "random")) + + prob <- Reduce("+", stabs_out, accumulate = FALSE) / length(subsamples) + ## formatting/tyding + node_set <- colnames(self$getModel(index = 1)$model_par$B) + colnames(prob) <- self$penalties + private$stab_path <- prob %>% + as.data.frame() %>% + mutate(Edge = 1:n()) %>% + gather(key = "Penalty", value = "Prob", -Edge) %>% + mutate(Penalty = as.numeric(Penalty), + Node1 = node_set[edge_to_node(Edge)$node1], + Node2 = node_set[edge_to_node(Edge)$node2], + Edge = paste0(Node1, "|", Node2)) + + invisible(subsamples) + } + ) + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + ## END OF CLASS ---- + ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +) diff --git a/R/PLNnetworkfit-class.R b/R/PLNnetworkfit-class.R index 3995df54..3816349b 100644 --- a/R/PLNnetworkfit-class.R +++ b/R/PLNnetworkfit-class.R @@ -42,11 +42,9 @@ PLNnetworkfit <- R6Class( ## Creation functions ---------------- #' @description Initialize a [`PLNnetworkfit`] object initialize = function(penalty, penalty_weights, responses, covariates, offsets, weights, formula, control) { - stopifnot(isSymmetric(penalty_weights), all(penalty_weights >= 0)) super$initialize(responses, covariates, offsets, weights, formula, control) private$lambda <- penalty private$rho <- penalty_weights - if (!control$penalize_diagonal) diag(private$rho) <- 0 }, #' @description Update fields of a [`PLNnetworkfit`] object #' @param B matrix of regression matrix diff --git a/R/ZIPLNnetwork.R b/R/ZIPLNnetwork.R index 8f300aff..66064663 100644 --- a/R/ZIPLNnetwork.R +++ b/R/ZIPLNnetwork.R @@ -46,7 +46,7 @@ ZIPLNnetwork <- function(formula, data, subset, weights, zi = c("single", "row", myPLN } -#' Control of PLNnetwork fit +#' Control of ZIPLNnetwork fit #' #' Helper to define list of parameters to control the PLN fit. All arguments have defaults. #' diff --git a/R/ZIPLNnetworkfamily-class.R b/R/ZIPLNnetworkfamily-class.R deleted file mode 100644 index bd8ff397..00000000 --- a/R/ZIPLNnetworkfamily-class.R +++ /dev/null @@ -1,370 +0,0 @@ -#' An R6 Class to represent a collection of ZIPLNnetwork -#' -#' @description The function [ZIPLNnetwork()] produces an instance of this class. -#' -#' This class comes with a set of methods, some of them being useful for the user: -#' See the documentation for [getBestModel()], -#' [getModel()] and [plot()][plot.ZIPLNnetworkfamily()] -#' -## Parameters shared by many methods -#' @param penalties a vector of positive real number controlling the level of sparsity of the underlying network. -#' @param responses the matrix of responses common to every models -#' @param covariates the matrix of covariates common to every models -#' @param offsets the matrix of offsets common to every models -#' @param weights the vector of observation weights -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call -#' @param control a list for controlling the optimization. -#' @param var value of the parameter (`rank` for PLNPCA, `sparsity` for PLNnetwork) that identifies the model to be extracted from the collection. If no exact match is found, the model with closest parameter value is returned with a warning. -#' @param index Integer index of the model to be returned. Only the first value is taken into account -#' -#' @include PLNfamily-class.R -#' @importFrom R6 R6Class -#' @importFrom glassoFast glassoFast -#' @examples -#' data(trichoptera) -#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) -#' fits <- PLNnetwork(Abundance ~ 1, data = trichoptera) -#' class(fits) -#' @seealso The function [ZIPLNnetwork()], the class [`ZIPLNfit_sparse`] -ZIPLNnetworkfamily <- R6Class( - classname = "ZIPLNnetworkfamily", - inherit = PLNfamily, - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - ## PUBLIC MEMBERS ------ - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - public = list( - covariates_ZI = NULL, # a field to store the covariates of the ZI - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - ## Creation functions ---------------- - #' @description Initialize all models in the collection - #' @return Update current [`PLNnetworkfit`] with smart starting values - initialize = function(penalties, responses, covariates, offsets, weights, formula, control) { - - ## Initialize fields shared by the super class - super$initialize(responses, covariates$PLN, offsets, weights, control) - self$covariates_ZI <- covariates$ZI - - ## A basic model for inception, useless one is defined by the user -### TODO check if it is useful - if (is.null(control$inception)) { - - # CHECK_ME_TORCH_GPU - # This appears to be in torch_gpu only. The commented out line below is - # in both PLNmodels/master and PLNmodels/dev. - myPLN <- switch( - control$inception_cov, - "spherical" = ZIPLNfit_spherical$new(responses, covariates, offsets, weights, formula, control), - "diagonal" = ZIPLNfit_diagonal$new(responses, covariates, offsets, weights, formula, control), - ZIPLNfit$new(responses, covariates, offsets, weights, formula, control) # defaults to full - ) - ## Allow inception with spherical / diagonal / full PLNfit before switching back to PLNfit_fixedcov - ## for the inner-outer loop of PLNnetwork. - myPLN$optimize(responses, covariates, offsets, weights, control$config_optim) - control$inception <- myPLN - } - - if (is.null(control$penalty_weights)) - control$penalty_weights <- matrix(1, ncol(responses), ncol(responses)) - ## Get the number of penalty - if (is.null(penalties)) { - if (is.list(control$penalty_weights)) - control$n_penalties <- length(control$penalty_weights) - } else { - control$n_penalties <- length(penalties) - } - ## Define a matrix of weights for each penalty - if (!is.list(control$penalty_weights)) - list_penalty_weights <- rep(list(control$penalty_weights), control$n_penalties) - else - list_penalty_weights <- control$penalty_weights - - ## Get an appropriate grid of penalties - if (is.null(penalties)) { - if (control$trace > 1) cat("\n Recovering an appropriate grid of penalties.") - # CHECK_ME_TORCH_GPU - # This appears to be in torch_gpu only. The commented out line below is - # in both PLNmodels/master and PLNmodels/dev. - # changed it to other one - max_pen <- list_penalty_weights %>% - map(~ as.matrix(myPLN$model_par$Sigma) / .x) %>% - # map(~ control$inception$model_par$Sigma / .x) %>% - map_dbl(~ max(abs(.x[upper.tri(.x, diag = control$penalize_diagonal)]))) %>% - max() - penalties <- 10^seq(log10(max_pen), log10(max_pen*control$min_ratio), len = control$n_penalties) - } else { - if (control$trace > 1) cat("\nPenalties already set by the user") - stopifnot(all(penalties > 0)) - } - ## Sort the penalty in decreasing order - o <- order(penalties, decreasing = TRUE) - private$params <- penalties[o] - list_penalty_weights <- list_penalty_weights[o] - - ## instantiate as many models as penalties - control$trace <- 0 - self$models <- map2(private$params, list_penalty_weights, function(penalty, penalty_weights) { - control$penalty <- penalty - control$penalty_weights <- penalty_weights - ZIPLNfit_sparse$new(responses, covariates, offsets, weights, formula, control) - }) - }, - - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - ## Optimization ---------------------- - #' @description Call to the C++ optimizer on all models of the collection - #' @param config a list for controlling the optimization. - optimize = function(config) { - ## Go along the penalty grid (i.e the models) - for (m in seq_along(self$models)) { - - if (config$trace == 1) { - cat("\tsparsifying penalty =", self$models[[m]]$penalty, "\r") - flush.console() - } - if (config$trace > 1) { - cat("\tsparsifying penalty =", self$models[[m]]$penalty, "- iteration:") - } - self$models[[m]]$optimize(self$responses, list(PLN = self$covariates, ZI = self$covariates_ZI), self$offsets, self$weights, config) - ## Save time by starting the optimization of model m + 1 with optimal parameters of model m - if (m < length(self$penalties)) - self$models[[m + 1]]$update( - B = self$models[[m]]$model_par$B, - M = self$models[[m]]$var_par$M, - S = self$models[[m]]$var_par$S - ) - - if (config$trace > 1) { - cat("\r \r") - flush.console() - } - } - - }, - - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - ## Stability ------------------------- - #' @description Compute the stability path by stability selection - #' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations. - #' @param control a list controlling the main optimization process in each call to PLNnetwork. See [PLNnetwork()] for details. - stability_selection = function(subsamples = NULL, control = PLNnetwork_param()) { - - ## select default subsamples according - if (is.null(subsamples)) { - subsample.size <- round(ifelse(private$n >= 144, 10*sqrt(private$n), 0.8*private$n)) - subsamples <- replicate(20, sample.int(private$n, subsample.size), simplify = FALSE) - } - - ## got for stability selection - cat("\nStability Selection for PLNnetwork: ") - cat("\nsubsampling: ") - - stabs_out <- future.apply::future_lapply(subsamples, function(subsample) { - cat("+") - inception_ <- self$getModel(self$penalties[1]) - inception_$update( - M = inception_$var_par$M[subsample, ], - S = inception_$var_par$S[subsample, ] - ) - - ## force some control parameters - control$inception = inception_ - control$penalty_weights = map(self$models, "penalty_weights") - control$penalize_diagonal = (sum(diag(inception_$penalty_weights)) != 0) - control$trace <- 0 - control$config_optim$trace <- 0 - - myPLN <- PLNnetworkfamily$new(penalties = self$penalties, - responses = self$responses [subsample, , drop = FALSE], - covariates = self$covariates[subsample, , drop = FALSE], - offsets = self$offsets [subsample, , drop = FALSE], - formula = private$formula, - weights = self$weights [subsample], control = control) - - myPLN$optimize(control$config_optim) - nets <- do.call(cbind, lapply(myPLN$models, function(model) { - as.matrix(model$latent_network("support"))[upper.tri(diag(private$p))] - })) - nets - }, future.seed = TRUE, future.scheduling = structure(TRUE, ordering = "random")) - - prob <- Reduce("+", stabs_out, accumulate = FALSE) / length(subsamples) - ## formatting/tyding - node_set <- colnames(self$getModel(index = 1)$model_par$B) - colnames(prob) <- self$penalties - private$stab_path <- prob %>% - as.data.frame() %>% - mutate(Edge = 1:n()) %>% - gather(key = "Penalty", value = "Prob", -Edge) %>% - mutate(Penalty = as.numeric(Penalty), - Node1 = node_set[edge_to_node(Edge)$node1], - Node2 = node_set[edge_to_node(Edge)$node2], - Edge = paste0(Node1, "|", Node2)) - - invisible(subsamples) - }, - - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - ## Extractors ------------------------ - #' @description Extract the regularization path of a [`PLNnetworkfamily`] - #' @param precision Logical. Should the regularization path be extracted from the precision matrix Omega (`TRUE`, default) or from the variance matrix Sigma (`FALSE`) - #' @param corr Logical. Should the matrix be transformed to (partial) correlation matrix before extraction? Defaults to `TRUE` - coefficient_path = function(precision = TRUE, corr = TRUE) { - lapply(self$penalties, function(x) { - if (precision) { - G <- self$getModel(x)$model_par$Omega - } else { - G <- self$getModel(x)$model_par$Sigma - dimnames(G) <- dimnames(self$getModel(x)$model_par$Omega) - } - if (corr) { - G <- ifelse(precision, -1, 1) * G / tcrossprod(sqrt(diag(G))) - } - setNames( - cbind( - expand.grid(colnames(G), rownames(G)), - as.vector(G)), c("Node1", "Node2", "Coeff") - ) %>% - mutate(Penalty = x, - Node1 = as.character(Node1), - Node2 = as.character(Node2), - Edge = paste0(Node1, "|", Node2)) %>% - filter(Node1 < Node2) - }) %>% bind_rows() - }, - - #' @description Extract the best network in the family according to some criteria - #' @param crit character. Criterion used to perform the selection. Is "StARS" is chosen but `$stability` field is empty, will compute stability path. - #' @param stability Only used for "StARS" criterion. A scalar indicating the target stability (= 1 - 2 beta) at which the network is selected. Default is `0.9`. - getBestModel = function(crit = c("BIC", "EBIC", "StARS"), stability = 0.9){ - crit <- match.arg(crit) - if (crit == "StARS") { - if (is.null(private$stab_path)) self$stability_selection() - id_stars <- self$criteria %>% - select(param, stability) %>% rename(Stability = stability) %>% - filter(Stability > stability) %>% - pull(param) %>% min() %>% match(self$penalties) - model <- self$models[[id_stars]]$clone() - } else { - stopifnot(!anyNA(self$criteria[[crit]])) - id <- 1 - if (length(self$criteria[[crit]]) > 1) { - id <- which.max(self$criteria[[crit]]) - } - model <- self$models[[id]]$clone() - } - model - }, - - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - ## Graphical methods ----------------- - #' @description Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a [`PLNnetworkfamily`]) - #' @param criteria vector of characters. The criteria to plot in `c("loglik", "pen_loglik", "BIC", "EBIC")`. Defaults to all of them. - #' @param reverse A logical indicating whether to plot the value of the criteria in the "natural" direction - #' (loglik - 0.5 penalty) or in the "reverse" direction (-2 loglik + penalty). Default to FALSE, i.e use the - #' natural direction, on the same scale as the log-likelihood.. - #' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`. - #' @return a [`ggplot`] graph - plot = function(criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), reverse = FALSE, log.x = TRUE) { - vlines <- sapply(intersect(criteria, c("BIC", "EBIC")) , function(crit) self$getBestModel(crit)$penalty) - p <- super$plot(criteria, reverse) + xlab("penalty") + geom_vline(xintercept = vlines, linetype = "dashed", alpha = 0.25) - if (log.x) p <- p + ggplot2::coord_trans(x = "log10") - p - }, - - #' @description Plot stability path - #' @param stability scalar: the targeted level of stability in stability plot. Default is `0.9`. - #' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`. - #' @return a [`ggplot`] graph - plot_stars = function(stability = 0.9, log.x = TRUE) { - if (anyNA(self$stability)) stop("stability selection has not yet been performed! Use stability_selection()") - dplot <- self$criteria %>% select(param, density, stability) %>% - rename(Penalty = param) %>% - gather(key = "Metric", value = "Value", stability:density) - penalty_stars <- dplot %>% filter(Metric == "stability" & Value >= stability) %>% - pull(Penalty) %>% min() - - p <- ggplot(dplot, aes(x = Penalty, y = Value, group = Metric, color = Metric)) + - geom_point() + geom_line() + theme_bw() + - ## Add information correspinding to best lambda - geom_vline(xintercept = penalty_stars, linetype = 2) + - geom_hline(yintercept = stability, linetype = 2) + - annotate(x = penalty_stars, y = 0, - label = paste("lambda == ", round(penalty_stars, 5)), - parse = TRUE, hjust = -0.05, vjust = 0, geom = "text") + - annotate(x = penalty_stars, y = stability, - label = paste("stability == ", stability), - parse = TRUE, hjust = -0.05, vjust = 1.5, geom = "text") - if (log.x) p <- p + ggplot2::scale_x_log10() + annotation_logticks(sides = "b") - p - }, - - #' @description Plot objective value of the optimization problem along the penalty path - #' @return a [`ggplot`] graph - plot_objective = function() { - objective <- unlist(lapply(self$models, function(model) model$optim_par$objective)) - changes <- cumsum(unlist(lapply(self$models, function(model) model$optim_par$iterations))) - dplot <- data.frame(iteration = 1:length(objective), objective = objective) - p <- ggplot(dplot, aes(x = iteration, y = objective)) + geom_line() + - geom_vline(xintercept = changes, linetype="dashed", alpha = 0.25) + - ggtitle("Objective along the alternate algorithm") + xlab("iteration (+ changes of model)") + - annotate("text", x = changes, y = min(dplot$objective), angle = 90, - label = paste("penalty=",format(self$criteria$param, digits = 1)), hjust = -.1, size = 3, alpha = 0.7) + theme_bw() - p - }, - - - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - ## Print methods --------------------- - #' @description User friendly print method - show = function() { - super$show() - cat(" Task: Network Inference \n") - cat("========================================================\n") - cat(" -", length(self$penalties) , "penalties considered: from", min(self$penalties), "to", max(self$penalties), "\n") - cat(" - Best model (greater BIC): lambda =", format(self$getBestModel("BIC")$penalty, digits = 3), "\n") - cat(" - Best model (greater EBIC): lambda =", format(self$getBestModel("EBIC")$penalty, digits = 3), "\n") - if (!anyNA(self$criteria$stability)) - cat(" - Best model (regarding StARS): lambda =", format(self$getBestModel("StARS")$penalty, digits = 3), "\n") - } - ), - - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - ## PRIVATE MEMBERS ------ - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - private = list( - stab_path = NULL # a field to store the stability path, - ), - - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - ## ACTIVE BINDINGS ------ - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - active = list( - #' @field penalties the sparsity level of the network in the successively fitted models - penalties = function() private$params, - #' @field stability_path the stability path of each edge as returned by the stars procedure - stability_path = function() private$stab_path, - #' @field stability mean edge stability along the penalty path - stability = function() { - if (!is.null(private$stab_path)) { - stability <- self$stability_path %>% - dplyr::select(Penalty, Prob) %>% - group_by(Penalty) %>% - summarize(Stability = 1 - mean(4 * Prob * (1 - Prob))) %>% - arrange(desc(Penalty)) %>% - pull(Stability) - } else { - stability <- rep(NA, length(self$penalties)) - } - stability - }, - #' @field criteria a data frame with the values of some criteria (approximated log-likelihood, (E)BIC, ICL and R2, stability) for the collection of models / fits - #' BIC, ICL and EBIC are defined so that they are on the same scale as the model log-likelihood, i.e. with the form, loglik - 0.5 penalty - criteria = function() {mutate(super$criteria, stability = self$stability)} - ) - - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - ## END OF CLASS ---- - ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -) - diff --git a/man/PLNnetworkfamily.Rd b/man/PLNnetworkfamily.Rd index 63218bf1..54e7041c 100644 --- a/man/PLNnetworkfamily.Rd +++ b/man/PLNnetworkfamily.Rd @@ -19,44 +19,30 @@ class(fits) \seealso{ The function \code{\link[=PLNnetwork]{PLNnetwork()}}, the class \code{\link{PLNnetworkfit}} } -\section{Super class}{ -\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{PLNnetworkfamily} -} -\section{Active bindings}{ -\if{html}{\out{
}} -\describe{ -\item{\code{penalties}}{the sparsity level of the network in the successively fitted models} - -\item{\code{stability_path}}{the stability path of each edge as returned by the stars procedure} - -\item{\code{stability}}{mean edge stability along the penalty path} - -\item{\code{criteria}}{a data frame with the values of some criteria (approximated log-likelihood, (E)BIC, ICL and R2, stability) for the collection of models / fits -BIC, ICL and EBIC are defined so that they are on the same scale as the model log-likelihood, i.e. with the form, loglik - 0.5 penalty} -} -\if{html}{\out{
}} +\section{Super classes}{ +\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{PLNmodels::PLNnetworkfamilyvirtual} -> \code{PLNnetworkfamily} } \section{Methods}{ \subsection{Public methods}{ \itemize{ \item \href{#method-PLNnetworkfamily-new}{\code{PLNnetworkfamily$new()}} -\item \href{#method-PLNnetworkfamily-optimize}{\code{PLNnetworkfamily$optimize()}} \item \href{#method-PLNnetworkfamily-stability_selection}{\code{PLNnetworkfamily$stability_selection()}} -\item \href{#method-PLNnetworkfamily-coefficient_path}{\code{PLNnetworkfamily$coefficient_path()}} -\item \href{#method-PLNnetworkfamily-getBestModel}{\code{PLNnetworkfamily$getBestModel()}} -\item \href{#method-PLNnetworkfamily-plot}{\code{PLNnetworkfamily$plot()}} -\item \href{#method-PLNnetworkfamily-plot_stars}{\code{PLNnetworkfamily$plot_stars()}} -\item \href{#method-PLNnetworkfamily-plot_objective}{\code{PLNnetworkfamily$plot_objective()}} -\item \href{#method-PLNnetworkfamily-show}{\code{PLNnetworkfamily$show()}} \item \href{#method-PLNnetworkfamily-clone}{\code{PLNnetworkfamily$clone()}} } } \if{html}{\out{ -
Inherited methods +
Inherited methods
}} @@ -101,23 +87,6 @@ Update current \code{\link{PLNnetworkfit}} with smart starting values } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamily-optimize}{}}} -\subsection{Method \code{optimize()}}{ -Call to the C++ optimizer on all models of the collection -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamily$optimize(config)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{config}}{a list for controlling the optimization.} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-PLNnetworkfamily-stability_selection}{}}} \subsection{Method \code{stability_selection()}}{ @@ -138,122 +107,6 @@ Compute the stability path by stability selection } \if{html}{\out{
}} } -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamily-coefficient_path}{}}} -\subsection{Method \code{coefficient_path()}}{ -Extract the regularization path of a \code{\link{PLNnetworkfamily}} -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamily$coefficient_path(precision = TRUE, corr = TRUE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{precision}}{Logical. Should the regularization path be extracted from the precision matrix Omega (\code{TRUE}, default) or from the variance matrix Sigma (\code{FALSE})} - -\item{\code{corr}}{Logical. Should the matrix be transformed to (partial) correlation matrix before extraction? Defaults to \code{TRUE}} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamily-getBestModel}{}}} -\subsection{Method \code{getBestModel()}}{ -Extract the best network in the family according to some criteria -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamily$getBestModel( - crit = c("BIC", "EBIC", "StARS"), - stability = 0.9 -)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{crit}}{character. Criterion used to perform the selection. Is "StARS" is chosen but \verb{$stability} field is empty, will compute stability path.} - -\item{\code{stability}}{Only used for "StARS" criterion. A scalar indicating the target stability (= 1 - 2 beta) at which the network is selected. Default is \code{0.9}.} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamily-plot}{}}} -\subsection{Method \code{plot()}}{ -Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a \code{\link{PLNnetworkfamily}}) -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamily$plot( - criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), - reverse = FALSE, - log.x = TRUE -)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{criteria}}{vector of characters. The criteria to plot in \code{c("loglik", "pen_loglik", "BIC", "EBIC")}. Defaults to all of them.} - -\item{\code{reverse}}{A logical indicating whether to plot the value of the criteria in the "natural" direction -(loglik - 0.5 penalty) or in the "reverse" direction (-2 loglik + penalty). Default to FALSE, i.e use the -natural direction, on the same scale as the log-likelihood..} - -\item{\code{log.x}}{logical: should the x-axis be represented in log-scale? Default is \code{TRUE}.} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -a \code{\link{ggplot}} graph -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamily-plot_stars}{}}} -\subsection{Method \code{plot_stars()}}{ -Plot stability path -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamily$plot_stars(stability = 0.9, log.x = TRUE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{stability}}{scalar: the targeted level of stability in stability plot. Default is \code{0.9}.} - -\item{\code{log.x}}{logical: should the x-axis be represented in log-scale? Default is \code{TRUE}.} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -a \code{\link{ggplot}} graph -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamily-plot_objective}{}}} -\subsection{Method \code{plot_objective()}}{ -Plot objective value of the optimization problem along the penalty path -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamily$plot_objective()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -a \code{\link{ggplot}} graph -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamily-show}{}}} -\subsection{Method \code{show()}}{ -User friendly print method -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamily$show()}\if{html}{\out{
}} -} - } \if{html}{\out{
}} \if{html}{\out{}} diff --git a/man/PLNnetworkfamilyvirtual.Rd b/man/PLNnetworkfamilyvirtual.Rd new file mode 100644 index 00000000..2155547c --- /dev/null +++ b/man/PLNnetworkfamilyvirtual.Rd @@ -0,0 +1,246 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PLNnetworkfamily-class.R +\name{PLNnetworkfamilyvirtual} +\alias{PLNnetworkfamilyvirtual} +\title{An R6 Class to virtually represent a collection of PLNnetworkfit (either standard or ZI)} +\description{ +The function \code{\link[=PLNnetwork]{PLNnetwork()}} produces an instance of this class. + +This class comes with a set of methods, some of them being useful for the user: +See the documentation for \code{\link[=getBestModel]{getBestModel()}}, +\code{\link[=getModel]{getModel()}} and \link[=plot.PLNnetworkfamily]{plot()} +} +\seealso{ +The function \code{\link[=PLNnetwork]{PLNnetwork()}}, the class \code{\link{PLNnetworkfit}} +} +\section{Super class}{ +\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{PLNnetworkfamilyvirtual} +} +\section{Active bindings}{ +\if{html}{\out{
}} +\describe{ +\item{\code{penalties}}{the sparsity level of the network in the successively fitted models} + +\item{\code{stability_path}}{the stability path of each edge as returned by the stars procedure} + +\item{\code{stability}}{mean edge stability along the penalty path} + +\item{\code{criteria}}{a data frame with the values of some criteria (approximated log-likelihood, (E)BIC, ICL and R2, stability) for the collection of models / fits +BIC, ICL and EBIC are defined so that they are on the same scale as the model log-likelihood, i.e. with the form, loglik - 0.5 penalty} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-PLNnetworkfamilyvirtual-new}{\code{PLNnetworkfamilyvirtual$new()}} +\item \href{#method-PLNnetworkfamilyvirtual-optimize}{\code{PLNnetworkfamilyvirtual$optimize()}} +\item \href{#method-PLNnetworkfamilyvirtual-coefficient_path}{\code{PLNnetworkfamilyvirtual$coefficient_path()}} +\item \href{#method-PLNnetworkfamilyvirtual-getBestModel}{\code{PLNnetworkfamilyvirtual$getBestModel()}} +\item \href{#method-PLNnetworkfamilyvirtual-plot}{\code{PLNnetworkfamilyvirtual$plot()}} +\item \href{#method-PLNnetworkfamilyvirtual-plot_stars}{\code{PLNnetworkfamilyvirtual$plot_stars()}} +\item \href{#method-PLNnetworkfamilyvirtual-plot_objective}{\code{PLNnetworkfamilyvirtual$plot_objective()}} +\item \href{#method-PLNnetworkfamilyvirtual-show}{\code{PLNnetworkfamilyvirtual$show()}} +\item \href{#method-PLNnetworkfamilyvirtual-clone}{\code{PLNnetworkfamilyvirtual$clone()}} +} +} +\if{html}{\out{ +
Inherited methods + +
+}} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-PLNnetworkfamilyvirtual-new}{}}} +\subsection{Method \code{new()}}{ +Initialize all models in the collection +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{PLNnetworkfamilyvirtual$new( + penalties, + responses, + covariates, + offsets, + weights, + formula, + control +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{penalties}}{a vector of positive real number controlling the level of sparsity of the underlying network.} + +\item{\code{responses}}{the matrix of responses common to every models} + +\item{\code{covariates}}{the matrix of covariates common to every models} + +\item{\code{offsets}}{the matrix of offsets common to every models} + +\item{\code{weights}}{the vector of observation weights} + +\item{\code{formula}}{model formula used for fitting, extracted from the formula in the upper-level call} + +\item{\code{control}}{a list for controlling the optimization.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Update current \code{\link{PLNnetworkfit}} with smart starting values +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-PLNnetworkfamilyvirtual-optimize}{}}} +\subsection{Method \code{optimize()}}{ +Call to the C++ optimizer on all models of the collection +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{PLNnetworkfamilyvirtual$optimize(config)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{config}}{a list for controlling the optimization.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-PLNnetworkfamilyvirtual-coefficient_path}{}}} +\subsection{Method \code{coefficient_path()}}{ +Extract the regularization path of a \code{\link{PLNnetworkfamily}} +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{PLNnetworkfamilyvirtual$coefficient_path(precision = TRUE, corr = TRUE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{precision}}{Logical. Should the regularization path be extracted from the precision matrix Omega (\code{TRUE}, default) or from the variance matrix Sigma (\code{FALSE})} + +\item{\code{corr}}{Logical. Should the matrix be transformed to (partial) correlation matrix before extraction? Defaults to \code{TRUE}} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-PLNnetworkfamilyvirtual-getBestModel}{}}} +\subsection{Method \code{getBestModel()}}{ +Extract the best network in the family according to some criteria +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{PLNnetworkfamilyvirtual$getBestModel( + crit = c("BIC", "EBIC", "StARS"), + stability = 0.9 +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{crit}}{character. Criterion used to perform the selection. Is "StARS" is chosen but \verb{$stability} field is empty, will compute stability path.} + +\item{\code{stability}}{Only used for "StARS" criterion. A scalar indicating the target stability (= 1 - 2 beta) at which the network is selected. Default is \code{0.9}.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-PLNnetworkfamilyvirtual-plot}{}}} +\subsection{Method \code{plot()}}{ +Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a \code{\link{PLNnetworkfamily}}) +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{PLNnetworkfamilyvirtual$plot( + criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), + reverse = FALSE, + log.x = TRUE +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{criteria}}{vector of characters. The criteria to plot in \code{c("loglik", "pen_loglik", "BIC", "EBIC")}. Defaults to all of them.} + +\item{\code{reverse}}{A logical indicating whether to plot the value of the criteria in the "natural" direction +(loglik - 0.5 penalty) or in the "reverse" direction (-2 loglik + penalty). Default to FALSE, i.e use the +natural direction, on the same scale as the log-likelihood..} + +\item{\code{log.x}}{logical: should the x-axis be represented in log-scale? Default is \code{TRUE}.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +a \code{\link{ggplot}} graph +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-PLNnetworkfamilyvirtual-plot_stars}{}}} +\subsection{Method \code{plot_stars()}}{ +Plot stability path +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{PLNnetworkfamilyvirtual$plot_stars(stability = 0.9, log.x = TRUE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{stability}}{scalar: the targeted level of stability in stability plot. Default is \code{0.9}.} + +\item{\code{log.x}}{logical: should the x-axis be represented in log-scale? Default is \code{TRUE}.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +a \code{\link{ggplot}} graph +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-PLNnetworkfamilyvirtual-plot_objective}{}}} +\subsection{Method \code{plot_objective()}}{ +Plot objective value of the optimization problem along the penalty path +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{PLNnetworkfamilyvirtual$plot_objective()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +a \code{\link{ggplot}} graph +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-PLNnetworkfamilyvirtual-show}{}}} +\subsection{Method \code{show()}}{ +User friendly print method +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{PLNnetworkfamilyvirtual$show()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-PLNnetworkfamilyvirtual-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{PLNnetworkfamilyvirtual$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/ZIPLNnetwork_param.Rd b/man/ZIPLNnetwork_param.Rd index ea3342d1..7b9db861 100644 --- a/man/ZIPLNnetwork_param.Rd +++ b/man/ZIPLNnetwork_param.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ZIPLNnetwork.R \name{ZIPLNnetwork_param} \alias{ZIPLNnetwork_param} -\title{Control of PLNnetwork fit} +\title{Control of ZIPLNnetwork fit} \usage{ ZIPLNnetwork_param( backend = c("nlopt"), diff --git a/man/ZIPLNnetworkfamily.Rd b/man/ZIPLNnetworkfamily.Rd index ce24c693..78c16648 100644 --- a/man/ZIPLNnetworkfamily.Rd +++ b/man/ZIPLNnetworkfamily.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ZIPLNnetworkfamily-class.R +% Please edit documentation in R/PLNnetworkfamily-class.R \name{ZIPLNnetworkfamily} \alias{ZIPLNnetworkfamily} \title{An R6 Class to represent a collection of ZIPLNnetwork} @@ -19,44 +19,30 @@ class(fits) \seealso{ The function \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}}, the class \code{\link{ZIPLNfit_sparse}} } -\section{Super class}{ -\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{ZIPLNnetworkfamily} -} -\section{Active bindings}{ -\if{html}{\out{
}} -\describe{ -\item{\code{penalties}}{the sparsity level of the network in the successively fitted models} - -\item{\code{stability_path}}{the stability path of each edge as returned by the stars procedure} - -\item{\code{stability}}{mean edge stability along the penalty path} - -\item{\code{criteria}}{a data frame with the values of some criteria (approximated log-likelihood, (E)BIC, ICL and R2, stability) for the collection of models / fits -BIC, ICL and EBIC are defined so that they are on the same scale as the model log-likelihood, i.e. with the form, loglik - 0.5 penalty} -} -\if{html}{\out{
}} +\section{Super classes}{ +\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{PLNmodels::PLNnetworkfamilyvirtual} -> \code{ZIPLNnetworkfamily} } \section{Methods}{ \subsection{Public methods}{ \itemize{ \item \href{#method-ZIPLNnetworkfamily-new}{\code{ZIPLNnetworkfamily$new()}} -\item \href{#method-ZIPLNnetworkfamily-optimize}{\code{ZIPLNnetworkfamily$optimize()}} \item \href{#method-ZIPLNnetworkfamily-stability_selection}{\code{ZIPLNnetworkfamily$stability_selection()}} -\item \href{#method-ZIPLNnetworkfamily-coefficient_path}{\code{ZIPLNnetworkfamily$coefficient_path()}} -\item \href{#method-ZIPLNnetworkfamily-getBestModel}{\code{ZIPLNnetworkfamily$getBestModel()}} -\item \href{#method-ZIPLNnetworkfamily-plot}{\code{ZIPLNnetworkfamily$plot()}} -\item \href{#method-ZIPLNnetworkfamily-plot_stars}{\code{ZIPLNnetworkfamily$plot_stars()}} -\item \href{#method-ZIPLNnetworkfamily-plot_objective}{\code{ZIPLNnetworkfamily$plot_objective()}} -\item \href{#method-ZIPLNnetworkfamily-show}{\code{ZIPLNnetworkfamily$show()}} \item \href{#method-ZIPLNnetworkfamily-clone}{\code{ZIPLNnetworkfamily$clone()}} } } \if{html}{\out{ -
Inherited methods +
Inherited methods
}} @@ -101,23 +87,6 @@ Update current \code{\link{PLNnetworkfit}} with smart starting values } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-optimize}{}}} -\subsection{Method \code{optimize()}}{ -Call to the C++ optimizer on all models of the collection -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$optimize(config)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{config}}{a list for controlling the optimization.} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-stability_selection}{}}} \subsection{Method \code{stability_selection()}}{ @@ -138,122 +107,6 @@ Compute the stability path by stability selection } \if{html}{\out{
}} } -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-coefficient_path}{}}} -\subsection{Method \code{coefficient_path()}}{ -Extract the regularization path of a \code{\link{PLNnetworkfamily}} -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$coefficient_path(precision = TRUE, corr = TRUE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{precision}}{Logical. Should the regularization path be extracted from the precision matrix Omega (\code{TRUE}, default) or from the variance matrix Sigma (\code{FALSE})} - -\item{\code{corr}}{Logical. Should the matrix be transformed to (partial) correlation matrix before extraction? Defaults to \code{TRUE}} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-getBestModel}{}}} -\subsection{Method \code{getBestModel()}}{ -Extract the best network in the family according to some criteria -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$getBestModel( - crit = c("BIC", "EBIC", "StARS"), - stability = 0.9 -)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{crit}}{character. Criterion used to perform the selection. Is "StARS" is chosen but \verb{$stability} field is empty, will compute stability path.} - -\item{\code{stability}}{Only used for "StARS" criterion. A scalar indicating the target stability (= 1 - 2 beta) at which the network is selected. Default is \code{0.9}.} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-plot}{}}} -\subsection{Method \code{plot()}}{ -Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a \code{\link{PLNnetworkfamily}}) -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$plot( - criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), - reverse = FALSE, - log.x = TRUE -)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{criteria}}{vector of characters. The criteria to plot in \code{c("loglik", "pen_loglik", "BIC", "EBIC")}. Defaults to all of them.} - -\item{\code{reverse}}{A logical indicating whether to plot the value of the criteria in the "natural" direction -(loglik - 0.5 penalty) or in the "reverse" direction (-2 loglik + penalty). Default to FALSE, i.e use the -natural direction, on the same scale as the log-likelihood..} - -\item{\code{log.x}}{logical: should the x-axis be represented in log-scale? Default is \code{TRUE}.} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -a \code{\link{ggplot}} graph -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-plot_stars}{}}} -\subsection{Method \code{plot_stars()}}{ -Plot stability path -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$plot_stars(stability = 0.9, log.x = TRUE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{stability}}{scalar: the targeted level of stability in stability plot. Default is \code{0.9}.} - -\item{\code{log.x}}{logical: should the x-axis be represented in log-scale? Default is \code{TRUE}.} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -a \code{\link{ggplot}} graph -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-plot_objective}{}}} -\subsection{Method \code{plot_objective()}}{ -Plot objective value of the optimization problem along the penalty path -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$plot_objective()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -a \code{\link{ggplot}} graph -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ZIPLNnetworkfamily-show}{}}} -\subsection{Method \code{show()}}{ -User friendly print method -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ZIPLNnetworkfamily$show()}\if{html}{\out{
}} -} - } \if{html}{\out{
}} \if{html}{\out{}} From 5afec1b8eaabc8a33c1fb297e67a7dbb227a08be Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Sat, 10 Feb 2024 08:13:48 +0100 Subject: [PATCH 11/30] share S3 methods [ci-skip] --- NAMESPACE | 6 +++--- R/PLNnetworkfamily-S3methods.R | 10 ++++------ man/PLNnetworkfamily.Rd | 2 +- man/ZIPLNnetworkfamily.Rd | 2 +- man/getBestModel.Rd | 6 +++--- man/getModel.Rd | 6 +++--- ...etworkfamily.Rd => plot.PLNnetworkfamilyvirtual.Rd} | 6 +++--- 7 files changed, 18 insertions(+), 20 deletions(-) rename man/{plot.PLNnetworkfamily.Rd => plot.PLNnetworkfamilyvirtual.Rd} (95%) diff --git a/NAMESPACE b/NAMESPACE index 9951428c..86a81b46 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,17 +9,17 @@ S3method(fitted,PLNmixturefit) S3method(fitted,ZIPLNfit) S3method(getBestModel,PLNPCAfamily) S3method(getBestModel,PLNmixturefamily) -S3method(getBestModel,PLNnetworkfamily) +S3method(getBestModel,PLNnetworkfamilyvirtual) S3method(getModel,PLNPCAfamily) S3method(getModel,PLNmixturefamily) -S3method(getModel,PLNnetworkfamily) +S3method(getModel,PLNnetworkfamilyvirtual) S3method(plot,PLNLDAfit) S3method(plot,PLNPCAfamily) S3method(plot,PLNPCAfit) S3method(plot,PLNfamily) S3method(plot,PLNmixturefamily) S3method(plot,PLNmixturefit) -S3method(plot,PLNnetworkfamily) +S3method(plot,PLNnetworkfamilyvirtual) S3method(plot,PLNnetworkfit) S3method(plot,ZIPLNfit_sparse) S3method(predict,PLNLDAfit) diff --git a/R/PLNnetworkfamily-S3methods.R b/R/PLNnetworkfamily-S3methods.R index 2287ff6a..765ce399 100644 --- a/R/PLNnetworkfamily-S3methods.R +++ b/R/PLNnetworkfamily-S3methods.R @@ -6,12 +6,10 @@ ## Auxiliary functions to check the given class of an objet -isPLNnetworkfamily <- function(Robject) {inherits(Robject, "PLNnetworkfamily")} +isPLNnetworkfamily <- function(Robject) {inherits(Robject, "PLNnetworkfamilyvirtual")} #' Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a [`PLNnetworkfamily`]) #' -#' @name plot.PLNnetworkfamily -#' #' @inheritParams plot.PLNfamily #' @inherit plot.PLNfamily return details #' @@ -34,7 +32,7 @@ isPLNnetworkfamily <- function(Robject) {inherits(Robject, "PLNnetworkfamily")} #' (with \code{type = 'stability'}) or the evolution of the criteria of the different models considered #' (with \code{type = 'criteria'}, the default). #' @export -plot.PLNnetworkfamily <- +plot.PLNnetworkfamilyvirtual <- function(x, type = c("criteria", "stability", "diagnostic"), criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), @@ -55,14 +53,14 @@ plot.PLNnetworkfamily <- #' @describeIn getModel Model extraction for [`PLNnetworkfamily`] #' @export -getModel.PLNnetworkfamily <- function(Robject, var, index = NULL) { +getModel.PLNnetworkfamilyvirtual <- function(Robject, var, index = NULL) { stopifnot(isPLNnetworkfamily(Robject)) Robject$getModel(var, index) } #' @describeIn getBestModel Model extraction for [`PLNnetworkfamily`] #' @export -getBestModel.PLNnetworkfamily <- function(Robject, crit = c("BIC", "EBIC", "StARS"), ...) { +getBestModel.PLNnetworkfamilyvirtual <- function(Robject, crit = c("BIC", "EBIC", "StARS"), ...) { stopifnot(isPLNnetworkfamily(Robject)) stability <- list(...)[["stability"]] if (is.null(stability)) stability <- 0.9 diff --git a/man/PLNnetworkfamily.Rd b/man/PLNnetworkfamily.Rd index 54e7041c..fa8eead1 100644 --- a/man/PLNnetworkfamily.Rd +++ b/man/PLNnetworkfamily.Rd @@ -20,7 +20,7 @@ class(fits) The function \code{\link[=PLNnetwork]{PLNnetwork()}}, the class \code{\link{PLNnetworkfit}} } \section{Super classes}{ -\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{PLNmodels::PLNnetworkfamilyvirtual} -> \code{PLNnetworkfamily} +\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{\link[PLNmodels:PLNnetworkfamilyvirtual]{PLNmodels::PLNnetworkfamilyvirtual}} -> \code{PLNnetworkfamily} } \section{Methods}{ \subsection{Public methods}{ diff --git a/man/ZIPLNnetworkfamily.Rd b/man/ZIPLNnetworkfamily.Rd index 78c16648..24ca0892 100644 --- a/man/ZIPLNnetworkfamily.Rd +++ b/man/ZIPLNnetworkfamily.Rd @@ -20,7 +20,7 @@ class(fits) The function \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}}, the class \code{\link{ZIPLNfit_sparse}} } \section{Super classes}{ -\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{PLNmodels::PLNnetworkfamilyvirtual} -> \code{ZIPLNnetworkfamily} +\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{\link[PLNmodels:PLNnetworkfamilyvirtual]{PLNmodels::PLNnetworkfamilyvirtual}} -> \code{ZIPLNnetworkfamily} } \section{Methods}{ \subsection{Public methods}{ diff --git a/man/getBestModel.Rd b/man/getBestModel.Rd index aff5ac35..67f2bd96 100644 --- a/man/getBestModel.Rd +++ b/man/getBestModel.Rd @@ -6,7 +6,7 @@ \alias{getBestModel.PLNPCAfamily} \alias{getBestModel} \alias{getBestModel.PLNmixturefamily} -\alias{getBestModel.PLNnetworkfamily} +\alias{getBestModel.PLNnetworkfamilyvirtual} \title{Best model extraction from a collection of models} \usage{ \method{getBestModel}{PLNPCAfamily}(Robject, crit = c("ICL", "BIC"), ...) @@ -15,7 +15,7 @@ getBestModel(Robject, crit, ...) \method{getBestModel}{PLNmixturefamily}(Robject, crit = c("ICL", "BIC"), ...) -\method{getBestModel}{PLNnetworkfamily}(Robject, crit = c("BIC", "EBIC", "StARS"), ...) +\method{getBestModel}{PLNnetworkfamilyvirtual}(Robject, crit = c("BIC", "EBIC", "StARS"), ...) } \arguments{ \item{Robject}{an object with class PLNPCAfamilly ot PLNnetworkfamily} @@ -39,7 +39,7 @@ Best model extraction from a collection of models \item \code{getBestModel(PLNmixturefamily)}: Model extraction for \code{\link{PLNmixturefamily}} -\item \code{getBestModel(PLNnetworkfamily)}: Model extraction for \code{\link{PLNnetworkfamily}} +\item \code{getBestModel(PLNnetworkfamilyvirtual)}: Model extraction for \code{\link{PLNnetworkfamily}} }} \examples{ diff --git a/man/getModel.Rd b/man/getModel.Rd index 846dbf8b..bca10d4a 100644 --- a/man/getModel.Rd +++ b/man/getModel.Rd @@ -6,7 +6,7 @@ \alias{getModel.PLNPCAfamily} \alias{getModel} \alias{getModel.PLNmixturefamily} -\alias{getModel.PLNnetworkfamily} +\alias{getModel.PLNnetworkfamilyvirtual} \title{Model extraction from a collection of models} \usage{ \method{getModel}{PLNPCAfamily}(Robject, var, index = NULL) @@ -15,7 +15,7 @@ getModel(Robject, var, index) \method{getModel}{PLNmixturefamily}(Robject, var, index = NULL) -\method{getModel}{PLNnetworkfamily}(Robject, var, index = NULL) +\method{getModel}{PLNnetworkfamilyvirtual}(Robject, var, index = NULL) } \arguments{ \item{Robject}{an R6 object with class \code{\link{PLNPCAfamily}} or \code{\link{PLNnetworkfamily}}} @@ -36,7 +36,7 @@ Model extraction from a collection of models \item \code{getModel(PLNmixturefamily)}: Model extraction for \code{\link{PLNmixturefamily}} -\item \code{getModel(PLNnetworkfamily)}: Model extraction for \code{\link{PLNnetworkfamily}} +\item \code{getModel(PLNnetworkfamilyvirtual)}: Model extraction for \code{\link{PLNnetworkfamily}} }} \examples{ diff --git a/man/plot.PLNnetworkfamily.Rd b/man/plot.PLNnetworkfamilyvirtual.Rd similarity index 95% rename from man/plot.PLNnetworkfamily.Rd rename to man/plot.PLNnetworkfamilyvirtual.Rd index 64cb0f34..bf189397 100644 --- a/man/plot.PLNnetworkfamily.Rd +++ b/man/plot.PLNnetworkfamilyvirtual.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/PLNnetworkfamily-S3methods.R -\name{plot.PLNnetworkfamily} -\alias{plot.PLNnetworkfamily} +\name{plot.PLNnetworkfamilyvirtual} +\alias{plot.PLNnetworkfamilyvirtual} \title{Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a \code{\link{PLNnetworkfamily}})} \usage{ -\method{plot}{PLNnetworkfamily}( +\method{plot}{PLNnetworkfamilyvirtual}( x, type = c("criteria", "stability", "diagnostic"), criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), From a4b25dead696d9f019b22729335643060d623ed6 Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Mon, 12 Feb 2024 18:08:05 +0100 Subject: [PATCH 12/30] [ci-skip] somes reformulation in ZIPLNnetwork --- NAMESPACE | 6 +- R/PLNnetworkfamily-S3methods.R | 8 +- R/PLNnetworkfamily-class.R | 10 +-- R/ZIPLNnetwork.R | 2 +- ...tworkfamilyvirtual.Rd => Networkfamily.Rd} | 83 +++++++++---------- man/PLNnetworkfamily.Rd | 16 ++-- man/ZIPLNnetworkfamily.Rd | 16 ++-- man/getBestModel.Rd | 6 +- man/getModel.Rd | 6 +- ...familyvirtual.Rd => plot.Networkfamily.Rd} | 6 +- 10 files changed, 78 insertions(+), 81 deletions(-) rename man/{PLNnetworkfamilyvirtual.Rd => Networkfamily.Rd} (66%) rename man/{plot.PLNnetworkfamilyvirtual.Rd => plot.Networkfamily.Rd} (95%) diff --git a/NAMESPACE b/NAMESPACE index 86a81b46..78784e97 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,19 +7,19 @@ S3method(coef,ZIPLNfit) S3method(fitted,PLNfit) S3method(fitted,PLNmixturefit) S3method(fitted,ZIPLNfit) +S3method(getBestModel,Networkfamily) S3method(getBestModel,PLNPCAfamily) S3method(getBestModel,PLNmixturefamily) -S3method(getBestModel,PLNnetworkfamilyvirtual) +S3method(getModel,Networkfamily) S3method(getModel,PLNPCAfamily) S3method(getModel,PLNmixturefamily) -S3method(getModel,PLNnetworkfamilyvirtual) +S3method(plot,Networkfamily) S3method(plot,PLNLDAfit) S3method(plot,PLNPCAfamily) S3method(plot,PLNPCAfit) S3method(plot,PLNfamily) S3method(plot,PLNmixturefamily) S3method(plot,PLNmixturefit) -S3method(plot,PLNnetworkfamilyvirtual) S3method(plot,PLNnetworkfit) S3method(plot,ZIPLNfit_sparse) S3method(predict,PLNLDAfit) diff --git a/R/PLNnetworkfamily-S3methods.R b/R/PLNnetworkfamily-S3methods.R index 765ce399..f1dd1040 100644 --- a/R/PLNnetworkfamily-S3methods.R +++ b/R/PLNnetworkfamily-S3methods.R @@ -6,7 +6,7 @@ ## Auxiliary functions to check the given class of an objet -isPLNnetworkfamily <- function(Robject) {inherits(Robject, "PLNnetworkfamilyvirtual")} +isPLNnetworkfamily <- function(Robject) {inherits(Robject, "Networkfamily")} #' Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a [`PLNnetworkfamily`]) #' @@ -32,7 +32,7 @@ isPLNnetworkfamily <- function(Robject) {inherits(Robject, "PLNnetworkfamilyvirt #' (with \code{type = 'stability'}) or the evolution of the criteria of the different models considered #' (with \code{type = 'criteria'}, the default). #' @export -plot.PLNnetworkfamilyvirtual <- +plot.Networkfamily <- function(x, type = c("criteria", "stability", "diagnostic"), criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), @@ -53,14 +53,14 @@ plot.PLNnetworkfamilyvirtual <- #' @describeIn getModel Model extraction for [`PLNnetworkfamily`] #' @export -getModel.PLNnetworkfamilyvirtual <- function(Robject, var, index = NULL) { +getModel.Networkfamily <- function(Robject, var, index = NULL) { stopifnot(isPLNnetworkfamily(Robject)) Robject$getModel(var, index) } #' @describeIn getBestModel Model extraction for [`PLNnetworkfamily`] #' @export -getBestModel.PLNnetworkfamilyvirtual <- function(Robject, crit = c("BIC", "EBIC", "StARS"), ...) { +getBestModel.Networkfamily <- function(Robject, crit = c("BIC", "EBIC", "StARS"), ...) { stopifnot(isPLNnetworkfamily(Robject)) stability <- list(...)[["stability"]] if (is.null(stability)) stability <- 0.9 diff --git a/R/PLNnetworkfamily-class.R b/R/PLNnetworkfamily-class.R index 471983bd..21264b47 100644 --- a/R/PLNnetworkfamily-class.R +++ b/R/PLNnetworkfamily-class.R @@ -1,4 +1,4 @@ -#' An R6 Class to virtually represent a collection of PLNnetworkfit (either standard or ZI) +#' An R6 Class to virtually represent a collection of Networkfit (either standard PLN or ZI-PLN) #' #' @description The function [PLNnetwork()] produces an instance of this class. #' @@ -21,8 +21,8 @@ #' @importFrom R6 R6Class #' @importFrom glassoFast glassoFast #' @seealso The function [PLNnetwork()], the class [`PLNnetworkfit`] -PLNnetworkfamilyvirtual <- R6Class( - classname = "PLNnetworkfamilyvirtual", +Networkfamily <- R6Class( + classname = "Networkfamily", inherit = PLNfamily, ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## PUBLIC MEMBERS ------ @@ -307,7 +307,7 @@ PLNnetworkfamilyvirtual <- R6Class( #' @seealso The function [PLNnetwork()], the class [`PLNnetworkfit`] PLNnetworkfamily <- R6Class( classname = "PLNnetworkfamily", - inherit = PLNnetworkfamilyvirtual, + inherit = Networkfamily, ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## PUBLIC MEMBERS ------ ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -440,7 +440,7 @@ PLNnetworkfamily <- R6Class( #' @seealso The function [ZIPLNnetwork()], the class [`ZIPLNfit_sparse`] ZIPLNnetworkfamily <- R6Class( classname = "ZIPLNnetworkfamily", - inherit = PLNnetworkfamilyvirtual, + inherit = Networkfamily, ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## PUBLIC MEMBERS ------ ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/R/ZIPLNnetwork.R b/R/ZIPLNnetwork.R index 66064663..5ff17536 100644 --- a/R/ZIPLNnetwork.R +++ b/R/ZIPLNnetwork.R @@ -19,7 +19,7 @@ #' #' @return an R6 object with class [`ZIPLNnetworkfamily`] #' -#' @include ZIPLNnetworkfamily-class.R +#' @include PLNnetworkfamily-class.R #' @examples #' data(trichoptera) #' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) diff --git a/man/PLNnetworkfamilyvirtual.Rd b/man/Networkfamily.Rd similarity index 66% rename from man/PLNnetworkfamilyvirtual.Rd rename to man/Networkfamily.Rd index 2155547c..2a7ca6f9 100644 --- a/man/PLNnetworkfamilyvirtual.Rd +++ b/man/Networkfamily.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/PLNnetworkfamily-class.R -\name{PLNnetworkfamilyvirtual} -\alias{PLNnetworkfamilyvirtual} -\title{An R6 Class to virtually represent a collection of PLNnetworkfit (either standard or ZI)} +\name{Networkfamily} +\alias{Networkfamily} +\title{An R6 Class to virtually represent a collection of Networkfit (either standard PLN or ZI-PLN)} \description{ The function \code{\link[=PLNnetwork]{PLNnetwork()}} produces an instance of this class. @@ -14,7 +14,7 @@ See the documentation for \code{\link[=getBestModel]{getBestModel()}}, The function \code{\link[=PLNnetwork]{PLNnetwork()}}, the class \code{\link{PLNnetworkfit}} } \section{Super class}{ -\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{PLNnetworkfamilyvirtual} +\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{Networkfamily} } \section{Active bindings}{ \if{html}{\out{
}} @@ -33,15 +33,15 @@ BIC, ICL and EBIC are defined so that they are on the same scale as the model lo \section{Methods}{ \subsection{Public methods}{ \itemize{ -\item \href{#method-PLNnetworkfamilyvirtual-new}{\code{PLNnetworkfamilyvirtual$new()}} -\item \href{#method-PLNnetworkfamilyvirtual-optimize}{\code{PLNnetworkfamilyvirtual$optimize()}} -\item \href{#method-PLNnetworkfamilyvirtual-coefficient_path}{\code{PLNnetworkfamilyvirtual$coefficient_path()}} -\item \href{#method-PLNnetworkfamilyvirtual-getBestModel}{\code{PLNnetworkfamilyvirtual$getBestModel()}} -\item \href{#method-PLNnetworkfamilyvirtual-plot}{\code{PLNnetworkfamilyvirtual$plot()}} -\item \href{#method-PLNnetworkfamilyvirtual-plot_stars}{\code{PLNnetworkfamilyvirtual$plot_stars()}} -\item \href{#method-PLNnetworkfamilyvirtual-plot_objective}{\code{PLNnetworkfamilyvirtual$plot_objective()}} -\item \href{#method-PLNnetworkfamilyvirtual-show}{\code{PLNnetworkfamilyvirtual$show()}} -\item \href{#method-PLNnetworkfamilyvirtual-clone}{\code{PLNnetworkfamilyvirtual$clone()}} +\item \href{#method-Networkfamily-new}{\code{Networkfamily$new()}} +\item \href{#method-Networkfamily-optimize}{\code{Networkfamily$optimize()}} +\item \href{#method-Networkfamily-coefficient_path}{\code{Networkfamily$coefficient_path()}} +\item \href{#method-Networkfamily-getBestModel}{\code{Networkfamily$getBestModel()}} +\item \href{#method-Networkfamily-plot}{\code{Networkfamily$plot()}} +\item \href{#method-Networkfamily-plot_stars}{\code{Networkfamily$plot_stars()}} +\item \href{#method-Networkfamily-plot_objective}{\code{Networkfamily$plot_objective()}} +\item \href{#method-Networkfamily-show}{\code{Networkfamily$show()}} +\item \href{#method-Networkfamily-clone}{\code{Networkfamily$clone()}} } } \if{html}{\out{ @@ -54,12 +54,12 @@ BIC, ICL and EBIC are defined so that they are on the same scale as the model lo }} \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamilyvirtual-new}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Networkfamily-new}{}}} \subsection{Method \code{new()}}{ Initialize all models in the collection \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamilyvirtual$new( +\if{html}{\out{
}}\preformatted{Networkfamily$new( penalties, responses, covariates, @@ -94,12 +94,12 @@ Update current \code{\link{PLNnetworkfit}} with smart starting values } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamilyvirtual-optimize}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Networkfamily-optimize}{}}} \subsection{Method \code{optimize()}}{ Call to the C++ optimizer on all models of the collection \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamilyvirtual$optimize(config)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{Networkfamily$optimize(config)}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -111,12 +111,12 @@ Call to the C++ optimizer on all models of the collection } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamilyvirtual-coefficient_path}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Networkfamily-coefficient_path}{}}} \subsection{Method \code{coefficient_path()}}{ Extract the regularization path of a \code{\link{PLNnetworkfamily}} \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamilyvirtual$coefficient_path(precision = TRUE, corr = TRUE)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{Networkfamily$coefficient_path(precision = TRUE, corr = TRUE)}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -130,15 +130,12 @@ Extract the regularization path of a \code{\link{PLNnetworkfamily}} } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamilyvirtual-getBestModel}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Networkfamily-getBestModel}{}}} \subsection{Method \code{getBestModel()}}{ Extract the best network in the family according to some criteria \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamilyvirtual$getBestModel( - crit = c("BIC", "EBIC", "StARS"), - stability = 0.9 -)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{Networkfamily$getBestModel(crit = c("BIC", "EBIC", "StARS"), stability = 0.9)}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -152,12 +149,12 @@ Extract the best network in the family according to some criteria } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamilyvirtual-plot}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Networkfamily-plot}{}}} \subsection{Method \code{plot()}}{ Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a \code{\link{PLNnetworkfamily}}) \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamilyvirtual$plot( +\if{html}{\out{
}}\preformatted{Networkfamily$plot( criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), reverse = FALSE, log.x = TRUE @@ -182,12 +179,12 @@ a \code{\link{ggplot}} graph } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamilyvirtual-plot_stars}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Networkfamily-plot_stars}{}}} \subsection{Method \code{plot_stars()}}{ Plot stability path \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamilyvirtual$plot_stars(stability = 0.9, log.x = TRUE)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{Networkfamily$plot_stars(stability = 0.9, log.x = TRUE)}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -204,12 +201,12 @@ a \code{\link{ggplot}} graph } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamilyvirtual-plot_objective}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Networkfamily-plot_objective}{}}} \subsection{Method \code{plot_objective()}}{ Plot objective value of the optimization problem along the penalty path \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamilyvirtual$plot_objective()}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{Networkfamily$plot_objective()}\if{html}{\out{
}} } \subsection{Returns}{ @@ -217,22 +214,22 @@ a \code{\link{ggplot}} graph } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamilyvirtual-show}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Networkfamily-show}{}}} \subsection{Method \code{show()}}{ User friendly print method \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamilyvirtual$show()}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{Networkfamily$show()}\if{html}{\out{
}} } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfamilyvirtual-clone}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Networkfamily-clone}{}}} \subsection{Method \code{clone()}}{ The objects of this class are cloneable with this method. \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PLNnetworkfamilyvirtual$clone(deep = FALSE)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{Networkfamily$clone(deep = FALSE)}\if{html}{\out{
}} } \subsection{Arguments}{ diff --git a/man/PLNnetworkfamily.Rd b/man/PLNnetworkfamily.Rd index fa8eead1..03052b2d 100644 --- a/man/PLNnetworkfamily.Rd +++ b/man/PLNnetworkfamily.Rd @@ -20,7 +20,7 @@ class(fits) The function \code{\link[=PLNnetwork]{PLNnetwork()}}, the class \code{\link{PLNnetworkfit}} } \section{Super classes}{ -\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{\link[PLNmodels:PLNnetworkfamilyvirtual]{PLNmodels::PLNnetworkfamilyvirtual}} -> \code{PLNnetworkfamily} +\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{\link[PLNmodels:Networkfamily]{PLNmodels::Networkfamily}} -> \code{PLNnetworkfamily} } \section{Methods}{ \subsection{Public methods}{ @@ -36,13 +36,13 @@ The function \code{\link[=PLNnetwork]{PLNnetwork()}}, the class \code{\link{PLNn
  • PLNmodels::PLNfamily$getModel()
  • PLNmodels::PLNfamily$postTreatment()
  • PLNmodels::PLNfamily$print()
  • -
  • PLNmodels::PLNnetworkfamilyvirtual$coefficient_path()
  • -
  • PLNmodels::PLNnetworkfamilyvirtual$getBestModel()
  • -
  • PLNmodels::PLNnetworkfamilyvirtual$optimize()
  • -
  • PLNmodels::PLNnetworkfamilyvirtual$plot()
  • -
  • PLNmodels::PLNnetworkfamilyvirtual$plot_objective()
  • -
  • PLNmodels::PLNnetworkfamilyvirtual$plot_stars()
  • -
  • PLNmodels::PLNnetworkfamilyvirtual$show()
  • +
  • PLNmodels::Networkfamily$coefficient_path()
  • +
  • PLNmodels::Networkfamily$getBestModel()
  • +
  • PLNmodels::Networkfamily$optimize()
  • +
  • PLNmodels::Networkfamily$plot()
  • +
  • PLNmodels::Networkfamily$plot_objective()
  • +
  • PLNmodels::Networkfamily$plot_stars()
  • +
  • PLNmodels::Networkfamily$show()
  • }} diff --git a/man/ZIPLNnetworkfamily.Rd b/man/ZIPLNnetworkfamily.Rd index 24ca0892..e773424b 100644 --- a/man/ZIPLNnetworkfamily.Rd +++ b/man/ZIPLNnetworkfamily.Rd @@ -20,7 +20,7 @@ class(fits) The function \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}}, the class \code{\link{ZIPLNfit_sparse}} } \section{Super classes}{ -\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{\link[PLNmodels:PLNnetworkfamilyvirtual]{PLNmodels::PLNnetworkfamilyvirtual}} -> \code{ZIPLNnetworkfamily} +\code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{\link[PLNmodels:Networkfamily]{PLNmodels::Networkfamily}} -> \code{ZIPLNnetworkfamily} } \section{Methods}{ \subsection{Public methods}{ @@ -36,13 +36,13 @@ The function \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}}, the class \code{\link{
  • PLNmodels::PLNfamily$getModel()
  • PLNmodels::PLNfamily$postTreatment()
  • PLNmodels::PLNfamily$print()
  • -
  • PLNmodels::PLNnetworkfamilyvirtual$coefficient_path()
  • -
  • PLNmodels::PLNnetworkfamilyvirtual$getBestModel()
  • -
  • PLNmodels::PLNnetworkfamilyvirtual$optimize()
  • -
  • PLNmodels::PLNnetworkfamilyvirtual$plot()
  • -
  • PLNmodels::PLNnetworkfamilyvirtual$plot_objective()
  • -
  • PLNmodels::PLNnetworkfamilyvirtual$plot_stars()
  • -
  • PLNmodels::PLNnetworkfamilyvirtual$show()
  • +
  • PLNmodels::Networkfamily$coefficient_path()
  • +
  • PLNmodels::Networkfamily$getBestModel()
  • +
  • PLNmodels::Networkfamily$optimize()
  • +
  • PLNmodels::Networkfamily$plot()
  • +
  • PLNmodels::Networkfamily$plot_objective()
  • +
  • PLNmodels::Networkfamily$plot_stars()
  • +
  • PLNmodels::Networkfamily$show()
  • }} diff --git a/man/getBestModel.Rd b/man/getBestModel.Rd index 67f2bd96..41d37cc3 100644 --- a/man/getBestModel.Rd +++ b/man/getBestModel.Rd @@ -6,7 +6,7 @@ \alias{getBestModel.PLNPCAfamily} \alias{getBestModel} \alias{getBestModel.PLNmixturefamily} -\alias{getBestModel.PLNnetworkfamilyvirtual} +\alias{getBestModel.Networkfamily} \title{Best model extraction from a collection of models} \usage{ \method{getBestModel}{PLNPCAfamily}(Robject, crit = c("ICL", "BIC"), ...) @@ -15,7 +15,7 @@ getBestModel(Robject, crit, ...) \method{getBestModel}{PLNmixturefamily}(Robject, crit = c("ICL", "BIC"), ...) -\method{getBestModel}{PLNnetworkfamilyvirtual}(Robject, crit = c("BIC", "EBIC", "StARS"), ...) +\method{getBestModel}{Networkfamily}(Robject, crit = c("BIC", "EBIC", "StARS"), ...) } \arguments{ \item{Robject}{an object with class PLNPCAfamilly ot PLNnetworkfamily} @@ -39,7 +39,7 @@ Best model extraction from a collection of models \item \code{getBestModel(PLNmixturefamily)}: Model extraction for \code{\link{PLNmixturefamily}} -\item \code{getBestModel(PLNnetworkfamilyvirtual)}: Model extraction for \code{\link{PLNnetworkfamily}} +\item \code{getBestModel(Networkfamily)}: Model extraction for \code{\link{PLNnetworkfamily}} }} \examples{ diff --git a/man/getModel.Rd b/man/getModel.Rd index bca10d4a..680ce343 100644 --- a/man/getModel.Rd +++ b/man/getModel.Rd @@ -6,7 +6,7 @@ \alias{getModel.PLNPCAfamily} \alias{getModel} \alias{getModel.PLNmixturefamily} -\alias{getModel.PLNnetworkfamilyvirtual} +\alias{getModel.Networkfamily} \title{Model extraction from a collection of models} \usage{ \method{getModel}{PLNPCAfamily}(Robject, var, index = NULL) @@ -15,7 +15,7 @@ getModel(Robject, var, index) \method{getModel}{PLNmixturefamily}(Robject, var, index = NULL) -\method{getModel}{PLNnetworkfamilyvirtual}(Robject, var, index = NULL) +\method{getModel}{Networkfamily}(Robject, var, index = NULL) } \arguments{ \item{Robject}{an R6 object with class \code{\link{PLNPCAfamily}} or \code{\link{PLNnetworkfamily}}} @@ -36,7 +36,7 @@ Model extraction from a collection of models \item \code{getModel(PLNmixturefamily)}: Model extraction for \code{\link{PLNmixturefamily}} -\item \code{getModel(PLNnetworkfamilyvirtual)}: Model extraction for \code{\link{PLNnetworkfamily}} +\item \code{getModel(Networkfamily)}: Model extraction for \code{\link{PLNnetworkfamily}} }} \examples{ diff --git a/man/plot.PLNnetworkfamilyvirtual.Rd b/man/plot.Networkfamily.Rd similarity index 95% rename from man/plot.PLNnetworkfamilyvirtual.Rd rename to man/plot.Networkfamily.Rd index bf189397..0f79f589 100644 --- a/man/plot.PLNnetworkfamilyvirtual.Rd +++ b/man/plot.Networkfamily.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/PLNnetworkfamily-S3methods.R -\name{plot.PLNnetworkfamilyvirtual} -\alias{plot.PLNnetworkfamilyvirtual} +\name{plot.Networkfamily} +\alias{plot.Networkfamily} \title{Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a \code{\link{PLNnetworkfamily}})} \usage{ -\method{plot}{PLNnetworkfamilyvirtual}( +\method{plot}{Networkfamily}( x, type = c("criteria", "stability", "diagnostic"), criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), From fe1a24c511335fb5773041af6bb2cb4300903fe9 Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Tue, 13 Feb 2024 11:27:30 +0100 Subject: [PATCH 13/30] passing test but some code reorganisation/cosmetics remain --- R/PLNnetworkfamily-class.R | 4 +++- R/PLNnetworkfit-class.R | 10 +++++++--- R/ZIPLNfit-class.R | 5 ++++- man/PLNnetworkfit.Rd | 15 +-------------- 4 files changed, 15 insertions(+), 19 deletions(-) diff --git a/R/PLNnetworkfamily-class.R b/R/PLNnetworkfamily-class.R index 21264b47..19ac4df8 100644 --- a/R/PLNnetworkfamily-class.R +++ b/R/PLNnetworkfamily-class.R @@ -338,7 +338,9 @@ PLNnetworkfamily <- R6Class( ## instantiate as many models as penalties control$trace <- 0 self$models <- map2(private$params, private$penalties_weights, function(penalty, penalty_weights) { - PLNnetworkfit$new(penalty, penalty_weights, responses, covariates, offsets, weights, formula, control) + control$penalty <- penalty + control$penalty_weights <- penalty_weights + PLNnetworkfit$new(responses, covariates, offsets, weights, formula, control) }) }, diff --git a/R/PLNnetworkfit-class.R b/R/PLNnetworkfit-class.R index 3816349b..85f01c5c 100644 --- a/R/PLNnetworkfit-class.R +++ b/R/PLNnetworkfit-class.R @@ -41,10 +41,14 @@ PLNnetworkfit <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Creation functions ---------------- #' @description Initialize a [`PLNnetworkfit`] object - initialize = function(penalty, penalty_weights, responses, covariates, offsets, weights, formula, control) { + initialize = function(responses, covariates, offsets, weights, formula, control) { super$initialize(responses, covariates, offsets, weights, formula, control) - private$lambda <- penalty - private$rho <- penalty_weights + ## Default for penalty weights (if not already set) + if (is.null(control$penalty_weights)) control$penalty_weights <- matrix(1, self$p, self$p) + stopifnot(isSymmetric(control$penalty_weights), all(control$penalty_weights >= 0)) + if (!control$penalize_diagonal) diag(control$penalty_weights) <- 0 + private$lambda <- control$penalty + private$rho <- control$penalty_weights }, #' @description Update fields of a [`PLNnetworkfit`] object #' @param B matrix of regression matrix diff --git a/R/ZIPLNfit-class.R b/R/ZIPLNfit-class.R index 003ab50e..a809b227 100644 --- a/R/ZIPLNfit-class.R +++ b/R/ZIPLNfit-class.R @@ -762,9 +762,12 @@ ZIPLNfit_sparse <- R6Class( #' @importFrom glassoFast glassoFast initialize = function(responses, covariates, offsets, weights, formula, control) { super$initialize(responses, covariates, offsets, weights, formula, control) + ## Default for penalty weights (if not already set) + if (is.null(control$penalty_weights)) control$penalty_weights <- matrix(1, self$p, self$p) + stopifnot(isSymmetric(control$penalty_weights), all(control$penalty_weights >= 0)) + if (!control$penalize_diagonal) diag(control$penalty_weights) <- 0 private$lambda <- control$penalty private$rho <- control$penalty_weights - if (!control$penalize_diagonal) diag(private$rho) <- 0 private$optimizer$Omega <- function(M, X, B, S) { glassoFast( crossprod(M - X %*% B)/self$n + diag(colMeans(S * S), self$p, self$p), diff --git a/man/PLNnetworkfit.Rd b/man/PLNnetworkfit.Rd index bc7bb2ff..f83df47f 100644 --- a/man/PLNnetworkfit.Rd +++ b/man/PLNnetworkfit.Rd @@ -76,25 +76,12 @@ The function \code{\link[=PLNnetwork]{PLNnetwork()}}, the class \code{\link{PLNn \subsection{Method \code{new()}}{ Initialize a \code{\link{PLNnetworkfit}} object \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{PLNnetworkfit$new( - penalty, - penalty_weights, - responses, - covariates, - offsets, - weights, - formula, - control -)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{PLNnetworkfit$new(responses, covariates, offsets, weights, formula, control)}\if{html}{\out{
    }} } \subsection{Arguments}{ \if{html}{\out{
    }} \describe{ -\item{\code{penalty}}{a positive real number controlling the level of sparsity of the underlying network.} - -\item{\code{penalty_weights}}{either a single or a list of p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values.} - \item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} \item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} From 5c344aebdc848c2dfc8a39634fb358a2c545bd9c Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Tue, 13 Feb 2024 14:57:13 +0100 Subject: [PATCH 14/30] some simplification in PLNnetworkfit --- R/PLNnetworkfit-class.R | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/R/PLNnetworkfit-class.R b/R/PLNnetworkfit-class.R index 85f01c5c..41958c92 100644 --- a/R/PLNnetworkfit-class.R +++ b/R/PLNnetworkfit-class.R @@ -50,21 +50,6 @@ PLNnetworkfit <- R6Class( private$lambda <- control$penalty private$rho <- control$penalty_weights }, - #' @description Update fields of a [`PLNnetworkfit`] object - #' @param B matrix of regression matrix - #' @param Sigma variance-covariance matrix of the latent variables - #' @param Omega precision matrix of the latent variables. Inverse of Sigma. - #' @param M matrix of mean vectors for the variational approximation - #' @param S matrix of variance vectors for the variational approximation - #' @param Z matrix of latent vectors (includes covariates and offset effects) - #' @param A matrix of fitted values - #' @param Ji vector of variational lower bounds of the log-likelihoods (one value per sample) - #' @param R2 approximate R^2 goodness-of-fit criterion - #' @param monitoring a list with optimization monitoring quantities - update = function(penalty=NA, B=NA, Sigma=NA, Omega=NA, M=NA, S=NA, Z=NA, A=NA, Ji=NA, R2=NA, monitoring=NA) { - super$update(B = B, Sigma = Sigma, Omega = Omega, M, S = S, Z = Z, A = A, Ji = Ji, R2 = R2, monitoring = monitoring) - if (!anyNA(penalty)) private$lambda <- penalty - }, ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Optimization ---------------------- @@ -92,7 +77,7 @@ PLNnetworkfit <- R6Class( do.call(self$update, optim_out) ## Check convergence - objective[iter] <- -self$loglik + self$penalty * sum(abs(private$Omega)) + objective[iter] <- -self$loglik # + self$penalty * sum(abs(private$Omega)) convergence[iter] <- abs(objective[iter] - objective.old)/abs(objective[iter]) if ((convergence[iter] < config$ftol_out) | (iter >= config$maxit_out)) cond <- TRUE @@ -104,9 +89,9 @@ PLNnetworkfit <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## OUTPUT private$Sigma <- Matrix::symmpart(glasso_out$w) - private$monitoring$objective <- objective[1:iter] - private$monitoring$convergence <- convergence[1:iter] - private$monitoring$outer_iterations <- iter + private$monitoring$objective <- objective[1:iter] + private$monitoring$convergence <- convergence[1:iter] + private$monitoring$iterations <- iter }, ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% From e5bfded6e4e5cce507b54b5e06a5fcbd6e52a1e4 Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Tue, 13 Feb 2024 17:53:37 +0100 Subject: [PATCH 15/30] use a list to carry data for simplification (for PLNnewotk and ZIPLNnetwork for now... propagte to all PLNs later) --- R/PLNnetwork.R | 6 +- R/PLNnetworkfamily-class.R | 78 +++++++----------- R/PLNnetworkfit-class.R | 14 ++-- R/ZIPLN.R | 16 ++-- R/ZIPLNfit-class.R | 107 ++++++++++--------------- R/ZIPLNnetwork.R | 8 +- man/Networkfamily.Rd | 24 ++---- man/PLNnetworkfamily.Rd | 20 +---- man/PLNnetworkfit.Rd | 75 ++--------------- man/ZIPLNfit.Rd | 35 ++------ man/ZIPLNfit_diagonal.Rd | 19 +---- man/ZIPLNfit_fixed.Rd | 12 +-- man/ZIPLNfit_sparse.Rd | 12 +-- man/ZIPLNfit_spherical.Rd | 19 +---- man/ZIPLNnetworkfamily.Rd | 20 +---- tests/testthat/test-plnnetworkfamily.R | 14 ++-- 16 files changed, 130 insertions(+), 349 deletions(-) diff --git a/R/PLNnetwork.R b/R/PLNnetwork.R index fa7d8161..f719badf 100644 --- a/R/PLNnetwork.R +++ b/R/PLNnetwork.R @@ -28,16 +28,16 @@ PLNnetwork <- function(formula, data, subset, weights, penalties = NULL, control replace 'list(my_arg = xx)' by PLN_param(my_arg = xx) and see the documentation of PLNnetwork_param().") ## extract the data matrices and weights - args <- extract_model(match.call(expand.dots = FALSE), parent.frame()) + data_ <- extract_model(match.call(expand.dots = FALSE), parent.frame()) ## Instantiate the collection of models if (control$trace > 0) cat("\n Initialization...") - myPLN <- PLNnetworkfamily$new(penalties, args$Y, args$X, args$O, args$w, args$formula, control) + myPLN <- PLNnetworkfamily$new(penalties, data_, control) ## Optimization if (control$trace > 0) cat("\n Adjusting", length(myPLN$penalties), "PLN with sparse inverse covariance estimation\n") if (control$trace) cat("\tJoint optimization alternating gradient descent and graphical-lasso\n") - myPLN$optimize(control$config_optim) + myPLN$optimize(data_, control$config_optim) ## Post-treatments if (control$trace > 0) cat("\n Post-treatments") diff --git a/R/PLNnetworkfamily-class.R b/R/PLNnetworkfamily-class.R index 19ac4df8..32f19213 100644 --- a/R/PLNnetworkfamily-class.R +++ b/R/PLNnetworkfamily-class.R @@ -8,14 +8,8 @@ #' ## Parameters shared by many methods #' @param penalties a vector of positive real number controlling the level of sparsity of the underlying network. -#' @param responses the matrix of responses common to every models -#' @param covariates the matrix of covariates common to every models -#' @param offsets the matrix of offsets common to every models -#' @param weights the vector of observation weights -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. -#' @param var value of the parameter (`rank` for PLNPCA, `sparsity` for PLNnetwork) that identifies the model to be extracted from the collection. If no exact match is found, the model with closest parameter value is returned with a warning. -#' @param index Integer index of the model to be returned. Only the first value is taken into account #' #' @include PLNfamily-class.R #' @importFrom R6 R6Class @@ -32,13 +26,13 @@ Networkfamily <- R6Class( ## Creation functions ---------------- #' @description Initialize all models in the collection #' @return Update current [`PLNnetworkfit`] with smart starting values - initialize = function(penalties, responses, covariates, offsets, weights, formula, control) { + initialize = function(penalties, data, control) { ## Initialize fields shared by the super class - super$initialize(responses, covariates, offsets, weights, control) + super$initialize(data$Y, data$X, data$O, data$w, control) if (is.null(control$penalty_weights)) - control$penalty_weights <- matrix(1, ncol(responses), ncol(responses)) + control$penalty_weights <- matrix(1, private$p, private$p) ## Get the number of penalty if (is.null(penalties)) { if (is.list(control$penalty_weights)) @@ -82,7 +76,7 @@ Networkfamily <- R6Class( ## Optimization ---------------------- #' @description Call to the C++ optimizer on all models of the collection #' @param config a list for controlling the optimization. - optimize = function(config) { + optimize = function(data, config) { ## Go along the penalty grid (i.e the models) for (m in seq_along(self$models)) { @@ -93,7 +87,7 @@ Networkfamily <- R6Class( if (config$trace > 1) { cat("\tsparsifying penalty =", self$models[[m]]$penalty, "- iteration:") } - self$models[[m]]$optimize(self$responses, self$covariates, self$offsets, self$weights, config) + self$models[[m]]$optimize(data, config) ## Save time by starting the optimization of model m + 1 with optimal parameters of model m if (m < length(self$penalties)) self$models[[m + 1]]$update( @@ -287,14 +281,9 @@ Networkfamily <- R6Class( #' ## Parameters shared by many methods #' @param penalties a vector of positive real number controlling the level of sparsity of the underlying network. -#' @param responses the matrix of responses common to every models -#' @param covariates the matrix of covariates common to every models -#' @param offsets the matrix of offsets common to every models -#' @param weights the vector of observation weights +#' @param data a named list used internally to carry the data matrices #' @param formula model formula used for fitting, extracted from the formula in the upper-level call #' @param control a list for controlling the optimization. -#' @param var value of the parameter (`rank` for PLNPCA, `sparsity` for PLNnetwork) that identifies the model to be extracted from the collection. If no exact match is found, the model with closest parameter value is returned with a warning. -#' @param index Integer index of the model to be returned. Only the first value is taken into account #' #' @include PLNfamily-class.R #' @importFrom R6 R6Class @@ -316,7 +305,7 @@ PLNnetworkfamily <- R6Class( ## Creation functions ---------------- #' @description Initialize all models in the collection #' @return Update current [`PLNnetworkfit`] with smart starting values - initialize = function(penalties, responses, covariates, offsets, weights, formula, control) { + initialize = function(penalties, data, control) { ## A basic model for inception, useless one is defined by the user if (is.null(control$inception)) { @@ -324,23 +313,23 @@ PLNnetworkfamily <- R6Class( ## for the inner-outer loop of PLNnetwork. myPLN <- switch( control$inception_cov, - "spherical" = PLNfit_spherical$new(responses, covariates, offsets, weights, formula, control), - "diagonal" = PLNfit_diagonal$new(responses, covariates, offsets, weights, formula, control), - PLNfit$new(responses, covariates, offsets, weights, formula, control) # defaults to full + "spherical" = PLNfit_spherical$new(data$Y, data$X, data$O, data$w, data$formula, control), + "diagonal" = PLNfit_diagonal$new(data$Y, data$X, data$O, data$w, data$formula, control), + PLNfit$new(data$Y, data$X, data$O, data$w, data$formula, control) # defaults to full ) - myPLN$optimize(responses, covariates, offsets, weights, control$config_optim) + myPLN$optimize(data$Y, data$X, data$O, data$w, control$config_optim) control$inception <- myPLN } ## Initialize fields shared by the super class - super$initialize(penalties, responses, covariates, offsets, weights, formula, control) + super$initialize(penalties, data, control) ## instantiate as many models as penalties control$trace <- 0 self$models <- map2(private$params, private$penalties_weights, function(penalty, penalty_weights) { control$penalty <- penalty control$penalty_weights <- penalty_weights - PLNnetworkfit$new(responses, covariates, offsets, weights, formula, control) + PLNnetworkfit$new(data, control) }) }, @@ -377,14 +366,14 @@ PLNnetworkfamily <- R6Class( control$trace <- 0 control$config_optim$trace <- 0 - myPLN <- PLNnetworkfamily$new(penalties = self$penalties, - responses = self$responses [subsample, , drop = FALSE], - covariates = self$covariates[subsample, , drop = FALSE], - offsets = self$offsets [subsample, , drop = FALSE], - formula = private$formula, - weights = self$weights [subsample], control = control) + data <- list( + Y = self$responses [subsample, , drop = FALSE], + X = self$covariates[subsample, , drop = FALSE], + O = self$offsets [subsample, , drop = FALSE], + w = self$weights [subsample], formula = private$formula) - myPLN$optimize(control$config_optim) + myPLN <- PLNnetworkfamily$new(self$penalties, data, control) + myPLN$optimize(data, control$config_optim) nets <- do.call(cbind, lapply(myPLN$models, function(model) { as.matrix(model$latent_network("support"))[upper.tri(diag(private$p))] })) @@ -422,14 +411,8 @@ PLNnetworkfamily <- R6Class( #' ## Parameters shared by many methods #' @param penalties a vector of positive real number controlling the level of sparsity of the underlying network. -#' @param responses the matrix of responses common to every models -#' @param covariates the matrix of covariates common to every models -#' @param offsets the matrix of offsets common to every models -#' @param weights the vector of observation weights -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. -#' @param var value of the parameter (`rank` for PLNPCA, `sparsity` for PLNnetwork) that identifies the model to be extracted from the collection. If no exact match is found, the model with closest parameter value is returned with a warning. -#' @param index Integer index of the model to be returned. Only the first value is taken into account #' #' @include PLNfamily-class.R #' @importFrom R6 R6Class @@ -447,12 +430,11 @@ ZIPLNnetworkfamily <- R6Class( ## PUBLIC MEMBERS ------ ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% public = list( - covariates_ZI = NULL, # a field to store the covariates of the ZI ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Creation functions ---------------- #' @description Initialize all models in the collection #' @return Update current [`PLNnetworkfit`] with smart starting values - initialize = function(penalties, responses, covariates, offsets, weights, formula, control) { + initialize = function(penalties, data, control) { ## A basic model for inception, useless one is defined by the user if (is.null(control$inception)) { @@ -460,25 +442,23 @@ ZIPLNnetworkfamily <- R6Class( ## for the inner-outer loop of PLNnetwork. myPLN <- switch( control$inception_cov, - "spherical" = ZIPLNfit_spherical$new(responses, covariates, offsets, weights, formula, control), - "diagonal" = ZIPLNfit_diagonal$new(responses, covariates, offsets, weights, formula, control), - ZIPLNfit$new(responses, covariates, offsets, weights, formula, control) # defaults to full + "spherical" = ZIPLNfit_spherical$new(data, control), + "diagonal" = ZIPLNfit_diagonal$new(data, control), + ZIPLNfit$new(data, control) # defaults to full ) - myPLN$optimize(responses, covariates, offsets, weights, control$config_optim) + myPLN$optimize(data, control$config_optim) control$inception <- myPLN } ## Initialize fields shared by the super class - super$initialize(penalties, responses, covariates$PLN, offsets, weights, formula, control) - self$covariates_ZI <- covariates$ZI - self$covariates <- list(PLN = self$covariates, ZI = self$covariates_ZI) + super$initialize(penalties, data, control) ## instantiate as many models as penalties control$trace <- 0 self$models <- map2(private$params, private$penalties_weights, function(penalty, penalty_weights) { control$penalty <- penalty control$penalty_weights <- penalty_weights - ZIPLNfit_sparse$new(responses, covariates, offsets, weights, formula, control) + ZIPLNfit_sparse$new(data, control) }) }, diff --git a/R/PLNnetworkfit-class.R b/R/PLNnetworkfit-class.R index 41958c92..6079a66d 100644 --- a/R/PLNnetworkfit-class.R +++ b/R/PLNnetworkfit-class.R @@ -5,11 +5,7 @@ #' See the documentation for [`plot()`][plot.PLNnetworkfit()] and methods inherited from [`PLNfit`]. #' ## Parameters common to all PLN-xx-fit methods (shared with PLNfit but inheritance does not work) -#' @param responses the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param covariates design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param offsets offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. #' @param nullModel null model used for approximate R2 computations. Defaults to a GLM model with same design matrix but not latent variable. #' @param B matrix of regression matrix @@ -41,8 +37,8 @@ PLNnetworkfit <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Creation functions ---------------- #' @description Initialize a [`PLNnetworkfit`] object - initialize = function(responses, covariates, offsets, weights, formula, control) { - super$initialize(responses, covariates, offsets, weights, formula, control) + initialize = function(data, control) { + super$initialize(data$Y, data$X, data$O, data$w, data$formula, control) ## Default for penalty weights (if not already set) if (is.null(control$penalty_weights)) control$penalty_weights <- matrix(1, self$p, self$p) stopifnot(isSymmetric(control$penalty_weights), all(control$penalty_weights >= 0)) @@ -55,13 +51,13 @@ PLNnetworkfit <- R6Class( ## Optimization ---------------------- #' @description Call to the C++ optimizer and update of the relevant fields #' @param config a list for controlling the optimization - optimize = function(responses, covariates, offsets, weights, config) { + optimize = function(data, config) { cond <- FALSE; iter <- 0 objective <- numeric(config$maxit_out) convergence <- numeric(config$maxit_out) ## start from the standard PLN at initialization objective.old <- -self$loglik - args <- list(data = list(Y = responses, X = covariates, O = offsets, w = weights), + args <- list(data = data, params = list(B = private$B, M = private$M, S = private$S), config = config) while (!cond) { diff --git a/R/ZIPLN.R b/R/ZIPLN.R index 761d5b09..2448f1cf 100644 --- a/R/ZIPLN.R +++ b/R/ZIPLN.R @@ -43,23 +43,23 @@ ZIPLN <- function(formula, data, subset, zi = c("single", "row", "col"), control = ZIPLN_param()) { ## extract the data matrices and weights - args <- extract_model_zi(match.call(expand.dots = FALSE), parent.frame()) - control$ziparam <- ifelse((args$zicovar), "covar", match.arg(zi)) + data_ <- extract_model_zi(match.call(expand.dots = FALSE), parent.frame()) + control$ziparam <- ifelse((data_$zicovar), "covar", match.arg(zi)) ## initialization if (control$trace > 0) cat("\n Initialization...") myPLN <- switch(control$covariance, - "diagonal" = ZIPLNfit_diagonal$new(args$Y , list(PLN = args$X, ZI = args$X0), args$O, args$w, args$formula, control), - "spherical" = ZIPLNfit_spherical$new(args$Y, list(PLN = args$X, ZI = args$X0), args$O, args$w, args$formula, control), - "fixed" = ZIPLNfit_fixed$new(args$Y , list(PLN = args$X, ZI = args$X0), args$O, args$w, args$formula, control), - "sparse" = ZIPLNfit_sparse$new(args$Y , list(PLN = args$X, ZI = args$X0), args$O, args$w, args$formula, control), - ZIPLNfit$new(args$Y, list(PLN = args$X, ZI = args$X0), args$O, args$w, args$formula, control)) # default: full covariance + "diagonal" = ZIPLNfit_diagonal$new(data_, control), + "spherical" = ZIPLNfit_spherical$new(data_, control), + "fixed" = ZIPLNfit_fixed$new(data_, control), + "sparse" = ZIPLNfit_sparse$new(data_, control), + ZIPLNfit$new(data_, control)) # default: full covariance ## optimization if (control$trace > 0) cat("\n Adjusting a ZI-PLN model with", control$covariance,"covariance model and", control$ziparam, "specific parameter(s) in Zero inflation component.") - myPLN$optimize(args$Y, list(PLN = args$X, ZI = args$X0), args$O, args$w, control$config_optim) + myPLN$optimize(data_, control$config_optim) if (control$trace > 0) cat("\n DONE!\n") myPLN diff --git a/R/ZIPLNfit-class.R b/R/ZIPLNfit-class.R index a809b227..40a6eb1a 100644 --- a/R/ZIPLNfit-class.R +++ b/R/ZIPLNfit-class.R @@ -7,11 +7,7 @@ #' #' Fields are accessed via active binding and cannot be changed by the user. #' -#' @param responses the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param covariates design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param offsets offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. See details. #' #' @inherit ZIPLN details @@ -69,22 +65,22 @@ ZIPLNfit <- R6Class( #' @description Initialize a [`ZIPLNfit`] model #' @importFrom stats glm.fit residuals poisson fitted coef #' @importFrom pscl zeroinfl - initialize = function(responses, covariates, offsets, weights, formula, control) { + initialize = function(data, control) { ## problem dimensions - n <- nrow(responses); p <- ncol(responses); d <- ncol(covariates$PLN); d0 <- ncol(covariates$ZI) + n <- nrow(data$Y); p <- ncol(data$Y); d <- ncol(data$X); d0 <- ncol(data$X0) ## save the formula call as specified by the user - private$formula <- formula - private$X <- covariates$PLN - private$X0 <- covariates$ZI + private$formula <- data$formula + private$X <- data$X + private$X0 <- data$X0 ## initialize the covariance model private$covariance <- control$covariance private$ziparam <- control$ziparam if (isZIPLNfit(control$inception)) { private$R <- control$inception$var_par$R - private$M <- control$inception$var_par$R - private$S <- control$inception$var_par$R + private$M <- control$inception$var_par$M + private$S <- control$inception$var_par$S private$B <- control$inception$model_par$B private$B0 <- control$inception$model_par$B0 } else { @@ -94,24 +90,24 @@ ZIPLNfit <- R6Class( B0 <- matrix(0, d0, p) ## Feature-wise univariate (ZI)poisson regression as starting point for ZIPLN for (j in 1:p) { - y = responses[, j] + y = data$Y[, j] if (min(y) == 0) { suppressWarnings( zip_out <- switch(control$ziparam, - "row" = pscl::zeroinfl(y ~ 0 + covariates$PLN | 0 + factor(1:n), offset = offsets[, j]), - "covar" = pscl::zeroinfl(y ~ 0 + covariates$PLN | 0 + covariates$ZI , offset = offsets[, j]), - pscl::zeroinfl(y ~ 0 + covariates$PLN | 1, offset = offsets[, j])) # offset only for the count model + "row" = pscl::zeroinfl(y ~ 0 + data$X | 0 + factor(1:n), offset = data$O[, j]), + "covar" = pscl::zeroinfl(y ~ 0 + data$X | 0 + data$X0 , offset = data$O[, j]), + pscl::zeroinfl(y ~ 0 + data$X | 1, offset = data$O[, j])) # offset only for the count model ) B0[,j] <- coef(zip_out, "zero") B[,j] <- coef(zip_out, "count") R[, j] <- predict(zip_out, type = "zero") - M[,j] <- residuals(zip_out) + covariates$PLN %*% coef(zip_out, "count") + M[,j] <- residuals(zip_out) + data$X %*% coef(zip_out, "count") } else { - p_out <- glm(y ~ 0 + covariates$PLN, family = 'poisson', offset = offsets[, j]) + p_out <- glm(y ~ 0 + data$X, family = 'poisson', offset = data$O[, j]) B0[,j] <- rep(-10, d) B[,j] <- coef(p_out) R[, j] <- 0 - M[,j] <- residuals(p_out) + covariates$PLN %*% coef(p_out) + M[,j] <- residuals(p_out) + data$X %*% coef(p_out) } } @@ -128,7 +124,7 @@ ZIPLNfit <- R6Class( "row" = matrix(rowMeans(private$R), n, p) , "col" = matrix(colMeans(private$R), n, p, byrow = TRUE), "covar" = private$R) - private$zeros <- 1 * (responses == 0) + private$zeros <- 1 * (data$Y == 0) ## Link to functions performing the optimization private$optimizer$B <- function(M, X) optim_zipln_B_dense(M, X) @@ -146,9 +142,8 @@ ZIPLNfit <- R6Class( #' @description Call to the Cpp optimizer and update of the relevant fields #' @param control a list for controlling the optimization. See details. - optimize = function(responses, covariates, offsets, weights, control) { + optimize = function(data, control) { - data <- list(Y = responses, X = covariates$PLN, X0 = covariates$ZI, O = offsets) parameters <- list(Omega = NA, B0 = private$B0, B = private$B, Pi = private$Pi, M = private$M, S = private$S, R = private$R) @@ -245,8 +240,8 @@ ZIPLNfit <- R6Class( M = parameters$M, S = parameters$S, R = parameters$R, - Z = offsets + parameters$M, - A = exp(offsets + parameters$M + .5 * parameters$S^2), + Z = data$O + parameters$M, + A = exp(data$O + parameters$M + .5 * parameters$S^2), Ji = vloglik, monitoring = list( iterations = nb_iter, @@ -256,12 +251,14 @@ ZIPLNfit <- R6Class( ) ### TODO: Should be in post-treatment - if (is.null(colnames(responses))) colnames(responses) <- paste0("Y", 1:self$p) - colnames(private$B0) <- colnames(private$B) <- colnames(responses) - rownames(private$B0) <- rownames(private$B) <- colnames(covariates) - rownames(private$Omega) <- colnames(private$Omega) <- colnames(private$Pi) <- colnames(responses) + colnames_Y <- colnames(data$Y) + if (is.null(colnames_Y)) colnames_Y <- paste0("Y", 1:self$p) + colnames(private$B0) <- colnames(private$B) <- colnames_Y + rownames(private$B) <- colnames(data$X) + rownames(private$B0) <- colnames(data$X0) + rownames(private$Omega) <- colnames(private$Omega) <- colnames(private$Pi) <- colnames_Y dimnames(private$Sigma) <- dimnames(private$Omega) - rownames(private$M) <- rownames(private$S) <- rownames(private$R) <- rownames(private$Pi) <- rownames(responses) + rownames(private$M) <- rownames(private$S) <- rownames(private$R) <- rownames(private$Pi) <- rownames(data$Y) }, @@ -275,13 +272,12 @@ ZIPLNfit <- R6Class( #' * the matrix `R` of variational ZI probabilities #' * the vector `Ji` of (variational) log-likelihood of each new observation #' * a list `monitoring` with information about convergence status - optimize_vestep = function(covariates, offsets, responses, weights, + optimize_vestep = function(data, B = self$model_par$B, B0 = self$model_par$B0, Omega = self$model_par$Omega, control = ZIPLN_param(backend = "nlopt")$config_optim) { - n <- nrow(responses) - data <- list(Y = responses, X = covariates$PLN, X0 = covariates$ZI, O = offsets) + n <- nrow(data$Y) parameters <- list(M = matrix(0, n, self$p), S = matrix(0.1, n, self$p), R = matrix(0, n, self$p)) @@ -406,10 +402,9 @@ ZIPLNfit <- R6Class( ## Optimize M and S if responses are provided, if (level == 1) { VE <- self$optimize_vestep( - covariates = list(PLN = X, ZI = X0), - offsets = O, - responses = as.matrix(responses), - weights = rep(1, n_new), + data = list( + Y = as.matrix(responses), X = X, X0 = X0, O = O, w = rep(1, n_new) + ), B = private$B, B0 = private$B0, Omega = private$Omega @@ -559,11 +554,7 @@ ZIPLNfit <- R6Class( #' An R6 Class to represent a ZIPLNfit in a standard, general framework, with diagonal residual covariance #' -#' @param responses the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param covariates design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param offsets offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. See details. #' #' @importFrom R6 R6Class @@ -582,8 +573,8 @@ ZIPLNfit_diagonal <- R6Class( inherit = ZIPLNfit, public = list( #' @description Initialize a [`ZIPLNfit_diagonal`] model - initialize = function(responses, covariates, offsets, weights, formula, control) { - super$initialize(responses, covariates, offsets, weights, formula, control) + initialize = function(data, control) { + super$initialize(data, control) private$optimizer$Omega <- optim_zipln_Omega_diagonal } ), @@ -612,11 +603,7 @@ ZIPLNfit_diagonal <- R6Class( #' An R6 Class to represent a ZIPLNfit in a standard, general framework, with spherical residual covariance #' -#' @param responses the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param covariates design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param offsets offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. See details. #' #' @importFrom R6 R6Class @@ -635,8 +622,8 @@ ZIPLNfit_spherical <- R6Class( inherit = ZIPLNfit, public = list( #' @description Initialize a [`ZIPLNfit_spherical`] model - initialize = function(responses, covariates, offsets, weights, formula, control) { - super$initialize(responses, covariates, offsets, weights, formula, control) + initialize = function(data, control) { + super$initialize(data, control) private$optimizer$Omega <- optim_zipln_Omega_spherical } ), @@ -665,11 +652,7 @@ ZIPLNfit_spherical <- R6Class( #' An R6 Class to represent a ZIPLNfit in a standard, general framework, with fixed (inverse) residual covariance #' -#' @param responses the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param covariates design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param offsets offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. See details. #' #' @importFrom R6 R6Class @@ -692,8 +675,8 @@ ZIPLNfit_fixed <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% public = list( #' @description Initialize a [`ZIPLNfit_fixed`] model - initialize = function(responses, covariates, offsets, weights, formula, control) { - super$initialize(responses, covariates, offsets, weights, formula, control) + initialize = function(data, control) { + super$initialize(data, control) private$Omega <- control$Omega private$optimizer$Omega <- function(M, X, B, S) {private$Omega} } @@ -723,11 +706,7 @@ ZIPLNfit_fixed <- R6Class( #' An R6 Class to represent a ZIPLNfit in a standard, general framework, with sparse inverse residual covariance #' -#' @param responses the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param covariates design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param offsets offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class -#' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param formula model formula used for fitting, extracted from the formula in the upper-level call +#' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. See details. #' #' @importFrom R6 R6Class @@ -760,8 +739,8 @@ ZIPLNfit_sparse <- R6Class( public = list( #' @description Initialize a [`ZIPLNfit_fixed`] model #' @importFrom glassoFast glassoFast - initialize = function(responses, covariates, offsets, weights, formula, control) { - super$initialize(responses, covariates, offsets, weights, formula, control) + initialize = function(data, control) { + super$initialize(data, control) ## Default for penalty weights (if not already set) if (is.null(control$penalty_weights)) control$penalty_weights <- matrix(1, self$p, self$p) stopifnot(isSymmetric(control$penalty_weights), all(control$penalty_weights >= 0)) diff --git a/R/ZIPLNnetwork.R b/R/ZIPLNnetwork.R index 5ff17536..739ab9a2 100644 --- a/R/ZIPLNnetwork.R +++ b/R/ZIPLNnetwork.R @@ -29,18 +29,18 @@ ZIPLNnetwork <- function(formula, data, subset, weights, zi = c("single", "row", "col"), penalties = NULL, control = ZIPLNnetwork_param()) { ## extract the data matrices and weights - args <- extract_model_zi(match.call(expand.dots = FALSE), parent.frame()) - control$ziparam <- ifelse((args$zicovar), "covar", match.arg(zi)) + data_ <- extract_model_zi(match.call(expand.dots = FALSE), parent.frame()) + control$ziparam <- ifelse((data_$zicovar), "covar", match.arg(zi)) ## initialization if (control$trace > 0) cat("\n Initialization...") - myPLN <- ZIPLNnetworkfamily$new(penalties, args$Y, list(PLN = args$X, ZI = args$X0), args$O, args$w, args$formula, control) + myPLN <- ZIPLNnetworkfamily$new(penalties, data_, control) ## optimization if (control$trace > 0) cat("\n Adjusting", length(myPLN$penalties), "ZI-PLN with sparse inverse covariance estimation and", control$ziparam, "specific parameter(s) in Zero inflation component.\n") - myPLN$optimize(control$config_optim) + myPLN$optimize(data_, control$config_optim) if (control$trace > 0) cat("\n DONE!\n") myPLN diff --git a/man/Networkfamily.Rd b/man/Networkfamily.Rd index 2a7ca6f9..f8d9df87 100644 --- a/man/Networkfamily.Rd +++ b/man/Networkfamily.Rd @@ -59,15 +59,7 @@ BIC, ICL and EBIC are defined so that they are on the same scale as the model lo \subsection{Method \code{new()}}{ Initialize all models in the collection \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{Networkfamily$new( - penalties, - responses, - covariates, - offsets, - weights, - formula, - control -)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{Networkfamily$new(penalties, data, control)}\if{html}{\out{
    }} } \subsection{Arguments}{ @@ -75,15 +67,7 @@ Initialize all models in the collection \describe{ \item{\code{penalties}}{a vector of positive real number controlling the level of sparsity of the underlying network.} -\item{\code{responses}}{the matrix of responses common to every models} - -\item{\code{covariates}}{the matrix of covariates common to every models} - -\item{\code{offsets}}{the matrix of offsets common to every models} - -\item{\code{weights}}{the vector of observation weights} - -\item{\code{formula}}{model formula used for fitting, extracted from the formula in the upper-level call} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{control}}{a list for controlling the optimization.} } @@ -99,12 +83,14 @@ Update current \code{\link{PLNnetworkfit}} with smart starting values \subsection{Method \code{optimize()}}{ Call to the C++ optimizer on all models of the collection \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{Networkfamily$optimize(config)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{Networkfamily$optimize(data, config)}\if{html}{\out{
    }} } \subsection{Arguments}{ \if{html}{\out{
    }} \describe{ +\item{\code{data}}{a named list used internally to carry the data matrices} + \item{\code{config}}{a list for controlling the optimization.} } \if{html}{\out{
    }} diff --git a/man/PLNnetworkfamily.Rd b/man/PLNnetworkfamily.Rd index 03052b2d..6490ced9 100644 --- a/man/PLNnetworkfamily.Rd +++ b/man/PLNnetworkfamily.Rd @@ -52,15 +52,7 @@ The function \code{\link[=PLNnetwork]{PLNnetwork()}}, the class \code{\link{PLNn \subsection{Method \code{new()}}{ Initialize all models in the collection \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{PLNnetworkfamily$new( - penalties, - responses, - covariates, - offsets, - weights, - formula, - control -)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{PLNnetworkfamily$new(penalties, data, control)}\if{html}{\out{
    }} } \subsection{Arguments}{ @@ -68,15 +60,7 @@ Initialize all models in the collection \describe{ \item{\code{penalties}}{a vector of positive real number controlling the level of sparsity of the underlying network.} -\item{\code{responses}}{the matrix of responses common to every models} - -\item{\code{covariates}}{the matrix of covariates common to every models} - -\item{\code{offsets}}{the matrix of offsets common to every models} - -\item{\code{weights}}{the vector of observation weights} - -\item{\code{formula}}{model formula used for fitting, extracted from the formula in the upper-level call} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{control}}{a list for controlling the optimization.} } diff --git a/man/PLNnetworkfit.Rd b/man/PLNnetworkfit.Rd index f83df47f..7e531d24 100644 --- a/man/PLNnetworkfit.Rd +++ b/man/PLNnetworkfit.Rd @@ -51,7 +51,6 @@ The function \code{\link[=PLNnetwork]{PLNnetwork()}}, the class \code{\link{PLNn \subsection{Public methods}{ \itemize{ \item \href{#method-PLNnetworkfit-new}{\code{PLNnetworkfit$new()}} -\item \href{#method-PLNnetworkfit-update}{\code{PLNnetworkfit$update()}} \item \href{#method-PLNnetworkfit-optimize}{\code{PLNnetworkfit$optimize()}} \item \href{#method-PLNnetworkfit-latent_network}{\code{PLNnetworkfit$latent_network()}} \item \href{#method-PLNnetworkfit-plot_network}{\code{PLNnetworkfit$plot_network()}} @@ -60,12 +59,13 @@ The function \code{\link[=PLNnetwork]{PLNnetwork()}}, the class \code{\link{PLNn } } \if{html}{\out{ -
    Inherited methods +
    Inherited methods
    @@ -76,21 +76,13 @@ The function \code{\link[=PLNnetwork]{PLNnetwork()}}, the class \code{\link{PLNn \subsection{Method \code{new()}}{ Initialize a \code{\link{PLNnetworkfit}} object \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{PLNnetworkfit$new(responses, covariates, offsets, weights, formula, control)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{PLNnetworkfit$new(data, control)}\if{html}{\out{
    }} } \subsection{Arguments}{ \if{html}{\out{
    }} \describe{ -\item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{offsets}}{offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{weights}}{an optional vector of observation weights to be used in the fitting process.} - -\item{\code{formula}}{model formula used for fitting, extracted from the formula in the upper-level call} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{control}}{a list for controlling the optimization.} } @@ -98,73 +90,18 @@ Initialize a \code{\link{PLNnetworkfit}} object } } \if{html}{\out{
    }} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PLNnetworkfit-update}{}}} -\subsection{Method \code{update()}}{ -Update fields of a \code{\link{PLNnetworkfit}} object -\subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{PLNnetworkfit$update( - penalty = NA, - B = NA, - Sigma = NA, - Omega = NA, - M = NA, - S = NA, - Z = NA, - A = NA, - Ji = NA, - R2 = NA, - monitoring = NA -)}\if{html}{\out{
    }} -} - -\subsection{Arguments}{ -\if{html}{\out{
    }} -\describe{ -\item{\code{penalty}}{a positive real number controlling the level of sparsity of the underlying network.} - -\item{\code{B}}{matrix of regression matrix} - -\item{\code{Sigma}}{variance-covariance matrix of the latent variables} - -\item{\code{Omega}}{precision matrix of the latent variables. Inverse of Sigma.} - -\item{\code{M}}{matrix of mean vectors for the variational approximation} - -\item{\code{S}}{matrix of variance vectors for the variational approximation} - -\item{\code{Z}}{matrix of latent vectors (includes covariates and offset effects)} - -\item{\code{A}}{matrix of fitted values} - -\item{\code{Ji}}{vector of variational lower bounds of the log-likelihoods (one value per sample)} - -\item{\code{R2}}{approximate R^2 goodness-of-fit criterion} - -\item{\code{monitoring}}{a list with optimization monitoring quantities} -} -\if{html}{\out{
    }} -} -} -\if{html}{\out{
    }} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-PLNnetworkfit-optimize}{}}} \subsection{Method \code{optimize()}}{ Call to the C++ optimizer and update of the relevant fields \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{PLNnetworkfit$optimize(responses, covariates, offsets, weights, config)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{PLNnetworkfit$optimize(data, config)}\if{html}{\out{
    }} } \subsection{Arguments}{ \if{html}{\out{
    }} \describe{ -\item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{offsets}}{offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{weights}}{an optional vector of observation weights to be used in the fitting process.} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{config}}{a list for controlling the optimization} } diff --git a/man/ZIPLNfit.Rd b/man/ZIPLNfit.Rd index 1f3c43b3..e8411b90 100644 --- a/man/ZIPLNfit.Rd +++ b/man/ZIPLNfit.Rd @@ -150,21 +150,13 @@ Update the current \code{\link{ZIPLNfit}} object \subsection{Method \code{new()}}{ Initialize a \code{\link{ZIPLNfit}} model \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{ZIPLNfit$new(responses, covariates, offsets, weights, formula, control)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{ZIPLNfit$new(data, control)}\if{html}{\out{
    }} } \subsection{Arguments}{ \if{html}{\out{
    }} \describe{ -\item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{offsets}}{offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{weights}}{an optional vector of observation weights to be used in the fitting process.} - -\item{\code{formula}}{model formula used for fitting, extracted from the formula in the upper-level call} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{control}}{a list for controlling the optimization. See details.} } @@ -177,19 +169,13 @@ Initialize a \code{\link{ZIPLNfit}} model \subsection{Method \code{optimize()}}{ Call to the Cpp optimizer and update of the relevant fields \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{ZIPLNfit$optimize(responses, covariates, offsets, weights, control)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{ZIPLNfit$optimize(data, control)}\if{html}{\out{
    }} } \subsection{Arguments}{ \if{html}{\out{
    }} \describe{ -\item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{offsets}}{offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{weights}}{an optional vector of observation weights to be used in the fitting process.} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{control}}{a list for controlling the optimization. See details.} } @@ -203,10 +189,7 @@ Call to the Cpp optimizer and update of the relevant fields Result of one call to the VE step of the optimization procedure: optimal variational parameters (M, S, R) and corresponding log likelihood values for fixed model parameters (Sigma, B, B0). Intended to position new data in the latent space. \subsection{Usage}{ \if{html}{\out{
    }}\preformatted{ZIPLNfit$optimize_vestep( - covariates, - offsets, - responses, - weights, + data, B = self$model_par$B, B0 = self$model_par$B0, Omega = self$model_par$Omega, @@ -217,13 +200,7 @@ Result of one call to the VE step of the optimization procedure: optimal variati \subsection{Arguments}{ \if{html}{\out{
    }} \describe{ -\item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{offsets}}{offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{weights}}{an optional vector of observation weights to be used in the fitting process.} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{B}}{Optional fixed value of the regression parameters in the PLN component} diff --git a/man/ZIPLNfit_diagonal.Rd b/man/ZIPLNfit_diagonal.Rd index acce0d88..7c076442 100644 --- a/man/ZIPLNfit_diagonal.Rd +++ b/man/ZIPLNfit_diagonal.Rd @@ -55,28 +55,13 @@ print(myPLN) \subsection{Method \code{new()}}{ Initialize a \code{\link{ZIPLNfit_diagonal}} model \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{ZIPLNfit_diagonal$new( - responses, - covariates, - offsets, - weights, - formula, - control -)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{ZIPLNfit_diagonal$new(data, control)}\if{html}{\out{
    }} } \subsection{Arguments}{ \if{html}{\out{
    }} \describe{ -\item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{offsets}}{offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{weights}}{an optional vector of observation weights to be used in the fitting process.} - -\item{\code{formula}}{model formula used for fitting, extracted from the formula in the upper-level call} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{control}}{a list for controlling the optimization. See details.} } diff --git a/man/ZIPLNfit_fixed.Rd b/man/ZIPLNfit_fixed.Rd index d57a5bd7..6f0ba648 100644 --- a/man/ZIPLNfit_fixed.Rd +++ b/man/ZIPLNfit_fixed.Rd @@ -56,21 +56,13 @@ print(myPLN) \subsection{Method \code{new()}}{ Initialize a \code{\link{ZIPLNfit_fixed}} model \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{ZIPLNfit_fixed$new(responses, covariates, offsets, weights, formula, control)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{ZIPLNfit_fixed$new(data, control)}\if{html}{\out{
    }} } \subsection{Arguments}{ \if{html}{\out{
    }} \describe{ -\item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{offsets}}{offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{weights}}{an optional vector of observation weights to be used in the fitting process.} - -\item{\code{formula}}{model formula used for fitting, extracted from the formula in the upper-level call} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{control}}{a list for controlling the optimization. See details.} } diff --git a/man/ZIPLNfit_sparse.Rd b/man/ZIPLNfit_sparse.Rd index f8b6cecd..46dd2d39 100644 --- a/man/ZIPLNfit_sparse.Rd +++ b/man/ZIPLNfit_sparse.Rd @@ -72,21 +72,13 @@ plot(myPLN) \subsection{Method \code{new()}}{ Initialize a \code{\link{ZIPLNfit_fixed}} model \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{ZIPLNfit_sparse$new(responses, covariates, offsets, weights, formula, control)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{ZIPLNfit_sparse$new(data, control)}\if{html}{\out{
    }} } \subsection{Arguments}{ \if{html}{\out{
    }} \describe{ -\item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{offsets}}{offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{weights}}{an optional vector of observation weights to be used in the fitting process.} - -\item{\code{formula}}{model formula used for fitting, extracted from the formula in the upper-level call} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{control}}{a list for controlling the optimization. See details.} } diff --git a/man/ZIPLNfit_spherical.Rd b/man/ZIPLNfit_spherical.Rd index df904d7b..4548dade 100644 --- a/man/ZIPLNfit_spherical.Rd +++ b/man/ZIPLNfit_spherical.Rd @@ -55,28 +55,13 @@ print(myPLN) \subsection{Method \code{new()}}{ Initialize a \code{\link{ZIPLNfit_spherical}} model \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{ZIPLNfit_spherical$new( - responses, - covariates, - offsets, - weights, - formula, - control -)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{ZIPLNfit_spherical$new(data, control)}\if{html}{\out{
    }} } \subsection{Arguments}{ \if{html}{\out{
    }} \describe{ -\item{\code{responses}}{the matrix of responses (called Y in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{covariates}}{design matrix (called X in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{offsets}}{offset matrix (called O in the model). Will usually be extracted from the corresponding field in PLNfamily-class} - -\item{\code{weights}}{an optional vector of observation weights to be used in the fitting process.} - -\item{\code{formula}}{model formula used for fitting, extracted from the formula in the upper-level call} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{control}}{a list for controlling the optimization. See details.} } diff --git a/man/ZIPLNnetworkfamily.Rd b/man/ZIPLNnetworkfamily.Rd index e773424b..0b7b76a5 100644 --- a/man/ZIPLNnetworkfamily.Rd +++ b/man/ZIPLNnetworkfamily.Rd @@ -52,15 +52,7 @@ The function \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}}, the class \code{\link{ \subsection{Method \code{new()}}{ Initialize all models in the collection \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{ZIPLNnetworkfamily$new( - penalties, - responses, - covariates, - offsets, - weights, - formula, - control -)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{ZIPLNnetworkfamily$new(penalties, data, control)}\if{html}{\out{
    }} } \subsection{Arguments}{ @@ -68,15 +60,7 @@ Initialize all models in the collection \describe{ \item{\code{penalties}}{a vector of positive real number controlling the level of sparsity of the underlying network.} -\item{\code{responses}}{the matrix of responses common to every models} - -\item{\code{covariates}}{the matrix of covariates common to every models} - -\item{\code{offsets}}{the matrix of offsets common to every models} - -\item{\code{weights}}{the vector of observation weights} - -\item{\code{formula}}{model formula used for fitting, extracted from the formula in the upper-level call} +\item{\code{data}}{a named list used internally to carry the data matrices} \item{\code{control}}{a list for controlling the optimization.} } diff --git a/tests/testthat/test-plnnetworkfamily.R b/tests/testthat/test-plnnetworkfamily.R index fe736451..2bae4430 100644 --- a/tests/testthat/test-plnnetworkfamily.R +++ b/tests/testthat/test-plnnetworkfamily.R @@ -10,19 +10,23 @@ test_that("PLNnetwork: main function, fields access and methods", { expect_equal(getBestModel(models), getBestModel(models, "BIC")) - X <- model.matrix(Abundance ~ 1, data = trichoptera) Y <- as.matrix(trichoptera$Abundance) - O <- matrix(0, nrow(Y),ncol(Y)) - w <- rep(1, nrow(Y)) + data = list( + Y = Y, + X = model.matrix(Abundance ~ 1, data = trichoptera), + O = matrix(0, nrow(Y),ncol(Y)), + w = rep(1, nrow(Y)), + formula = Abundance ~ 1 + ) ## extract the data matrices and weights ctrl <- PLNmodels:::PLNnetwork_param(trace = 0) ## instantiate - myPLN <- PLNmodels:::PLNnetworkfamily$new(NULL, Y, X, O, w, Abundance ~ 1, ctrl) + myPLN <- PLNmodels:::PLNnetworkfamily$new(NULL, data, ctrl) ## optimize - myPLN$optimize(ctrl$config_optim) + myPLN$optimize(data, ctrl$config_optim) ## post-treatment config_post <- PLNmodels:::config_post_default_PLNnetwork From d7aec33818e0ac8176d1ba9de0d077da2073b2ca Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Tue, 13 Feb 2024 18:12:49 +0100 Subject: [PATCH 16/30] regenerating doc --- NAMESPACE | 2 ++ R/PLNnetworkfamily-S3methods.R | 14 ++++++++++---- R/PLNnetworkfamily-class.R | 17 +++++++++-------- R/ZIPLNfit-class.R | 2 +- man/plot.Networkfamily.Rd | 33 +++++++++++++++++++++++++++++++-- 5 files changed, 53 insertions(+), 15 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 78784e97..179926fb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,8 +20,10 @@ S3method(plot,PLNPCAfit) S3method(plot,PLNfamily) S3method(plot,PLNmixturefamily) S3method(plot,PLNmixturefit) +S3method(plot,PLNnetworkfamily) S3method(plot,PLNnetworkfit) S3method(plot,ZIPLNfit_sparse) +S3method(plot,ZIPLNnetworkfamily) S3method(predict,PLNLDAfit) S3method(predict,PLNfit) S3method(predict,PLNmixturefit) diff --git a/R/PLNnetworkfamily-S3methods.R b/R/PLNnetworkfamily-S3methods.R index f1dd1040..498a7cc1 100644 --- a/R/PLNnetworkfamily-S3methods.R +++ b/R/PLNnetworkfamily-S3methods.R @@ -8,7 +8,7 @@ ## Auxiliary functions to check the given class of an objet isPLNnetworkfamily <- function(Robject) {inherits(Robject, "Networkfamily")} -#' Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a [`PLNnetworkfamily`]) +#' Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of network fits (either [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`]) #' #' @inheritParams plot.PLNfamily #' @inherit plot.PLNfamily return details @@ -20,7 +20,6 @@ isPLNnetworkfamily <- function(Robject) {inherits(Robject, "Networkfamily")} #' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`. #' @param stability scalar: the targeted level of stability in stability plot. Default is .9. #' -#' #' @examples #' data(trichoptera) #' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) @@ -32,8 +31,7 @@ isPLNnetworkfamily <- function(Robject) {inherits(Robject, "Networkfamily")} #' (with \code{type = 'stability'}) or the evolution of the criteria of the different models considered #' (with \code{type = 'criteria'}, the default). #' @export -plot.Networkfamily <- - function(x, +plot.Networkfamily <- function(x, type = c("criteria", "stability", "diagnostic"), criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), reverse = FALSE, @@ -51,6 +49,14 @@ plot.Networkfamily <- p } +#' @describeIn plot.Networkfamily Display various outputs associated with a collection of network fits +#' @export +plot.PLNnetworkfamily <- plot.Networkfamily + +#' @describeIn plot.Networkfamily Display various outputs associated with a collection of network fits +#' @export +plot.ZIPLNnetworkfamily <- plot.Networkfamily + #' @describeIn getModel Model extraction for [`PLNnetworkfamily`] #' @export getModel.Networkfamily <- function(Robject, var, index = NULL) { diff --git a/R/PLNnetworkfamily-class.R b/R/PLNnetworkfamily-class.R index 32f19213..366870a4 100644 --- a/R/PLNnetworkfamily-class.R +++ b/R/PLNnetworkfamily-class.R @@ -494,14 +494,15 @@ ZIPLNnetworkfamily <- R6Class( control$trace <- 0 control$config_optim$trace <- 0 - myPLN <- PLNnetworkfamily$new(penalties = self$penalties, - responses = self$responses [subsample, , drop = FALSE], - covariates = self$covariates[subsample, , drop = FALSE], - offsets = self$offsets [subsample, , drop = FALSE], - formula = private$formula, - weights = self$weights [subsample], control = control) - - myPLN$optimize(control$config_optim) + data <- list( + Y = self$responses [subsample, , drop = FALSE], + X = self$covariates[subsample, , drop = FALSE], + O = self$offsets [subsample, , drop = FALSE], + w = self$weights [subsample], formula = private$formula) + + myPLN <- ZIPLNnetworkfamily$new(self$penalties, data, control) + myPLN$optimize(data, control$config_optim) + nets <- do.call(cbind, lapply(myPLN$models, function(model) { as.matrix(model$latent_network("support"))[upper.tri(diag(private$p))] })) diff --git a/R/ZIPLNfit-class.R b/R/ZIPLNfit-class.R index 40a6eb1a..f30bb86c 100644 --- a/R/ZIPLNfit-class.R +++ b/R/ZIPLNfit-class.R @@ -75,7 +75,7 @@ ZIPLNfit <- R6Class( private$X0 <- data$X0 ## initialize the covariance model private$covariance <- control$covariance - private$ziparam <- control$ziparam + private$ziparam <- control$ziparam if (isZIPLNfit(control$inception)) { private$R <- control$inception$var_par$R diff --git a/man/plot.Networkfamily.Rd b/man/plot.Networkfamily.Rd index 0f79f589..b54595d7 100644 --- a/man/plot.Networkfamily.Rd +++ b/man/plot.Networkfamily.Rd @@ -2,7 +2,9 @@ % Please edit documentation in R/PLNnetworkfamily-S3methods.R \name{plot.Networkfamily} \alias{plot.Networkfamily} -\title{Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a \code{\link{PLNnetworkfamily}})} +\alias{plot.PLNnetworkfamily} +\alias{plot.ZIPLNnetworkfamily} +\title{Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of network fits (either \code{\link{PLNnetworkfamily}} or \code{\link{ZIPLNnetworkfamily}})} \usage{ \method{plot}{Networkfamily}( x, @@ -13,6 +15,26 @@ stability = 0.9, ... ) + +\method{plot}{PLNnetworkfamily}( + x, + type = c("criteria", "stability", "diagnostic"), + criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), + reverse = FALSE, + log.x = TRUE, + stability = 0.9, + ... +) + +\method{plot}{ZIPLNnetworkfamily}( + x, + type = c("criteria", "stability", "diagnostic"), + criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), + reverse = FALSE, + log.x = TRUE, + stability = 0.9, + ... +) } \arguments{ \item{x}{an R6 object with class \code{\link{PLNnetworkfamily}}} @@ -38,12 +60,19 @@ Produces either a diagnostic plot (with \code{type = 'diagnostic'}), a stability (with \code{type = 'criteria'}, the default). } \description{ -Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a \code{\link{PLNnetworkfamily}}) +Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of network fits (either \code{\link{PLNnetworkfamily}} or \code{\link{ZIPLNnetworkfamily}}) } \details{ The BIC and ICL criteria have the form 'loglik - 1/2 * penalty' so that they are on the same scale as the model log-likelihood. You can change this direction and use the alternate form '-2*loglik + penalty', as some authors do, by setting \code{reverse = TRUE}. } +\section{Functions}{ +\itemize{ +\item \code{plot(PLNnetworkfamily)}: Display various outputs associated with a collection of network fits + +\item \code{plot(ZIPLNnetworkfamily)}: Display various outputs associated with a collection of network fits + +}} \examples{ data(trichoptera) trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) From e9d221cedee2b7bc4377a04c23fde84f53d18394 Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Tue, 13 Feb 2024 18:25:05 +0100 Subject: [PATCH 17/30] completing _pkgdown.yml --- NAMESPACE | 4 ++++ R/PLNnetworkfamily-S3methods.R | 20 ++++++++++++++++++-- _pkgdown.yml | 9 +++++++-- man/getBestModel.Rd | 12 +++++++++++- man/getModel.Rd | 12 +++++++++++- 5 files changed, 51 insertions(+), 6 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 179926fb..debf5692 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,9 +10,13 @@ S3method(fitted,ZIPLNfit) S3method(getBestModel,Networkfamily) S3method(getBestModel,PLNPCAfamily) S3method(getBestModel,PLNmixturefamily) +S3method(getBestModel,PLNnetworkfamily) +S3method(getBestModel,ZIPLNnetworkfamily) S3method(getModel,Networkfamily) S3method(getModel,PLNPCAfamily) S3method(getModel,PLNmixturefamily) +S3method(getModel,PLNnetworkfamily) +S3method(getModel,ZIPLNnetworkfamily) S3method(plot,Networkfamily) S3method(plot,PLNLDAfit) S3method(plot,PLNPCAfamily) diff --git a/R/PLNnetworkfamily-S3methods.R b/R/PLNnetworkfamily-S3methods.R index 498a7cc1..b76043cc 100644 --- a/R/PLNnetworkfamily-S3methods.R +++ b/R/PLNnetworkfamily-S3methods.R @@ -57,14 +57,22 @@ plot.PLNnetworkfamily <- plot.Networkfamily #' @export plot.ZIPLNnetworkfamily <- plot.Networkfamily -#' @describeIn getModel Model extraction for [`PLNnetworkfamily`] +#' @describeIn getModel Model extraction for [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`] #' @export getModel.Networkfamily <- function(Robject, var, index = NULL) { stopifnot(isPLNnetworkfamily(Robject)) Robject$getModel(var, index) } -#' @describeIn getBestModel Model extraction for [`PLNnetworkfamily`] +#' @describeIn getModel Model extraction for [`PLNnetworkfamily`] +#' @export +getModel.PLNnetworkfamily <- getModel.Networkfamily + +#' @describeIn getModel Model extraction for [`ZIPLNnetworkfamily`] +#' @export +getModel.ZIPLNnetworkfamily <- getModel.Networkfamily + +#' @describeIn getBestModel Model extraction for [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`] #' @export getBestModel.Networkfamily <- function(Robject, crit = c("BIC", "EBIC", "StARS"), ...) { stopifnot(isPLNnetworkfamily(Robject)) @@ -73,6 +81,14 @@ getBestModel.Networkfamily <- function(Robject, crit = c("BIC", "EBIC", "StARS") Robject$getBestModel(match.arg(crit), stability) } +#' @describeIn getBestModel Model extraction for [`PLNnetworkfamily`] +#' @export +getBestModel.PLNnetworkfamily <- getBestModel.Networkfamily + +#' @describeIn getBestModel Model extraction for [`ZIPLNnetworkfamily`] +#' @export +getBestModel.ZIPLNnetworkfamily <- getBestModel.Networkfamily + #' Extract the regularization path of a PLNnetwork fit #' #' @name coefficient_path diff --git a/_pkgdown.yml b/_pkgdown.yml index ec857788..a1fc70e4 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -44,6 +44,7 @@ reference: - '`PLNLDA`' - '`PLNPCA`' - '`PLNnetwork`' + - '`ZIPLNnetwork`' - '`PLNmixture`' - title: 'Poisson lognormal fit' desc: > @@ -105,13 +106,17 @@ reference: - '`fitted.PLNmixturefit`' - '`getBestModel.PLNmixturefamily`' - '`getModel.PLNmixturefamily`' -- title: 'Sparse Poisson lognormal fit and network' +- title: 'Sparse Poisson lognormal fit and network, w/o Zero Inflated component' desc: > - Description of the PLNnetworkfit and PLNnetworkfamily objects and methods for their manipulation. + Description of the (ZI)PLNnetworkfit and (ZI)PLNnetworkfamily objects and methods for their manipulation. contents: - starts_with('PLNnetworkfit') - '`PLNnetwork_param`' + - '`ZIPLNnetwork_param`' - '`plot.PLNnetworkfit`' + - '`plot.ZIPLNfit_sparse`' + - '`Networkfamily`' + - '`ZIPLNnetworkfamily`' - '`PLNnetworkfamily`' - '`plot.PLNnetworkfamily`' - '`getBestModel.PLNnetworkfamily`' diff --git a/man/getBestModel.Rd b/man/getBestModel.Rd index 41d37cc3..bce004c4 100644 --- a/man/getBestModel.Rd +++ b/man/getBestModel.Rd @@ -7,6 +7,8 @@ \alias{getBestModel} \alias{getBestModel.PLNmixturefamily} \alias{getBestModel.Networkfamily} +\alias{getBestModel.PLNnetworkfamily} +\alias{getBestModel.ZIPLNnetworkfamily} \title{Best model extraction from a collection of models} \usage{ \method{getBestModel}{PLNPCAfamily}(Robject, crit = c("ICL", "BIC"), ...) @@ -16,6 +18,10 @@ getBestModel(Robject, crit, ...) \method{getBestModel}{PLNmixturefamily}(Robject, crit = c("ICL", "BIC"), ...) \method{getBestModel}{Networkfamily}(Robject, crit = c("BIC", "EBIC", "StARS"), ...) + +\method{getBestModel}{PLNnetworkfamily}(Robject, crit = c("BIC", "EBIC", "StARS"), ...) + +\method{getBestModel}{ZIPLNnetworkfamily}(Robject, crit = c("BIC", "EBIC", "StARS"), ...) } \arguments{ \item{Robject}{an object with class PLNPCAfamilly ot PLNnetworkfamily} @@ -39,7 +45,11 @@ Best model extraction from a collection of models \item \code{getBestModel(PLNmixturefamily)}: Model extraction for \code{\link{PLNmixturefamily}} -\item \code{getBestModel(Networkfamily)}: Model extraction for \code{\link{PLNnetworkfamily}} +\item \code{getBestModel(Networkfamily)}: Model extraction for \code{\link{PLNnetworkfamily}} or \code{\link{ZIPLNnetworkfamily}} + +\item \code{getBestModel(PLNnetworkfamily)}: Model extraction for \code{\link{PLNnetworkfamily}} + +\item \code{getBestModel(ZIPLNnetworkfamily)}: Model extraction for \code{\link{ZIPLNnetworkfamily}} }} \examples{ diff --git a/man/getModel.Rd b/man/getModel.Rd index 680ce343..a812d321 100644 --- a/man/getModel.Rd +++ b/man/getModel.Rd @@ -7,6 +7,8 @@ \alias{getModel} \alias{getModel.PLNmixturefamily} \alias{getModel.Networkfamily} +\alias{getModel.PLNnetworkfamily} +\alias{getModel.ZIPLNnetworkfamily} \title{Model extraction from a collection of models} \usage{ \method{getModel}{PLNPCAfamily}(Robject, var, index = NULL) @@ -16,6 +18,10 @@ getModel(Robject, var, index) \method{getModel}{PLNmixturefamily}(Robject, var, index = NULL) \method{getModel}{Networkfamily}(Robject, var, index = NULL) + +\method{getModel}{PLNnetworkfamily}(Robject, var, index = NULL) + +\method{getModel}{ZIPLNnetworkfamily}(Robject, var, index = NULL) } \arguments{ \item{Robject}{an R6 object with class \code{\link{PLNPCAfamily}} or \code{\link{PLNnetworkfamily}}} @@ -36,7 +42,11 @@ Model extraction from a collection of models \item \code{getModel(PLNmixturefamily)}: Model extraction for \code{\link{PLNmixturefamily}} -\item \code{getModel(Networkfamily)}: Model extraction for \code{\link{PLNnetworkfamily}} +\item \code{getModel(Networkfamily)}: Model extraction for \code{\link{PLNnetworkfamily}} or \code{\link{ZIPLNnetworkfamily}} + +\item \code{getModel(PLNnetworkfamily)}: Model extraction for \code{\link{PLNnetworkfamily}} + +\item \code{getModel(ZIPLNnetworkfamily)}: Model extraction for \code{\link{ZIPLNnetworkfamily}} }} \examples{ From b13c94c33b4c69bb3137ff8236dfe4b2251a036d Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Tue, 13 Feb 2024 22:33:43 +0100 Subject: [PATCH 18/30] add test for ziplnetwork --- tests/testthat/test-ziplnnetworkfamily.R | 170 +++++++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 tests/testthat/test-ziplnnetworkfamily.R diff --git a/tests/testthat/test-ziplnnetworkfamily.R b/tests/testthat/test-ziplnnetworkfamily.R new file mode 100644 index 00000000..bbacbccb --- /dev/null +++ b/tests/testthat/test-ziplnnetworkfamily.R @@ -0,0 +1,170 @@ +context("test-plnnetworkfamily") + +data(trichoptera) +## use a subset t save some time +trichoptera <- prepare_data(trichoptera$Abundance[1:20, 1:5], trichoptera$Covariate[1:20, ]) + +models <- ZIPLNnetwork(Abundance ~ 1, data = trichoptera) + +test_that("ZIPLNnetwork: main function, fields access and methods", { + + expect_equal(getBestModel(models), getBestModel(models, "BIC")) + + Y <- as.matrix(trichoptera$Abundance) + data = list( + Y = Y, + X = model.matrix(Abundance ~ 1, data = trichoptera), + X0 = matrix(NA,0,0), + O = matrix(0, nrow(Y),ncol(Y)), + w = rep(1, nrow(Y)), + formula = Abundance ~ 1 + ) + + ## extract the data matrices and weights + ctrl <- PLNmodels:::ZIPLNnetwork_param(trace = 0) + ctrl$ziparam <- "single" + + ## instantiate + myPLN <- PLNmodels:::ZIPLNnetworkfamily$new(NULL, data, ctrl) + + ## optimize + myPLN$optimize(data, ctrl$config_optim) + + expect_equal(myPLN$criteria$BIC, models$criteria$BIC) + + ## S3 methods + expect_true(PLNmodels:::isNetworkfamily(myPLN)) + expect_is(plot(myPLN), "ggplot") + expect_is(plot(myPLN, reverse = TRUE), "ggplot") + expect_is(plot(myPLN, type = "diagnostic"), "ggplot") + expect_is(getBestModel(myPLN), "ZIPLNfit_sparse") + expect_is(getModel(myPLN, myPLN$penalties[1]), "ZIPLNfit_sparse") + + ## Field access + expect_true(all(myPLN$penalties > 0)) + expect_null(myPLN$stability_path) + expect_true(anyNA(myPLN$stability)) + + ## Other R6 methods + expect_true(is.data.frame(myPLN$coefficient_path())) + subs <- replicate(2, + sample.int(nrow(trichoptera), size = nrow(trichoptera)/2), + simplify = FALSE) + myPLN$stability_selection(subsamples = subs) + expect_is(plot(myPLN, type = "stability"), "ggplot") + expect_true(!is.null(myPLN$stability_path)) + expect_true(inherits(myPLN$plot(), "ggplot")) + expect_true(inherits(myPLN$plot_objective(), "ggplot")) + expect_true(inherits(myPLN$plot_stars(), "ggplot")) +}) + +test_that("ZIPLNnetwork computes the stability path only once.", { + + ## extract_probs fails if stability selection has not been performed. + expect_error(extract_probs(models), + "Please perform stability selection using stability_selection(Robject) first", fixed = TRUE) + set.seed(1077) + subs <- replicate(2, + sample.int(nrow(trichoptera), size = nrow(trichoptera)/2), + simplify = FALSE) + stability_selection(models, subsamples = subs, force = TRUE) + models$stability_selection(subsamples = subs) + ## Stability_path has correct dimensions + p <- getModel(models, index = 1)$p + expect_equal(dim(models$stability_path), + c(length(models$penalties) * p*(p-1)/2L, 5)) + ## try to compute it again + expect_message(stability_selection(models), + "Previous stability selection detected. Use \"force = TRUE\" to recompute it.") + ## extracts the inclusion frequencies + expect_equal(dim(extract_probs(models, index = 1, format = "matrix")), + c(p, p)) + expect_length(extract_probs(models, index = 1, format = "vector"), + p*(p-1)/2) +}) + +test_that("ZIPLNnetwork: matrix of penalties work", { + + p <- ncol(trichoptera$Abundance) + W <- diag(1, p, p) + W[upper.tri(W)] <- runif(p*(p-1)/2, min = 1, max = 5) + W[lower.tri(W)] <- t(W)[lower.tri(W)] + myPLN <- ZIPLNnetwork(Abundance ~ 1, data = trichoptera, control = ZIPLNnetwork_param(penalty_weights = W)) + + ## S3 methods + expect_true(PLNmodels:::isNetworkfamily(myPLN)) + expect_is(plot(myPLN), "ggplot") + expect_is(plot(myPLN, reverse = TRUE), "ggplot") + expect_is(plot(myPLN, type = "diagnostic"), "ggplot") + expect_is(getBestModel(myPLN), "PLNnetworkfit") + expect_is(getModel(myPLN, myPLN$penalties[1]), "ZIPLNfit_sparse") + + ## Field access + expect_true(all(myPLN$penalties > 0)) + expect_null(myPLN$stability_path) + expect_true(anyNA(myPLN$stability)) + + ## Other R6 methods + expect_true(is.data.frame(myPLN$coefficient_path())) + subs <- replicate(2, + sample.int(nrow(trichoptera), size = nrow(trichoptera)/2), + simplify = FALSE) + myPLN$stability_selection(subsamples = subs, control = PLNnetwork_param(penalty_weights = W)) + expect_is(plot(myPLN, type = "stability"), "ggplot") + expect_true(!is.null(myPLN$stability_path)) + expect_true(inherits(myPLN$plot(), "ggplot")) + expect_true(inherits(myPLN$plot_objective(), "ggplot")) + expect_true(inherits(myPLN$plot_stars(), "ggplot")) + + ## missspecification of penlaty weights should induce errors + ## not symmetric + W <- diag(1, p, p) + W[upper.tri(W)] <- runif(p*(p-1)/2, min = 1, max = 5) + expect_error(PLNnetwork(Abundance ~ 1, data = trichoptera, control = PLNnetwork_param(penalty_weights = W))) + + ## not square + W <- matrix(1, p + 1, p) + expect_error(PLNnetwork(Abundance ~ 1, data = trichoptera, control = PLNnetwork_param(penalty_weights = W))) + + ## not-positive entries + W <- matrix(0, p, p) + expect_error(PLNnetwork(Abundance ~ 1, data = trichoptera, control = PLNnetwork_param(penalty_weights = W))) + +}) + +test_that("PLNnetwork: list of matrices of penalties work", { + + p <- ncol(trichoptera$Abundance) + W <- diag(1, p, p) + W[upper.tri(W)] <- runif(p*(p-1)/2, min = 1, max = 5) + W[lower.tri(W)] <- t(W)[lower.tri(W)] + list_W <- lapply(seq(1, 1e-2, len = 30), function(rho) rho * W) + + myPLN <- ZIPLNnetwork(Abundance ~ 1, data = trichoptera, control = ZIPLNnetwork_param(penalty_weights = list_W)) + + ## S3 methods + expect_true(PLNmodels:::isNetworkfamily(myPLN)) + expect_is(plot(myPLN), "ggplot") + expect_is(plot(myPLN, reverse = TRUE), "ggplot") + expect_is(plot(myPLN, type = "diagnostic"), "ggplot") + expect_is(getBestModel(myPLN), "ZIPLNfit_sparse") + expect_is(getModel(myPLN, myPLN$penalties[1]), "ZIPLNfit_sparse") + + ## Field access + expect_true(all(myPLN$penalties > 0)) + expect_null(myPLN$stability_path) + expect_true(anyNA(myPLN$stability)) + + ## Other R6 methods + expect_true(is.data.frame(myPLN$coefficient_path())) + subs <- replicate(2, + sample.int(nrow(trichoptera), size = nrow(trichoptera)/2), + simplify = FALSE) + myPLN$stability_selection(subsamples = subs, control = ZIPLNnetwork_param(penalty_weights = W)) + expect_is(plot(myPLN, type = "stability"), "ggplot") + expect_true(!is.null(myPLN$stability_path)) + expect_true(inherits(myPLN$plot(), "ggplot")) + expect_true(inherits(myPLN$plot_objective(), "ggplot")) + expect_true(inherits(myPLN$plot_stars(), "ggplot")) + +}) From b457142009bc7736effa0a0a5b304fde9f3d6174 Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Tue, 13 Feb 2024 22:35:04 +0100 Subject: [PATCH 19/30] fixing stability selection for ZIPLNetwork --- R/PLNnetworkfamily-S3methods.R | 15 ++++++------ R/PLNnetworkfamily-class.R | 30 +++++++++++++++--------- man/ZIPLNnetworkfamily.Rd | 2 +- tests/testthat/test-plnnetworkfamily.R | 6 ++--- tests/testthat/test-ziplnnetworkfamily.R | 4 ++-- 5 files changed, 33 insertions(+), 24 deletions(-) diff --git a/R/PLNnetworkfamily-S3methods.R b/R/PLNnetworkfamily-S3methods.R index b76043cc..504a1f3b 100644 --- a/R/PLNnetworkfamily-S3methods.R +++ b/R/PLNnetworkfamily-S3methods.R @@ -6,7 +6,7 @@ ## Auxiliary functions to check the given class of an objet -isPLNnetworkfamily <- function(Robject) {inherits(Robject, "Networkfamily")} +isNetworkfamily <- function(Robject) {inherits(Robject, "Networkfamily")} #' Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of network fits (either [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`]) #' @@ -37,7 +37,7 @@ plot.Networkfamily <- function(x, reverse = FALSE, log.x = TRUE, stability = 0.9, ...) { - stopifnot(isPLNnetworkfamily(x)) + stopifnot(isNetworkfamily(x)) type <- match.arg(type) if (type == "criteria") p <- x$plot(criteria, reverse) @@ -60,7 +60,7 @@ plot.ZIPLNnetworkfamily <- plot.Networkfamily #' @describeIn getModel Model extraction for [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`] #' @export getModel.Networkfamily <- function(Robject, var, index = NULL) { - stopifnot(isPLNnetworkfamily(Robject)) + stopifnot(isNetworkfamily(Robject)) Robject$getModel(var, index) } @@ -75,7 +75,7 @@ getModel.ZIPLNnetworkfamily <- getModel.Networkfamily #' @describeIn getBestModel Model extraction for [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`] #' @export getBestModel.Networkfamily <- function(Robject, crit = c("BIC", "EBIC", "StARS"), ...) { - stopifnot(isPLNnetworkfamily(Robject)) + stopifnot(isNetworkfamily(Robject)) stability <- list(...)[["stability"]] if (is.null(stability)) stability <- 0.9 Robject$getBestModel(match.arg(crit), stability) @@ -104,7 +104,7 @@ getBestModel.ZIPLNnetworkfamily <- getBestModel.Networkfamily #' head(coefficient_path(fits)) #' @export coefficient_path <- function(Robject, precision = TRUE, corr = TRUE) { - stopifnot(isPLNnetworkfamily(Robject)) + stopifnot(isNetworkfamily(Robject)) Robject$coefficient_path(precision, corr) } @@ -131,7 +131,8 @@ coefficient_path <- function(Robject, precision = TRUE, corr = TRUE) { #' } #' @export stability_selection <- function(Robject, subsamples = NULL, control = PLNnetwork_param(), force = FALSE) { - stopifnot(isPLNnetworkfamily(Robject)) + stopifnot(isNetworkfamily(Robject)) + if (inherits(Robject, "ZIPLNnetworkfamily")) control <- ZIPLNnetwork_param() if (force || anyNA(Robject$stability)) { Robject$stability_selection(subsamples, control) } else { @@ -179,7 +180,7 @@ extract_probs <- function(Robject, penalty = NULL, index = NULL, crit = c("StARS", "BIC", "EBIC"), format = c("matrix", "vector"), tol = 1e-5) { - stopifnot(isPLNnetworkfamily(Robject)) + stopifnot(isNetworkfamily(Robject)) ## Check if stability selection has been performed stab_path <- Robject$stability_path if (is.null(stab_path)) { diff --git a/R/PLNnetworkfamily-class.R b/R/PLNnetworkfamily-class.R index 366870a4..a04be42e 100644 --- a/R/PLNnetworkfamily-class.R +++ b/R/PLNnetworkfamily-class.R @@ -370,7 +370,7 @@ PLNnetworkfamily <- R6Class( Y = self$responses [subsample, , drop = FALSE], X = self$covariates[subsample, , drop = FALSE], O = self$offsets [subsample, , drop = FALSE], - w = self$weights [subsample], formula = private$formula) + w = self$weights [subsample]) myPLN <- PLNnetworkfamily$new(self$penalties, data, control) myPLN$optimize(data, control$config_optim) @@ -430,6 +430,7 @@ ZIPLNnetworkfamily <- R6Class( ## PUBLIC MEMBERS ------ ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% public = list( + covariates0 = NULL, # covariates used in the ZI component ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Creation functions ---------------- #' @description Initialize all models in the collection @@ -452,6 +453,7 @@ ZIPLNnetworkfamily <- R6Class( ## Initialize fields shared by the super class super$initialize(penalties, data, control) + self$covariates0 <- data$X0 ## instantiate as many models as penalties control$trace <- 0 @@ -467,7 +469,7 @@ ZIPLNnetworkfamily <- R6Class( #' @description Compute the stability path by stability selection #' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations. #' @param control a list controlling the main optimization process in each call to PLNnetwork. See [PLNnetwork()] for details. - stability_selection = function(subsamples = NULL, control = PLNnetwork_param()) { + stability_selection = function(subsamples = NULL, control = ZIPLNnetwork_param()) { ## select default subsamples according if (is.null(subsamples)) { @@ -476,13 +478,15 @@ ZIPLNnetworkfamily <- R6Class( } ## got for stability selection - cat("\nStability Selection for PLNnetwork: ") + cat("\nStability Selection for ZIPLNnetwork: ") cat("\nsubsampling: ") - stabs_out <- future.apply::future_lapply(subsamples, function(subsample) { - cat("+") + # stabs_out <- future.apply::future_lapply(subsamples, function(subsample) { + stabs_out <- lapply(subsamples, function(subsample) { + cat("+") inception_ <- self$getModel(self$penalties[1]) inception_$update( + R = inception_$var_par$R[subsample, ], M = inception_$var_par$M[subsample, ], S = inception_$var_par$S[subsample, ] ) @@ -493,12 +497,15 @@ ZIPLNnetworkfamily <- R6Class( control$penalize_diagonal = (sum(diag(inception_$penalty_weights)) != 0) control$trace <- 0 control$config_optim$trace <- 0 - + control$ziparam <- inception_$zi_model + X0 <- self$covariates0 + if (nrow(X0) > 0) X0 <- X0[subsample, , drop = FALSE] data <- list( - Y = self$responses [subsample, , drop = FALSE], - X = self$covariates[subsample, , drop = FALSE], - O = self$offsets [subsample, , drop = FALSE], - w = self$weights [subsample], formula = private$formula) + Y = self$responses [subsample, , drop = FALSE], + X = self$covariates [subsample, , drop = FALSE], + X0 = X0, + O = self$offsets [subsample, , drop = FALSE], + w = self$weights [subsample]) myPLN <- ZIPLNnetworkfamily$new(self$penalties, data, control) myPLN$optimize(data, control$config_optim) @@ -507,7 +514,8 @@ ZIPLNnetworkfamily <- R6Class( as.matrix(model$latent_network("support"))[upper.tri(diag(private$p))] })) nets - }, future.seed = TRUE, future.scheduling = structure(TRUE, ordering = "random")) + # }, future.seed = TRUE, future.scheduling = structure(TRUE, ordering = "random")) + }) prob <- Reduce("+", stabs_out, accumulate = FALSE) / length(subsamples) ## formatting/tyding diff --git a/man/ZIPLNnetworkfamily.Rd b/man/ZIPLNnetworkfamily.Rd index 0b7b76a5..4a9811f0 100644 --- a/man/ZIPLNnetworkfamily.Rd +++ b/man/ZIPLNnetworkfamily.Rd @@ -78,7 +78,7 @@ Compute the stability path by stability selection \subsection{Usage}{ \if{html}{\out{
    }}\preformatted{ZIPLNnetworkfamily$stability_selection( subsamples = NULL, - control = PLNnetwork_param() + control = ZIPLNnetwork_param() )}\if{html}{\out{
    }} } diff --git a/tests/testthat/test-plnnetworkfamily.R b/tests/testthat/test-plnnetworkfamily.R index 2bae4430..97ffd071 100644 --- a/tests/testthat/test-plnnetworkfamily.R +++ b/tests/testthat/test-plnnetworkfamily.R @@ -36,7 +36,7 @@ test_that("PLNnetwork: main function, fields access and methods", { expect_equal(myPLN$criteria$BIC, models$criteria$BIC) ## S3 methods - expect_true(PLNmodels:::isPLNnetworkfamily(myPLN)) + expect_true(PLNmodels:::isNetworkfamily(myPLN)) expect_is(plot(myPLN), "ggplot") expect_is(plot(myPLN, reverse = TRUE), "ggplot") expect_is(plot(myPLN, type = "diagnostic"), "ggplot") @@ -94,7 +94,7 @@ test_that("PLNnetwork: matrix of penalties work", { myPLN <- PLNnetwork(Abundance ~ 1, data = trichoptera, control = PLNnetwork_param(penalty_weights = W)) ## S3 methods - expect_true(PLNmodels:::isPLNnetworkfamily(myPLN)) + expect_true(PLNmodels:::isNetworkfamily(myPLN)) expect_is(plot(myPLN), "ggplot") expect_is(plot(myPLN, reverse = TRUE), "ggplot") expect_is(plot(myPLN, type = "diagnostic"), "ggplot") @@ -145,7 +145,7 @@ test_that("PLNnetwork: list of matrices of penalties work", { myPLN <- PLNnetwork(Abundance ~ 1, data = trichoptera, control = PLNnetwork_param(penalty_weights = list_W)) ## S3 methods - expect_true(PLNmodels:::isPLNnetworkfamily(myPLN)) + expect_true(PLNmodels:::isNetworkfamily(myPLN)) expect_is(plot(myPLN), "ggplot") expect_is(plot(myPLN, reverse = TRUE), "ggplot") expect_is(plot(myPLN, type = "diagnostic"), "ggplot") diff --git a/tests/testthat/test-ziplnnetworkfamily.R b/tests/testthat/test-ziplnnetworkfamily.R index bbacbccb..b0331c91 100644 --- a/tests/testthat/test-ziplnnetworkfamily.R +++ b/tests/testthat/test-ziplnnetworkfamily.R @@ -96,7 +96,7 @@ test_that("ZIPLNnetwork: matrix of penalties work", { expect_is(plot(myPLN), "ggplot") expect_is(plot(myPLN, reverse = TRUE), "ggplot") expect_is(plot(myPLN, type = "diagnostic"), "ggplot") - expect_is(getBestModel(myPLN), "PLNnetworkfit") + expect_is(getBestModel(myPLN), "ZIPLNfit_sparse") expect_is(getModel(myPLN, myPLN$penalties[1]), "ZIPLNfit_sparse") ## Field access @@ -109,7 +109,7 @@ test_that("ZIPLNnetwork: matrix of penalties work", { subs <- replicate(2, sample.int(nrow(trichoptera), size = nrow(trichoptera)/2), simplify = FALSE) - myPLN$stability_selection(subsamples = subs, control = PLNnetwork_param(penalty_weights = W)) + myPLN$stability_selection(subsamples = subs, control = ZIPLNnetwork_param(penalty_weights = W)) expect_is(plot(myPLN, type = "stability"), "ggplot") expect_true(!is.null(myPLN$stability_path)) expect_true(inherits(myPLN$plot(), "ggplot")) From 2d90739e4fff188e82a2d7538ef175ae77d9e0d3 Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Tue, 13 Feb 2024 22:39:20 +0100 Subject: [PATCH 20/30] updating doc --- R/PLNnetworkfamily-class.R | 1 + man/ZIPLNnetworkfamily.Rd | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/R/PLNnetworkfamily-class.R b/R/PLNnetworkfamily-class.R index a04be42e..05532292 100644 --- a/R/PLNnetworkfamily-class.R +++ b/R/PLNnetworkfamily-class.R @@ -430,6 +430,7 @@ ZIPLNnetworkfamily <- R6Class( ## PUBLIC MEMBERS ------ ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% public = list( + #' @field covariates0 the matrix of covariates included in the ZI component covariates0 = NULL, # covariates used in the ZI component ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Creation functions ---------------- diff --git a/man/ZIPLNnetworkfamily.Rd b/man/ZIPLNnetworkfamily.Rd index 4a9811f0..19b4e49d 100644 --- a/man/ZIPLNnetworkfamily.Rd +++ b/man/ZIPLNnetworkfamily.Rd @@ -22,6 +22,13 @@ The function \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}}, the class \code{\link{ \section{Super classes}{ \code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{\link[PLNmodels:Networkfamily]{PLNmodels::Networkfamily}} -> \code{ZIPLNnetworkfamily} } +\section{Public fields}{ +\if{html}{\out{
    }} +\describe{ +\item{\code{covariates0}}{the matrix of covariates included in the ZI component} +} +\if{html}{\out{
    }} +} \section{Methods}{ \subsection{Public methods}{ \itemize{ From cb1de62f7783cee38be0b986a070ce2840063112 Mon Sep 17 00:00:00 2001 From: Mahendra Mariadassou Date: Tue, 20 Feb 2024 16:10:34 +0100 Subject: [PATCH 21/30] Documentation cleanup and fixes for PLNnetwork* classes and methods. --- R/PLNnetwork.R | 13 +++--- R/PLNnetworkfamily-S3methods.R | 23 +++++----- R/PLNnetworkfamily-class.R | 83 +++++++++++++++++++++------------- R/PLNnetworkfit-class.R | 6 +-- 4 files changed, 73 insertions(+), 52 deletions(-) diff --git a/R/PLNnetwork.R b/R/PLNnetwork.R index f719badf..1ea9d96b 100644 --- a/R/PLNnetwork.R +++ b/R/PLNnetwork.R @@ -1,13 +1,14 @@ -#' Poisson lognormal model towards sparse network inference +#' Sparse Poisson lognormal model for network inference #' -#' Fit the sparse inverse covariance variant of the Poisson lognormal with a variational algorithm -#' for a collection of sparsity parameter values distributed on a log scale. Use the (g)lm syntax for model specification (covariates, offsets). +#' Perform sparse inverse covariance estimation for the Zero Inflated Poisson lognormal model +#' using a variational algorithm. Iterate over a range of logarithmically spaced sparsity parameter values. +#' Use the (g)lm syntax to specify the model (including covariates and offsets). #' #' @param formula an object of class "formula": a symbolic description of the model to be fitted. #' @param data an optional data frame, list or environment (or object coercible by as.data.frame to a data frame) containing the variables in the model. If not found in data, the variables are taken from environment(formula), typically the environment from which lm is called. #' @param subset an optional vector specifying a subset of observations to be used in the fitting process. #' @param weights an optional vector of observation weights to be used in the fitting process. -#' @param penalties an optional vector of positive real number controlling the level of sparsity of the underlying network. if NULL (the default), will be set internally. See \code{PLNnetwork_param()} for additional tuning of the penalty. +#' @param penalties an optional vector of positive real number controlling the level of sparsity of the underlying network. if NULL (the default), will be set internally. See `PLNnetwork_param()` for additional tuning of the penalty. #' @param control a list-like structure for controlling the optimization, with default generated by [PLNnetwork_param()]. See the corresponding documentation for details; #' #' @return an R6 object with class [`PLNnetworkfamily`], which contains @@ -59,7 +60,7 @@ PLNnetwork <- function(formula, data, subset, weights, penalties = NULL, control #' @param n_penalties an integer that specifies the number of values for the penalty grid when internally generated. Ignored when penalties is non `NULL` #' @param min_ratio the penalty grid ranges from the minimal value that produces a sparse to this value multiplied by `min_ratio`. Default is 0.1. #' @param penalize_diagonal boolean: should the diagonal terms be penalized in the graphical-Lasso? Default is \code{TRUE} -#' @param penalty_weights either a single or a list of p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values. +#' @param penalty_weights either a single or a list of p x p matrix of weights (default: all weights equal to 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values. #' @param inception Set up the parameters initialization: by default, the model is initialized with a multivariate linear model applied on #' log-transformed data, and with the same formula as the one provided by the user. However, the user can provide a PLNfit (typically obtained from a previous fit), #' which sometimes speeds up the inference. @@ -67,7 +68,7 @@ PLNnetwork <- function(formula, data, subset, weights, penalties = NULL, control #' @return list of parameters configuring the fit. #' @inherit PLN_param details #' @details See [PLN_param()] for a full description of the generic optimization parameters. PLNnetwork_param() also has two additional parameters controlling the optimization due the inner-outer loop structure of the optimizer: -#' * "ftol_out" outer solver stops when an optimization step changes the objective function by less than xtol multiplied by the absolute value of the parameter. Default is 1e-6 +#' * "ftol_out" outer solver stops when an optimization step changes the objective function by less than ftol multiplied by the absolute value of the parameter. Default is 1e-6 #' * "maxit_out" outer solver stops when the number of iteration exceeds maxit_out. Default is 50 #' #' @seealso [PLN_param()] diff --git a/R/PLNnetworkfamily-S3methods.R b/R/PLNnetworkfamily-S3methods.R index 504a1f3b..8a8fd7bc 100644 --- a/R/PLNnetworkfamily-S3methods.R +++ b/R/PLNnetworkfamily-S3methods.R @@ -13,10 +13,11 @@ isNetworkfamily <- function(Robject) {inherits(Robject, "Networkfamily")} #' @inheritParams plot.PLNfamily #' @inherit plot.PLNfamily return details #' -#' @param x an R6 object with class [`PLNnetworkfamily`] +#' @param x an R6 object with class [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`] #' @param type a character, either "criteria", "stability" or "diagnostic" for the type of plot. -#' @param criteria vector of characters. The criteria to plot in c("loglik", "BIC", "ICL", "R_squared", "EBIC", "pen_loglik"). -#' Default is c("loglik", "pen_loglik", "BIC", "EBIC"). Only relevant when `type = "criteria"`. +#' @param criteria Vector of criteria to plot, to be selected among "loglik" (log-likelihood), +#' "BIC", "ICL", "R_squared", "EBIC" and "pen_loglik" (penalized log-likelihood). +#' Default is c("loglik", "pen_loglik", "BIC", "EBIC"). Only used when `type = "criteria"`. #' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`. #' @param stability scalar: the targeted level of stability in stability plot. Default is .9. #' @@ -27,9 +28,9 @@ isNetworkfamily <- function(Robject) {inherits(Robject, "Networkfamily")} #' \dontrun{ #' plot(fits) #' } -#' @return Produces either a diagnostic plot (with \code{type = 'diagnostic'}), a stability plot -#' (with \code{type = 'stability'}) or the evolution of the criteria of the different models considered -#' (with \code{type = 'criteria'}, the default). +#' @return Produces either a diagnostic plot (with `type = 'diagnostic'`), a stability plot +#' (with `type = 'stability'`) or the evolution of the criteria of the different models considered +#' (with `type = 'criteria'`, the default). #' @export plot.Networkfamily <- function(x, type = c("criteria", "stability", "diagnostic"), @@ -92,7 +93,7 @@ getBestModel.ZIPLNnetworkfamily <- getBestModel.Networkfamily #' Extract the regularization path of a PLNnetwork fit #' #' @name coefficient_path -#' @param Robject an object with class [`PLNnetworkfamily`], i.e. an output from [PLNnetwork()] +#' @param Robject an object with class [`Networkfamily`], i.e. an output from [PLNnetwork()] #' @param precision a logical, should the coefficients of the precision matrix Omega or the covariance matrix Sigma be sent back. Default is `TRUE`. #' @param corr a logical, should the correlation (partial in case `precision = TRUE`) be sent back. Default is `TRUE`. #' @@ -114,12 +115,12 @@ coefficient_path <- function(Robject, precision = TRUE, corr = TRUE) { #' #' @description This function computes the StARS stability criteria over a path of penalties. If a path has already been computed, the functions stops with a message unless `force = TRUE` has been specified. #' -#' @param Robject an object with class [`PLNnetworkfamily`], i.e. an output from [PLNnetwork()] -#' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines th number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations. -#' @param control a list controlling the main optimization process in each call to PLNnetwork. See [PLNnetwork()] for details. +#' @param Robject an object with class [`PLNnetworkfamily`] or [`ZIPLNnetworkfamily`], i.e. an output from [PLNnetwork()] or [ZIPLNnetwork()] +#' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines th number of subsamples used in the stability selection. Automatically set to 20 subsamples with size `10*sqrt(n)` if `n >= 144` and `0.8*n` otherwise following Liu et al. (2010) recommendations. +#' @param control a list controlling the main optimization process in each call to [PLNnetwork()] or [ZIPLNnetwork()]. See [PLN_param()] or [ZIPLN_param()] for details. #' @param force force computation of the stability path, even if a previous one has been detected. #' -#' @return the list of subsamples. The estimated probabilities of selection of the edges are stored in the fields `stability_path` of the initial Robject with class [`PLNnetworkfamily`] +#' @return the list of subsamples. The estimated probabilities of selection of the edges are stored in the fields `stability_path` of the initial Robject with class [`Networkfamily`] #' @examples #' data(trichoptera) #' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) diff --git a/R/PLNnetworkfamily-class.R b/R/PLNnetworkfamily-class.R index 05532292..1686df02 100644 --- a/R/PLNnetworkfamily-class.R +++ b/R/PLNnetworkfamily-class.R @@ -1,10 +1,16 @@ -#' An R6 Class to virtually represent a collection of Networkfit (either standard PLN or ZI-PLN) +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +## CLASS Networkfamily ---- +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' An R6 Class to virtually represent a collection of network fits #' -#' @description The function [PLNnetwork()] produces an instance of this class. +#' @description The functions [PLNnetwork()] and [ZIPLNnetwork()] both produce an instance of this class, which can be thought of as a vector of [`PLNnetworkfit`]s [`ZIPLNnetworkfit`]s (indexed by penalty parameter) #' -#' This class comes with a set of methods, some of them being useful for the user: +#' This class comes with a set of methods mostly used to compare +#' network fits (in terms of goodness of fit) or extract one from +#' the family (based on penalty parameter and/or goodness of it). #' See the documentation for [getBestModel()], -#' [getModel()] and [plot()][plot.PLNnetworkfamily()] +#' [getModel()] and [plot()][plot.Networkfamily()] for the user-facing ones. #' ## Parameters shared by many methods #' @param penalties a vector of positive real number controlling the level of sparsity of the underlying network. @@ -14,7 +20,7 @@ #' @include PLNfamily-class.R #' @importFrom R6 R6Class #' @importFrom glassoFast glassoFast -#' @seealso The function [PLNnetwork()], the class [`PLNnetworkfit`] +#' @seealso The functions [PLNnetwork()], [ZIPLNnetwork()] and the classes [`PLNnetworkfit`], [`ZIPLNnetworkfit`] Networkfamily <- R6Class( classname = "Networkfamily", inherit = PLNfamily, @@ -25,7 +31,7 @@ Networkfamily <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Creation functions ---------------- #' @description Initialize all models in the collection - #' @return Update current [`PLNnetworkfit`] with smart starting values + #' @return Update all network fits in the family with smart starting values initialize = function(penalties, data, control) { ## Initialize fields shared by the super class @@ -46,7 +52,7 @@ Networkfamily <- R6Class( else list_penalty_weights <- control$penalty_weights - ## Check consistency of weights and optionnaly silent diagonal penalties + ## Check consistency of weights and optionally silent diagonal penalties list_penalty_weights <- map(list_penalty_weights, function(penalty_weights) { stopifnot(isSymmetric(penalty_weights), all(penalty_weights >= 0)) @@ -56,14 +62,14 @@ Networkfamily <- R6Class( ## Get an appropriate grid of penalties if (is.null(penalties)) { - if (control$trace > 1) cat("\n Recovering an appropriate grid of penalties.") + if (control$trace > 1) cat("\nComputing an appropriate grid of penalties.") max_pen <- list_penalty_weights %>% map(~ as.matrix(control$inception$model_par$Sigma) / .x) %>% map_dbl(~ max(abs(.x[upper.tri(.x, diag = control$penalize_diagonal)]))) %>% max() penalties <- 10^seq(log10(max_pen), log10(max_pen*control$min_ratio), len = control$n_penalties) } else { - if (control$trace > 1) cat("\nPenalties already set by the user") + if (control$trace > 1) cat("\nUsing penalties penalties provided by the user.") stopifnot(all(penalties > 0)) } ## Sort the penalty in decreasing order @@ -107,7 +113,7 @@ Networkfamily <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Extractors ------------------------ - #' @description Extract the regularization path of a [`PLNnetworkfamily`] + #' @description Extract the regularization path of a [`Networkfamily`] #' @param precision Logical. Should the regularization path be extracted from the precision matrix Omega (`TRUE`, default) or from the variance matrix Sigma (`FALSE`) #' @param corr Logical. Should the matrix be transformed to (partial) correlation matrix before extraction? Defaults to `TRUE` coefficient_path = function(precision = TRUE, corr = TRUE) { @@ -135,8 +141,10 @@ Networkfamily <- R6Class( }, #' @description Extract the best network in the family according to some criteria - #' @param crit character. Criterion used to perform the selection. Is "StARS" is chosen but `$stability` field is empty, will compute stability path. + #' @param crit character. Criterion used to perform the selection. If "StARS" is chosen but `$stability` field is empty, will compute stability path. #' @param stability Only used for "StARS" criterion. A scalar indicating the target stability (= 1 - 2 beta) at which the network is selected. Default is `0.9`. + #' @details + #' For BIC and EBIC criteria, higher is better. getBestModel = function(crit = c("BIC", "EBIC", "StARS"), stability = 0.9){ crit <- match.arg(crit) if (crit == "StARS") { @@ -159,11 +167,11 @@ Networkfamily <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Graphical methods ----------------- - #' @description Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a [`PLNnetworkfamily`]) + #' @description Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of network fits (a [`Networkfamily`]) #' @param criteria vector of characters. The criteria to plot in `c("loglik", "pen_loglik", "BIC", "EBIC")`. Defaults to all of them. #' @param reverse A logical indicating whether to plot the value of the criteria in the "natural" direction #' (loglik - 0.5 penalty) or in the "reverse" direction (-2 loglik + penalty). Default to FALSE, i.e use the - #' natural direction, on the same scale as the log-likelihood.. + #' natural direction, on the same scale as the log-likelihood. #' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`. #' @return a [`ggplot`] graph plot = function(criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), reverse = FALSE, log.x = TRUE) { @@ -174,7 +182,7 @@ Networkfamily <- R6Class( }, #' @description Plot stability path - #' @param stability scalar: the targeted level of stability in stability plot. Default is `0.9`. + #' @param stability scalar: the targeted level of stability using stability selection. Default is `0.9`. #' @param log.x logical: should the x-axis be represented in log-scale? Default is `TRUE`. #' @return a [`ggplot`] graph plot_stars = function(stability = 0.9, log.x = TRUE) { @@ -187,7 +195,7 @@ Networkfamily <- R6Class( p <- ggplot(dplot, aes(x = Penalty, y = Value, group = Metric, color = Metric)) + geom_point() + geom_line() + theme_bw() + - ## Add information correspinding to best lambda + ## Add information corresponding to best lambda geom_vline(xintercept = penalty_stars, linetype = 2) + geom_hline(yintercept = stability, linetype = 2) + annotate(x = penalty_stars, y = 0, @@ -252,16 +260,16 @@ Networkfamily <- R6Class( if (!is.null(private$stab_path)) { stability <- self$stability_path %>% dplyr::select(Penalty, Prob) %>% - group_by(Penalty) %>% - summarize(Stability = 1 - mean(4 * Prob * (1 - Prob))) %>% - arrange(desc(Penalty)) %>% - pull(Stability) + dplyr::group_by(Penalty) %>% + dplyr::summarize(Stability = 1 - mean(4 * Prob * (1 - Prob))) %>% + dplyr::arrange(desc(Penalty)) %>% + dplyr::pull(Stability) } else { stability <- rep(NA, length(self$penalties)) } stability }, - #' @field criteria a data frame with the values of some criteria (approximated log-likelihood, (E)BIC, ICL and R2, stability) for the collection of models / fits + #' @field criteria a data frame with the values of some criteria (variational log-likelihood, (E)BIC, ICL and R2, stability) for the collection of models / fits #' BIC, ICL and EBIC are defined so that they are on the same scale as the model log-likelihood, i.e. with the form, loglik - 0.5 penalty criteria = function() {mutate(super$criteria, stability = self$stability)} ) @@ -271,13 +279,20 @@ Networkfamily <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ) -#' An R6 Class to represent a collection of PLNnetworkfit +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +## CLASS PLNnetworkfamily ---- +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' An R6 Class to represent a collection of [`PLNnetworkfit`]s #' #' @description The function [PLNnetwork()] produces an instance of this class. #' -#' This class comes with a set of methods, some of them being useful for the user: +#' This class comes with a set of methods mostly used to compare +#' network fits (in terms of goodness of fit) or extract one from +#' the family (based on penalty parameter and/or goodness of it). #' See the documentation for [getBestModel()], -#' [getModel()] and [plot()][plot.PLNnetworkfamily()] +#' [getModel()] and [plot()][plot.Networkfamily()] for the user-facing ones. +#' #' ## Parameters shared by many methods #' @param penalties a vector of positive real number controlling the level of sparsity of the underlying network. @@ -307,7 +322,7 @@ PLNnetworkfamily <- R6Class( #' @return Update current [`PLNnetworkfit`] with smart starting values initialize = function(penalties, data, control) { - ## A basic model for inception, useless one is defined by the user + ## A basic model (constrained model) for inception, ignored if inception is provided by the user if (is.null(control$inception)) { ## Allow inception with spherical / diagonal / full PLNfit before switching back to PLNfit_fixedcov ## for the inner-outer loop of PLNnetwork. @@ -324,7 +339,7 @@ PLNnetworkfamily <- R6Class( ## Initialize fields shared by the super class super$initialize(penalties, data, control) - ## instantiate as many models as penalties + ## instantiate one model per penalty control$trace <- 0 self$models <- map2(private$params, private$penalties_weights, function(penalty, penalty_weights) { control$penalty <- penalty @@ -337,11 +352,11 @@ PLNnetworkfamily <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Stability ------------------------- #' @description Compute the stability path by stability selection - #' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations. - #' @param control a list controlling the main optimization process in each call to PLNnetwork. See [PLNnetwork()] for details. + #' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size `10*sqrt(n)` if `n >= 144` and `0.8*n` otherwise following Liu et al. (2010) recommendations. + #' @param control a list controlling the main optimization process in each call to [`PLNnetwork()`]. See [PLNnetwork()] and [PLN_param()] for details. stability_selection = function(subsamples = NULL, control = PLNnetwork_param()) { - ## select default subsamples according + ## select default subsamples according to Liu et al. (2010) recommendations. if (is.null(subsamples)) { subsample.size <- round(ifelse(private$n >= 144, 10*sqrt(private$n), 0.8*private$n)) subsamples <- replicate(20, sample.int(private$n, subsample.size), simplify = FALSE) @@ -401,6 +416,10 @@ PLNnetworkfamily <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ) +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +## CLASS PLNnetworkfamily ---- +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + #' An R6 Class to represent a collection of ZIPLNnetwork #' #' @description The function [ZIPLNnetwork()] produces an instance of this class. @@ -456,7 +475,7 @@ ZIPLNnetworkfamily <- R6Class( super$initialize(penalties, data, control) self$covariates0 <- data$X0 - ## instantiate as many models as penalties + ## instantiate one model per penalty control$trace <- 0 self$models <- map2(private$params, private$penalties_weights, function(penalty, penalty_weights) { control$penalty <- penalty @@ -468,11 +487,11 @@ ZIPLNnetworkfamily <- R6Class( ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## Stability ------------------------- #' @description Compute the stability path by stability selection - #' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations. - #' @param control a list controlling the main optimization process in each call to PLNnetwork. See [PLNnetwork()] for details. + #' @param subsamples a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size `10*sqrt(n)` if `n >= 144` and `0.8*n` otherwise following Liu et al. (2010) recommendations. + #' @param control a list controlling the main optimization process in each call to [`PLNnetwork()`]. See [ZIPLNnetwork()] and [ZIPLN_param()] for details. stability_selection = function(subsamples = NULL, control = ZIPLNnetwork_param()) { - ## select default subsamples according + ## select default subsamples according to Liu et al. (2010) recommendations. if (is.null(subsamples)) { subsample.size <- round(ifelse(private$n >= 144, 10*sqrt(private$n), 0.8*private$n)) subsamples <- replicate(20, sample.int(private$n, subsample.size), simplify = FALSE) diff --git a/R/PLNnetworkfit-class.R b/R/PLNnetworkfit-class.R index 6079a66d..312c16df 100644 --- a/R/PLNnetworkfit-class.R +++ b/R/PLNnetworkfit-class.R @@ -7,14 +7,14 @@ ## Parameters common to all PLN-xx-fit methods (shared with PLNfit but inheritance does not work) #' @param data a named list used internally to carry the data matrices #' @param control a list for controlling the optimization. -#' @param nullModel null model used for approximate R2 computations. Defaults to a GLM model with same design matrix but not latent variable. -#' @param B matrix of regression matrix +#' @param nullModel null model used for approximate R2 computations. Defaults to a GLM model with same design matrix but no latent variable. +#' @param B matrix of regression coefficients #' @param Sigma variance-covariance matrix of the latent variables #' @param Omega precision matrix of the latent variables. Inverse of Sigma. #' ## Parameters specific to PLNnetwork-fit methods #' @param penalty a positive real number controlling the level of sparsity of the underlying network. -#' @param penalty_weights either a single or a list of p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values. +#' @param penalty_weights either a single or a list of p x p matrix of weights (default: all weights equal to 1) to adapt the amount of shrinkage to each pair of node. Must be symmetric with positive values. #' #' @include PLNnetworkfit-class.R #' @examples From bbc57371105c092b7d3ef5b6645d3ba0288de7a6 Mon Sep 17 00:00:00 2001 From: Mahendra Mariadassou Date: Tue, 20 Feb 2024 17:21:35 +0100 Subject: [PATCH 22/30] Update doc for ZIPLN* classes --- R/ZIPLN.R | 12 +++++------- R/ZIPLNfit-S3methods.R | 15 ++++----------- R/ZIPLNfit-class.R | 6 +++++- R/ZIPLNnetwork.R | 33 +++++++++++---------------------- 4 files changed, 25 insertions(+), 41 deletions(-) diff --git a/R/ZIPLN.R b/R/ZIPLN.R index 2448f1cf..7b84ddf2 100644 --- a/R/ZIPLN.R +++ b/R/ZIPLN.R @@ -73,18 +73,16 @@ ZIPLN <- function(formula, data, subset, zi = c("single", "row", "col"), control #' Helper to define list of parameters to control the PLN fit. All arguments have defaults. #' #' @inheritParams PLN_param +#' @inheritParams PLNnetwork_param #' @param penalty a user-defined penalty to sparsify the residual covariance. Defaults to 0 (no sparsity). -#' @param penalize_diagonal boolean: should the diagonal terms be penalized in the graphical-Lasso? Only relevant with sparse covariance. Default is \code{TRUE} -#' @param penalty_weights p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values. Only relevant with sparse covariance. #' @return list of parameters used during the fit and post-processing steps #' #' @inherit PLN_param details -#' @details See [PLN_param()] for a full description of the generic optimization parameters. ZIPLN_param() also -#' has two additional parameters controlling the optimization due the inner-outer loop structure of the optimizer, -#' and additional parameter controlling the form of the variational approximation of the zero inflation: -#' * "ftol_out" outer solver stops when an optimization step changes the objective function by less than `ftol_out` multiplied by the absolute value of the parameter. Default is 1e-8 +#' @details See [PLN_param()] and [PLNnetwork_param()] for a full description of the generic optimization parameters. Like [PLNnetwork_param()], ZIPLN_param() has two parameters controlling the optimization due the inner-outer loop structure of the optimizer: +#' * "ftol_out" outer solver stops when an optimization step changes the objective function by less than `ftol_out` multiplied by the absolute value of the parameter. Default is 1e-6 #' * "maxit_out" outer solver stops when the number of iteration exceeds `maxit_out`. Default is 100 -#' * "approx_ZI" either use an exact or approximated conditional distribution for the zero inflantion. Default is FALSE +#' and one additional parameter controlling the form of the variational approximation of the zero inflation: +#' * "approx_ZI" either uses an exact or approximated conditional distribution for the zero inflation. Default is FALSE #' #' @export ZIPLN_param <- function( diff --git a/R/ZIPLNfit-S3methods.R b/R/ZIPLNfit-S3methods.R index a158c241..d6aabb84 100644 --- a/R/ZIPLNfit-S3methods.R +++ b/R/ZIPLNfit-S3methods.R @@ -107,18 +107,11 @@ isZIPLNfit_sparse <- function(Robject) {inherits(Robject, "ZIPLNfit_sparse")} #' Extract and plot the network (partial correlation, support or inverse covariance) from a [`ZIPLNfit_sparse`] object #' #' @name plot.ZIPLNfit_sparse +#' @inheritParams plot.PLNnetworkfit +#' @param x an R6 object with class [`ZIPLNfit_sparse`] +#' +#' @inherit plot.PLNnetworkfit return #' -#' @param x an R6 object with class [`PLNnetworkfit`] -#' @param type character. Value of the weight of the edges in the network, either "partial_cor" (partial correlation) or "support" (binary). Default is `"partial_cor"`. -#' @param output the type of output used: either 'igraph' or 'corrplot'. Default is `'igraph'`. -#' @param edge.color Length 2 color vector. Color for positive/negative edges. Default is `c("#F8766D", "#00BFC4")`. Only relevant for igraph output. -#' @param node.labels vector of character. The labels of the nodes. The default will use the column names ot the response matrix. -#' @param remove.isolated if `TRUE`, isolated node are remove before plotting. Only relevant for igraph output. -#' @param layout an optional igraph layout. Only relevant for igraph output. -#' @param plot logical. Should the final network be displayed or only sent back to the user. Default is `TRUE`. -#' @param ... Not used (S3 compatibility). -#' -#' @return Send back an invisible object (igraph or Matrix, depending on the output chosen) and optionally displays a graph (via igraph or corrplot for large ones) #' @examples #' data(trichoptera) #' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate) diff --git a/R/ZIPLNfit-class.R b/R/ZIPLNfit-class.R index f30bb86c..ae556bc7 100644 --- a/R/ZIPLNfit-class.R +++ b/R/ZIPLNfit-class.R @@ -1,3 +1,7 @@ +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +## CLASS ZIPLNfit ----- +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + #' An R6 Class to represent a ZIPLNfit #' #' @description The function [ZIPLN()] fits a model which is an instance of an object with class [`ZIPLNfit`]. @@ -759,7 +763,7 @@ ZIPLNfit_sparse <- R6Class( #' @description Extract interaction network in the latent space #' @param type edge value in the network. Can be "support" (binary edges), "precision" (coefficient of the precision matrix) or "partial_cor" (partial correlation between species) #' @importFrom Matrix Matrix - #' @return a square matrix of size `PLNnetworkfit$n` + #' @return a square matrix of size `ZIPLNfit_sparse$n` latent_network = function(type = c("partial_cor", "support", "precision")) { net <- switch( match.arg(type), diff --git a/R/ZIPLNnetwork.R b/R/ZIPLNnetwork.R index 739ab9a2..3f06a417 100644 --- a/R/ZIPLNnetwork.R +++ b/R/ZIPLNnetwork.R @@ -1,7 +1,8 @@ -#' Zero Inflated Poisson lognormal model toward sparse network inference +#' Zero Inflated Sparse Poisson lognormal model for network inference #' -#' Fit the sparse inverse covariance variant of the Zero Inflated Poisson lognormal with a variational algorithm -#' for a collection of sparsity parameter values distributed on a log scale. Use the (g)lm syntax for model specification (covariates, offsets). +#' Perform sparse inverse covariance estimation for the Zero Inflated Poisson lognormal model +#' using a variational algorithm. Iterate over a range of logarithmically spaced sparsity parameter values. +#' Use the (g)lm syntax to specify the model (including covariates and offsets). #' #' @inheritParams PLNnetwork #' @param control a list-like structure for controlling the optimization, with default generated by [ZIPLNnetwork_param()]. See the associated documentation @@ -48,28 +49,16 @@ ZIPLNnetwork <- function(formula, data, subset, weights, zi = c("single", "row", #' Control of ZIPLNnetwork fit #' -#' Helper to define list of parameters to control the PLN fit. All arguments have defaults. +#' Helper to define list of parameters to control the ZIPLNnetwork fit. All arguments have defaults. #' -#' @param backend optimization back used, either "nlopt" or "torch". Default is "nlopt" -#' @param inception_cov Covariance structure used for the inception model used to initialize the PLNfamily. Defaults to "full" and can be constrained to "diagonal" and "spherical". -#' @param config_optim a list for controlling the optimizer (either "nlopt" or "torch" backend). See details -#' @param config_post a list for controlling the post-treatment (optional bootstrap, jackknife, R2, etc). -#' @param trace a integer for verbosity. -#' @param n_penalties an integer that specifies the number of values for the penalty grid when internally generated. Ignored when penalties is non `NULL` -#' @param min_ratio the penalty grid ranges from the minimal value that produces a sparse to this value multiplied by `min_ratio`. Default is 0.1. -#' @param penalize_diagonal boolean: should the diagonal terms be penalized in the graphical-Lasso? Default is \code{TRUE} -#' @param penalty_weights either a single or a list of p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values. -#' @param inception Set up the parameters initialization: by default, the model is initialized with a multivariate linear model applied on -#' log-transformed data, and with the same formula as the one provided by the user. However, the user can provide a PLNfit (typically obtained from a previous fit), -#' which sometimes speeds up the inference. +#' @inheritParams PLNnetwork_param #' -#' @return list of parameters configuring the fit. -#' @inherit PLN_param details -#' @details See [PLN_param()] for a full description of the generic optimization parameters. PLNnetwork_param() also has two additional parameters controlling the optimization due the inner-outer loop structure of the optimizer: -#' * "ftol_out" outer solver stops when an optimization step changes the objective function by less than xtol multiplied by the absolute value of the parameter. Default is 1e-6 -#' * "maxit_out" outer solver stops when the number of iteration exceeds maxit_out. Default is 50 +#' @inherit PLN_param details return +#' @details See [PLNnetwork_param()] for a full description of the optimization parameters. Note that some defaults values are different than those used in [PLNnetwork_param()]: +#' * "ftol_out" (outer loop convergence tolerance the objective function) is set by default to 1e-6 +#' * "maxit_out" (max number of iterations for the outer loop) is set by default to 100 #' -#' @seealso [PLN_param()] +#' @seealso [PLNnetwork_param()] and [PLN_param()] #' @export ZIPLNnetwork_param <- function( backend = c("nlopt"), From c8eb7c3133278db99ac1f0fb40eb784a9e173f45 Mon Sep 17 00:00:00 2001 From: Mahendra Mariadassou Date: Tue, 20 Feb 2024 17:43:33 +0100 Subject: [PATCH 23/30] Refactor nb_param for ZIPLNfit --- R/ZIPLNfit-class.R | 71 +++++++++++++++++----------------------------- 1 file changed, 26 insertions(+), 45 deletions(-) diff --git a/R/ZIPLNfit-class.R b/R/ZIPLNfit-class.R index ae556bc7..791d1507 100644 --- a/R/ZIPLNfit-class.R +++ b/R/ZIPLNfit-class.R @@ -501,16 +501,21 @@ ZIPLNfit <- R6Class( d = function() {nrow(private$B)}, #' @field d0 number of covariates in the ZI part d0 = function() {nrow(private$B0)}, - #' @field nb_param number of parameters in the current PLN model + #' @field nb_param_zi number of parameters in the ZI part of the model + nb_param_zi = function() { + as.integer(switch(private$ziparam, + "single" = 1L, + "row" = self$n, + "col" = self$p, + "covar" = self$p * self$d)) + }, + #' @field nb_param_pln number of parameters in the PLN part of the model + nb_param_pln = function() { + as.integer(self$p * self$d + self$p * (self$p + 1L) / 2L) + }, + #' @field nb_param number of parameters in the ZIPLN model nb_param = function() { - as.integer( - self$p * self$d + self$p * (self$p + 1L)/2L + - switch(private$ziparam, - "single" = 1L, - "row" = self$n, - "col" = self$p, - "covar" = self$p * self$d) - ) + self$nb_param_zi + self$nb_param_pln }, #' @field model_par a list with the matrices of parameters found in the model (B, Sigma, plus some others depending on the variant) model_par = function() {list(B = private$B, B0 = private$B0, Pi = private$Pi, Omega = private$Omega, Sigma = private$Sigma)}, @@ -583,15 +588,9 @@ ZIPLNfit_diagonal <- R6Class( } ), active = list( - #' @field nb_param number of parameters in the current PLN model - nb_param = function() { - res <- self$p * self$d + self$p + - switch(private$ziparam, - "single" = 1L, - "row" = self$n, - "col" = self$p, - "covar" = self$p * self$d) - as.integer(res) + #' @field nb_param_pln number of parameters in the PLN part of the current model + nb_param_pln = function() { + as.integer(self$p * self$d + self$p) }, #' @field vcov_model character: the model used for the residual covariance vcov_model = function() {"diagonal"} @@ -632,15 +631,9 @@ ZIPLNfit_spherical <- R6Class( } ), active = list( - #' @field nb_param number of parameters in the current PLN model - nb_param = function() { - res <- self$p * self$d + 1L + - switch(private$ziparam, - "single" = 1L, - "row" = self$n, - "col" = self$p, - "covar" = self$p * self$d) - as.integer(res) + #' @field nb_param_pln number of parameters in the PLN part of the current model + nb_param_pln = function() { + as.integer(self$p * self$d + 1L) }, #' @field vcov_model character: the model used for the residual covariance vcov_model = function() {"spherical"} @@ -686,15 +679,9 @@ ZIPLNfit_fixed <- R6Class( } ), active = list( - #' @field nb_param number of parameters in the current PLN model - nb_param = function() { - res <- self$p * self$d + - switch(private$ziparam, - "single" = 1L, - "row" = self$n, - "col" = self$p, - "covar" = self$p * self$d) - as.integer(res) + #' @field nb_param_pln number of parameters in the PLN part of the current model + nb_param_pln = function() { + as.integer(self$p * self$d + 0L) }, #' @field vcov_model character: the model used for the residual covariance vcov_model = function() {"fixed"} @@ -813,15 +800,9 @@ ZIPLNfit_sparse <- R6Class( penalty_weights = function() {private$rho}, #' @field n_edges number of edges if the network (non null coefficient of the sparse precision matrix) n_edges = function() {sum(private$Omega[upper.tri(private$Omega, diag = FALSE)] != 0)}, - #' @field nb_param number of parameters in the current PLN model - nb_param = function() { - res <- self$p * self$d + self$n_edges + - switch(private$ziparam, - "single" = 1L, - "row" = self$n, - "col" = self$p, - "covar" = self$p * self$d) - as.integer(res) + #' @field nb_param_pln number of parameters in the PLN part of the current model + nb_param_pln = function() { + as.integer(self$p * self$d + self$n_edges) }, #' @field vcov_model character: the model used for the residual covariance vcov_model = function() {"sparse"}, From 147a5f51a4786f39846e2d501ff58d70a5981f82 Mon Sep 17 00:00:00 2001 From: Mahendra Mariadassou Date: Tue, 20 Feb 2024 17:43:46 +0100 Subject: [PATCH 24/30] Fix typos in tests --- tests/testthat/test-ziplnnetworkfamily.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-ziplnnetworkfamily.R b/tests/testthat/test-ziplnnetworkfamily.R index b0331c91..b5223dd3 100644 --- a/tests/testthat/test-ziplnnetworkfamily.R +++ b/tests/testthat/test-ziplnnetworkfamily.R @@ -116,7 +116,7 @@ test_that("ZIPLNnetwork: matrix of penalties work", { expect_true(inherits(myPLN$plot_objective(), "ggplot")) expect_true(inherits(myPLN$plot_stars(), "ggplot")) - ## missspecification of penlaty weights should induce errors + ## misspecification of penalty weights should induce errors ## not symmetric W <- diag(1, p, p) W[upper.tri(W)] <- runif(p*(p-1)/2, min = 1, max = 5) @@ -126,7 +126,7 @@ test_that("ZIPLNnetwork: matrix of penalties work", { W <- matrix(1, p + 1, p) expect_error(PLNnetwork(Abundance ~ 1, data = trichoptera, control = PLNnetwork_param(penalty_weights = W))) - ## not-positive entries + ## nonpositive entries W <- matrix(0, p, p) expect_error(PLNnetwork(Abundance ~ 1, data = trichoptera, control = PLNnetwork_param(penalty_weights = W))) From 5ce27fc7b53f61e94b76884382c42e3b16164dc1 Mon Sep 17 00:00:00 2001 From: Mahendra Mariadassou Date: Tue, 20 Feb 2024 17:53:41 +0100 Subject: [PATCH 25/30] Fix igraph warning in tests --- tests/testthat/test-plnnetworkfit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-plnnetworkfit.R b/tests/testthat/test-plnnetworkfit.R index 4b311fa3..a835dc96 100644 --- a/tests/testthat/test-plnnetworkfit.R +++ b/tests/testthat/test-plnnetworkfit.R @@ -42,7 +42,7 @@ test_that("PLNnetwork fit: check classes, getters and field access", { expect_equal(vcov(myPLNfit, "covariance"), myPLNfit$model_par$Sigma) expect_equal(vcov(myPLNfit, "covariance"), sigma(myPLNfit)) expect_warning(standard_error(myPLNfit)) - expect_true(igraph::is.igraph(myPLNfit$plot_network(output = "igraph", plot = FALSE))) + expect_true(igraph::is_igraph(myPLNfit$plot_network(output = "igraph", plot = FALSE))) expect_true(inherits(myPLNfit$plot_network(output = "corrplot", plot = FALSE), "Matrix")) }) From 36123fd4c8229ef8ae25b258a30a37670502dcd6 Mon Sep 17 00:00:00 2001 From: Mahendra Mariadassou Date: Tue, 20 Feb 2024 17:56:42 +0100 Subject: [PATCH 26/30] Update doc --- man/Networkfamily.Rd | 30 ++++++++++++++++++------------ man/PLNnetwork.Rd | 7 ++++--- man/PLNnetwork_param.Rd | 4 ++-- man/PLNnetworkfamily.Rd | 10 ++++++---- man/ZIPLN_param.Rd | 13 ++++++------- man/ZIPLNfit.Rd | 6 +++++- man/ZIPLNfit_diagonal.Rd | 2 +- man/ZIPLNfit_fixed.Rd | 2 +- man/ZIPLNfit_sparse.Rd | 4 ++-- man/ZIPLNfit_spherical.Rd | 2 +- man/ZIPLNnetwork.Rd | 7 ++++--- man/ZIPLNnetwork_param.Rd | 12 ++++++------ man/ZIPLNnetworkfamily.Rd | 2 +- man/coefficient_path.Rd | 2 +- man/plot.Networkfamily.Rd | 7 ++++--- man/plot.ZIPLNfit_sparse.Rd | 2 +- man/stability_selection.Rd | 6 +++--- 17 files changed, 66 insertions(+), 52 deletions(-) diff --git a/man/Networkfamily.Rd b/man/Networkfamily.Rd index f8d9df87..48bd6116 100644 --- a/man/Networkfamily.Rd +++ b/man/Networkfamily.Rd @@ -2,16 +2,18 @@ % Please edit documentation in R/PLNnetworkfamily-class.R \name{Networkfamily} \alias{Networkfamily} -\title{An R6 Class to virtually represent a collection of Networkfit (either standard PLN or ZI-PLN)} +\title{An R6 Class to virtually represent a collection of network fits} \description{ -The function \code{\link[=PLNnetwork]{PLNnetwork()}} produces an instance of this class. +The functions \code{\link[=PLNnetwork]{PLNnetwork()}} and \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}} both produce an instance of this class, which can be thought of as a vector of \code{\link{PLNnetworkfit}}s \code{\link{ZIPLNnetworkfit}}s (indexed by penalty parameter) -This class comes with a set of methods, some of them being useful for the user: +This class comes with a set of methods mostly used to compare +network fits (in terms of goodness of fit) or extract one from +the family (based on penalty parameter and/or goodness of it). See the documentation for \code{\link[=getBestModel]{getBestModel()}}, -\code{\link[=getModel]{getModel()}} and \link[=plot.PLNnetworkfamily]{plot()} +\code{\link[=getModel]{getModel()}} and \link[=plot.Networkfamily]{plot()} for the user-facing ones. } \seealso{ -The function \code{\link[=PLNnetwork]{PLNnetwork()}}, the class \code{\link{PLNnetworkfit}} +The functions \code{\link[=PLNnetwork]{PLNnetwork()}}, \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}} and the classes \code{\link{PLNnetworkfit}}, \code{\link{ZIPLNnetworkfit}} } \section{Super class}{ \code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{Networkfamily} @@ -25,7 +27,7 @@ The function \code{\link[=PLNnetwork]{PLNnetwork()}}, the class \code{\link{PLNn \item{\code{stability}}{mean edge stability along the penalty path} -\item{\code{criteria}}{a data frame with the values of some criteria (approximated log-likelihood, (E)BIC, ICL and R2, stability) for the collection of models / fits +\item{\code{criteria}}{a data frame with the values of some criteria (variational log-likelihood, (E)BIC, ICL and R2, stability) for the collection of models / fits BIC, ICL and EBIC are defined so that they are on the same scale as the model log-likelihood, i.e. with the form, loglik - 0.5 penalty} } \if{html}{\out{
    }} @@ -74,7 +76,7 @@ Initialize all models in the collection \if{html}{\out{
    }} } \subsection{Returns}{ -Update current \code{\link{PLNnetworkfit}} with smart starting values +Update all network fits in the family with smart starting values } } \if{html}{\out{
    }} @@ -100,7 +102,7 @@ Call to the C++ optimizer on all models of the collection \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Networkfamily-coefficient_path}{}}} \subsection{Method \code{coefficient_path()}}{ -Extract the regularization path of a \code{\link{PLNnetworkfamily}} +Extract the regularization path of a \code{\link{Networkfamily}} \subsection{Usage}{ \if{html}{\out{
    }}\preformatted{Networkfamily$coefficient_path(precision = TRUE, corr = TRUE)}\if{html}{\out{
    }} } @@ -127,18 +129,22 @@ Extract the best network in the family according to some criteria \subsection{Arguments}{ \if{html}{\out{
    }} \describe{ -\item{\code{crit}}{character. Criterion used to perform the selection. Is "StARS" is chosen but \verb{$stability} field is empty, will compute stability path.} +\item{\code{crit}}{character. Criterion used to perform the selection. If "StARS" is chosen but \verb{$stability} field is empty, will compute stability path.} \item{\code{stability}}{Only used for "StARS" criterion. A scalar indicating the target stability (= 1 - 2 beta) at which the network is selected. Default is \code{0.9}.} } \if{html}{\out{
    }} } +\subsection{Details}{ +For BIC and EBIC criteria, higher is better. +} + } \if{html}{\out{
    }} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Networkfamily-plot}{}}} \subsection{Method \code{plot()}}{ -Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of PLNnetwork fits (a \code{\link{PLNnetworkfamily}}) +Display various outputs (goodness-of-fit criteria, robustness, diagnostic) associated with a collection of network fits (a \code{\link{Networkfamily}}) \subsection{Usage}{ \if{html}{\out{
    }}\preformatted{Networkfamily$plot( criteria = c("loglik", "pen_loglik", "BIC", "EBIC"), @@ -154,7 +160,7 @@ Display various outputs (goodness-of-fit criteria, robustness, diagnostic) assoc \item{\code{reverse}}{A logical indicating whether to plot the value of the criteria in the "natural" direction (loglik - 0.5 penalty) or in the "reverse" direction (-2 loglik + penalty). Default to FALSE, i.e use the -natural direction, on the same scale as the log-likelihood..} +natural direction, on the same scale as the log-likelihood.} \item{\code{log.x}}{logical: should the x-axis be represented in log-scale? Default is \code{TRUE}.} } @@ -176,7 +182,7 @@ Plot stability path \subsection{Arguments}{ \if{html}{\out{
    }} \describe{ -\item{\code{stability}}{scalar: the targeted level of stability in stability plot. Default is \code{0.9}.} +\item{\code{stability}}{scalar: the targeted level of stability using stability selection. Default is \code{0.9}.} \item{\code{log.x}}{logical: should the x-axis be represented in log-scale? Default is \code{TRUE}.} } diff --git a/man/PLNnetwork.Rd b/man/PLNnetwork.Rd index 36184572..51589dfe 100644 --- a/man/PLNnetwork.Rd +++ b/man/PLNnetwork.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/PLNnetwork.R \name{PLNnetwork} \alias{PLNnetwork} -\title{Poisson lognormal model towards sparse network inference} +\title{Sparse Poisson lognormal model for network inference} \usage{ PLNnetwork( formula, @@ -31,8 +31,9 @@ an R6 object with class \code{\link{PLNnetworkfamily}}, which contains a collection of models with class \code{\link{PLNnetworkfit}} } \description{ -Fit the sparse inverse covariance variant of the Poisson lognormal with a variational algorithm -for a collection of sparsity parameter values distributed on a log scale. Use the (g)lm syntax for model specification (covariates, offsets). +Perform sparse inverse covariance estimation for the Zero Inflated Poisson lognormal model +using a variational algorithm. Iterate over a range of logarithmically spaced sparsity parameter values. +Use the (g)lm syntax to specify the model (including covariates and offsets). } \examples{ data(trichoptera) diff --git a/man/PLNnetwork_param.Rd b/man/PLNnetwork_param.Rd index f74ea009..c40c1350 100644 --- a/man/PLNnetwork_param.Rd +++ b/man/PLNnetwork_param.Rd @@ -30,7 +30,7 @@ PLNnetwork_param( \item{penalize_diagonal}{boolean: should the diagonal terms be penalized in the graphical-Lasso? Default is \code{TRUE}} -\item{penalty_weights}{either a single or a list of p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values.} +\item{penalty_weights}{either a single or a list of p x p matrix of weights (default: all weights equal to 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values.} \item{config_post}{a list for controlling the post-treatment (optional bootstrap, jackknife, R2, etc).} @@ -49,7 +49,7 @@ Helper to define list of parameters to control the PLN fit. All arguments have d \details{ See \code{\link[=PLN_param]{PLN_param()}} for a full description of the generic optimization parameters. PLNnetwork_param() also has two additional parameters controlling the optimization due the inner-outer loop structure of the optimizer: \itemize{ -\item "ftol_out" outer solver stops when an optimization step changes the objective function by less than xtol multiplied by the absolute value of the parameter. Default is 1e-6 +\item "ftol_out" outer solver stops when an optimization step changes the objective function by less than ftol multiplied by the absolute value of the parameter. Default is 1e-6 \item "maxit_out" outer solver stops when the number of iteration exceeds maxit_out. Default is 50 } } diff --git a/man/PLNnetworkfamily.Rd b/man/PLNnetworkfamily.Rd index 6490ced9..6cb3e9ab 100644 --- a/man/PLNnetworkfamily.Rd +++ b/man/PLNnetworkfamily.Rd @@ -2,13 +2,15 @@ % Please edit documentation in R/PLNnetworkfamily-class.R \name{PLNnetworkfamily} \alias{PLNnetworkfamily} -\title{An R6 Class to represent a collection of PLNnetworkfit} +\title{An R6 Class to represent a collection of \code{\link{PLNnetworkfit}}s} \description{ The function \code{\link[=PLNnetwork]{PLNnetwork()}} produces an instance of this class. -This class comes with a set of methods, some of them being useful for the user: +This class comes with a set of methods mostly used to compare +network fits (in terms of goodness of fit) or extract one from +the family (based on penalty parameter and/or goodness of it). See the documentation for \code{\link[=getBestModel]{getBestModel()}}, -\code{\link[=getModel]{getModel()}} and \link[=plot.PLNnetworkfamily]{plot()} +\code{\link[=getModel]{getModel()}} and \link[=plot.Networkfamily]{plot()} for the user-facing ones. } \examples{ data(trichoptera) @@ -87,7 +89,7 @@ Compute the stability path by stability selection \describe{ \item{\code{subsamples}}{a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations.} -\item{\code{control}}{a list controlling the main optimization process in each call to PLNnetwork. See \code{\link[=PLNnetwork]{PLNnetwork()}} for details.} +\item{\code{control}}{a list controlling the main optimization process in each call to \code{\link[=PLNnetwork]{PLNnetwork()}}. See \code{\link[=PLNnetwork]{PLNnetwork()}} and \code{\link[=PLN_param]{PLN_param()}} for details.} } \if{html}{\out{
    }} } diff --git a/man/ZIPLN_param.Rd b/man/ZIPLN_param.Rd index 13d6fb2e..5d9a624d 100644 --- a/man/ZIPLN_param.Rd +++ b/man/ZIPLN_param.Rd @@ -28,9 +28,9 @@ ZIPLN_param( \item{penalty}{a user-defined penalty to sparsify the residual covariance. Defaults to 0 (no sparsity).} -\item{penalize_diagonal}{boolean: should the diagonal terms be penalized in the graphical-Lasso? Only relevant with sparse covariance. Default is \code{TRUE}} +\item{penalize_diagonal}{boolean: should the diagonal terms be penalized in the graphical-Lasso? Default is \code{TRUE}} -\item{penalty_weights}{p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values. Only relevant with sparse covariance.} +\item{penalty_weights}{either a single or a list of p x p matrix of weights (default: all weights equal to 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values.} \item{config_post}{a list for controlling the post-treatments (optional bootstrap, jackknife, R2, etc.). See details} @@ -47,12 +47,11 @@ list of parameters used during the fit and post-processing steps Helper to define list of parameters to control the PLN fit. All arguments have defaults. } \details{ -See \code{\link[=PLN_param]{PLN_param()}} for a full description of the generic optimization parameters. ZIPLN_param() also -has two additional parameters controlling the optimization due the inner-outer loop structure of the optimizer, -and additional parameter controlling the form of the variational approximation of the zero inflation: +See \code{\link[=PLN_param]{PLN_param()}} and \code{\link[=PLNnetwork_param]{PLNnetwork_param()}} for a full description of the generic optimization parameters. Like \code{\link[=PLNnetwork_param]{PLNnetwork_param()}}, ZIPLN_param() has two parameters controlling the optimization due the inner-outer loop structure of the optimizer: \itemize{ -\item "ftol_out" outer solver stops when an optimization step changes the objective function by less than \code{ftol_out} multiplied by the absolute value of the parameter. Default is 1e-8 +\item "ftol_out" outer solver stops when an optimization step changes the objective function by less than \code{ftol_out} multiplied by the absolute value of the parameter. Default is 1e-6 \item "maxit_out" outer solver stops when the number of iteration exceeds \code{maxit_out}. Default is 100 -\item "approx_ZI" either use an exact or approximated conditional distribution for the zero inflantion. Default is FALSE +and one additional parameter controlling the form of the variational approximation of the zero inflation: +\item "approx_ZI" either uses an exact or approximated conditional distribution for the zero inflation. Default is FALSE } } diff --git a/man/ZIPLNfit.Rd b/man/ZIPLNfit.Rd index e8411b90..5d4a5950 100644 --- a/man/ZIPLNfit.Rd +++ b/man/ZIPLNfit.Rd @@ -40,7 +40,11 @@ print(myPLN) \item{\code{d0}}{number of covariates in the ZI part} -\item{\code{nb_param}}{number of parameters in the current PLN model} +\item{\code{nb_param_zi}}{number of parameters in the ZI part of the model} + +\item{\code{nb_param_pln}}{number of parameters in the PLN part of the model} + +\item{\code{nb_param}}{number of parameters in the ZIPLN model} \item{\code{model_par}}{a list with the matrices of parameters found in the model (B, Sigma, plus some others depending on the variant)} diff --git a/man/ZIPLNfit_diagonal.Rd b/man/ZIPLNfit_diagonal.Rd index 7c076442..8b75204f 100644 --- a/man/ZIPLNfit_diagonal.Rd +++ b/man/ZIPLNfit_diagonal.Rd @@ -24,7 +24,7 @@ print(myPLN) \section{Active bindings}{ \if{html}{\out{
    }} \describe{ -\item{\code{nb_param}}{number of parameters in the current PLN model} +\item{\code{nb_param_pln}}{number of parameters in the PLN part of the current model} \item{\code{vcov_model}}{character: the model used for the residual covariance} } diff --git a/man/ZIPLNfit_fixed.Rd b/man/ZIPLNfit_fixed.Rd index 6f0ba648..6f6da2fd 100644 --- a/man/ZIPLNfit_fixed.Rd +++ b/man/ZIPLNfit_fixed.Rd @@ -25,7 +25,7 @@ print(myPLN) \section{Active bindings}{ \if{html}{\out{
    }} \describe{ -\item{\code{nb_param}}{number of parameters in the current PLN model} +\item{\code{nb_param_pln}}{number of parameters in the PLN part of the current model} \item{\code{vcov_model}}{character: the model used for the residual covariance} } diff --git a/man/ZIPLNfit_sparse.Rd b/man/ZIPLNfit_sparse.Rd index 46dd2d39..f5e5387e 100644 --- a/man/ZIPLNfit_sparse.Rd +++ b/man/ZIPLNfit_sparse.Rd @@ -31,7 +31,7 @@ plot(myPLN) \item{\code{n_edges}}{number of edges if the network (non null coefficient of the sparse precision matrix)} -\item{\code{nb_param}}{number of parameters in the current PLN model} +\item{\code{nb_param_pln}}{number of parameters in the PLN part of the current model} \item{\code{vcov_model}}{character: the model used for the residual covariance} @@ -102,7 +102,7 @@ Extract interaction network in the latent space \if{html}{\out{
    }} } \subsection{Returns}{ -a square matrix of size \code{PLNnetworkfit$n} +a square matrix of size \code{ZIPLNfit_sparse$n} } } \if{html}{\out{
    }} diff --git a/man/ZIPLNfit_spherical.Rd b/man/ZIPLNfit_spherical.Rd index 4548dade..5779c2c8 100644 --- a/man/ZIPLNfit_spherical.Rd +++ b/man/ZIPLNfit_spherical.Rd @@ -24,7 +24,7 @@ print(myPLN) \section{Active bindings}{ \if{html}{\out{
    }} \describe{ -\item{\code{nb_param}}{number of parameters in the current PLN model} +\item{\code{nb_param_pln}}{number of parameters in the PLN part of the current model} \item{\code{vcov_model}}{character: the model used for the residual covariance} } diff --git a/man/ZIPLNnetwork.Rd b/man/ZIPLNnetwork.Rd index de38ed41..aef11b01 100644 --- a/man/ZIPLNnetwork.Rd +++ b/man/ZIPLNnetwork.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ZIPLNnetwork.R \name{ZIPLNnetwork} \alias{ZIPLNnetwork} -\title{Zero Inflated Poisson lognormal model toward sparse network inference} +\title{Zero Inflated Sparse Poisson lognormal model for network inference} \usage{ ZIPLNnetwork( formula, @@ -40,8 +40,9 @@ for details.} an R6 object with class \code{\link{ZIPLNnetworkfamily}} } \description{ -Fit the sparse inverse covariance variant of the Zero Inflated Poisson lognormal with a variational algorithm -for a collection of sparsity parameter values distributed on a log scale. Use the (g)lm syntax for model specification (covariates, offsets). +Perform sparse inverse covariance estimation for the Zero Inflated Poisson lognormal model +using a variational algorithm. Iterate over a range of logarithmically spaced sparsity parameter values. +Use the (g)lm syntax to specify the model (including covariates and offsets). } \details{ Covariates for the Zero-Inflation parameter (using a logistic regression model) can be specified in the formula RHS using the pipe diff --git a/man/ZIPLNnetwork_param.Rd b/man/ZIPLNnetwork_param.Rd index 7b9db861..fe3d4daa 100644 --- a/man/ZIPLNnetwork_param.Rd +++ b/man/ZIPLNnetwork_param.Rd @@ -30,7 +30,7 @@ ZIPLNnetwork_param( \item{penalize_diagonal}{boolean: should the diagonal terms be penalized in the graphical-Lasso? Default is \code{TRUE}} -\item{penalty_weights}{either a single or a list of p x p matrix of weights (default filled with 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values.} +\item{penalty_weights}{either a single or a list of p x p matrix of weights (default: all weights equal to 1) to adapt the amount of shrinkage to each pairs of node. Must be symmetric with positive values.} \item{config_post}{a list for controlling the post-treatment (optional bootstrap, jackknife, R2, etc).} @@ -44,15 +44,15 @@ which sometimes speeds up the inference.} list of parameters configuring the fit. } \description{ -Helper to define list of parameters to control the PLN fit. All arguments have defaults. +Helper to define list of parameters to control the ZIPLNnetwork fit. All arguments have defaults. } \details{ -See \code{\link[=PLN_param]{PLN_param()}} for a full description of the generic optimization parameters. PLNnetwork_param() also has two additional parameters controlling the optimization due the inner-outer loop structure of the optimizer: +See \code{\link[=PLNnetwork_param]{PLNnetwork_param()}} for a full description of the optimization parameters. Note that some defaults values are different than those used in \code{\link[=PLNnetwork_param]{PLNnetwork_param()}}: \itemize{ -\item "ftol_out" outer solver stops when an optimization step changes the objective function by less than xtol multiplied by the absolute value of the parameter. Default is 1e-6 -\item "maxit_out" outer solver stops when the number of iteration exceeds maxit_out. Default is 50 +\item "ftol_out" (outer loop convergence tolerance the objective function) is set by default to 1e-6 +\item "maxit_out" (max number of iterations for the outer loop) is set by default to 100 } } \seealso{ -\code{\link[=PLN_param]{PLN_param()}} +\code{\link[=PLNnetwork_param]{PLNnetwork_param()}} and \code{\link[=PLN_param]{PLN_param()}} } diff --git a/man/ZIPLNnetworkfamily.Rd b/man/ZIPLNnetworkfamily.Rd index 19b4e49d..0bf90623 100644 --- a/man/ZIPLNnetworkfamily.Rd +++ b/man/ZIPLNnetworkfamily.Rd @@ -94,7 +94,7 @@ Compute the stability path by stability selection \describe{ \item{\code{subsamples}}{a list of vectors describing the subsamples. The number of vectors (or list length) determines the number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations.} -\item{\code{control}}{a list controlling the main optimization process in each call to PLNnetwork. See \code{\link[=PLNnetwork]{PLNnetwork()}} for details.} +\item{\code{control}}{a list controlling the main optimization process in each call to \code{\link[=PLNnetwork]{PLNnetwork()}}. See \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}} and \code{\link[=ZIPLN_param]{ZIPLN_param()}} for details.} } \if{html}{\out{
    }} } diff --git a/man/coefficient_path.Rd b/man/coefficient_path.Rd index 3a389ccb..45552555 100644 --- a/man/coefficient_path.Rd +++ b/man/coefficient_path.Rd @@ -7,7 +7,7 @@ coefficient_path(Robject, precision = TRUE, corr = TRUE) } \arguments{ -\item{Robject}{an object with class \code{\link{PLNnetworkfamily}}, i.e. an output from \code{\link[=PLNnetwork]{PLNnetwork()}}} +\item{Robject}{an object with class \code{\link{Networkfamily}}, i.e. an output from \code{\link[=PLNnetwork]{PLNnetwork()}}} \item{precision}{a logical, should the coefficients of the precision matrix Omega or the covariance matrix Sigma be sent back. Default is \code{TRUE}.} diff --git a/man/plot.Networkfamily.Rd b/man/plot.Networkfamily.Rd index b54595d7..ab24d8b7 100644 --- a/man/plot.Networkfamily.Rd +++ b/man/plot.Networkfamily.Rd @@ -37,12 +37,13 @@ ) } \arguments{ -\item{x}{an R6 object with class \code{\link{PLNnetworkfamily}}} +\item{x}{an R6 object with class \code{\link{PLNnetworkfamily}} or \code{\link{ZIPLNnetworkfamily}}} \item{type}{a character, either "criteria", "stability" or "diagnostic" for the type of plot.} -\item{criteria}{vector of characters. The criteria to plot in c("loglik", "BIC", "ICL", "R_squared", "EBIC", "pen_loglik"). -Default is c("loglik", "pen_loglik", "BIC", "EBIC"). Only relevant when \code{type = "criteria"}.} +\item{criteria}{Vector of criteria to plot, to be selected among "loglik" (log-likelihood), +"BIC", "ICL", "R_squared", "EBIC" and "pen_loglik" (penalized log-likelihood). +Default is c("loglik", "pen_loglik", "BIC", "EBIC"). Only used when \code{type = "criteria"}.} \item{reverse}{A logical indicating whether to plot the value of the criteria in the "natural" direction (loglik - 0.5 penalty) or in the "reverse" direction (-2 loglik + penalty). Default to FALSE, i.e use the diff --git a/man/plot.ZIPLNfit_sparse.Rd b/man/plot.ZIPLNfit_sparse.Rd index 52dad7c2..d358717c 100644 --- a/man/plot.ZIPLNfit_sparse.Rd +++ b/man/plot.ZIPLNfit_sparse.Rd @@ -17,7 +17,7 @@ ) } \arguments{ -\item{x}{an R6 object with class \code{\link{PLNnetworkfit}}} +\item{x}{an R6 object with class \code{\link{ZIPLNfit_sparse}}} \item{type}{character. Value of the weight of the edges in the network, either "partial_cor" (partial correlation) or "support" (binary). Default is \code{"partial_cor"}.} diff --git a/man/stability_selection.Rd b/man/stability_selection.Rd index 2fb5384a..ae378f80 100644 --- a/man/stability_selection.Rd +++ b/man/stability_selection.Rd @@ -12,16 +12,16 @@ stability_selection( ) } \arguments{ -\item{Robject}{an object with class \code{\link{PLNnetworkfamily}}, i.e. an output from \code{\link[=PLNnetwork]{PLNnetwork()}}} +\item{Robject}{an object with class \code{\link{PLNnetworkfamily}} or \code{\link{ZIPLNnetworkfamily}}, i.e. an output from \code{\link[=PLNnetwork]{PLNnetwork()}} or \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}}} \item{subsamples}{a list of vectors describing the subsamples. The number of vectors (or list length) determines th number of subsamples used in the stability selection. Automatically set to 20 subsamples with size \code{10*sqrt(n)} if \code{n >= 144} and \code{0.8*n} otherwise following Liu et al. (2010) recommendations.} -\item{control}{a list controlling the main optimization process in each call to PLNnetwork. See \code{\link[=PLNnetwork]{PLNnetwork()}} for details.} +\item{control}{a list controlling the main optimization process in each call to \code{\link[=PLNnetwork]{PLNnetwork()}} or \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}}. See \code{\link[=PLN_param]{PLN_param()}} or \code{\link[=ZIPLN_param]{ZIPLN_param()}} for details.} \item{force}{force computation of the stability path, even if a previous one has been detected.} } \value{ -the list of subsamples. The estimated probabilities of selection of the edges are stored in the fields \code{stability_path} of the initial Robject with class \code{\link{PLNnetworkfamily}} +the list of subsamples. The estimated probabilities of selection of the edges are stored in the fields \code{stability_path} of the initial Robject with class \code{\link{Networkfamily}} } \description{ This function computes the StARS stability criteria over a path of penalties. If a path has already been computed, the functions stops with a message unless \code{force = TRUE} has been specified. From 23090f2810d3d77fac4262a9ce3b5ca7f8e3ad30 Mon Sep 17 00:00:00 2001 From: Mahendra Mariadassou Date: Tue, 20 Feb 2024 18:11:37 +0100 Subject: [PATCH 27/30] Fix broken link in doc --- R/PLNnetworkfamily-class.R | 4 ++-- man/Networkfamily.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/PLNnetworkfamily-class.R b/R/PLNnetworkfamily-class.R index 1686df02..f12a5543 100644 --- a/R/PLNnetworkfamily-class.R +++ b/R/PLNnetworkfamily-class.R @@ -4,7 +4,7 @@ #' An R6 Class to virtually represent a collection of network fits #' -#' @description The functions [PLNnetwork()] and [ZIPLNnetwork()] both produce an instance of this class, which can be thought of as a vector of [`PLNnetworkfit`]s [`ZIPLNnetworkfit`]s (indexed by penalty parameter) +#' @description The functions [PLNnetwork()] and [ZIPLNnetwork()] both produce an instance of this class, which can be thought of as a vector of [`PLNnetworkfit`]s [`ZIPLNfit_sparse`]s (indexed by penalty parameter) #' #' This class comes with a set of methods mostly used to compare #' network fits (in terms of goodness of fit) or extract one from @@ -20,7 +20,7 @@ #' @include PLNfamily-class.R #' @importFrom R6 R6Class #' @importFrom glassoFast glassoFast -#' @seealso The functions [PLNnetwork()], [ZIPLNnetwork()] and the classes [`PLNnetworkfit`], [`ZIPLNnetworkfit`] +#' @seealso The functions [PLNnetwork()], [ZIPLNnetwork()] and the classes [`PLNnetworkfit`], [`ZIPLNfit_sparse`] Networkfamily <- R6Class( classname = "Networkfamily", inherit = PLNfamily, diff --git a/man/Networkfamily.Rd b/man/Networkfamily.Rd index 48bd6116..31b53c27 100644 --- a/man/Networkfamily.Rd +++ b/man/Networkfamily.Rd @@ -4,7 +4,7 @@ \alias{Networkfamily} \title{An R6 Class to virtually represent a collection of network fits} \description{ -The functions \code{\link[=PLNnetwork]{PLNnetwork()}} and \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}} both produce an instance of this class, which can be thought of as a vector of \code{\link{PLNnetworkfit}}s \code{\link{ZIPLNnetworkfit}}s (indexed by penalty parameter) +The functions \code{\link[=PLNnetwork]{PLNnetwork()}} and \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}} both produce an instance of this class, which can be thought of as a vector of \code{\link{PLNnetworkfit}}s \code{\link{ZIPLNfit_sparse}}s (indexed by penalty parameter) This class comes with a set of methods mostly used to compare network fits (in terms of goodness of fit) or extract one from @@ -13,7 +13,7 @@ See the documentation for \code{\link[=getBestModel]{getBestModel()}}, \code{\link[=getModel]{getModel()}} and \link[=plot.Networkfamily]{plot()} for the user-facing ones. } \seealso{ -The functions \code{\link[=PLNnetwork]{PLNnetwork()}}, \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}} and the classes \code{\link{PLNnetworkfit}}, \code{\link{ZIPLNnetworkfit}} +The functions \code{\link[=PLNnetwork]{PLNnetwork()}}, \code{\link[=ZIPLNnetwork]{ZIPLNnetwork()}} and the classes \code{\link{PLNnetworkfit}}, \code{\link{ZIPLNfit_sparse}} } \section{Super class}{ \code{\link[PLNmodels:PLNfamily]{PLNmodels::PLNfamily}} -> \code{Networkfamily} From dea860542817c2c8ba5364c50c0fd16427459b6e Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Sat, 24 Feb 2024 17:08:30 +0100 Subject: [PATCH 28/30] fixes to Mahendra's comments --- R/PLNnetwork.R | 1 - R/PLNnetworkfamily-class.R | 6 ++---- R/ZIPLNfit-class.R | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/R/PLNnetwork.R b/R/PLNnetwork.R index 1ea9d96b..372357be 100644 --- a/R/PLNnetwork.R +++ b/R/PLNnetwork.R @@ -123,6 +123,5 @@ PLNnetwork_param <- function( variance = TRUE , config_post = config_pst , config_optim = config_opt , -### TODO CHECK: Why two inceptive model (cov and not ?) inception = inception ), class = "PLNmodels_param") } diff --git a/R/PLNnetworkfamily-class.R b/R/PLNnetworkfamily-class.R index f12a5543..20f607a7 100644 --- a/R/PLNnetworkfamily-class.R +++ b/R/PLNnetworkfamily-class.R @@ -501,8 +501,7 @@ ZIPLNnetworkfamily <- R6Class( cat("\nStability Selection for ZIPLNnetwork: ") cat("\nsubsampling: ") - # stabs_out <- future.apply::future_lapply(subsamples, function(subsample) { - stabs_out <- lapply(subsamples, function(subsample) { + stabs_out <- future.apply::future_lapply(subsamples, function(subsample) { cat("+") inception_ <- self$getModel(self$penalties[1]) inception_$update( @@ -534,8 +533,7 @@ ZIPLNnetworkfamily <- R6Class( as.matrix(model$latent_network("support"))[upper.tri(diag(private$p))] })) nets - # }, future.seed = TRUE, future.scheduling = structure(TRUE, ordering = "random")) - }) + }, future.seed = TRUE, future.scheduling = structure(TRUE, ordering = "random")) prob <- Reduce("+", stabs_out, accumulate = FALSE) / length(subsamples) ## formatting/tyding diff --git a/R/ZIPLNfit-class.R b/R/ZIPLNfit-class.R index 791d1507..567e7a95 100644 --- a/R/ZIPLNfit-class.R +++ b/R/ZIPLNfit-class.R @@ -802,7 +802,7 @@ ZIPLNfit_sparse <- R6Class( n_edges = function() {sum(private$Omega[upper.tri(private$Omega, diag = FALSE)] != 0)}, #' @field nb_param_pln number of parameters in the PLN part of the current model nb_param_pln = function() { - as.integer(self$p * self$d + self$n_edges) + as.integer(self$p * self$d + self$n_edges + self$p) }, #' @field vcov_model character: the model used for the residual covariance vcov_model = function() {"sparse"}, From 1c000d0c6a99cfc669bf1aeffb5d0df685dabb13 Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Sat, 24 Feb 2024 17:17:35 +0100 Subject: [PATCH 29/30] upadted NEWS file with new ZIPLN features --- NEWS.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 01adb95d..b0ea56d7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,10 @@ -# Current (2024-01-23) +# PLNmodels 1.2.0 (2024-02-24) -* Addition of ZIPLN() and ZIPLNfit-class to allow for zero-inflation in the (for now) standard PLN model (merge PR #116) +* new feature: ZIPLN (PLN with zero inflation) + * ZIPLN() and ZIPLNfit-class to allow for zero-inflation in the standard PLN model (merge PR #116) + * ZIPLNnetwork() and ZIPLNfit_sparse-class to allow for zero-inflation in the PLNnetwork model (merge PR #118) + * Code factorization between PLNnetwork and ZIPLNnetwork (and associated classes) +* fix inconsistency between fitted and predict (merge PR #115) # PLNmodels 1.1.0 (2024-01-08) From 25d2444e8f8cb44fad0ebbc203d32106a8b705e4 Mon Sep 17 00:00:00 2001 From: Julien Chiquet Date: Mon, 26 Feb 2024 15:48:34 +0100 Subject: [PATCH 30/30] updating R check with oldrel for macos and windows --- .github/workflows/R-CMD-check.yaml | 9 +++++---- README.Rmd | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 1474613d..ade15537 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -19,17 +19,18 @@ jobs: matrix: config: - {os: macOS-latest, r: 'release'} + - {os: macOS-latest, r: 'oldrel-1'} - {os: windows-latest, r: 'release'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} + - {os: windows-latest, r: 'oldrel-1'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 diff --git a/README.Rmd b/README.Rmd index 2516078d..3ce56d45 100644 --- a/README.Rmd +++ b/README.Rmd @@ -17,6 +17,7 @@ knitr::opts_chunk$set( [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/PLNmodels)](https://cran.r-project.org/package=PLNmodels) [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html) [![](https://img.shields.io/github/last-commit/pln-team/PLNmodels.svg)](https://github.com/pln-team/PLNmodels/commits/master) +[![R-CMD-check](https://github.com/PLN-team/PLNmodels/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/PLN-team/PLNmodels/actions/workflows/R-CMD-check.yaml) > The Poisson lognormal model and variants can be used for a variety of multivariate problems when count data are at play (including PCA, LDA and network inference for count data). This package implements efficient algorithms to fit such models accompanied with a set of functions for visualization and diagnostic. See [this deck of slides](https://pln-team.github.io/slideshow/slides) for a comprehensive introduction.