diff --git a/NAMESPACE b/NAMESPACE index 17763f8..0d96d8a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,11 +12,17 @@ export(CausalForestDynamicSubgroups) export(DiscreteCovariatesToOneHot) export(RATEOmnibusTest) export(RATETest) +export(asympBF) export(braid_rows) export(charlson_score) export(fct_confint) export(flatten_date_intervals) +export(invasympBF) +export(invlogasympBF) +export(invwatershed) export(lms) +export(logasympBF) +export(watershed) import(stats) importFrom(doParallel,registerDoParallel) importFrom(foreach,"%dopar%") diff --git a/R/klp_AsympBF.R b/R/klp_AsympBF.R new file mode 100644 index 0000000..e25f9b3 --- /dev/null +++ b/R/klp_AsympBF.R @@ -0,0 +1,201 @@ +#' @title Asymptotic Bayes factors +#' +#' @description The Bayesian equivalent of a significance test for H1: an +#' unrestricted parameter value versus H0: of a specific parameter value based +#' only on data D can be obtained from Bayes factor (BF). Then `BF = +#' Probability(H1|D) / Probability(H0|D)` and is a Bayesian equivalent of a +#' likelihood ratio. It is based on the same asymptotics as the ubiqutous +#' chi-square tests. This BF only depends on the difference in deviance +#' between the models corresponding to H0 and H1 (chisquare) and the dimension +#' d of H1. This BF is monotone in chisquare (and hence the p-value p) for +#' fixed d. It is thus a tool to turn p-values into evidence, also +#' retrospectively. The expression for BF depends on a parameter lambda which +#' expresses the ratio between the information in the prior and the data +#' (likelihood). By default `lambda = min(d / chisquare, lambdamax = 0.255)`. +#' Thus, as chisquare goes to infinity we effectively maximize BF and hence +#' the evidence favoring H1, and opposite for small chisquare has a +#' well-defined watershed where we have equal preferences for H1 and H0. The +#' value 0.255 corresponds to a watershed of 2, that is we prefer H1 when +#' `chisquare > d * 2` and prefer H0 when `chisquare < d * 2`, similar to +#' having a BF that is a continuous version of the Akaike Information +#' Criterion for model selection. For derivations and details, see Rostgaard +#' (2023). +#' +#' @param chisq a non-negative number denoting the difference in deviance +#' between the statistical models corresponding to H0 and H1 +#' @param p the p value corresponding to the test statistic chisq on d degrees +#' of freedom +#' @param d the dimension of H1, `d => 1` +#' @param lambda a strictly positive number corresponding to the ratio between +#' the information in the prior and the data +#' @param lambdamax an upper limit on lambda +#' @param bf Bayes factor, a strictly positive number +#' @param logasympBF log(bf) +#' +#' @details For fixed dimension d of the alternative hypothesis H1 `asympBF(.) = +#' exp(logasympBF(.))` maps a test statistic (chisquare) or a p-value p into a +#' Bayes factor (the ratio between the probabilities of the models +#' corresponding to each hypothesis). `asympBF(.)` is monotonely increasing in +#' chisquare, attaining the value 1 when `chisquare = d * watershed`. The +#' watershed is thus a device to specify what the user considers a practical +#' null result by choosing `lambdamax = watershed(watershed)`. +#' +#' For sufficiently large values of chisquare, lambda is estimated as +#' d/chisquare. This behavior can be overruled by specifying lambda. Using +#' `invasympBF(.) = exp(invlogasympBF(.))` we can map a Bayes factor, bf to a +#' value of chisquare. +#' +#' Likewise, we can obtain the watershed corresponding to a given lambdamax as +#' `invwatershed(lambdamax)`. +#' +#' Generally in these functions we recode or ignore illegal values of +#' parameters, rather than returning an error code. `chisquare` is always +#' recoded as `abs(chisquare)`. `d` is set to `1` as default, and missing or +#' entered values of `d < 1` are recoded as `d = 1`. Entered values of +#' `lambdamax <= 0` or missing are ignored. Entered values of `lambda <= 0` or +#' missing are ignored in `invwatershed(.)`. we use `abs(lambda)` as argument, +#' `lambda = 0` results in an error. +#' +#' @references Klaus Rostgaard (2023): Simple nested Bayesian hypothesis testing +#' for meta-analysis, Cox, Poisson and logistic regression models. Scientific +#' Reports. https://doi.org/10.1038/s41598-023-31838-8 +#' +#' @author +#' KLP & KIJA +#' +#' @examples +#' # example code +#' +#' # 1. the example(s) from Rostgaard (2023) +#' asympBF(p = 0.19, d = 8) # 0.148411 +#' asympBF(p = 0.19, d = 8, lambdamax = 100) # 0.7922743 +#' asympBF(p = 0.19, d = 8, lambda = 100 / 4442) # 5.648856e-05 +#' # a maximal value of a parameter considered practically null +#' deltalogHR <- -0.2 * log(0.80) +#' sigma <- (log(1.19) - log(0.89)) / 3.92 +#' chisq <- (deltalogHR / sigma) ** 2 +#' chisq # 0.3626996 +#' watershed(chisq) +#' # leads nowhere useful chisq=0.36<2 +#' +#' # 2. tests for interaction/heterogeneity - real world examples +#' asympBF(p = 0.26, d = 24) # 0.00034645 +#' asympBF(p = 0.06, d = 11) # 0.3101306 +#' asympBF(p = 0.59, d = 7) # 0.034872 +#' +#' # 3. other examples +#' asympBF(p = 0.05) # 2.082664 +#' asympBF(p = 0.05, d = 8) # 0.8217683 +#' chisq <- invasympBF(19) +#' chisq # 9.102697 +#' pchisq(chisq, df = 1, lower.tail = FALSE) # 0.002552328 +#' chisq <- invasympBF(19, d = 8) +#' chisq # 23.39056 +#' pchisq(chisq, df = 8, lower.tail = FALSE) # 0.002897385 + +#' @rdname asympBF +#' @export +logasympBF <- function(chisq = NA, + p = NA, + d = 1, + lambda = NA, + lambdamax = 0.255) { + chisq <- abs(chisq) + d <- ifelse(is.na(d) | d < 1, 1, d) + if (is.na(chisq)) chisq <- qchisq(p, d, lower.tail = FALSE) + lambda <- ifelse(lambda <= 0 | is.na(lambda), d / max(chisq, 10E-6), lambda) + lambdamax <- ifelse(lambdamax <= 0, 0.255, lambdamax) + lambda0 <- min(lambda, lambdamax) + psi <- lambda0 / (1 + lambda0) + logasympBF <- d/2 * log(psi) + (1 - psi) * chisq/2 + return(logasympBF) +} + +#' @rdname asympBF +#' @export +asympBF <- function(chisq = NA, + p = NA, + d = 1, + lambda = NA, + lambdamax = 0.255) { + call <- match.call() + call[[1]] <- logasympBF + asympBF <- exp(eval(call)) + return(asympBF) +} + +#' @rdname asympBF +#' @export +invlogasympBF <- function(logasympBF = NA, + d = 1, + lambda = NA, + lambdamax = 0.255) { + d <- ifelse(is.na(d) | d < 1, 1, d) + lambda <- ifelse(lambda <= 0, NA, lambda) + lambdamax <- ifelse(lambdamax <= 0, 0.255, lambdamax) + if (is.na(lambda)) { + lambdaP <- lambdamax + lambdaT <- NA + } else { + lambdaP <- min(lambda, lambdamax) + lambdaT <- lambdaP + } + chisqT <- NA + psiP <- lambdaP / (lambdaP + 1) + chisqP <- (logasympBF - d * log(psiP)/2) * 2 / (1 - psiP) + if (!is.na(lambdaT) || (0 <= chisqP && chisqP <= d / lambdaP)) { + chisqT<-chisqP + lambdaT<-lambdaP + } + epsilon <- 0.000005 + for (taeller in 1:20) { + lambdaP <- min(lambdamax, d / chisqP) + psiP <- lambdaP / (lambdaP + 1) + chisqP <- (logasympBF - d * log(psiP)/2) * 2 / (1 - psiP) + if (abs(lambdaP / min(lambdamax, d / chisqP) - 1) < epsilon) { + chisqT <- chisqP + break + } + } + return(chisqT) +} + +#' @rdname asympBF +#' @export +invasympBF <- function(bf, + d = 1, + lambda = NA, + lambdamax = 0.255) { + invasympBF <- invlogasympBF( + logasympBF = log(bf), + d = d, + lambda = lambda, + lambdamax = lambdamax + ) + return(invasympBF) +} + +#' @rdname asympBF +#' @export +watershed <- function(chisq) { + chisq <- abs(chisq) + psi <- 0.5 + delta <- 0.25 + twologBF <- log(psi) + chisq * (1 - psi) + for (taeller in 1:20) { + if (twologBF < 0) psi <- psi + delta else psi <- psi - delta + twologBF <- log(psi) + chisq * (1 - psi) + if (abs(twologBF) <= 10e-6) break else delta <- delta / 2 + } + lambda <- psi / (1 - psi) + return(lambda) +} + +#' @rdname asympBF +#' @export +invwatershed <- function(lambda) { + lambda <- abs(lambda) + psi <- lambda / (1 + lambda) + chisq <- -log(psi) / (1 - psi) + return(chisq) +} diff --git a/man/asympBF.Rd b/man/asympBF.Rd new file mode 100644 index 0000000..f1f41f7 --- /dev/null +++ b/man/asympBF.Rd @@ -0,0 +1,124 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/klp_AsympBF.R +\name{logasympBF} +\alias{logasympBF} +\alias{asympBF} +\alias{invlogasympBF} +\alias{invasympBF} +\alias{watershed} +\alias{invwatershed} +\title{Asymptotic Bayes factors} +\usage{ +logasympBF(chisq = NA, p = NA, d = 1, lambda = NA, lambdamax = 0.255) + +asympBF(chisq = NA, p = NA, d = 1, lambda = NA, lambdamax = 0.255) + +invlogasympBF(logasympBF = NA, d = 1, lambda = NA, lambdamax = 0.255) + +invasympBF(bf, d = 1, lambda = NA, lambdamax = 0.255) + +watershed(chisq) + +invwatershed(lambda) +} +\arguments{ +\item{chisq}{a non-negative number denoting the difference in deviance +between the statistical models corresponding to H0 and H1} + +\item{p}{the p value corresponding to the test statistic chisq on d degrees +of freedom} + +\item{d}{the dimension of H1, \verb{d => 1}} + +\item{lambda}{a strictly positive number corresponding to the ratio between +the information in the prior and the data} + +\item{lambdamax}{an upper limit on lambda} + +\item{logasympBF}{log(bf)} + +\item{bf}{Bayes factor, a strictly positive number} +} +\description{ +The Bayesian equivalent of a significance test for H1: an +unrestricted parameter value versus H0: of a specific parameter value based +only on data D can be obtained from Bayes factor (BF). Then \code{BF = Probability(H1|D) / Probability(H0|D)} and is a Bayesian equivalent of a +likelihood ratio. It is based on the same asymptotics as the ubiqutous +chi-square tests. This BF only depends on the difference in deviance +between the models corresponding to H0 and H1 (chisquare) and the dimension +d of H1. This BF is monotone in chisquare (and hence the p-value p) for +fixed d. It is thus a tool to turn p-values into evidence, also +retrospectively. The expression for BF depends on a parameter lambda which +expresses the ratio between the information in the prior and the data +(likelihood). By default \code{lambda = min(d / chisquare, lambdamax = 0.255)}. +Thus, as chisquare goes to infinity we effectively maximize BF and hence +the evidence favoring H1, and opposite for small chisquare has a +well-defined watershed where we have equal preferences for H1 and H0. The +value 0.255 corresponds to a watershed of 2, that is we prefer H1 when +\code{chisquare > d * 2} and prefer H0 when \code{chisquare < d * 2}, similar to +having a BF that is a continuous version of the Akaike Information +Criterion for model selection. For derivations and details, see Rostgaard +(2023). +} +\details{ +For fixed dimension d of the alternative hypothesis H1 \code{asympBF(.) = exp(logasympBF(.))} maps a test statistic (chisquare) or a p-value p into a +Bayes factor (the ratio between the probabilities of the models +corresponding to each hypothesis). \code{asympBF(.)} is monotonely increasing in +chisquare, attaining the value 1 when \code{chisquare = d * watershed}. The +watershed is thus a device to specify what the user considers a practical +null result by choosing \code{lambdamax = watershed(watershed)}. + +For sufficiently large values of chisquare, lambda is estimated as +d/chisquare. This behavior can be overruled by specifying lambda. Using +\code{invasympBF(.) = exp(invlogasympBF(.))} we can map a Bayes factor, bf to a +value of chisquare. + +Likewise, we can obtain the watershed corresponding to a given lambdamax as +\code{invwatershed(lambdamax)}. + +Generally in these functions we recode or ignore illegal values of +parameters, rather than returning an error code. \code{chisquare} is always +recoded as \code{abs(chisquare)}. \code{d} is set to \code{1} as default, and missing or +entered values of \code{d < 1} are recoded as \code{d = 1}. Entered values of +\code{lambdamax <= 0} or missing are ignored. Entered values of \code{lambda <= 0} or +missing are ignored in \code{invwatershed(.)}. we use \code{abs(lambda)} as argument, +\code{lambda = 0} results in an error. +} +\examples{ +# example code + +# 1. the example(s) from Rostgaard (2023) +asympBF(p = 0.19, d = 8) # 0.148411 +asympBF(p = 0.19, d = 8, lambdamax = 100) # 0.7922743 +asympBF(p = 0.19, d = 8, lambda = 100 / 4442) # 5.648856e-05 +# a maximal value of a parameter considered practically null +deltalogHR <- -0.2 * log(0.80) +sigma <- (log(1.19) - log(0.89)) / 3.92 +chisq <- (deltalogHR / sigma) ** 2 +chisq # 0.3626996 +watershed(chisq) +# leads nowhere useful chisq=0.36<2 + +# 2. tests for interaction/heterogeneity - real world examples +asympBF(p = 0.26, d = 24) # 0.00034645 +asympBF(p = 0.06, d = 11) # 0.3101306 +asympBF(p = 0.59, d = 7) # 0.034872 + +# 3. other examples +asympBF(p = 0.05) # 2.082664 +asympBF(p = 0.05, d = 8) # 0.8217683 +chisq <- invasympBF(19) +chisq # 9.102697 +pchisq(chisq, df = 1, lower.tail = FALSE) # 0.002552328 +chisq <- invasympBF(19, d = 8) +chisq # 23.39056 +pchisq(chisq, df = 8, lower.tail = FALSE) # 0.002897385 +} +\references{ +Klaus Rostgaard (2023): Simple nested Bayesian hypothesis testing +for meta-analysis, Cox, Poisson and logistic regression models. Scientific +Reports. https://doi.org/10.1038/s41598-023-31838-8 +} +\author{ +KLP & KIJA +}