diff --git a/.Rbuildignore b/.Rbuildignore index b15a5e9..f19eb43 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,3 +3,7 @@ ^LICENSE\.md$ ^doc$ ^Meta$ +^README\.Rmd$ +^bibliography.bib$ +^cran-comments\.md$ +^\.github$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..0f2fe08 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,52 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check + +permissions: read-all + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/DESCRIPTION b/DESCRIPTION index 22a84d9..9edc33a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,10 @@ Package: skipTrack -Title: Bayesian Hierarchical Models Adjusting for Non-Adherence in Mobile Menstrual Cycle Tracking -Version: 0.1.0 +Title: A Bayesian Hierarchical Model that Controls for Non-Adherence in Mobile Menstrual Cycle Tracking +Version: 0.0.1 Authors@R: - person("Luke", "Duttweiler", , "lduttweiler@hsph.harvard.edu", role = c("aut", "cre"), + person("Luke", "Duttweiler", , "lduttweiler@hsph.harvard.edu", role = c("aut", "cre", 'cph'), comment = c(ORCID = "0000-0002-0467-995X")) -Description: Implements a Bayesian hierarchical model of the same name designed to perform inference on cycle length mean and regularity given the possibility of non-adherence in cycle length self-tracking. Currently accepts baseline continuous covariates for cycle mean length and regularity. Future updates will include include categorical covariates, time-varying covariates, and the inclusion of external information regarding tracking skips. +Description: Implements a Bayesian hierarchical model designed to identify skips in mobiel menstrual cycle self-tracking on mobiel apps. Future developments will allow for the inclusion of covariates affecting cycle mean and regularity, as well as extra information regarding tracking non-adherence. Main methods to be outlined in a forthcoming paper, with alternative models from Li et al. (2022) . License: MIT + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) @@ -20,6 +20,7 @@ Imports: glmnet (>= 4.1.0), gridExtra (>= 2.0), LaplacesDemon (>= 16.0.0), + lifecycle, mvtnorm (>= 1.2.0), optimg (>= 0.1.2), parallel (>= 4.0.0), diff --git a/LICENSE b/LICENSE index e8ea682..9b9fd49 100644 --- a/LICENSE +++ b/LICENSE @@ -1,2 +1,2 @@ -YEAR: 2023 +YEAR: 2024 COPYRIGHT HOLDER: skipTrack authors diff --git a/LICENSE.md b/LICENSE.md index 3753d3e..ec77c79 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ # MIT License -Copyright (c) 2023 skipTrack authors +Copyright (c) 2024 skipTrack authors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/NAMESPACE b/NAMESPACE index 849de60..9b46a52 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,7 @@ export(skipTrack.simulate) export(skipTrack.visualize) importFrom(foreach,"%do%") importFrom(foreach,"%dopar%") +importFrom(lifecycle,deprecated) importFrom(parallel,detectCores) importFrom(parallel,makeCluster) importFrom(stats,dbeta) diff --git a/R/liInference.R b/R/liInference.R index 74ba011..4582717 100644 --- a/R/liInference.R +++ b/R/liInference.R @@ -19,6 +19,8 @@ #' #' @return Numeric value representing the Monte Carlo estimate of the negative marginal log-likelihood. #' +#' @references Li, Kathy, et al. "A predictive model for next cycle start date that accounts for adherence in menstrual self-tracking." Journal of the American Medical Informatics Association 29.1 (2022): 3-11. + likVec <- function(pars = c(kappa = 180, gamma = 6, alpha = 2, diff --git a/R/liMCMC.R b/R/liMCMC.R index ac5e3a3..6ef5d1a 100644 --- a/R/liMCMC.R +++ b/R/liMCMC.R @@ -1,4 +1,40 @@ -#NEEDS DOCUMENTATION + +#' Runs MCMC algorithm for performing inference using the model from Li et al. (2022) +#' +#' @description +#' This function performs inference on cycle length data, assuming the model from Li et al. (2022). It is important to note +#' that Li et al. does not actually use this algorithm as they target a particular analytic posterior predictive distribution, +#' and solve directly. However, we are targeting a different posterior and thus use this MCMC to perform inference. +#' +#' @inheritParams skipTrack.MCMC +#' +#' @param S Integer. The maximum number of skips to consider possible. +#' @param hyperparams Named numeric vector of hyperparameters containing the +#' elements: kappa, gamma, alpha, beta. NOTE: MUST BE IN CORRECT ORDER. +#' - \code{kappa}: Numeric value, shape parameter of Gamma distribution for Lambda_i. +#' - \code{gamma}: Numeric value, rate parameter of Gamma distribution for Lambda_i. +#' - \code{alpha}: Numeric value, shape1 parameter of Beta distribution for Pi_i. +#' - \code{beta}: Numeric value, shape2 parameter of Beta distribution for Pi_i. +#' @param initialParams A list of initial parameter values for the MCMC algorithm. +#' Default values are provided for pi, lambdais, piis, ss. +#' +#' @return A list containing the MCMC draws for each parameter at each iteration. Each element +#' in the list is itself a list containing: +#' \describe{ +#' \item{ijDat}{A data.frame with updated parameters at the individual-observation level: Individual, ys, lambdais, piis, ss.} +#' \item{iDat}{A data.frame with updated parameters at the individual level: Individual, lambdas, pis.} +#' \item{kappa}{Fixed value of hyperparameter kappa.} +#' \item{gamma}{Fixed value of hyperparameter gamma.} +#' \item{alpha}{Fixed value of hyperparameter alpha.} +#' \item{beta}{Fixed value of hyperparamter beta.} +#' \item{S}{Fixed input value S.} +#' \item{indFirst}{A logical vector indicating the first occurrence of each individual.} +#' } +#' +#' @seealso \code{\link{gibbsStepLi}} +#' +#' @references Li, Kathy, et al. "A predictive model for next cycle start date that accounts for adherence in menstrual self-tracking." Journal of the American Medical Informatics Association 29.1 (2022): 3-11. +#' liMCMC <- function(Y, cluster, S, @@ -43,6 +79,33 @@ liMCMC <- function(Y, return(fullDraws) } +#' Gibbs Step Li - One MCMC step for the Li Model +#' +#' +#' +#' @param ijDat A data.frame with parameters at the individual-observation level: Individual, ys, lambdais, piis, ss. +#' @param iDat A data.frame with parameters at the individual level: Individual, lambdas, pis. +#' @param kappa Fixed value of hyperparameter kappa. +#' @param gamma Fixed value of hyperparameter gamma. +#' @param alpha Fixed value of hyperparameter alpha. +#' @param beta Fixed value of hyperparamter beta. +#' @param S Fixed input value S. +#' @param indFirst A logical vector indicating the first occurrence of each individual. +#' +#' @return A list containing one MCMC draws for each parameter. Elements are: +#' \describe{ +#' \item{ijDat}{A data.frame with updated parameters at the individual-observation level: Individual, ys, lambdais, piis, ss.} +#' \item{iDat}{A data.frame with updated parameters at the individual level: Individual, lambdas, pis.} +#' \item{kappa}{Fixed value of hyperparameter kappa.} +#' \item{gamma}{Fixed value of hyperparameter gamma.} +#' \item{alpha}{Fixed value of hyperparameter alpha.} +#' \item{beta}{Fixed value of hyperparamter beta.} +#' \item{S}{Fixed input value S.} +#' \item{indFirst}{A logical vector indicating the first occurrence of each individual.} +#' } +#' +#' @references Li, Kathy, et al. "A predictive model for next cycle start date that accounts for adherence in menstrual self-tracking." Journal of the American Medical Informatics Association 29.1 (2022): 3-11. +#' gibbsStepLi <- function(ijDat, iDat, kappa, gamma, alpha, beta, S, indFirst){ #Now i level newLambdais <- lapply(iDat$Individual, function(ind){ diff --git a/R/liPosteriors.R b/R/liPosteriors.R index 9f93f67..6a70134 100644 --- a/R/liPosteriors.R +++ b/R/liPosteriors.R @@ -4,10 +4,10 @@ #' for lambda_i in the Li algorithm, given the observed values y_ij, the indicators s_ij, #' and the prior hyperparameters priorK and priorG. #' -#' @param yij Vector of observed values for lambda_i. -#' @param sij Vector of indicators for lambda_i. -#' @param priorK Prior hyperparameter for K. -#' @param priorG Prior hyperparameter for G. +#' @param yij Vector of observed values for individual i. +#' @param sij Vector of cycle skip indicators for individual i. +#' @param priorK Prior hyperparameter kappa. +#' @param priorG Prior hyperparameter gamma. #' #' @return A random draw from the posterior distribution of lambda_i. #' @@ -24,9 +24,19 @@ postLambdai <- function(yij, sij, priorK, priorG){ return(rep(dr, n)) } -#Function to compute random draw from posterior for pi_i in Li algorithm. -#This one requires an MH step. Using the posterior of true geometric from the calculated hyperparameters as a proposal - +#' Compute M-H draw for pi_i in Li algorithm +#' +#' This performs a Metropolis-Hastings draw for pi_i, assuming s_ij follows a truncated geometric distribution with parameters +#' pi_i and S. The proposal distribution for pi_i is Beta(alpha, beta). +#' +#' @param sij Vector of cycle skip indicators for individual i +#' @param currentPii Current value of pi_i +#' @param priorA Hyperparameter alpha. +#' @param priorB Hyperparameter beta. +#' @param S Maximum number of skips allowed in algorithm +#' +#' @return Draw for pi_i, repeated for the number of observations from individual i +#' postPii <- function(sij, currentPii, priorA, priorB, S){ #n is the number of cycles for this individual n <- length(sij) diff --git a/R/skipTrack-package.R b/R/skipTrack-package.R index 70139b1..4fb8d1d 100644 --- a/R/skipTrack-package.R +++ b/R/skipTrack-package.R @@ -4,6 +4,7 @@ ## usethis namespace: start #' @importFrom foreach %do% #' @importFrom foreach %dopar% +#' @importFrom lifecycle deprecated #' @importFrom parallel detectCores #' @importFrom parallel makeCluster #' @importFrom stats dbeta diff --git a/README.Rmd b/README.Rmd new file mode 100644 index 0000000..935ee2e --- /dev/null +++ b/README.Rmd @@ -0,0 +1,162 @@ +--- +output: github_document +bibliography: 'bibliography.bib' +header-includes: + - \usepackage{amsmath} + - \usepackage{xcolor} +--- + + + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.path = "man/figures/README-", + out.width = "100%" +) +``` + +# skipTrack + + +[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) +[![R-CMD-check](https://github.com/LukeDuttweiler/skipTrack/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/LukeDuttweiler/skipTrack/actions/workflows/R-CMD-check.yaml) + + +Welcome to the SkipTrack Package! + +SkipTrack is a Bayesian hierarchical model for self-reported menstrual cycle length data on mobile health apps. The model is an extension of the hierarchical model presented in @li2022predictive that focuses on predicting an individual's next menstrual cycle start date while accounting for cycle length inaccuracies introduced by non-adherence in user self-tracked data. + +## Installation + +```{r} +#Install from CRAN +install.packages('skipTrack') + +#Install Development Version +devtools::install_github("LukeDuttweiler/skipTrack") +``` + +## Model + +@li2022predictive notes that apps designed to help users track their menstrual cycles "are subject to adherence artifacts that may obscure health-related conclusions: if a user forgets to track their period, their cycle length computations are inflated." This is visualized in the image below in which the numbers represent days after the initial bleeding day is recorded in the app, $\color{red}{\text{red}}$ days are bleeding days recorded by the user, and $\color{blue}{\text{blue}}$ days are bleeding days not recorded by the user. + +$$\overbrace{\underbrace{\color{red}{1, 2, 3, 4}, 5, \dots, 29}_\text{True Cycle, 29 Days}}^\text{Recorded Cycle, 29 Days}, \overbrace{\underbrace{\color{red}{30, 31, 32, 33}, 34, \dots, 61}_\text{True Cycle, 32 Days}, \underbrace{\color{blue}{62, 63, 64, 65}, 66, \dots, 90}_\text{True Cycle, 29 Days}}^\text{Recorded Cycle, 61 Days}$$ + +The SkipTrack model extends the model given by @li2022predictive by specifying parameters for each individuals for cycle length regularity, as well as their cycle length mean, and weakening assumptions made by Li et al. on the probability of failing to track a cycle. + + + +In short, the modeling framework assumed by SkipTrack is as follows. The observed cycle lengths are represented with $y_{ij}$ where $1 \leq i \leq n$ represents an individual who has contributed $n_i$ observations, with $1 \leq j \leq n_i$. We assume that + +$$ +y_{ij} \sim \text{LogNormal}\big(\mu_i + \log(c_{ij}), \tau_i\big), +$$ +where $\mu_i$ is an individual level mean parameter, $\tau_i$ is an individual level precision parameter, and $c_{ij}$ is an integer-valued parameter representing the number of true cycles present in the observed cycle $y_{ij}$. That is, if $c_{ij} = 1$ then $y_{ij}$ is a true cycle, if $c_{ij} = 2$ then $y_{ij}$ gives the length of two true cycles added together, and so on. + +We then assume + +$$ +\mu_i \sim \text{Normal}(\mu, \rho) \mspace{100mu}\tau_i \sim \text{Gamma}(\theta, \phi) +$$ + +where $\rho$ is a precision parameter, and the Gamma distribution above is parameterized by mean ($\theta$) and rate $\phi$. + + + +This is a fully interpretable model that allows for the identification of skipping in cycle tracking, while allowing for different individual's regularities, and accounting for uncertainty in the model. A paper discussing the full model details will be published soon. + +## Example Usage + +# Package Usage + +The SkipTrack package provides functions for fitting the SkipTrack model, evaluating model run diagnostics, retrieving and visualizing model results, and simulating related data. We begin our tutorial by examining some simulated data. + +```{r} +library(skipTrack) +``` + +First, we simulate data on 100 individuals from the SkipTrack model where each observed $y_{ij}$ value has a 75% probability of being a true cycle, a 20% probability of being two true cycles recorded as one, and a 5% probability of being three true cycles recorded as one. + +```{r} +#Simulate data +dat <- skipTrack.simulate(n = 100, model = 'skipTrack', skipProb = c(.75, .2, .05)) + +names(dat) +``` + +The result of the simulation function is simply a named list with various components. The (currently) important components are + + + * `Y`: the $y_{ij}$ values, observed outcomes + * `cluster`: the $i$ values, individual markers + * `NumTrue`: the $c_{ij}$ values, number of true cycles in an observed cycle + * `Underlying`: underlying parameters pertaining to the specific model used for data simulation + + + +Looking at the histogram of `dat$Y`, we can see a clear mixture of at least two distributions, one centered around 30 days, and another centered near 60 days (corresponding to the true cycles and observed cycles containing two true cycles respectively), which is what we expect based on our generation. + +```{r, fig.align='center', fig.width = 7} +#Histogram of observed outcomes +hist(dat$Y, breaks = 10:150) +``` + +Fitting the SkipTrack model using this simulated data requires a call to the function `skipTrack.fit`. Note that because this is a Bayesian model and is fit with an MCMC algorithm, it can take some time with large datasets and a high number of MCMC reps and chains. + +In this code we ask for 4 chains, each with 1000 iterations, run sequentially. Note that we recommend allowing the sampler to run longer than this (usually at least 5000 iterations per chain), but we use a short run here to save time. + +If `useParallel = TRUE`, the MCMC chains will be evaluated in parallel, which helps with longer runs. + +```{r} +ft <- skipTrack.fit(Y = dat$Y, cluster = dat$cluster, + reps = 1000, chains = 4, useParallel = FALSE) +``` + +Once we have the model results we are able to examine model diagnostics, visualize results from the model, and view a model summary. + +### Diagnostics + +Multivariate, multichain MCMC diagnostics, including traceplots, Gelman-Rubin diagnostics, and effective sample size, are all available for various parameters from the model fit. These are supplied using the `genMCMCDiag` package, see that packages' documentation for details. + +Here we show the output of the diagnostics on the $c_{ij}$ parameters, which show that (at least for the $c_{ij}$ values) the algorithm is mixing effectively (or will be, once the algorithm runs a little longer). + +```{r, fig.align='center', fig.width = 7, fig.height=7} +skipTrack.diagnostics(ft, param = 'cijs') +``` + +### Visualization + +In order to see some important plots for the SkipTrack model fit, you can simply use `plot(ft)`, and the plots are directly accessible using `skipTrack.visualize(ft)`. + +```{r, fig.align='center', fig.width = 7, fig.height=7} +plot(ft) +``` + +### Summary + +A summary is available for the SkipTrack model fit with `summary(ft)`, with more detailed results accessible through `skipTrack.results(ft)`. Importantly, these results are based on a default chain burn-in value of 750 draws. This can be changed using the parameter `burnIn` for either function. + +```{r} +summary(ft) + +summary(ft, burnIn = 500) +``` + +This introduction provides enough information to start fitting the SkipTrack model. For further information regarding different methods of simulating data, additional model fitting, and tuning parameters for fitting the model, please see the help pages. Additional vignettes are forthcoming. + +\newpage + +## Bibliography + diff --git a/README.md b/README.md new file mode 100644 index 0000000..f5818cc --- /dev/null +++ b/README.md @@ -0,0 +1,321 @@ + + + + + + + +# skipTrack + + + +[![Lifecycle: +experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) +[![R-CMD-check](https://github.com/LukeDuttweiler/skipTrack/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/LukeDuttweiler/skipTrack/actions/workflows/R-CMD-check.yaml) + + +Welcome to the SkipTrack Package! + +SkipTrack is a Bayesian hierarchical model for self-reported menstrual +cycle length data on mobile health apps. The model is an + extension of the hierarchical model presented in Li +et al. (2022) that focuses on predicting an individual’s next menstrual +cycle start date while accounting for cycle length inaccuracies +introduced by non-adherence in user self-tracked data. + +## Installation + +``` r +#Install from CRAN +install.packages('skipTrack') +#> Installing package into '/private/var/folders/9h/055tc3cs7ql0r89g2lrc5j1h0000gn/T/Rtmp7dAERQ/temp_libpath52ae786c035f' +#> (as 'lib' is unspecified) +#> Warning: package 'skipTrack' is not available for this version of R +#> +#> A version of this package for your version of R might be available elsewhere, +#> see the ideas at +#> https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages + +#Install Development Version +devtools::install_github("LukeDuttweiler/skipTrack") +#> Using GitHub PAT from the git credential store. +#> Downloading GitHub repo LukeDuttweiler/skipTrack@HEAD +#> farver (2.1.1 -> 2.1.2 ) [CRAN] +#> RcppArmad... (0.12.8.2.1 -> 0.12.8.3.0) [CRAN] +#> Installing 2 packages: farver, RcppArmadillo +#> Installing packages into '/private/var/folders/9h/055tc3cs7ql0r89g2lrc5j1h0000gn/T/Rtmp7dAERQ/temp_libpath52ae786c035f' +#> (as 'lib' is unspecified) +#> +#> The downloaded binary packages are in +#> /var/folders/9h/055tc3cs7ql0r89g2lrc5j1h0000gn/T//RtmplTNwy8/downloaded_packages +#> ── R CMD build ───────────────────────────────────────────────────────────────── +#> * checking for file ‘/private/var/folders/9h/055tc3cs7ql0r89g2lrc5j1h0000gn/T/RtmplTNwy8/remotes654466a9445e/LukeDuttweiler-skipTrack-bc836ab/DESCRIPTION’ ... OK +#> * preparing ‘skipTrack’: +#> * checking DESCRIPTION meta-information ... OK +#> * checking for LF line-endings in source and make files and shell scripts +#> * checking for empty or unneeded directories +#> * building ‘skipTrack_0.1.0.tar.gz’ +#> Installing package into '/private/var/folders/9h/055tc3cs7ql0r89g2lrc5j1h0000gn/T/Rtmp7dAERQ/temp_libpath52ae786c035f' +#> (as 'lib' is unspecified) +#> Adding 'skipTrack_0.1.0.tgz' to the cache +``` + +## Model + +Li et al. (2022) notes that apps designed to help users track their +menstrual cycles “are subject to adherence artifacts that may obscure +health-related conclusions: if a user forgets to track their period, +their cycle length computations are inflated.” This is visualized in the +image below in which the numbers represent days after the initial +bleeding day is recorded in the app, $\color{red}{\text{red}}$ days are +bleeding days recorded by the user, and $\color{blue}{\text{blue}}$ days +are bleeding days not recorded by the user. + +$$\overbrace{\underbrace{\color{red}{1, 2, 3, 4}, 5, \dots, 29}_\text{True Cycle, 29 Days}}^\text{Recorded Cycle, 29 Days}, \overbrace{\underbrace{\color{red}{30, 31, 32, 33}, 34, \dots, 61}_\text{True Cycle, 32 Days}, \underbrace{\color{blue}{62, 63, 64, 65}, 66, \dots, 90}_\text{True Cycle, 29 Days}}^\text{Recorded Cycle, 61 Days}$$ + +The SkipTrack model extends the model given by Li et al. (2022) by +specifying parameters for each individuals for cycle length regularity, +as well as their cycle length mean, and weakening assumptions made by Li +et al. on the probability of failing to track a cycle. + + + +In short, the modeling framework assumed by SkipTrack is as follows. The +observed cycle lengths are represented with $y_{ij}$ where +$1 \leq i \leq n$ represents an individual who has contributed $n_i$ +observations, with $1 \leq j \leq n_i$. We assume that + +$$ +y_{ij} \sim \text{LogNormal}\big(\mu_i + \log(c_{ij}), \tau_i\big), +$$ where $\mu_i$ is an individual level mean parameter, $\tau_i$ is an +individual level precision parameter, and $c_{ij}$ is an integer-valued +parameter representing the number of true cycles present in the observed +cycle $y_{ij}$. That is, if $c_{ij} = 1$ then $y_{ij}$ is a true cycle, +if $c_{ij} = 2$ then $y_{ij}$ gives the length of two true cycles added +together, and so on. + +We then assume + +$$ +\mu_i \sim \text{Normal}(\mu, \rho) \mspace{100mu}\tau_i \sim \text{Gamma}(\theta, \phi) +$$ + +where $\rho$ is a precision parameter, and the Gamma distribution above +is parameterized by mean ($\theta$) and rate $\phi$. + + + +This is a fully interpretable model that allows for the identification +of skipping in cycle tracking, while allowing for different individual’s +regularities, and accounting for uncertainty in the model. A paper +discussing the full model details will be published soon. + +## Example Usage + +# Package Usage + +The SkipTrack package provides functions for fitting the SkipTrack +model, evaluating model run diagnostics, retrieving and visualizing +model results, and simulating related data. We begin our tutorial by +examining some simulated data. + +``` r +library(skipTrack) +``` + +First, we simulate data on 100 individuals from the SkipTrack model +where each observed $y_{ij}$ value has a 75% probability of being a true +cycle, a 20% probability of being two true cycles recorded as one, and a +5% probability of being three true cycles recorded as one. + +``` r +#Simulate data +dat <- skipTrack.simulate(n = 100, model = 'skipTrack', skipProb = c(.75, .2, .05)) + +names(dat) +#> [1] "Y" "cluster" "X" "Z" "Beta" +#> [6] "Gamma" "NumTrue" "Underlying" +``` + +The result of the simulation function is simply a named list with +various components. The (currently) important components are + +- `Y`: the $y_{ij}$ values, observed outcomes +- `cluster`: the $i$ values, individual markers +- `NumTrue`: the $c_{ij}$ values, number of true cycles in an observed + cycle +- `Underlying`: underlying parameters pertaining to the specific model + used for data simulation + + + +Looking at the histogram of `dat$Y`, we can see a clear mixture of at +least two distributions, one centered around 30 days, and another +centered near 60 days (corresponding to the true cycles and observed +cycles containing two true cycles respectively), which is what we expect +based on our generation. + +``` r +#Histogram of observed outcomes +hist(dat$Y, breaks = 10:150) +``` + + + +Fitting the SkipTrack model using this simulated data requires a call to +the function `skipTrack.fit`. Note that because this is a Bayesian model +and is fit with an MCMC algorithm, it can take some time with large +datasets and a high number of MCMC reps and chains. + +In this code we ask for 4 chains, each with 1000 iterations, run +sequentially. Note that we recommend allowing the sampler to run longer +than this (usually at least 5000 iterations per chain), but we use a +short run here to save time. + +If `useParallel = TRUE`, the MCMC chains will be evaluated in parallel, +which helps with longer runs. + +``` r +ft <- skipTrack.fit(Y = dat$Y, cluster = dat$cluster, + reps = 1000, chains = 4, useParallel = FALSE) +``` + +Once we have the model results we are able to examine model diagnostics, +visualize results from the model, and view a model summary. + +### Diagnostics + +Multivariate, multichain MCMC diagnostics, including traceplots, +Gelman-Rubin diagnostics, and effective sample size, are all available +for various parameters from the model fit. These are supplied using the +`genMCMCDiag` package, see that packages’ documentation for details. + +Here we show the output of the diagnostics on the $c_{ij}$ parameters, +which show that (at least for the $c_{ij}$ values) the algorithm is +mixing effectively (or will be, once the algorithm runs a little +longer). + +``` r +skipTrack.diagnostics(ft, param = 'cijs') +``` + + + + #> ---------------------------------------------------- + #> Generalized MCMC Diagnostics using lanfear Method + #> ---------------------------------------------------- + #> + #> |Effective Sample Size: + #> |--------------------------- + #> | Chain 1| Chain 2| Chain 3| Chain 4| Sum| + #> |-------:|-------:|-------:|-------:|-------:| + #> | 85.146| 88.431| 56.475| 96.856| 326.909| + #> + #> |Gelman-Rubin Diagnostic: + #> |--------------------------- + #> | Point est.| Upper C.I.| + #> |----------:|----------:| + #> | 1.009| 1.012| + +### Visualization + +In order to see some important plots for the SkipTrack model fit, you +can simply use `plot(ft)`, and the plots are directly accessible using +`skipTrack.visualize(ft)`. + +``` r +plot(ft) +``` + + + +### Summary + +A summary is available for the SkipTrack model fit with `summary(ft)`, +with more detailed results accessible through `skipTrack.results(ft)`. +Importantly, these results are based on a default chain burn-in value of +750 draws. This can be changed using the parameter `burnIn` for either +function. + +``` r +summary(ft) +#> ---------------------------------------------------- +#> Summary of skipTrack.fit using skipTrack model +#> ---------------------------------------------------- +#> Mean Coefficients: +#> +#> Estimate 95% CI Lower 95% CI Upper +#> (Intercept) 3.41 3.381 3.439 +#> +#> ---------------------------------------------------- +#> Precision Coefficients: +#> +#> Estimate 95% CI Lower 95% CI Upper +#> (Intercept) 5.507 5.341 5.656 +#> +#> ---------------------------------------------------- +#> Diagnostics: +#> +#> Effective Sample Size Gelman-Rubin +#> Betas 4004.00 1.00 +#> Gammas 21.71 1.00 +#> cijs 370.96 1.01 +#> +#> ---------------------------------------------------- + +summary(ft, burnIn = 500) +#> ---------------------------------------------------- +#> Summary of skipTrack.fit using skipTrack model +#> ---------------------------------------------------- +#> Mean Coefficients: +#> +#> Estimate 95% CI Lower 95% CI Upper +#> (Intercept) 3.41 3.381 3.439 +#> +#> ---------------------------------------------------- +#> Precision Coefficients: +#> +#> Estimate 95% CI Lower 95% CI Upper +#> (Intercept) 5.481 5.256 5.648 +#> +#> ---------------------------------------------------- +#> Diagnostics: +#> +#> Effective Sample Size Gelman-Rubin +#> Betas 4004.00 1.00 +#> Gammas 21.76 1.01 +#> cijs 354.69 1.01 +#> +#> ---------------------------------------------------- +``` + +This introduction provides enough information to start fitting the +SkipTrack model. For further information regarding different methods of +simulating data, additional model fitting, and tuning parameters for +fitting the model, please see the help pages. Additional vignettes are +forthcoming. + +## Bibliography + +
+ +
+ +Li, Kathy, Iñigo Urteaga, Amanda Shea, Virginia J Vitzthum, Chris H +Wiggins, and Noémie Elhadad. 2022. “A Predictive Model for Next Cycle +Start Date That Accounts for Adherence in Menstrual Self-Tracking.” +*Journal of the American Medical Informatics Association* 29 (1): 3–11. + +
+ +
diff --git a/bibliography.bib b/bibliography.bib new file mode 100644 index 0000000..64fa892 --- /dev/null +++ b/bibliography.bib @@ -0,0 +1,887 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% GRAPH THEORY +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +@article{reff2015spectral, + title={Spectral properties of oriented hypergraphs}, + author={Reff, Nathan}, + journal={arXiv preprint arXiv:1506.05054}, + year={2015} +} + +@article{kitouni2019lower, + title={Lower bounds for the Laplacian spectral radius of an oriented hypergraph.}, + author={Kitouni, Ouail and Reff, Nathan}, + journal={Australas. J Comb.}, + volume={74}, + pages={408--422}, + year={2019} +} + +@article{mulas2020spectra, + title={Spectra of Complex Unit Hypergraphs}, + author={Mulas, Raffaella and Reff, Nathan}, + journal={arXiv preprint arXiv:2011.10458}, + year={2020} +} + +@article{galuppi2021spectral, + title={Spectral theory of weighted hypergraphs via tensors}, + author={Galuppi, Francesco and Mulas, Raffaella and Venturello, Lorenzo}, + journal={arXiv preprint arXiv:2106.00277}, + year={2021} +} + +@article{mulas2021sharp, + title={Sharp bounds for the largest eigenvalue}, + author={Mulas, Raffaella}, + journal={Mathematical notes}, + volume={109}, + number={1}, + pages={102--109}, + year={2021}, + publisher={Springer} +} + +@article{courant1954methods, + title={Methods of mathematical physics}, + author={Courant, R and Hilbert, D}, + journal={Bulletin of the American Mathematical Society}, + volume={60}, + pages={578--579}, + year={1954} +} + +@article{courant1965methods, + title={Methods of mathematical physics, Volume I}, + author={Courant, R}, + journal={Interscience Publishers Inc., New York, USA, a Division of John Wiley \& Sons, Card Number 53-7164}, + year={1965} +} + +@article{duttweiler2019spectra, + title={Spectra of cycle and path families of oriented hypergraphs}, + author={Duttweiler, Luke and Reff, Nathan}, + journal={Linear Algebra and its Applications}, + volume={578}, + pages={251--271}, + year={2019}, + publisher={Elsevier} +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% BAYESIAN NETWORK THEORY +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +@article{javidian2020hypergraph, + title={On a hypergraph probabilistic graphical model}, + author={Javidian, Mohammad Ali and Wang, Zhiyu and Lu, Linyuan and Valtorta, Marco}, + journal={Annals of Mathematics and Artificial Intelligence}, + volume={88}, + number={9}, + pages={1003--1033}, + year={2020}, + publisher={Springer} +} + +@article{loh2014high, + title={High-dimensional learning of linear causal networks via inverse covariance estimation}, + author={Loh, Po-Ling and B{\"u}hlmann, Peter}, + journal={The Journal of Machine Learning Research}, + volume={15}, + number={1}, + pages={3065--3105}, + year={2014}, + publisher={JMLR. org} +} + +@article{cordoba2020review, + title={A review of Gaussian Markov models for conditional independence}, + author={C{\'o}rdoba, Irene and Bielza, Concha and Larra{\~n}aga, Pedro}, + journal={Journal of Statistical Planning and Inference}, + volume={206}, + pages={127--144}, + year={2020}, + publisher={Elsevier} +} + +@article{liang1993arma, + title={ARMA model order estimation based on the eigenvalues of the covariance matrix}, + author={Liang, Gang and Wilkes, D Mitchell and Cadzow, James A}, + journal={IEEE transactions on signal processing}, + volume={41}, + number={10}, + pages={3003--3009}, + year={1993}, + publisher={IEEE} +} + +@article{friedman2008sparse, + title={Sparse inverse covariance estimation with the graphical lasso}, + author={Friedman, Jerome and Hastie, Trevor and Tibshirani, Robert}, + journal={Biostatistics}, + volume={9}, + number={3}, + pages={432--441}, + year={2008}, + publisher={Oxford University Press} +} + +@article{frydenberg1990chain, + title={The chain graph Markov property}, + author={Frydenberg, Morten}, + journal={Scandinavian Journal of Statistics}, + pages={333--353}, + year={1990}, + publisher={JSTOR} +} + +@book{pearl2009causality, + title={Causality}, + author={Pearl, Judea}, + year={2009}, + publisher={Cambridge university press} +} + +@article{almudevar2010hypothesis, + title={A hypothesis test for equality of bayesian network models}, + author={Almudevar, Anthony}, + journal={EURASIP Journal on Bioinformatics and Systems Biology}, + volume={2010}, + pages={1--11}, + year={2010}, + publisher={Springer} +} + +@article{needham2007primer, + title={A primer on learning in Bayesian networks for computational biology}, + author={Needham, Chris J and Bradford, James R and Bulpitt, Andrew J and Westhead, David R}, + journal={PLoS computational biology}, + volume={3}, + number={8}, + pages={e129}, + year={2007}, + publisher={Public Library of Science San Francisco, USA} +} + +@article{chow1968approximating, + title={Approximating discrete probability distributions with dependence trees}, + author={Chow, CKCN and Liu, Cong}, + journal={IEEE transactions on Information Theory}, + volume={14}, + number={3}, + pages={462--467}, + year={1968}, + publisher={IEEE} +} + +@book{pearl1988probabilistic, + title={Probabilistic reasoning in intelligent systems: networks of plausible inference}, + author={Pearl, Judea}, + year={1988}, + publisher={Morgan kaufmann} +} + +@article{chickering2004large, + title={Large-sample learning of Bayesian networks is NP-hard}, + author={Chickering, Max and Heckerman, David and Meek, Chris}, + journal={Journal of Machine Learning Research}, + volume={5}, + pages={1287--1330}, + year={2004} +} + +@incollection{chickering1996learning, + title={Learning Bayesian networks is NP-complete}, + author={Chickering, David Maxwell}, + booktitle={Learning from data}, + pages={121--130}, + year={1996}, + publisher={Springer} +} + +@article{silander2012simple, + title={A simple approach for finding the globally optimal Bayesian network structure}, + author={Silander, Tomi and Myllymaki, Petri}, + journal={arXiv preprint arXiv:1206.6875}, + year={2012} +} + +@article{duttweiler2023spectral, + title={Spectral Bayesian Network Theory}, + author={Duttweiler, Luke and Thurston, Sally W and Almudevar, Anthony}, + journal={Linear Algebra and its Applications}, + year={2023}, + publisher={Elsevier} +} + +@book{russell2010artificial, + title={Artificial intelligence a modern approach}, + author={Russell, Stuart J}, + year={2010}, + publisher={Pearson Education, Inc.} +} + +@article{foygel2010extended, + title={Extended Bayesian information criteria for Gaussian graphical models}, + author={Foygel, Rina and Drton, Mathias}, + journal={Advances in neural information processing systems}, + volume={23}, + year={2010} +} + +%%%%%%%%%%%%% +%Applications +%%%%%%%%%%%%% + +@article{barton2012bayesian, + title={Bayesian networks in environmental and resource management}, + author={Barton, David N and Kuikka, Sakari and Varis, Olli and Uusitalo, Laura and Henriksen, Hans J{\o}rgen and Borsuk, Mark and de la Hera, Africa and Farmani, Raziyeh and Johnson, Sandra and Linnell, John DC}, + journal={Integrated environmental assessment and management}, + volume={8}, + number={3}, + pages={418--429}, + year={2012}, + publisher={Wiley Online Library} +} + +@article{sevinc2020bayesian, + title={A Bayesian network model for prediction and analysis of possible forest fire causes}, + author={Sevinc, Volkan and Kucuk, Omer and Goltas, Merih}, + journal={Forest Ecology and Management}, + volume={457}, + pages={117723}, + year={2020}, + publisher={Elsevier} +} + +@article{dondelinger2013non, + title={Non-homogeneous dynamic Bayesian networks with Bayesian regularization for inferring gene regulatory networks with gradually time-varying structure}, + author={Dondelinger, Frank and L{\`e}bre, Sophie and Husmeier, Dirk}, + journal={Machine Learning}, + volume={90}, + number={2}, + pages={191--230}, + year={2013}, + publisher={Springer} +} + +@article{xing2017improved, + title={An improved Bayesian network method for reconstructing gene regulatory network based on candidate auto selection}, + author={Xing, Linlin and Guo, Maozu and Liu, Xiaoyan and Wang, Chunyu and Wang, Lei and Zhang, Yin}, + journal={BMC genomics}, + volume={18}, + number={9}, + pages={17--30}, + year={2017}, + publisher={BioMed Central} +} + +%%%%%%%%%%%%%%%%%%%%%% +%Data Analysis Related +%%%%%%%%%%%%%%%%%%%%%%% + +@article{baliwag2015cytokines, + title={Cytokines in psoriasis}, + author={Baliwag, Jaymie and Barnes, Drew H and Johnston, Andrew}, + journal={Cytokine}, + volume={73}, + number={2}, + pages={342--350}, + year={2015}, + publisher={Elsevier} +} + +@article{nickoloff2004recent, + title={Recent insights into the immunopathogenesis of psoriasis provide new therapeutic opportunities}, + author={Nickoloff, Brian J and Nestle, Frank O and others}, + journal={The Journal of clinical investigation}, + volume={113}, + number={12}, + pages={1664--1675}, + year={2004}, + publisher={Am Soc Clin Investig} +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% ESTIMATING EIGENVALUES +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +@article{kollo1993asymptotics, + title={Asymptotics of eigenvalues and unit-length eigenvectors of sample variance and correlation matrices}, + author={Kollo, T{\^o}nu and Neudecker, Heinz}, + journal={Journal of Multivariate Analysis}, + volume={47}, + number={2}, + pages={283--300}, + year={1993}, + publisher={Elsevier} +} + +@article{konishi1979asymptotic, + title={Asymptotic expansions for the distributions of statistics based on the sample correlation matrix in principal component analysis}, + author={Konishi, Sadanori}, + journal={Hiroshima Mathematical Journal}, + volume={9}, + number={3}, + pages={647--700}, + year={1979}, + publisher={Hiroshima University, Mathematics Program} +} + +@article{neudecker1990asymptotic, + title={The asymptotic variance matrix of the sample correlation matrix}, + author={Neudecker, Heinz and Wesselman, Albertus Martinus}, + journal={Linear Algebra and its Applications}, + volume={127}, + pages={589--599}, + year={1990}, + publisher={North-Holland} +} + +@book{magnus2019matrix, + title={Matrix differential calculus with applications in statistics and econometrics}, + author={Magnus, Jan R and Neudecker, Heinz}, + year={2019}, + publisher={John Wiley \& Sons} +} + +@article{van1989elliptical, + title={Elliptical multivariate analysis}, + author={Van Praag, Bernard MS and Wesselman, Bertram M}, + journal={Journal of Econometrics}, + volume={41}, + number={2}, + pages={189--203}, + year={1989}, + publisher={Elsevier} +} + +@article{anderson1965asymptotic, + title={An asymptotic expansion for the distribution of the latent roots of the estimated covariance matrix}, + author={Anderson, George A}, + journal={The Annals of Mathematical Statistics}, + volume={36}, + number={4}, + pages={1153--1173}, + year={1965}, + publisher={Institute of Mathematical Statistics} +} + +@article{sakai2000quadratic, + title={A quadratic discriminant function based on bias rectification of eigenvalues}, + author={Sakai, Mitsuru and Yoneda, Masaaki and Hase, Hiroyuki and Maruyama, Hiroshi and Naoe, Michiko}, + journal={Systems and Computers in Japan}, + volume={31}, + number={9}, + pages={28--38}, + year={2000}, + publisher={Wiley Online Library} +} + +@incollection{saleem2015perturbation, + title={Perturbation theory}, + author={Saleem, Mohammad}, + booktitle={Quantum Mechanics}, + year={2015}, + publisher={IOP Publishing} +} + +@book{fukunaga2013introduction, + title={Introduction to statistical pattern recognition}, + author={Fukunaga, Keinosuke}, + year={2013}, + publisher={Elsevier} +} + +@article{ledoit2004well, + title={A well-conditioned estimator for large-dimensional covariance matrices}, + author={Ledoit, Olivier and Wolf, Michael}, + journal={Journal of Multivariate Analysis}, + volume={88}, + number={2}, + pages={365--411}, + year={2004}, + publisher={Elsevier} +} + +@article{touloumis2015nonparametric, + title={Nonparametric Stein-type shrinkage covariance matrix estimators in high-dimensional settings}, + author={Touloumis, Anestis}, + journal={Computational Statistics \& Data Analysis}, + volume={83}, + pages={251--261}, + year={2015}, + publisher={Elsevier} +} + +@article{muirhead1987developments, + title={Developments in eigenvalue estimation}, + author={Muirhead, Robb J}, + journal={Advances in Multivariate Statistical Analysis: Pillai Memorial Volume}, + pages={277--288}, + year={1987}, + publisher={Springer} +} + +\@article{mestre2008improved, + title={Improved estimation of eigenvalues and eigenvectors of covariance matrices using their sample estimates}, + author={Mestre, Xavier}, + journal={IEEE Transactions on Information Theory}, + volume={54}, + number={11}, + pages={5113--5129}, + year={2008}, + publisher={IEEE} +} + +@article{bodnar2016direct, + title={Direct shrinkage estimation of large dimensional precision matrix}, + author={Bodnar, Taras and Gupta, Arjun K and Parolya, Nestor}, + journal={Journal of Multivariate Analysis}, + volume={146}, + pages={223--236}, + year={2016}, + publisher={Elsevier} +} + +@article{nguyen2022distributionally, + title={Distributionally robust inverse covariance estimation: The Wasserstein shrinkage estimator}, + author={Nguyen, Viet Anh and Kuhn, Daniel and Mohajerin Esfahani, Peyman}, + journal={Operations research}, + volume={70}, + number={1}, + pages={490--515}, + year={2022}, + publisher={INFORMS} +} + +@article{ledoit2020analytical, + title={Analytical nonlinear shrinkage of large-dimensional covariance matrices}, + author={Ledoit, Olivier and Wolf, Michael}, + journal={The Annals of Statistics}, + volume={48}, + number={5}, + pages={3043--3065}, + year={2020}, + publisher={JSTOR} +} + +@article{engle2002dynamic, + title={Dynamic conditional correlation: A simple class of multivariate generalized autoregressive conditional heteroskedasticity models}, + author={Engle, Robert}, + journal={Journal of Business \& Economic Statistics}, + volume={20}, + number={3}, + pages={339--350}, + year={2002}, + publisher={Taylor \& Francis} +} + +@article{yuan2007model, + title={Model selection and estimation in the Gaussian graphical model}, + author={Yuan, Ming and Lin, Yi}, + journal={Biometrika}, + volume={94}, + number={1}, + pages={19--35}, + year={2007}, + publisher={Oxford University Press} +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% BAYESIAN NETWORK MCMC +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +@article{madigan1995bayesian, + title={Bayesian graphical models for discrete data}, + author={Madigan, David and York, Jeremy and Allard, Denis}, + journal={International Statistical Review/Revue Internationale de Statistique}, + pages={215--232}, + year={1995}, + publisher={JSTOR} +} + +@article{friedman2003being, + title={Being Bayesian about network structure. A Bayesian approach to structure discovery in Bayesian networks}, + author={Friedman, Nir and Koller, Daphne}, + journal={Machine learning}, + volume={50}, + number={1}, + pages={95--125}, + year={2003}, + publisher={Springer} +} + +@article{grzegorczyk2008improving, + title={Improving the structure MCMC sampler for Bayesian networks by introducing a new edge reversal move}, + author={Grzegorczyk, Marco and Husmeier, Dirk}, + journal={Machine Learning}, + volume={71}, + number={2-3}, + pages={265}, + year={2008}, + publisher={Springer} +} + +@article{colombo2014order, + title={Order-independent constraint-based causal structure learning.}, + author={Colombo, Diego and Maathuis, Marloes H and others}, + journal={J. Mach. Learn. Res.}, + volume={15}, + number={1}, + pages={3741--3782}, + year={2014} +} + +@techreport{margaritis2003learning, + title={Learning Bayesian network model structure from data}, + author={Margaritis, Dimitris}, + year={2003}, + institution={Carnegie-Mellon Univ Pittsburgh Pa School of Computer Science} +} + +@article{deonovic2017convergence, + title={Convergence diagnostics for MCMC draws of a categorical variable}, + author={Deonovic, Benjamin E and Smith, Brian J}, + journal={arXiv preprint arXiv:1706.04919}, + year={2017} +} + +@article{suter2021bayesian, + title={Bayesian structure learning and sampling of Bayesian networks with the R package BiDAG}, + author={Suter, Polina and Kuipers, Jack and Moffa, Giusi and Beerenwinkel, Niko}, + journal={arXiv preprint arXiv:2105.00488}, + year={2021} +} + +@article{larjo2015using, + title={Using multi-step proposal distribution for improved MCMC convergence in Bayesian network structure learning}, + author={Larjo, Antti and L{\"a}hdesm{\"a}ki, Harri}, + journal={EURASIP Journal on Bioinformatics and Systems Biology}, + volume={2015}, + number={1}, + pages={1--14}, + year={2015}, + publisher={Springer} +} + +@article{lanfear2016estimating, + title={Estimating the effective sample size of tree topologies from Bayesian phylogenetic analyses}, + author={Lanfear, Robert and Hua, Xia and Warren, Dan L}, + journal={Genome biology and evolution}, + volume={8}, + number={8}, + pages={2319--2332}, + year={2016}, + publisher={Oxford University Press} +} + +@article{andrieu2003introduction, + title={An introduction to MCMC for machine learning}, + author={Andrieu, Christophe and De Freitas, Nando and Doucet, Arnaud and Jordan, Michael I}, + journal={Machine learning}, + volume={50}, + pages={5--43}, + year={2003}, + publisher={Springer} +} + +@book{gutin2006traveling, + title={The traveling salesman problem and its variations}, + author={Gutin, Gregory and Punnen, Abraham P}, + volume={12}, + year={2006}, + publisher={Springer Science \& Business Media} +} + +@article{cowles1996markov, + title={Markov chain Monte Carlo convergence diagnostics: a comparative review}, + author={Cowles, Mary Kathryn and Carlin, Bradley P}, + journal={Journal of the American Statistical Association}, + volume={91}, + number={434}, + pages={883--904}, + year={1996}, + publisher={Taylor \& Francis} +} + +@article{roy2020convergence, + title={Convergence diagnostics for markov chain monte carlo}, + author={Roy, Vivekananda}, + journal={Annual Review of Statistics and Its Application}, + volume={7}, + pages={387--412}, + year={2020}, + publisher={Annual Reviews} +} + +@article{rasmussen1999infinite, + title={The infinite Gaussian mixture model}, + author={Rasmussen, Carl}, + journal={Advances in neural information processing systems}, + volume={12}, + year={1999} +} + +@article{kuipers2017partition, + title={Partition MCMC for inference on acyclic digraphs}, + author={Kuipers, Jack and Moffa, Giusi}, + journal={Journal of the American Statistical Association}, + volume={112}, + number={517}, + pages={282--299}, + year={2017}, + publisher={Taylor \& Francis} +} + +@article{su2016improving, + title={Improving structure mcmc for bayesian networks through markov blanket resampling}, + author={Su, Chengwei and Borsuk, Mark E}, + journal={The Journal of Machine Learning Research}, + volume={17}, + number={1}, + pages={4042--4061}, + year={2016}, + publisher={JMLR. org} +} + +@book{reinelt2003traveling, + title={The traveling salesman: computational solutions for TSP applications}, + author={Reinelt, Gerhard}, + volume={840}, + year={2003}, + publisher={Springer} +} + +@article{dixit2017mcmc, + title={MCMC diagnostics for higher dimensions using Kullback Leibler divergence}, + author={Dixit, Anand and Roy, Vivekananda}, + journal={Journal of Statistical Computation and Simulation}, + volume={87}, + number={13}, + pages={2622--2638}, + year={2017}, + publisher={Taylor \& Francis} +} + +@article{moins2023use, + title={On the use of a local ˆR to improve MCMC convergence diagnostic}, + author={Moins, Th{\'e}o and Arbel, Julyan and Dutfoy, Anne and Girard, St{\'e}phane}, + journal={Bayesian Analysis}, + volume={1}, + number={1}, + pages={1--26}, + year={2023}, + publisher={International Society for Bayesian Analysis} +} + +@article{gelman1992inference, + title={Inference from iterative simulation using multiple sequences}, + author={Gelman, Andrew and Rubin, Donald B}, + journal={Statistical science}, + volume={7}, + number={4}, + pages={457--472}, + year={1992}, + publisher={Institute of Mathematical Statistics} +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% T32 +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +@article{gasparrini2010distributed, + title={Distributed lag non-linear models}, + author={Gasparrini, Antonio and Armstrong, Ben and Kenward, Mike G}, + journal={Statistics in medicine}, + volume={29}, + number={21}, + pages={2224--2234}, + year={2010}, + publisher={Wiley Online Library} +} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Model Selection and Post-Selection Inference +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +@article{berk2010statistical, + title={Statistical inference after model selection}, + author={Berk, Richard and Brown, Lawrence and Zhao, Linda}, + journal={Journal of Quantitative Criminology}, + volume={26}, + pages={217--236}, + year={2010}, + publisher={Springer} +} + +@article{zhang2022post, + title={Post-model-selection inference in linear regression models: an integrated review}, + author={Zhang, Dongliang and Khalili, Abbas and Asgharian, Masoud}, + journal={Statistic Surveys}, + volume={16}, + pages={86--136}, + year={2022}, + publisher={The American Statistical Association, the Bernoulli Society, the Institute~…} +} + +@article{berk2013valid, + title={Valid post-selection inference}, + author={Berk, Richard and Brown, Lawrence and Buja, Andreas and Zhang, Kai and Zhao, Linda}, + journal={The Annals of Statistics}, + pages={802--837}, + year={2013}, + publisher={JSTOR} +} + +@article{liu2018more, + title={More powerful post-selection inference, with application to the lasso}, + author={Liu, Keli and Markovic, Jelena and Tibshirani, Robert}, + journal={arXiv preprint arXiv:1801.09037}, + year={2018} +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Random Matrix Theory +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +@book{bai2010spectral, + title={Spectral analysis of large dimensional random matrices}, + author={Bai, Zhidong and Silverstein, Jack W}, + volume={20}, + year={2010}, + publisher={Springer} +} + +@article{chafai2010dirichlet, + title={The Dirichlet Markov Ensemble}, + author={Chafa{\"\i}, Djalil}, + journal={Journal of Multivariate Analysis}, + volume={101}, + number={3}, + pages={555--567}, + year={2010}, + publisher={Elsevier} +} + +@article{chafai2009singular, + title={Singular values of random matrices}, + author={Chafa{\i}, Djalil and Chaf{\"a}, Djalil and Gu{\'e}don, Olivier and Lecue, Guillaume and Pajor, Alain}, + journal={Lecture Notes}, + year={2009}, + publisher={Citeseer} +} + +@article{bryson2021marchenko, + title={Marchenko--Pastur law with relaxed independence conditions}, + author={Bryson, Jennifer and Vershynin, Roman and Zhao, Hongkai}, + journal={Random Matrices: Theory and Applications}, + volume={10}, + number={04}, + pages={2150040}, + year={2021}, + publisher={World Scientific} +} + +@article{bai2008large, + title={Large sample covariance matrices without independence structures in columns}, + author={Bai, Zhidong and Zhou, Wang}, + journal={Statistica Sinica}, + pages={425--442}, + year={2008}, + publisher={JSTOR} +} + +@article{marchenko1967distribution, + title={Distribution of eigenvalues for some sets of random matrices}, + author={Marchenko, Vladimir Alexandrovich and Pastur, Leonid Andreevich}, + journal={Matematicheskii Sbornik}, + volume={114}, + number={4}, + pages={507--536}, + year={1967}, + publisher={Russian Academy of Sciences, Steklov Mathematical Institute of Russian~…} +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Clustering +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +@article{ghaemi2009survey, + title={A survey: clustering ensembles techniques}, + author={Ghaemi, Reza and Sulaiman, Md Nasir and Ibrahim, Hamidah and Mustapha, Norwati}, + journal={International Journal of Computer and Information Engineering}, + volume={3}, + number={2}, + pages={365--374}, + year={2009} +} + +@article{golalipour2021clustering, + title={From clustering to clustering ensemble selection: A review}, + author={Golalipour, Keyvan and Akbari, Ebrahim and Hamidi, Seyed Saeed and Lee, Malrey and Enayatifar, Rasul}, + journal={Engineering Applications of Artificial Intelligence}, + volume={104}, + pages={104388}, + year={2021}, + publisher={Elsevier} +} + +@article{fred2005combining, + title={Combining multiple clusterings using evidence accumulation}, + author={Fred, Ana LN and Jain, Anil K}, + journal={IEEE transactions on pattern analysis and machine intelligence}, + volume={27}, + number={6}, + pages={835--850}, + year={2005}, + publisher={IEEE} +} + +@article{dahl2006model, + title={Model-based clustering for expression data via a Dirichlet process mixture model}, + author={Dahl, David B}, + journal={Bayesian inference for gene expression and proteomics}, + volume={4}, + pages={201--218}, + year={2006} +} + +@article{miller2014inconsistency, + title={Inconsistency of Pitman-Yor process mixtures for the number of components}, + author={Miller, Jeffrey W and Harrison, Matthew T}, + journal={The Journal of Machine Learning Research}, + volume={15}, + number={1}, + pages={3333--3370}, + year={2014}, + publisher={JMLR. org} +} + +@article{jain2004split, + title={A split-merge Markov chain Monte Carlo procedure for the Dirichlet process mixture model}, + author={Jain, Sonia and Neal, Radford M}, + journal={Journal of computational and Graphical Statistics}, + volume={13}, + number={1}, + pages={158--182}, + year={2004}, + publisher={Taylor \& Francis} +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% SkipTrack References +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +@article{li2022predictive, + title={A predictive model for next cycle start date that accounts for adherence in menstrual self-tracking}, + author={Li, Kathy and Urteaga, I{\~n}igo and Shea, Amanda and Vitzthum, Virginia J and Wiggins, Chris H and Elhadad, No{\'e}mie}, + journal={Journal of the American Medical Informatics Association}, + volume={29}, + number={1}, + pages={3--11}, + year={2022}, + publisher={Oxford University Press} +} \ No newline at end of file diff --git a/cran-comments.md b/cran-comments.md new file mode 100644 index 0000000..4b1ddb1 --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,9 @@ +## R CMD check results + +0 errors | 0 warnings | 1 note + +* This is a new release. + +## Notes + +* There are currently no published references for the main methods in this package. We have included references to alternative methods when appropriate, and will add references once papers have been published/submitted to arXiv. diff --git a/man/figures/README-unnamed-chunk-5-1.png b/man/figures/README-unnamed-chunk-5-1.png new file mode 100644 index 0000000..d7742c2 Binary files /dev/null and b/man/figures/README-unnamed-chunk-5-1.png differ diff --git a/man/figures/README-unnamed-chunk-7-1.png b/man/figures/README-unnamed-chunk-7-1.png new file mode 100644 index 0000000..7372de3 Binary files /dev/null and b/man/figures/README-unnamed-chunk-7-1.png differ diff --git a/man/figures/README-unnamed-chunk-8-1.png b/man/figures/README-unnamed-chunk-8-1.png new file mode 100644 index 0000000..f3706a0 Binary files /dev/null and b/man/figures/README-unnamed-chunk-8-1.png differ diff --git a/man/figures/lifecycle-archived.svg b/man/figures/lifecycle-archived.svg new file mode 100644 index 0000000..745ab0c --- /dev/null +++ b/man/figures/lifecycle-archived.svg @@ -0,0 +1,21 @@ + + lifecycle: archived + + + + + + + + + + + + + + + lifecycle + + archived + + diff --git a/man/figures/lifecycle-defunct.svg b/man/figures/lifecycle-defunct.svg new file mode 100644 index 0000000..d5c9559 --- /dev/null +++ b/man/figures/lifecycle-defunct.svg @@ -0,0 +1,21 @@ + + lifecycle: defunct + + + + + + + + + + + + + + + lifecycle + + defunct + + diff --git a/man/figures/lifecycle-deprecated.svg b/man/figures/lifecycle-deprecated.svg new file mode 100644 index 0000000..b61c57c --- /dev/null +++ b/man/figures/lifecycle-deprecated.svg @@ -0,0 +1,21 @@ + + lifecycle: deprecated + + + + + + + + + + + + + + + lifecycle + + deprecated + + diff --git a/man/figures/lifecycle-experimental.svg b/man/figures/lifecycle-experimental.svg new file mode 100644 index 0000000..5d88fc2 --- /dev/null +++ b/man/figures/lifecycle-experimental.svg @@ -0,0 +1,21 @@ + + lifecycle: experimental + + + + + + + + + + + + + + + lifecycle + + experimental + + diff --git a/man/figures/lifecycle-maturing.svg b/man/figures/lifecycle-maturing.svg new file mode 100644 index 0000000..897370e --- /dev/null +++ b/man/figures/lifecycle-maturing.svg @@ -0,0 +1,21 @@ + + lifecycle: maturing + + + + + + + + + + + + + + + lifecycle + + maturing + + diff --git a/man/figures/lifecycle-questioning.svg b/man/figures/lifecycle-questioning.svg new file mode 100644 index 0000000..7c1721d --- /dev/null +++ b/man/figures/lifecycle-questioning.svg @@ -0,0 +1,21 @@ + + lifecycle: questioning + + + + + + + + + + + + + + + lifecycle + + questioning + + diff --git a/man/figures/lifecycle-soft-deprecated.svg b/man/figures/lifecycle-soft-deprecated.svg new file mode 100644 index 0000000..9c166ff --- /dev/null +++ b/man/figures/lifecycle-soft-deprecated.svg @@ -0,0 +1,21 @@ + + lifecycle: soft-deprecated + + + + + + + + + + + + + + + lifecycle + + soft-deprecated + + diff --git a/man/figures/lifecycle-stable.svg b/man/figures/lifecycle-stable.svg new file mode 100644 index 0000000..9bf21e7 --- /dev/null +++ b/man/figures/lifecycle-stable.svg @@ -0,0 +1,29 @@ + + lifecycle: stable + + + + + + + + + + + + + + + + lifecycle + + + + stable + + + diff --git a/man/figures/lifecycle-superseded.svg b/man/figures/lifecycle-superseded.svg new file mode 100644 index 0000000..db8d757 --- /dev/null +++ b/man/figures/lifecycle-superseded.svg @@ -0,0 +1,21 @@ + + lifecycle: superseded + + + + + + + + + + + + + + + lifecycle + + superseded + + diff --git a/man/gibbsStepLi.Rd b/man/gibbsStepLi.Rd new file mode 100644 index 0000000..3cd9e0d --- /dev/null +++ b/man/gibbsStepLi.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/liMCMC.R +\name{gibbsStepLi} +\alias{gibbsStepLi} +\title{Gibbs Step Li - One MCMC step for the Li Model} +\usage{ +gibbsStepLi(ijDat, iDat, kappa, gamma, alpha, beta, S, indFirst) +} +\arguments{ +\item{ijDat}{A data.frame with parameters at the individual-observation level: Individual, ys, lambdais, piis, ss.} + +\item{iDat}{A data.frame with parameters at the individual level: Individual, lambdas, pis.} + +\item{kappa}{Fixed value of hyperparameter kappa.} + +\item{gamma}{Fixed value of hyperparameter gamma.} + +\item{alpha}{Fixed value of hyperparameter alpha.} + +\item{beta}{Fixed value of hyperparamter beta.} + +\item{S}{Fixed input value S.} + +\item{indFirst}{A logical vector indicating the first occurrence of each individual.} +} +\value{ +A list containing one MCMC draws for each parameter. Elements are: +\describe{ +\item{ijDat}{A data.frame with updated parameters at the individual-observation level: Individual, ys, lambdais, piis, ss.} +\item{iDat}{A data.frame with updated parameters at the individual level: Individual, lambdas, pis.} +\item{kappa}{Fixed value of hyperparameter kappa.} +\item{gamma}{Fixed value of hyperparameter gamma.} +\item{alpha}{Fixed value of hyperparameter alpha.} +\item{beta}{Fixed value of hyperparamter beta.} +\item{S}{Fixed input value S.} +\item{indFirst}{A logical vector indicating the first occurrence of each individual.} +} +} +\description{ +Gibbs Step Li - One MCMC step for the Li Model +} +\references{ +Li, Kathy, et al. "A predictive model for next cycle start date that accounts for adherence in menstrual self-tracking." Journal of the American Medical Informatics Association 29.1 (2022): 3-11. +} diff --git a/man/liMCMC.Rd b/man/liMCMC.Rd new file mode 100644 index 0000000..cf8ea47 --- /dev/null +++ b/man/liMCMC.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/liMCMC.R +\name{liMCMC} +\alias{liMCMC} +\title{Runs MCMC algorithm for performing inference using the model from Li et al. (2022)} +\usage{ +liMCMC( + Y, + cluster, + S, + hyperparams = c(kappa = 180, gamma = 6, alpha = 2, beta = 20), + initialParams = list(pi = c(1/3, 1/3, 1/3), lambdais = rep(30, + length(unique(cycleDat$Individual))), piis = rep(0.2, + length(unique(cycleDat$Individual))), ss = sample(0:S, nrow(cycleDat), replace = + TRUE)), + reps = 1000 +) +} +\arguments{ +\item{Y}{A vector of observed cycle lengths.} + +\item{cluster}{A vector indicating the individual cluster/group membership for each observation Y.} + +\item{S}{Integer. The maximum number of skips to consider possible.} + +\item{hyperparams}{Named numeric vector of hyperparameters containing the +elements: kappa, gamma, alpha, beta. NOTE: MUST BE IN CORRECT ORDER. +\itemize{ +\item \code{kappa}: Numeric value, shape parameter of Gamma distribution for Lambda_i. +\item \code{gamma}: Numeric value, rate parameter of Gamma distribution for Lambda_i. +\item \code{alpha}: Numeric value, shape1 parameter of Beta distribution for Pi_i. +\item \code{beta}: Numeric value, shape2 parameter of Beta distribution for Pi_i. +}} + +\item{initialParams}{A list of initial parameter values for the MCMC algorithm. +Default values are provided for pi, lambdais, piis, ss.} + +\item{reps}{The number of MCMC iterations (steps) to perform. Default is 1000.} +} +\value{ +A list containing the MCMC draws for each parameter at each iteration. Each element +in the list is itself a list containing: +\describe{ +\item{ijDat}{A data.frame with updated parameters at the individual-observation level: Individual, ys, lambdais, piis, ss.} +\item{iDat}{A data.frame with updated parameters at the individual level: Individual, lambdas, pis.} +\item{kappa}{Fixed value of hyperparameter kappa.} +\item{gamma}{Fixed value of hyperparameter gamma.} +\item{alpha}{Fixed value of hyperparameter alpha.} +\item{beta}{Fixed value of hyperparamter beta.} +\item{S}{Fixed input value S.} +\item{indFirst}{A logical vector indicating the first occurrence of each individual.} +} +} +\description{ +This function performs inference on cycle length data, assuming the model from Li et al. (2022). It is important to note +that Li et al. does not actually use this algorithm as they target a particular analytic posterior predictive distribution, +and solve directly. However, we are targeting a different posterior and thus use this MCMC to perform inference. +} +\references{ +Li, Kathy, et al. "A predictive model for next cycle start date that accounts for adherence in menstrual self-tracking." Journal of the American Medical Informatics Association 29.1 (2022): 3-11. +} +\seealso{ +\code{\link{gibbsStepLi}} +} diff --git a/man/likVec.Rd b/man/likVec.Rd index 54c3807..6ab6931 100644 --- a/man/likVec.Rd +++ b/man/likVec.Rd @@ -42,3 +42,6 @@ of the given hyperparameters for the generative model from Li et al. (2022). It samples M instances of the parameters from the given distributions and averages the the likelihoods, giving a marginal likelihood for the hyperparameters. } +\references{ +Li, Kathy, et al. "A predictive model for next cycle start date that accounts for adherence in menstrual self-tracking." Journal of the American Medical Informatics Association 29.1 (2022): 3-11. +} diff --git a/man/postLambdai.Rd b/man/postLambdai.Rd index 72c42a2..a350e68 100644 --- a/man/postLambdai.Rd +++ b/man/postLambdai.Rd @@ -7,13 +7,13 @@ postLambdai(yij, sij, priorK, priorG) } \arguments{ -\item{yij}{Vector of observed values for lambda_i.} +\item{yij}{Vector of observed values for individual i.} -\item{sij}{Vector of indicators for lambda_i.} +\item{sij}{Vector of cycle skip indicators for individual i.} -\item{priorK}{Prior hyperparameter for K.} +\item{priorK}{Prior hyperparameter kappa.} -\item{priorG}{Prior hyperparameter for G.} +\item{priorG}{Prior hyperparameter gamma.} } \value{ A random draw from the posterior distribution of lambda_i. diff --git a/man/postPii.Rd b/man/postPii.Rd new file mode 100644 index 0000000..f271b89 --- /dev/null +++ b/man/postPii.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/liPosteriors.R +\name{postPii} +\alias{postPii} +\title{Compute M-H draw for pi_i in Li algorithm} +\usage{ +postPii(sij, currentPii, priorA, priorB, S) +} +\arguments{ +\item{sij}{Vector of cycle skip indicators for individual i} + +\item{currentPii}{Current value of pi_i} + +\item{priorA}{Hyperparameter alpha.} + +\item{priorB}{Hyperparameter beta.} + +\item{S}{Maximum number of skips allowed in algorithm} +} +\value{ +Draw for pi_i, repeated for the number of observations from individual i +} +\description{ +This performs a Metropolis-Hastings draw for pi_i, assuming s_ij follows a truncated geometric distribution with parameters +pi_i and S. The proposal distribution for pi_i is Beta(alpha, beta). +} diff --git a/man/skipTrack-package.Rd b/man/skipTrack-package.Rd index 2daab55..9b9ff8b 100644 --- a/man/skipTrack-package.Rd +++ b/man/skipTrack-package.Rd @@ -4,9 +4,9 @@ \name{skipTrack-package} \alias{skipTrack} \alias{skipTrack-package} -\title{skipTrack: Bayesian Hierarchical Models Adjusting for Non-Adherence in Mobile Menstrual Cycle Tracking} +\title{skipTrack: A Bayesian Hierarchical Model that Controls for Non-Adherence in Mobile Menstrual Cycle Tracking} \description{ -Implements a Bayesian hierarchical model of the same name designed to perform inference on cycle length mean and regularity given the possibility of non-adherence in cycle length self-tracking. Currently accepts baseline continuous covariates for cycle mean length and regularity. Future updates will include include categorical covariates, time-varying covariates, and the inclusion of external information regarding tracking skips. +Implements a Bayesian hierarchical model designed to identify skips in mobiel menstrual cycle self-tracking on mobiel apps. Future developments will allow for the inclusion of covariates affecting cycle mean and regularity, as well as extra information regarding tracking non-adherence. Main methods to be outlined in a forthcoming paper, with alternative models from Li et al. (2022) \doi{10.1093/jamia/ocab182}. } \seealso{ Useful links: @@ -17,7 +17,7 @@ Useful links: } \author{ -\strong{Maintainer}: Luke Duttweiler \email{lduttweiler@hsph.harvard.edu} (\href{https://orcid.org/0000-0002-0467-995X}{ORCID}) +\strong{Maintainer}: Luke Duttweiler \email{lduttweiler@hsph.harvard.edu} (\href{https://orcid.org/0000-0002-0467-995X}{ORCID}) [copyright holder] } \keyword{internal} diff --git a/vignettes/skipTrack_intro.Rmd b/vignettes/skipTrack_intro.Rmd index c1bd9d3..32ba7dd 100644 --- a/vignettes/skipTrack_intro.Rmd +++ b/vignettes/skipTrack_intro.Rmd @@ -24,7 +24,7 @@ set.seed(1) Welcome to the SkipTrack Package! -SkipTrack is a Bayesian hierarchical model for self-reported menstrual cycle length data on mobile health apps. The model is a significant extension of the hierarchical model presented in @li2022predictive that focuses on predicting an individual's next menstrual cycle start date while accounting for cycle length inaccuracies introduced by non-adherence in user self-tracked data. +SkipTrack is a Bayesian hierarchical model for self-reported menstrual cycle length data on mobile health apps. The model is an extension of the hierarchical model presented in @li2022predictive that focuses on predicting an individual's next menstrual cycle start date while accounting for cycle length inaccuracies introduced by non-adherence in user self-tracked data. @li2022predictive notes that apps designed to help users track their menstrual cycles "are subject to adherence artifacts that may obscure health-related conclusions: if a user forgets to track their period, their cycle length computations are inflated." This is visualized in the image below in which the numbers represent days after the initial bleeding day is recorded in the app, $\color{red}{\text{red}}$ days are bleeding days recorded by the user, and $\color{blue}{\text{blue}}$ days are bleeding days not recorded by the user. @@ -64,7 +64,7 @@ This is a fully interpretable model that allows for the identification of skippi The SkipTrack package provides functions for fitting the SkipTrack model, evaluating model run diagnostics, retrieving and visualizing model results, and simulating related data. We begin our tutorial by examining some simulated data. -```{r setup} +```{r} library(skipTrack) ``` @@ -114,7 +114,7 @@ Once we have the model results we are able to examine model diagnostics, visuali ### Diagnostics -Multivariate, multichain MCMC diagnostics, including traceplots, Gelman-Rubin diagnostics, and effective sample size, are all available for various parameters from the model fit. These are supplied using the `genMCMCDiag` package, see CITATION for details. +Multivariate, multichain MCMC diagnostics, including traceplots, Gelman-Rubin diagnostics, and effective sample size, are all available for various parameters from the model fit. These are supplied using the `genMCMCDiag` package, see that packages' documentation for details. Here we show the output of the diagnostics on the $c_{ij}$ parameters, which show that (at least for the $c_{ij}$ values) the algorithm is mixing effectively (or will be, once the algorithm runs a little longer).