PecanProject · mdietze · Mar 26, 2020 · Mar 6, 2020 · Mar 6, 2020 · Mar 6, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,7 @@ For more information about this file see also [Keep a Changelog](http://keepacha
 
 ### Fixed
 
+- PEcAn.utils::tranformstats() assumed the statistic names column of its input was a factor. It now accepts character too, and returns the same class given as input (#2545).
 - fixed and added tests for `get.rh` function in PEcAn.data.atmosphere 
 - Invalid .zenodo.json that broke automatic archiving on Zenodo ([b56ef53](https://github.com/PecanProject/pecan/commit/b56ef53888d73904c893b9e8c8cfaeedd7b1edbe))
 - Fixed a filehandle leak in multi-year runs of PEcAn.BIOCRO::met2model.BIOCRO: It was only closing the last input file it processed (#2485).

diff --git a/base/utils/R/transformstats.R b/base/utils/R/transformstats.R
@@ -1,46 +1,53 @@
 
-#--------------------------------------------------------------------------------------------------#
 ##' Transform misc. statistics to SE
 ##'
-##' Automates transformations of SD, MSE, LSD, 95%CI, HSD, and MSD to conservative estimates of SE.
-##' @name transformstats
-##' @title Transform Stats 
-##' @param data data frame with mean, statistic, n, and statistic name: \code{example data <- data.frame(Y=rep(1,5), stat=rep(1,5), n=rep(4,5), statname=c('SD', 'MSE', 'LSD', 'HSD', 'MSD'))}
-##' @return dataframe with statistics transformed to SE
+##' Automates transformations of SD, MSE, LSD, 95%CI, HSD, and MSD
+##'   to conservative estimates of SE.
+##' Method details and assumptions described in
+##' LeBauer 2020 Transforming ANOVA and Regression statistics for Meta-analysis. Authorea. DOI: https://doi.org/10.22541/au.158359749.96662550
+##' @param data data frame with columns for mean, statistic, n,
+##'   and statistic name
+##' @return data frame with statistics transformed to SE
 ##' @author David LeBauer
 ##' @export
-##' @examples statdf <- data.frame(Y=rep(1,5),
-##'                                stat=rep(1,5),
-##'                                n=rep(4,5),
-##'                                statname=c('SD', 'MSE', 'LSD', 'HSD', 'MSD'))
+##' @examples
+##' statdf <- data.frame(Y=rep(1,5),
+##'                      stat=rep(1,5),
+##'                      n=rep(4,5),
+##'                      statname=c('SD', 'MSE', 'LSD', 'HSD', 'MSD'))
 ##' transformstats(statdf)
 transformstats <- function(data) {
-  if (!"SE" %in% levels(data$statname)) {
-    data$statname <- factor(data$statname, levels = c(levels(data$statname), "SE"))
+  if (is.factor(data$statname) && !"SE" %in% levels(data$statname)) {
+    data$statname <- factor(
+      data$statname,
+      levels = c(levels(data$statname), "SE"))
   }
   ## Transformation of stats to SE transform SD to SE
   if (max(c("SD", "sd") %in% data$statname)) {
     sdi <- which(data$statname %in% c("SD", "sd"))
-    data$stat[sdi] <- data$stat[sdi]/sqrt(data$n[sdi])
+    data$stat[sdi] <- data$stat[sdi] / sqrt(data$n[sdi])
     data$statname[sdi] <- "SE"
   }
   ## transform MSE to SE
   if ("MSE" %in% data$statname) {
     msei <- which(data$statname == "MSE")
-    data$stat[msei] <- sqrt(data$stat[msei]/data$n[msei])
+    data$stat[msei] <- sqrt(data$stat[msei] / data$n[msei])
     data$statname[msei] <- "SE"
   }
   ## 95%CI measured from mean to upper or lower CI SE = CI/t
   if ("95%CI" %in% data$statname) {
     cii <- which(data$statname == "95%CI")
-    data$stat[cii] <- data$stat[cii]/stats::qt(0.975, data$n[cii])
+    data$stat[cii] <- data$stat[cii] / stats::qt(0.975, data$n[cii])
     data$statname[cii] <- "SE"
   }
   ## Fisher's Least Significant Difference (LSD)
   ## conservatively assume no within block replication
   if ("LSD" %in% data$statname) {
     lsdi <- which(data$statname == "LSD")
-    data$stat[lsdi] <- data$stat[lsdi]/(stats::qt(0.975, data$n[lsdi]) * sqrt((2 * data$n[lsdi])))
+    data$stat[lsdi] <- (
+      data$stat[lsdi]
+      / (stats::qt(0.975, data$n[lsdi])
+          * sqrt((2 * data$n[lsdi]))))
     data$statname[lsdi] <- "SE"
   }
   ## Tukey's Honestly Significant Difference (HSD),
@@ -49,7 +56,7 @@ transformstats <- function(data) {
     hsdi <- which(data$statname == "HSD")
     n <- data$n[hsdi]
     n[is.na(n)] <- 2  ## minimum n that can be used if NA
-    data$stat[hsdi] <- data$stat[hsdi]/(stats::qtukey(0.975, n, df = 2))
+    data$stat[hsdi] <- data$stat[hsdi] / (stats::qtukey(0.975, n, df = 2))
     data$statname[hsdi] <- "SE"
     data$n[hsdi] <- n
   }
@@ -58,11 +65,14 @@ transformstats <- function(data) {
   ## SE  = MSD*n/(t*sqrt(2))
   if ("MSD" %in% data$statname) {
     msdi <- which(data$statname == "MSD")
-    data$stat[msdi] <- data$stat[msdi] * data$n[msdi] / (stats::qt(0.975, 2 * data$n[msdi] - 2) * sqrt(2))
+    data$stat[msdi] <- (
+      data$stat[msdi]
+      * data$n[msdi]
+      / (stats::qt(0.975, 2 * data$n[msdi] - 2) * sqrt(2)))
     data$statname[msdi] <- "SE"
   }
-  if (FALSE %in% c("SE", "none") %in% data$statname) {
-    print(paste(trait, ": ERROR!!! data contains untransformed statistics"))
+  if (!all(data$statname %in% c("SE", "none"))) {
+    PEcAn.logger::logger.error("data contains untransformed statistics")
   }
   return(data)
 } # transformstats
diff --git a/base/utils/man/transformstats.Rd b/base/utils/man/transformstats.Rd
diff --git a/base/utils/tests/Rcheck_reference.log b/base/utils/tests/Rcheck_reference.log
@@ -78,11 +78,10 @@ summarize.result: no visible binding for global variable ‘time’
 summarize.result: no visible binding for global variable ‘cultivar_id’
 summarize.result: no visible binding for global variable ‘specie_id’
 summarize.result: no visible binding for global variable ‘statname’
-transformstats: no visible binding for global variable ‘trait’
 Undefined global functions or variables:
   . citation_id control cultivar_id ensemble.samples
   get.parameter.samples greenhouse id median n nr runs.samples
-  sa.samples se settings site_id specie_id statname time trait
+  sa.samples se settings site_id specie_id statname time
   trait.samples trt_id x y years yieldarray
 Consider adding
   importFrom("stats", "median", "time")