From 5229a512a14e5cd3f02c8c443475b01c38fd8266 Mon Sep 17 00:00:00 2001 From: Arun Srinivasan Date: Sun, 10 Feb 2019 16:54:28 +0000 Subject: [PATCH] Closes #1974, closes #2949 and closes #1369 - similar dcast issues --- NEWS.md | 2 ++ R/data.table.R | 11 +++++++---- R/fcast.R | 9 ++++++--- inst/tests/tests.Rraw | 22 ++++++++++++++++++++++ 4 files changed, 37 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 57d45c2a8..7a92afb26 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,6 +16,8 @@ 4. Grouping by unusual column names such as `by='string_with_\\'` and `keyby="x y"` could fail, [#3319](https://github.com/Rdatatable/data.table/issues/3319) and [#3378](https://github.com/Rdatatable/data.table/issues/3378). Thanks to @HughParsonage for reporting and @MichaelChirico for the fixes. +6. Several issues were filed regarding limitations of `dcast.data.table` in handling `fun.aggregate` argument when the functions are not directly provided to the argument as `fun.aggregate <- list(sum, mean)` and instead are stored in a variable, e.g., `funs <- list(sum, mean)` and referred to as `fun.aggregate=funs`. This fix closes several issues [#1974](https://github.com/Rdatatable/data.table/issues/1974), [#1369](https://github.com/Rdatatable/data.table/issues/1369) and [#2949](https://github.com/Rdatatable/data.table/issues/2949). Thanks to @sunbee, @Ping2016 and @d0rg0ld for reporting. + #### NOTES 1. When upgrading to 1.12.0 some Windows users might have seen `CdllVersion not found` in some circumstances. We found a way to catch that so the [helpful message](https://twitter.com/MattDowle/status/1084528873549705217) now occurs for those upgrading from versions prior to 1.12.0 too, as well as those upgrading from 1.12.0 to a later version. See item 1 in notes section of 1.12.0 below for more background. diff --git a/R/data.table.R b/R/data.table.R index c119e6eb6..90f8c2120 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1724,7 +1724,8 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { gfuns = c("sum", "prod", "mean", "median", "var", "sd", ".N", "min", "max", "head", "last", "first", "tail", "[") # added .N for #5760 .ok <- function(q) { if (dotN(q)) return(TRUE) # For #5760 - cond = is.call(q) && length(q1c <- as.character(q[[1L]]))==1L && q1c %chin% gfuns && !is.call(q[[2L]]) + # Need is.symbol() check. See #1369, #1974 or #2949 issues and explanation below by searching for one of these issues. + cond = is.call(q) && is.symbol(q[[1]]) && (q1c <- as.character(q[[1]])) %chin% gfuns && !is.call(q[[2L]]) # run GForce for simple f(x) calls and f(x, na.rm = TRUE)-like calls ans = cond && (length(q)==2L || identical("na",substring(names(q)[3L], 1L, 2L))) if (identical(ans, TRUE)) return(ans) @@ -1761,9 +1762,11 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { oldjsub = jsub if (jsub[[1L]]=="list") { for (ii in seq_along(jsub)[-1L]) { - if (dotN(jsub[[ii]])) next; # For #5760 - if (is.call(jsub[[ii]]) && jsub[[ii]][[1L]]=="mean") - jsub[[ii]] = .optmean(jsub[[ii]]) + this_jsub = jsub[[ii] + if (dotN(this_jsub)) next; # For #5760 + # Addressing #1369, #2949 and #1974. Added is.symbol() check to handle cases where expanded function definition is used insead of function names. #1369 results in (function(x) sum(x)) as jsub[[.]] from dcast.data.table. + if (is.call(this_jsub) && is.symbol(this_jsub[[1L]]) && this_jsub[[1L]]=="mean") + jsub[[ii]] = .optmean(this_jsub) } } else if (jsub[[1L]]=="mean") { jsub = .optmean(jsub) diff --git a/R/fcast.R b/R/fcast.R index 087f65fce..bf572fe2e 100644 --- a/R/fcast.R +++ b/R/fcast.R @@ -63,13 +63,16 @@ value_vars <- function(value.var, varnames) { } aggregate_funs <- function(funs, vals, sep="_", ...) { - if (is.call(funs) && funs[[1L]] == "eval") + if (is.symbol(funs)) { # quick fix for #2949, #1974 and #1369 + funs <- eval(funs, parent.frame(2L), parent.frame(2L)) + if (is.function(funs)) funs <- list(funs) + } else if (is.call(funs) && funs[[1L]] == "eval") funs = eval(funs[[2L]], parent.frame(2L), parent.frame(2L)) - if (is.call(funs) && as.character(funs[[1L]]) %chin% c("c", "list")) + if (is.call(funs) && as.character(funs[[1L]]) %chin% c("c", "list")) { funs = lapply(as.list(funs)[-1L], function(x) { if (is.call(x) && as.character(x[[1L]]) %chin% c("c", "list")) as.list(x)[-1L] else x }) - else funs = list(funs) + } else funs = list(funs) # needed for cases as shown in test#1700.1 if (length(funs) != length(vals)) { if (length(vals) == 1L) vals = replicate(length(funs), vals) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c0e894b98..2f572a6b9 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -13414,6 +13414,28 @@ DT = data.table(a = c(1:3, 3:1)) test(1984.38, rowidv(DT, prefix = 5L), error='must be NULL or a character vector') test(1984.39, rowidv(DT, prefix = c('hey','you')), error='must be NULL or a character vector') +# tests for #2949, #1974 and #1369 - dcast not able to handle functions referred to by a variable +dt = data.table( + x=sample(5,20,TRUE), + y=sample(2,20,TRUE), + z=sample(letters[1:2], 20,TRUE), + d1 = runif(20), + d2=1L +) +myFun <- function(data, vars) { + mySum <- function(x) sum(x) + dcast.data.table(data, "x + y ~ z", value.var=vars, fun.aggregate=mySum) +} +funs = list(sum, mean) +vars = list("d1", "d2") +test(1986.1, + dcast.data.table(dt, x + y ~ z, fun=funs, value.var=vars), + dcast.data.table(dt, x + y ~ z, fun=list(sum, mean), value.var=vars) +) +test(1986.2, + dcast.data.table(dt, x + y ~ z, fun=sum, value.var=vars), + myFun(dt, vars) +) ################################### # Add new tests above this line #