fix autonaaming issue in groupings sets for special symbols, closes #…

…3653
Rdatatable · Aug 1, 2019 · a4205e4 · a4205e4
1 parent 2986736
commit a4205e4
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 3 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -198,6 +198,8 @@
 
 24. `column not found` could incorrectly occur in rare non-equi-join cases, [#3635](https://github.com/Rdatatable/data.table/issues/3635). Thanks to @UweBlock for the report.
 
+25. `groupingsets` functions will now properly handle alone special symbols when using an empty set to group by, [#3653](https://github.com/Rdatatable/data.table/issues/3653). Thanks to @Henrik-P for the report.
+
 #### NOTES
 
 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below.

diff --git a/R/groupingsets.R b/R/groupingsets.R
@@ -67,6 +67,7 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...)
   # input arguments handling
   jj = if (!missing(jj)) jj else substitute(j)
   av = all.vars(jj, TRUE)
+  jsym = if (length(av)==1L) {if (".N"==av) "N" else if (".I"==av) "I" else if (".GRP"==av) "GRP"} # workaround for autonamed columns in grand total #3653
   if (":=" %chin% av)
     stop("Expression passed to grouping sets function must not update by reference. Use ':=' on results of your grouping function.")
   if (missing(.SDcols))
@@ -97,13 +98,19 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...)
     stop("Using integer64 class columns require to have 'bit64' package installed.") # nocov
   int64.by.cols = intersect(int64.cols, by)
   # aggregate function called for each grouping set
-  aggregate.set = function(by.set) {
+  aggregate.set = function(by.set, jsym) {
     if (length(by.set)) {
       r = if (length(.SDcols)) x[, eval(jj), by.set, .SDcols=.SDcols] else x[, eval(jj), by.set]
     } else {
+      ## workaround for grand total single var as data.table too, change to drop=FALSE after #648 solved
       r = if (length(.SDcols)) x[, eval(jj), .SDcols=.SDcols] else x[, eval(jj)]
-      # workaround for grand total single var as data.table too, change to drop=FALSE after #648 solved
       if (!is.data.table(r)) r = setDT(list(r))
+      if (!is.null(jsym)) {
+        if (!"V1" %in% names(r))
+          stop("internal error in groupingsets, V1 is not present, please report") # nocov
+        else
+          setnames(r, "V1", jsym)
+      }
     }
     if (id) {
       # integer bit mask of aggregation levels: http://www.postgresql.org/docs/9.5/static/functions-aggregate.html#FUNCTIONS-GROUPING-TABLE
@@ -121,6 +128,6 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...)
   # actually processing everything here
   rbindlist(c(
     list(empty), # 0 rows template for colorder and type
-    lapply(sets, aggregate.set) # all aggregations
+    lapply(sets, aggregate.set, jsym) # all aggregations
   ), use.names=TRUE, fill=TRUE)
 }
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
@@ -15428,6 +15428,16 @@ test(2071.10, dcast(data.table(a=1, b=1, l=list(list(1))), a ~ b, value.var='l')
 test(2071.11, dcast(data.table(a = 1, b = 2, c = 3), a ~ b, value.var = 'c', fill = '2'),
      data.table(a=1, `2`=3, key='a'))
 
+# groupingsets j=.N by character(0) set #3653
+d = data.table(x = c("a", "a", "b"))
+test(2073.01, groupingsets(d, j = .N, by = "x", sets = list("x", character())), data.table(x=c("a","b",NA_character_), N=c(2L,1L,3L)))
+test(2073.02, groupingsets(d, j = .N, by = "x", sets = list(character())), data.table(x=NA_character_, N=3L))
+test(2073.03, groupingsets(d, j = .GRP, by = "x", sets = list("x", character())), data.table(x=c("a","b",NA_character_), GRP=c(1L,2L,1L)))
+test(2073.04, groupingsets(d, j = .GRP, by = "x", sets = list(character())), data.table(x=NA_character_, GRP=1L))
+test(2073.05, groupingsets(d, j = .I, by = "x", sets = list("x", character())), data.table(x=c("a","a","b",rep(NA_character_,3L)), I=c(1:3,1:3)))
+test(2073.06, groupingsets(d, j = .I, by = "x", sets = list(character())), data.table(x=rep(NA_character_,3L), I=1:3))
+
+
 ###################################
 #  Add new tests above this line  #
 ###################################