Skip to content

Commit

Permalink
fix autonaaming issue in groupings sets for special symbols, closes #…
Browse files Browse the repository at this point in the history
  • Loading branch information
jangorecki committed Aug 1, 2019
1 parent 2986736 commit a4205e4
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 3 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@

24. `column not found` could incorrectly occur in rare non-equi-join cases, [#3635](https://github.com/Rdatatable/data.table/issues/3635). Thanks to @UweBlock for the report.

25. `groupingsets` functions will now properly handle alone special symbols when using an empty set to group by, [#3653](https://github.com/Rdatatable/data.table/issues/3653). Thanks to @Henrik-P for the report.

#### NOTES

1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below.
Expand Down
13 changes: 10 additions & 3 deletions R/groupingsets.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...)
# input arguments handling
jj = if (!missing(jj)) jj else substitute(j)
av = all.vars(jj, TRUE)
jsym = if (length(av)==1L) {if (".N"==av) "N" else if (".I"==av) "I" else if (".GRP"==av) "GRP"} # workaround for autonamed columns in grand total #3653
if (":=" %chin% av)
stop("Expression passed to grouping sets function must not update by reference. Use ':=' on results of your grouping function.")
if (missing(.SDcols))
Expand Down Expand Up @@ -97,13 +98,19 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...)
stop("Using integer64 class columns require to have 'bit64' package installed.") # nocov
int64.by.cols = intersect(int64.cols, by)
# aggregate function called for each grouping set
aggregate.set = function(by.set) {
aggregate.set = function(by.set, jsym) {
if (length(by.set)) {
r = if (length(.SDcols)) x[, eval(jj), by.set, .SDcols=.SDcols] else x[, eval(jj), by.set]
} else {
## workaround for grand total single var as data.table too, change to drop=FALSE after #648 solved
r = if (length(.SDcols)) x[, eval(jj), .SDcols=.SDcols] else x[, eval(jj)]
# workaround for grand total single var as data.table too, change to drop=FALSE after #648 solved
if (!is.data.table(r)) r = setDT(list(r))
if (!is.null(jsym)) {
if (!"V1" %in% names(r))
stop("internal error in groupingsets, V1 is not present, please report") # nocov
else
setnames(r, "V1", jsym)
}
}
if (id) {
# integer bit mask of aggregation levels: http://www.postgresql.org/docs/9.5/static/functions-aggregate.html#FUNCTIONS-GROUPING-TABLE
Expand All @@ -121,6 +128,6 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...)
# actually processing everything here
rbindlist(c(
list(empty), # 0 rows template for colorder and type
lapply(sets, aggregate.set) # all aggregations
lapply(sets, aggregate.set, jsym) # all aggregations
), use.names=TRUE, fill=TRUE)
}
10 changes: 10 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -15428,6 +15428,16 @@ test(2071.10, dcast(data.table(a=1, b=1, l=list(list(1))), a ~ b, value.var='l')
test(2071.11, dcast(data.table(a = 1, b = 2, c = 3), a ~ b, value.var = 'c', fill = '2'),
data.table(a=1, `2`=3, key='a'))

# groupingsets j=.N by character(0) set #3653
d = data.table(x = c("a", "a", "b"))
test(2073.01, groupingsets(d, j = .N, by = "x", sets = list("x", character())), data.table(x=c("a","b",NA_character_), N=c(2L,1L,3L)))
test(2073.02, groupingsets(d, j = .N, by = "x", sets = list(character())), data.table(x=NA_character_, N=3L))
test(2073.03, groupingsets(d, j = .GRP, by = "x", sets = list("x", character())), data.table(x=c("a","b",NA_character_), GRP=c(1L,2L,1L)))
test(2073.04, groupingsets(d, j = .GRP, by = "x", sets = list(character())), data.table(x=NA_character_, GRP=1L))
test(2073.05, groupingsets(d, j = .I, by = "x", sets = list("x", character())), data.table(x=c("a","a","b",rep(NA_character_,3L)), I=c(1:3,1:3)))
test(2073.06, groupingsets(d, j = .I, by = "x", sets = list(character())), data.table(x=rep(NA_character_,3L), I=1:3))


###################################
# Add new tests above this line #
###################################
Expand Down

0 comments on commit a4205e4

Please sign in to comment.