Skip to content

Commit

Permalink
Closes #2046 and closes #2111. Fixes -ve length vectors issue with GF…
Browse files Browse the repository at this point in the history
…orce.
  • Loading branch information
arunsrinivasan committed Nov 13, 2017
1 parent e334ed1 commit 74e7b8f
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 4 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@

26. Indices are now retrieved by exact name, [#2465] (https://github.com/Rdatatable/data.table/issues/2465). Thanks to @pannnda for reporting and providing a reproducible example and to @MarkusBonsch for fixing.

27. GForce implementations for `median` and `var` while used along with `by` argument, at times, resulted in "negative length vectors not allowed" error message. This is now fixed. Closes [#2046](https://github.com/Rdatatable/data.table/issues/2046) and [#2111](https://github.com/Rdatatable/data.table/issues/2111). Thanks to @caneff and @osofr for filing issues on Github and to @kmillar for identifying and explaining the issue under #2046.

#### NOTES

Expand Down
7 changes: 5 additions & 2 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -1787,7 +1787,10 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
assign(".N", len__, thisEnv) # For #5760
#fix for #1683
if (use.I) assign(".I", seq_len(nrow(x)), thisEnv)
ans = gforce(thisEnv, jsub, o__, f__, len__, irows) # irows needed for #971.
# Fixing 2046 and 2111. o__ when key exists is set to integer(0) without additional 'starts' and 'maxgrpn' attributes. 'maxgrpn' attribute was used in gforce internally by looking for attribute, which resulted in a junk value, sometimes -ve, which created the issue. Thanks @kmillar for debugging as well. Check for existence of attribute and if NULL generate it directly with max(len__). Better fix would be to fix it upstream with attributes for o__, but better to do it when this function is simplified.
maxgrplen = attr(o__, "maxgrpn")
if (is.null(maxgrplen)) maxgrplen = max(len__)
ans = gforce(thisEnv, jsub, o__, f__, len__, irows, maxgrplen) # irows needed for #971.
gi = if (length(o__)) o__[f__] else f__
g = lapply(grpcols, function(i) groups[[i]][gi])
ans = c(g, ans)
Expand Down Expand Up @@ -2804,7 +2807,7 @@ gmin <- function(x, na.rm=FALSE) .Call(Cgmin, x, na.rm)
gmax <- function(x, na.rm=FALSE) .Call(Cgmax, x, na.rm)
gvar <- function(x, na.rm=FALSE) .Call(Cgvar, x, na.rm)
gsd <- function(x, na.rm=FALSE) .Call(Cgsd, x, na.rm)
gforce <- function(env, jsub, o, f, l, rows) .Call(Cgforce, env, jsub, o, f, l, rows)
gforce <- function(env, jsub, o, f, l, rows, maxgrpn) .Call(Cgforce, env, jsub, o, f, l, rows, maxgrpn)

isReallyReal <- function(x) {
.Call(CisReallyReal, x)
Expand Down
7 changes: 7 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -11143,6 +11143,13 @@ target <- data.table(colname = c("test1", "test2", "test2", "test3"), colname_wi
target[colname_with_suffix == "not found", ]
test(1852.1, DT1[DT2, lookup_result := i.lookup_result, on=c("colname"="lookup")], target)

# fix for gmedian (2046) and gvar (2111); -ve indices issue.
# the error won't occur always, since the junk value will not be always -ve. Could add a verbose message and capture that here .. but don't want to add another argument to gforce() function.
dt <- data.table(x=c(1,1,1),y=c(3,0,0), key="x")
test(1853.1, dt[, median(y), by=x], data.table(x=1, V1=0, key="x"))
testDT <- data.table(col1 = c(1,1,1, 2,2,2), col2 = c(2,2,2,1,1,1), ID = c(rep(1,3), rep(2,3)), key="ID")
test(1853.2, testDT[, lapply(.SD, var), by=ID], data.table(ID=c(1,2), col1=0, col2=0, key="ID"))

##########################

# TODO: Tests involving GForce functions needs to be run with optimisation level 1 and 2, so that both functions are tested all the time.
Expand Down
5 changes: 3 additions & 2 deletions src/gsumm.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ static union {
# define SQRTL sqrt
#endif

SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) {
SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg, SEXP maxgrpnArg) {
int i, j, g, *this;
// clock_t start = clock();
if (TYPEOF(env) != ENVSXP) error("env is not an environment");
Expand All @@ -34,6 +34,7 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) {
if (!isInteger(f)) error("f is not an integer vector");
if (!isInteger(l)) error("l is not an integer vector");
if (!isInteger(irowsArg) && !isNull(irowsArg)) error("irowsArg is not an integer vector");
if (!isInteger(maxgrpnArg)) error("maxgrpnArg is not an integer vector");
ngrp = LENGTH(l);
if (LENGTH(f) != ngrp) error("length(f)=%d != length(l)=%d", LENGTH(f), ngrp);
grpn=0;
Expand All @@ -58,7 +59,7 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) {
}
// for gmedian
// initialise maxgrpn
maxgrpn = INTEGER(getAttrib(o, install("maxgrpn")))[0];
maxgrpn = INTEGER(maxgrpnArg)[0];
oo = INTEGER(o);
ff = INTEGER(f);

Expand Down

0 comments on commit 74e7b8f

Please sign in to comment.