Rdatatable · mattdowle · Jul 13, 2019 · Jul 3, 2019 · Jul 3, 2019 · Jul 3, 2019
@@ -222,6 +222,8 @@
 
 11. We intend to deprecate the `datatable.nomatch` option, [more info](https://github.com/Rdatatable/data.table/pull/3578/files). A message is now printed upon use of the option (once per session) as a first step. It asks you to please stop using the option and to pass `nomatch=NULL` explicitly if you require inner join. Outer join (`nomatch=NA`) has always been the default because it is safer; it does not drop missing data silently. The problem is that the option is global; i.e., if a user changes the default using this option for their own use, that can change the behavior of joins inside packages that use `data.table` too. This is the only `data.table` option with this concern.
 
+12. The test suite of 9k tests now runs with three R options on: `warnPartialMatchArgs`, `warnPartialMatchAttr`, and `warnPartialMatchDollar`. This ensures that we don't rely on partial argument matching in internal code, for robustness and efficiency, and so that users can turn these options on for their code in production, [#3664](https://github.com/Rdatatable/data.table/issues/3664). Thanks to Vijay Lulla for the suggestion, and Michael Chirico for fixing 48 internal calls to `attr()` which were missing `exact=TRUE`, for example. Thanks to R-core for adding these options to R 2.6.0 (Oct 2007).
+
 
 ### Changes in [v1.12.2](https://github.com/Rdatatable/data.table/milestone/14?closed=1)  (07 Apr 2019)
 

@@ -5,7 +5,7 @@
 
 as.IDate = function(x, ...) UseMethod("as.IDate")
 
-as.IDate.default = function(x, ..., tz = attr(x, "tzone")) {
+as.IDate.default = function(x, ..., tz = attr(x, "tzone", exact=TRUE)) {
   if (is.null(tz)) tz = "UTC"
   as.IDate(as.Date(x, tz = tz, ...))
 }
@@ -32,7 +32,7 @@ as.IDate.Date = function(x, ...) {
   x                                 # always return a new object
 }
 
-as.IDate.POSIXct = function(x, tz = attr(x, "tzone"), ...) {
+as.IDate.POSIXct = function(x, tz = attr(x, "tzone", exact=TRUE), ...) {
   if (is.null(tz)) tz = "UTC"
   if (tz %chin% c("UTC", "GMT")) {
     (setattr(as.integer(x) %/% 86400L, "class", c("IDate", "Date")))  # %/% returns new object so can use setattr() on it; wrap with () to return visibly
@@ -135,7 +135,7 @@ as.ITime.default = function(x, ...) {
   as.ITime(as.POSIXlt(x, ...), ...)
 }
 
-as.ITime.POSIXct = function(x, tz = attr(x, "tzone"), ...) {
+as.ITime.POSIXct = function(x, tz = attr(x, "tzone", exact=TRUE), ...) {
   if (is.null(tz)) tz = "UTC"
   if (tz %chin% c("UTC", "GMT")) as.ITime(unclass(x), ...)
   else as.ITime(as.POSIXlt(x, tz = tz, ...), ...)
@@ -302,23 +302,23 @@ as.POSIXlt.ITime = function(x, ...) {
 ###################################################################
 
 second  = function(x) {
-  if (inherits(x,'POSIXct') && identical(attr(x,'tzone'),'UTC')) {
+  if (inherits(x, 'POSIXct') && identical(attr(x, 'tzone', exact=TRUE), 'UTC')) {
     # if we know the object is in UTC, can calculate the hour much faster
     as.integer(x) %% 60L
   } else {
     as.integer(as.POSIXlt(x)$sec)
   }
 }
 minute  = function(x) {
-  if (inherits(x,'POSIXct') && identical(attr(x,'tzone'),'UTC')) {
+  if (inherits(x, 'POSIXct') && identical(attr(x, 'tzone', exact=TRUE), 'UTC')) {
     # ever-so-slightly faster than x %% 3600L %/% 60L
     as.integer(x) %/% 60L %% 60L
   } else {
     as.POSIXlt(x)$min
   }
 }
 hour = function(x) {
-  if (inherits(x,'POSIXct') && identical(attr(x,'tzone'),'UTC')) {
+  if (inherits(x, 'POSIXct') && identical(attr(x, 'tzone', exact=TRUE), 'UTC')) {
     # ever-so-slightly faster than x %% 86400L %/% 3600L
     as.integer(x) %/% 3600L %% 24L
   } else {

@@ -6,7 +6,7 @@ between = function(x,lower,upper,incbounds=TRUE) {
   is.px = function(x) inherits(x, "POSIXct")
   is.i64 = function(x) inherits(x, "integer64")
   # POSIX special handling to auto coerce character
-  if (is.px(x) && !is.null(tz<-attr(x, "tzone", TRUE)) && nzchar(tz) &&
+  if (is.px(x) && !is.null(tz<-attr(x, "tzone", exact=TRUE)) && nzchar(tz) &&
       (is.character(lower) || is.character(upper))) {
     try_posix_cast = function(x, tz) {tryCatch(
       list(status=0L, value=as.POSIXct(x, tz = tz)),
@@ -30,7 +30,7 @@ between = function(x,lower,upper,incbounds=TRUE) {
       ((is.null(x) || !nzchar(x)) && (is.null(y) || !nzchar(y)) && (is.null(z) || !nzchar(z))) ||
         (identical(x, y) && identical(x, z))
     }
-    if (!tz_match(attr(x, "tzone", TRUE), attr(lower, "tzone", TRUE), attr(upper, "tzone", TRUE))) {
+    if (!tz_match(attr(x, "tzone", exact=TRUE), attr(lower, "tzone", exact=TRUE), attr(upper, "tzone", exact=TRUE))) {
       stop("'between' function arguments have mismatched timezone attribute, align all arguments to same timezone")
     }
   }

@@ -138,13 +138,13 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
     # TODO: could check/reuse secondary indices, but we need 'starts' attribute as well!
     xo = forderv(x, xcols, retGrp=TRUE)
     if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
-    xg = attr(xo, 'starts')
+    xg = attr(xo, 'starts', exact=TRUE)
     resetcols = head(xcols, non_equi-1L)
     if (length(resetcols)) {
       # TODO: can we get around having to reorder twice here?
       # or at least reuse previous order?
       if (verbose) {last.started.at=proc.time();cat("  Generating group lengths ... ");flush.console()}
-      resetlen = attr(forderv(x, resetcols, retGrp=TRUE), 'starts')
+      resetlen = attr(forderv(x, resetcols, retGrp=TRUE), 'starts', exact=TRUE)
       resetlen = .Call(Cuniqlengths, resetlen, nrow(x))
       if (verbose) {cat("done in",timetaken(last.started.at),"\n"); flush.console()}
     } else resetlen = integer(0L)

@@ -993,7 +993,7 @@ replace_order = function(isub, verbose, env) {
       newnames = NULL
       suppPrint = identity
       if (length(av) && av[1L] == ":=") {
-        if (identical(attr(x,".data.table.locked"),TRUE)) stop(".SD is locked. Using := in .SD's j is reserved for possible future use; a tortuously flexible way to modify by group. Use := in j directly to modify by group by reference.")
+        if (identical(attr(x, ".data.table.locked", exact=TRUE), TRUE)) stop(".SD is locked. Using := in .SD's j is reserved for possible future use; a tortuously flexible way to modify by group. Use := in j directly to modify by group by reference.")
         suppPrint = function(x) { .global$print=address(x); x }
         # Suppress print when returns ok not on error, bug #2376. Thanks to: http://stackoverflow.com/a/13606880/403310
         # All appropriate returns following this point are wrapped; i.e. return(suppPrint(x)).
@@ -1393,7 +1393,7 @@ replace_order = function(isub, verbose, env) {
           cat("Finding group sizes from the positions (can be avoided to save RAM) ... ")
           flush.console()  # for windows
         }
-        f__ = attr(o__, "starts")
+        f__ = attr(o__, "starts", exact=TRUE)
         len__ = uniqlengths(f__, xnrow)
         if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
         if (!bysameorder && !keyby) {
@@ -1741,7 +1741,7 @@ replace_order = function(isub, verbose, env) {
     if (any(names(x)[cols] %chin% key(x)))
       setkey(x,NULL)
     # fixes #1479. Take care of secondary indices, TODO: cleaner way of doing this
-    attrs = attr(x, 'index')
+    attrs = attr(x, 'index', exact=TRUE)
     skeys = names(attributes(attrs))
     if (!is.null(skeys)) {
       hits  = unlist(lapply(paste0("__", names(x)[cols]), function(x) grep(x, skeys, fixed = TRUE)))
@@ -1899,7 +1899,7 @@ as.matrix.data.table = function(x, rownames=NULL, rownames.value=NULL, ...) {
     if (!is.logical(xj))
       all.logical = FALSE
     if (length(levels(xj)) > 0L || !(is.numeric(xj) || is.complex(xj) || is.logical(xj)) ||
-        (!is.null(cl <- attr(xj, "class")) && any(cl %chin%
+        (!is.null(cl <- attr(xj, "class", exact=TRUE)) && any(cl %chin%
         c("Date", "POSIXct", "POSIXlt"))))
       non.numeric = TRUE
     if (!is.atomic(xj))
@@ -2108,7 +2108,7 @@ transform.data.table = function (`_data`, ...)
   inx = chmatch(tags, names(`_data`))
   matched = !is.na(inx)
   if (any(matched)) {
-    if (isTRUE(attr(`_data`, ".data.table.locked", TRUE))) {
+    if (isTRUE(attr(`_data`, ".data.table.locked", exact=TRUE))) {
       setattr(`_data`, ".data.table.locked", NULL) # fix for #1641, now covered by test 104.2
     }
     `_data`[,inx[matched]] = e[matched]
@@ -2348,7 +2348,7 @@ point = function(to, to_idx, from, from_idx) {
       setattr(ans, "sorted", head(key(ans), keylength)) ## keep what can be kept
     }
     ## take care of attributes.
-    indices = names(attributes(attr(ans, "index")))
+    indices = names(attributes(attr(ans, "index", exact=TRUE)))
     for(index in indices) {
       indexcols = strsplit(index, split = "__")[[1L]][-1L]
       indexlength = which.first(!indexcols %chin% cols) - 1L
@@ -2357,13 +2357,13 @@ point = function(to, to_idx, from, from_idx) {
       if (reducedindex %chin% indices || !indexlength) {
         ## Either reduced index already present or no columns of the original index remain.
         ## Drop the original index completely
-        setattr(attr(ans, "index", exact = TRUE), index, NULL)
-      } else if(length(attr(attr(ans, "index"), index))) {
+        setattr(attr(ans, "index", exact=TRUE), index, NULL)
+      } else if(length(attr(attr(ans, "index", exact=TRUE), index, exact=TRUE))) {
         ## index is not length 0. Drop it since shortening could lead to spurious reordering in discarded columns (#2336)
-        setattr(attr(ans, "index", exact = TRUE), index, NULL)
+        setattr(attr(ans, "index", exact=TRUE), index, NULL)
       } else {
         ## rename index to reducedindex
-        names(attributes(attr(ans, "index")))[names(attributes(attr(ans, "index"))) == index] = reducedindex
+        names(attributes(attr(ans, "index")))[names(attributes(attr(ans, "index", exact=TRUE))) == index] = reducedindex
       }
     }
   } else { # retain.key == FALSE
@@ -2411,7 +2411,7 @@ setattr = function(x,name,value) {
   # User can also call `attr<-` function directly, but that copies (maybe just when NAMED>0, which is always for data.frame, I think).  See "Confused by NAMED" thread on r-devel 24 Nov 2011.
   # We tend to use setattr() internally in data.table.R because often we construct a data.table and it hasn't
   # got names yet. setnames() is the user interface which checks integrity and doesn't let you drop names for example.
-  if (name=="names" && is.data.table(x) && length(attr(x,"names")) && !is.null(value))
+  if (name=="names" && is.data.table(x) && length(attr(x, "names", exact=TRUE)) && !is.null(value))
     setnames(x,value)
     # Using setnames here so that truelength of names can be retained, to carry out integrity checks such as not
     # creating names longer than the number of columns of x, and to change the key, too
@@ -2484,23 +2484,23 @@ setnames = function(x,old,new,skip_absent=FALSE) {
   m = chmatch(names(x)[i], key(x))
   w = which(!is.na(m))
   if (length(w))
-    .Call(Csetcharvec, attr(x,"sorted"), m[w], new[w])
+    .Call(Csetcharvec, attr(x, "sorted", exact=TRUE), m[w], new[w])
 
   # update secondary keys
-  idx = attr(x,"index")
+  idx = attr(x, "index", exact=TRUE)
   for (k in names(attributes(idx))) {
     tt = strsplit(k,split="__")[[1L]][-1L]
     m = chmatch(names(x)[i], tt)
     w = which(!is.na(m))
     if (length(w)) {
       tt[m[w]] = new[w]
       newk = paste0("__",tt,collapse="")
-      setattr(idx, newk, attr(idx, k))
+      setattr(idx, newk, attr(idx, k, exact=TRUE))
       setattr(idx, k, NULL)
     }
   }
 
-  .Call(Csetcharvec, attr(x,"names"), as.integer(i), new)
+  .Call(Csetcharvec, attr(x, "names", exact=TRUE), as.integer(i), new)
   invisible(x)
 }
 
@@ -2776,7 +2776,7 @@ rowidv = function(x, cols=seq_along(x), prefix=NULL) {
     cols = as.integer(cols)
   }
   xorder = forderv(x, by=cols, sort=FALSE, retGrp=TRUE) # speedup on char with sort=FALSE
-  xstart = attr(xorder, 'start')
+  xstart = attr(xorder, 'starts', exact=TRUE)
   if (!length(xorder)) xorder = seq_along(x[[1L]])
   ids = .Call(Cfrank, xorder, xstart, uniqlengths(xstart, length(xorder)), "sequence")
   if (!is.null(prefix))
@@ -2846,7 +2846,7 @@ isReallyReal = function(x) {
   #'        ATTENTION: If nothing else helps, an auto-index is created on x unless options prevent this.
   if(getOption("datatable.optimize") < 3L) return(NULL) ## at least level three optimization required.
   if (!is.call(isub)) return(NULL)
-  if (!is.null(attr(x, '.data.table.locked'))) return(NULL)  # fix for #958, don't create auto index on '.SD'.
+  if (!is.null(attr(x, '.data.table.locked', exact=TRUE))) return(NULL)  # fix for #958, don't create auto index on '.SD'.
   ## a list of all possible operators with their translations into the 'on' clause
   validOps = list(op = c("==", "%in%", "%chin%"),
                    on = c("==", "==",   "=="))
@@ -2955,7 +2955,7 @@ isReallyReal = function(x) {
     idx = NULL
     for (cand in candidates){
       if (all(names(i) %chin% cand) && length(cand) == length(i)){
-        idx = attr(attr(x, "index"), paste0("__", cand, collapse = ""))
+        idx = attr(attr(x, "index", exact=TRUE), paste0("__", cand, collapse = ""), exact = TRUE)
         idxCols = cand
         break
       }
@@ -2972,7 +2972,7 @@ isReallyReal = function(x) {
     setindexv(x, names(i))
     if (verbose) {cat(timetaken(last.started.at),"\n");flush.console()}
     if (verbose) {cat("Optimized subsetting with index '", paste0(names(i), collapse = "__"),"'\n",sep="");flush.console()}
-    idx = attr(attr(x, "index"), paste0("__", names(i), collapse = ""))
+    idx = attr(attr(x, "index", exact=TRUE), paste0("__", names(i), collapse = ""), exact=TRUE)
     idxCols = names(i)
   }
   if(!is.null(idxCols)){

@@ -23,8 +23,8 @@ duplicated.data.table = function(x, incomparables=FALSE, fromLast=FALSE, by=seq_
     if (fromLast) f = cumsum(uniqlengths(f, nrow(x)))
   } else {
     o = forderv(x, by=query$by, sort=FALSE, retGrp=TRUE)
-    if (attr(o, 'maxgrpn') == 1L) return(rep.int(FALSE, nrow(x)))
-    f = attr(o,"starts")
+    if (attr(o, 'maxgrpn', exact=TRUE) == 1L) return(rep.int(FALSE, nrow(x)))
+    f = attr(o, "starts", exact=TRUE)
     if (fromLast) f = cumsum(uniqlengths(f, nrow(x)))
     if (length(o)) f = o[f]
   }
@@ -47,8 +47,8 @@ unique.data.table = function(x, incomparables=FALSE, fromLast=FALSE, by=seq_alon
   # if by=key(x), forderv tests for orderedness within it quickly and will short-circuit
   # there isn't any need in unique() to call uniqlist like duplicated does; uniqlist returns a new nrow(x) vector anyway and isn't
   # as efficient as forderv returning empty o when input is already ordered
-  if (attr(o, 'maxgrpn') == 1L) return(copy(x))  # return copy so that unique(x)[, col := val] doesn't affect original data.table, #3383.
-  f = attr(o,"starts")
+  if (attr(o, 'maxgrpn', exact=TRUE) == 1L) return(copy(x))  # return copy so that unique(x)[, col := val] doesn't affect original data.table, #3383.
+  f = attr(o, "starts", exact=TRUE)
   if (fromLast) f = cumsum(uniqlengths(f, nrow(x)))
   if (length(o)) f = o[f]
   if (length(o <- forderv(f))) f = f[o]  # don't sort the uniques too
@@ -149,7 +149,7 @@ uniqueN = function(x, by = if (is.list(x)) seq_along(x) else NULL, na.rm=FALSE)
   }
   if (is.null(by)) by = seq_along(x)
   o = forderv(x, by=by, retGrp=TRUE, na.last=if (!na.rm) FALSE else NA)
-  starts = attr(o, 'starts')
+  starts = attr(o, 'starts', exact=TRUE)
   if (!na.rm) {
     length(starts)
   } else {

@@ -142,7 +142,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
   fill.default = NULL
   if (is.null(fun.call)) {
     oo = forderv(dat, by=varnames, retGrp=TRUE)
-    if (attr(oo, 'maxgrpn') > 1L) {
+    if (attr(oo, 'maxgrpn', exact=TRUE) > 1L) {
       message("Aggregate function missing, defaulting to 'length'")
       fun.call = quote(length)
     }
@@ -159,7 +159,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
   }
   order_ = function(x) {
     o = forderv(x, retGrp=TRUE, sort=TRUE)
-    idx = attr(o, 'starts')
+    idx = attr(o, 'starts', exact=TRUE)
     if (!length(o)) o = seq_along(x)
     o[idx] # subsetVector retains attributes, using R's subset for now
   }

@@ -78,7 +78,7 @@ foverlaps = function(x, y, by.x=if (!is.null(key(x))) key(x) else key(y), by.y=k
     stop("Some interval cols are of type POSIXct while others are not. Please ensure all interval cols are (or are not) of POSIXct type")
   }
   # #1143, mismatched timezone
-  getTZ = function(x) if (is.null(tz <- attr(x, "tzone"))) "" else tz # "" == NULL AFAICT
+  getTZ = function(x) if (is.null(tz <- attr(x, "tzone", exact=TRUE))) "" else tz # "" == NULL AFAICT
   tzone_chk = c(getTZ(xval1), getTZ(xval2), getTZ(yval1), getTZ(yval2))
   if (any(tzone_chk != tzone_chk[1L])) {
     warning("POSIXct interval cols have mixed timezones. Overlaps are performed on the internal numerical representation of POSIXct objects, therefore printed values may give the impression that values don't overlap but their internal representations will. Please ensure that POSIXct type interval cols have identical 'tzone' attributes to avoid confusion.")

@@ -43,7 +43,7 @@ frankv = function(x, cols=seq_along(x), order=1L, na.last=TRUE, ties.method=c("a
     cols = c(cols, ncol(x))
   }
   xorder  = forderv(x, by=cols, order=order, sort=TRUE, retGrp=TRUE, na.last=if (isFALSE(na.last)) na.last else TRUE)
-  xstart  = attr(xorder, 'starts')
+  xstart  = attr(xorder, 'starts', exact=TRUE)
   xsorted = FALSE
   if (!length(xorder)) {
     xsorted = TRUE

@@ -285,7 +285,7 @@ yaml=FALSE, autostart=NA)
     setattr(ans, 'names', make.names(names(ans), unique=TRUE))
   }
 
-  colClassesAs = attr(ans, "colClassesAs")   # should only be present if one or more are != ""
+  colClassesAs = attr(ans, "colClassesAs", exact=TRUE)   # should only be present if one or more are != ""
   for (j in which(colClassesAs!="")) {       # # 1634
     v = .subset2(ans, j)
     new_class = colClassesAs[j]

@@ -125,7 +125,7 @@ format.data.table = function (x, ..., justify="none", timezone = FALSE) {
   }
   # FR #2842 add timezone for posix timestamps
   format.timezone = function(col) { # paste timezone to a time object
-    tz = attr(col,'tzone', exact = TRUE)
+    tz = attr(col,'tzone', exact=TRUE)
     if (!is.null(tz)) { # date object with tz
       nas = is.na(col)
       col = paste0(as.character(col)," ",tz) # parse to character