diff --git a/R/as.data.table.R b/R/as.data.table.R index feae0f5ac..2bfff9056 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -223,7 +223,7 @@ as.data.table.data.frame = function(x, keep.rownames=FALSE, key=NULL, ...) { setnames(ans, 'rn', keep.rownames[1L]) return(ans) } - if (any(vapply_1i(x, function(xi) length(dim(xi))))) { # not is.atomic because is.atomic(matrix) is true + if (any(cols_with_dims(x))) { # a data.frame with a column that is data.frame needs to be expanded; test 2013.4 # x may be a class with [[ method that behaves differently, so as.list first for default [[, #4526 return(as.data.table.list(as.list(x), keep.rownames=keep.rownames, ...)) @@ -245,7 +245,7 @@ as.data.table.data.frame = function(x, keep.rownames=FALSE, key=NULL, ...) { as.data.table.data.table = function(x, ...) { # as.data.table always returns a copy, automatically takes care of #473 - if (any(vapply_1i(x, function(xi) length(dim(xi))))) { # for test 2089.2 + if (any(cols_with_dims(x))) { # for test 2089.2 return(as.data.table.list(x, ...)) } x = copy(x) # #1681 diff --git a/R/data.table.R b/R/data.table.R index 81b647fb2..6c4ee8e62 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1,11 +1,3 @@ -if (!exists("trimws", "package:base")) { - # trimws was new in R 3.2.0. Backport it for internal data.table use in R 3.1.0 - trimws = function(x) { - mysub = function(re, x) sub(re, "", x, perl = TRUE) - mysub("[ \t\r\n]+$", mysub("^[ \t\r\n]+", x)) - } -} - dim.data.table = function(x) { .Call(Cdim, x) @@ -356,8 +348,8 @@ replace_dot_alias = function(e) { # Fixes 4994: a case where quoted expression with a "!", ex: expr = quote(!dt1); dt[eval(expr)] requires # the "eval" to be checked before `as.name("!")`. Therefore interchanged. restore.N = remove.N = FALSE - if (exists(".N", envir=parent.frame(), inherits=FALSE)) { - old.N = get(".N", envir=parent.frame(), inherits=FALSE) + old.N = get0(".N", envir=parent.frame(), inherits=FALSE) + if (!is.null(old.N)) { locked.N = bindingIsLocked(".N", parent.frame()) if (locked.N) eval(call("unlockBinding", ".N", parent.frame())) # eval call to pass R CMD check NOTE until we find cleaner way assign(".N", nrow(x), envir=parent.frame(), inherits=FALSE) @@ -899,12 +891,12 @@ replace_dot_alias = function(e) { } if (!is.list(byval)) stopf("'by' or 'keyby' must evaluate to a vector or a list of vectors (where 'list' includes data.table and data.frame which are lists, too)") if (length(byval)==1L && is.null(byval[[1L]])) bynull=TRUE #3530 when by=(function()NULL)() - if (!bynull) for (jj in seq_len(length(byval))) { + if (!bynull) for (jj in seq_along(byval)) { if (!(this_type <- typeof(byval[[jj]])) %chin% ORDERING_TYPES) { stopf("Column or expression %d of 'by' or 'keyby' is type '%s' which is not currently supported. If you have a compelling use case, please add it to https://github.com/Rdatatable/data.table/issues/1597. As a workaround, consider converting the column to a supported type, e.g. by=sapply(list_col, toString), whilst taking care to maintain distinctness in the process.", jj, this_type) } } - tt = vapply_1i(byval,length) + tt = lengths(byval) if (any(tt!=xnrow)) stopf("The items in the 'by' or 'keyby' list are length(s) %s. Each must be length %d; the same length as there are rows in x (after subsetting if i is provided).", brackify(tt), xnrow) if (is.null(bynames)) bynames = rep.int("",length(byval)) if (length(idx <- which(!nzchar(bynames))) && !bynull) { @@ -1034,7 +1026,7 @@ replace_dot_alias = function(e) { # allow filtering via function in .SDcols, #3950 if (is.function(.SDcols)) { .SDcols = lapply(x, .SDcols) - if (any(idx <- vapply_1i(.SDcols, length) > 1L | vapply_1c(.SDcols, typeof) != 'logical' | vapply_1b(.SDcols, anyNA))) + if (any(idx <- lengths(.SDcols) > 1L | vapply_1c(.SDcols, typeof) != 'logical' | vapply_1b(.SDcols, anyNA))) stopf("When .SDcols is a function, it is applied to each column; the output of this function must be a non-missing boolean scalar signalling inclusion/exclusion of the column. However, these conditions were not met for: %s", brackify(names(x)[idx])) .SDcols = unlist(.SDcols, use.names = FALSE) } @@ -1290,11 +1282,12 @@ replace_dot_alias = function(e) { # warningf(sym," in j is looking for ",getName," in calling scope, but a column '", sym, "' exists. Column names should not start with ..") } getName = substr(sym, 3L, nchar(sym)) - if (!exists(getName, parent.frame())) { + getNameVal <- get0(getName, parent.frame()) + if (is.null(getNameVal)) { if (exists(sym, parent.frame())) next # user did 'manual' prefix; i.e. variable in calling scope has .. prefix stopf("Variable '%s' is not found in calling scope. Looking in calling scope because this symbol was prefixed with .. in the j= parameter.", getName) } - assign(sym, get(getName, parent.frame()), SDenv) + assign(sym, getNameVal, SDenv) } # hash=TRUE (the default) does seem better as expected using e.g. test 645. TO DO experiment with 'size' argument if (missingby || bynull || (!byjoin && !length(byval))) { @@ -1460,7 +1453,7 @@ replace_dot_alias = function(e) { # is a more general issue but the former can be fixed by forcing units='secs' SDenv$`-.POSIXt` = function(e1, e2) { if (inherits(e2, 'POSIXt')) { - if (verbose && !exists('done_units_report', parent.frame())) { + if (verbose && !get0('done_units_report', parent.frame(), ifnotfound = FALSE)) { catf('\nNote: forcing units="secs" on implicit difftime by group; call difftime explicitly to choose custom units\n') assign('done_units_report', TRUE, parent.frame()) } @@ -2804,7 +2797,7 @@ setDF = function(x, rownames=NULL) { } x } else { - n = vapply_1i(x, length) + n = lengths(x) mn = max(n) if (any(n 1L && prod(vapply_1i(i, length)) > 1e4){ + if(length(i) > 1L && prod(lengths(i)) > 1e4){ ## CJ would result in more than 1e4 rows. This would be inefficient, especially memory-wise #2635 if (verbose) {catf("Subsetting optimization disabled because the cross-product of RHS values exceeds 1e4, causing memory problems.\n");flush.console()} return(NULL) diff --git a/R/fcast.R b/R/fcast.R index dc9480c86..7c7c297a6 100644 --- a/R/fcast.R +++ b/R/fcast.R @@ -235,7 +235,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ..., .Call(Csetlistelt, mapunique, 2L, seq_len(nrow(rhs_))) lhs = lhs_; rhs = rhs_ } - maplen = vapply_1i(mapunique, length) + maplen = lengths(mapunique) idx = do.call("CJ", mapunique)[map, 'I' := .I][["I"]] # TO DO: move this to C and avoid materialising the Cross Join. some_fill = anyNA(idx) fill.default = if (run_agg_funs && is.null(fill) && some_fill) dat_for_default_fill[, maybe_err(eval(fun.call))] diff --git a/R/fmelt.R b/R/fmelt.R index 5c50ca26c..fc01f327e 100644 --- a/R/fmelt.R +++ b/R/fmelt.R @@ -27,8 +27,7 @@ patterns = function(..., cols=character(0L), ignore.case=FALSE, perl=FALSE, fixe if (!is.character(p)) stopf("Input patterns must be of type character.") matched = lapply(p, grep, cols, ignore.case=ignore.case, perl=perl, fixed=fixed, useBytes=useBytes) - # replace with lengths when R 3.2.0 dependency arrives - if (length(idx <- which(sapply(matched, length) == 0L))) + if (length(idx <- which(lengths(matched) == 0L))) stopf('Pattern(s) not found: [%s]', brackify(p[idx])) if (length(matched) == 1L) return(matched[[1L]]) matched @@ -125,7 +124,7 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na stopf("sep must be character string") } list.of.vectors = strsplit(cols, sep, fixed=TRUE) - vector.lengths = sapply(list.of.vectors, length) + vector.lengths = lengths(list.of.vectors) n.groups = max(vector.lengths) if (n.groups == 1) { stopf("each column name results in only one item after splitting using sep, which means that all columns would be melted; to fix please either specify melt on all columns directly without using measure, or use a different sep/pattern specification") diff --git a/R/fread.R b/R/fread.R index 66bda3fb1..4092c9cfb 100644 --- a/R/fread.R +++ b/R/fread.R @@ -91,7 +91,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") } file_info = file.info(file) if (is.na(file_info$size)) stopf("File '%s' does not exist or is non-readable. getwd()=='%s'", file, getwd()) - if (isTRUE(file_info$isdir)) stopf("File '%s' is a directory. Not yet implemented.", file) # dir.exists() requires R v3.2+, #989 + if (isTRUE(file_info$isdir)) stopf("File '%s' is a directory. Not yet implemented.", file) # Could use dir.exists(), but we already ran file.info(). if (!file_info$size) { warningf("File '%s' has size 0. Returning a NULL %s.", file, if (data.table) 'data.table' else 'data.frame') return(if (data.table) data.table(NULL) else data.frame(NULL)) @@ -350,7 +350,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") if (!all(vapply_1b(index, is.character))) stopf("index argument of data.table() must be a character vector naming columns (NB: col.names are applied before this)") if (is.list(index)) { - to_split = vapply_1i(index, length) == 1L + to_split = lengths(index) == 1L if (any(to_split)) index[to_split] = sapply(index[to_split], strsplit, split = ",", fixed = TRUE) } else { diff --git a/R/programming.R b/R/programming.R index da97e785c..c82fb3681 100644 --- a/R/programming.R +++ b/R/programming.R @@ -18,7 +18,7 @@ list2lang = function(x) { char = vapply(x, is.character, FALSE) to.name = !asis & char if (any(to.name)) { ## turns "my_name" character scalar into `my_name` symbol, for convenience - if (any(non.scalar.char <- vapply(x[to.name], length, 0L)!=1L)) { + if (any(non.scalar.char <- lengths(x[to.name])!=1L)) { stopf("Character objects provided in the input are not scalar objects, if you need them as character vector rather than a name, then wrap each into 'I' call: %s", brackify(names(non.scalar.char)[non.scalar.char])) } x[to.name] = lapply(x[to.name], as.name) diff --git a/R/setkey.R b/R/setkey.R index 1ea81de7e..3c03e3300 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -340,7 +340,7 @@ CJ = function(..., sorted = TRUE, unique = FALSE) if (unique) l[[i]] = unique(y) } } - nrow = prod( vapply_1i(l, length) ) # lengths(l) will work from R 3.2.0 + nrow = prod(lengths(l)) if (nrow > .Machine$integer.max) stopf("Cross product of elements provided to CJ() would result in %.0f rows which exceeds .Machine$integer.max == %d", nrow, .Machine$integer.max) l = .Call(Ccj, l) setDT(l) diff --git a/R/test.data.table.R b/R/test.data.table.R index c040b2ee0..c5c81a193 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -349,11 +349,11 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no # from all.equal and different to identical related to row.names and unused factor levels # 3) each test has a unique id which we refer to in commit messages, emails etc. # 4) test that a query generates exactly 2 warnings, that they are both the correct warning messages, and that the result is the one expected - .test.data.table = exists("nfail", parent.frame()) # test() can be used inside functions defined in tests.Rraw, so inherits=TRUE (default) here + nfail = get0("nfail", parent.frame()) # test() can be used inside functions defined in tests.Rraw, so inherits=TRUE (default) here + .test.data.table = !is.null(nfail) numStr = sprintf("%.8g", num) if (.test.data.table) { prevtest = get("prevtest", parent.frame()) - nfail = get("nfail", parent.frame()) # to cater for both test.data.table() and stepping through tests in dev whichfail = get("whichfail", parent.frame()) assign("ntest", get("ntest", parent.frame()) + if (num>0) 1L else 0L, parent.frame(), inherits=TRUE) # bump number of tests run lasttime = get("lasttime", parent.frame()) diff --git a/R/utils.R b/R/utils.R index feacd2b00..dd772c064 100644 --- a/R/utils.R +++ b/R/utils.R @@ -21,10 +21,6 @@ nan_is_na = function(x) { stopf("Argument 'nan' must be NA or NaN") } -if (base::getRversion() < "3.2.0") { # Apr 2015 - isNamespaceLoaded = function(x) x %chin% loadedNamespaces() -} - if (!exists('startsWith', 'package:base', inherits=FALSE)) { # R 3.3.0; Apr 2016 startsWith = function(x, stub) substr(x, 1L, nchar(stub))==stub } @@ -67,20 +63,23 @@ require_bit64_if_needed = function(DT) { } # vapply for return value character(1) -vapply_1c = function (x, fun, ..., use.names = TRUE) { +vapply_1c = function(x, fun, ..., use.names = TRUE) { vapply(X = x, FUN = fun, ..., FUN.VALUE = NA_character_, USE.NAMES = use.names) } # vapply for return value logical(1) -vapply_1b = function (x, fun, ..., use.names = TRUE) { +vapply_1b = function(x, fun, ..., use.names = TRUE) { vapply(X = x, FUN = fun, ..., FUN.VALUE = NA, USE.NAMES = use.names) } # vapply for return value integer(1) -vapply_1i = function (x, fun, ..., use.names = TRUE) { +vapply_1i = function(x, fun, ..., use.names = TRUE) { vapply(X = x, FUN = fun, ..., FUN.VALUE = NA_integer_, USE.NAMES = use.names) } +# not is.atomic because is.atomic(matrix) is true +cols_with_dims = function(x) vapply_1i(x, function(j) length(dim(j))) > 0L + more = function(f) system(paste("more",f)) # nocov (just a dev helper) # helper used to auto-name columns in data.table(x,y) as c("x","y"), CJ(x,y) and similar diff --git a/inst/tests/other.Rraw b/inst/tests/other.Rraw index 087b3bada..c3b21193c 100644 --- a/inst/tests/other.Rraw +++ b/inst/tests/other.Rraw @@ -520,7 +520,7 @@ if (loaded[["xts"]]) { # was 1465 in tests.Rraw, #5516 # was 2108 in tests.Rraw, #5516 # first and last should no longer load xts namespace, #3857, below commented test for interactive validation when xts present but not loaded or attached -# stopifnot("xts"%in%installed.packages(), !"xts"%in%loadedNamespaces()); library(data.table); x=as.POSIXct("2019-01-01"); last(x); stopifnot(!"xts" %in% loadedNamespaces()) +# stopifnot("xts"%in%installed.packages(), !isNamespaceLoaded("xts")); library(data.table); x=as.POSIXct("2019-01-01"); last(x); stopifnot(!isNamespaceLoaded("xts")) x = as.POSIXct("2019-09-09")+0:1 old = options(datatable.verbose=TRUE) test(19.01, last(x), x[length(x)], output="!is.xts(x)") diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index d8d74a958..68f12bd2b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -67,7 +67,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { shallow = data.table:::shallow # until exported .shallow = data.table:::.shallow split.data.table = data.table:::split.data.table - if (!exists('startsWith', 'package:base', inherits=FALSE)) startsWith = data.table:::startsWith + if (!exists('startsWith', 'package:base', inherits=FALSE)) startsWith = data.table:::startsWith # R 3.3.0 stopf = data.table:::stopf test = data.table:::test uniqlengths = data.table:::uniqlengths @@ -1472,13 +1472,13 @@ test(462, DT[,foo:=10L], data.table(a=1:3,v=4:9,foo=10L,key="a")) unlink(f) # Test CJ problems with v1.7.4, #1689 -test(463, all(sapply(CJ(1:2,1:3),length)==6L)) +test(463, all(lengths(CJ(1:2,1:3)) == 6L)) DT = data.table(x=1:4,y=1:2,cnt=1L,key=c('x', 'y')) test(464, DT[CJ(1:4,1:4)]$cnt, INT(1,rep(NA,4),1,NA,NA,1,rep(NA,4),1,NA,NA)) test(465, DT[CJ(1:4,1:4), sum(cnt>0), by=.EACHI]$y, rep(1:4,4)) f1 = factor(c("READING","MATHEMATICS")) f2 = factor(c("2010_2011","2009_2010","2008_2009"), levels=paste(2006:2010,2007:2011,sep="_")) -test(466, all(sapply(CJ(f1, f2),length)==6L)) +test(466, all(lengths(CJ(f1, f2))==6L)) # Test list(.SD,newcol=..) gives error with guidance DT = data.table(a=1:2,v=3:6) @@ -2882,7 +2882,7 @@ test(966, fread(input, colClasses=list(character=2:4)), data.table(A=1:2, B=c("f warning="Column number 4 (colClasses[[1]][3]) is out of range [1,ncol=3]") # Character input more than 4096 bytes (used to be passed through path.expand which imposed the limit), #2649 -test(967, nrow(fread( paste( rep('a\tb\n', 10000), collapse=''), header=FALSE)), 10000L) +test(967, nrow(fread( strrep('a\tb\n', 10000L), header=FALSE)), 10000L) # Test fread warns about removal of any footer (and autostart skips up over it) test(968, fread("A,B\n1,3\n2,4\n\nRowcount: 2\n"), data.table(A=1:2,B=3:4), warning="Discarded single-line footer.*Rowcount: 2") @@ -7005,7 +7005,7 @@ test(1477.22, transpose(la, list.cols=NA), error="list.cols should be logical TR ll = list(data.frame(a=1), data.frame(x=1, y=2), NULL, list()) ll <- lapply(ll, setDT) test(1478.1, sapply(ll, truelength), c(1025L, 1026L, 1024L, 1024L)) -test(1478.2, sapply(ll, length), INT(1,2,0,0)) +test(1478.2, lengths(ll), INT(1,2,0,0)) # rbindlist stack imbalance issue, #980. test(1479, rbindlist(replicate(4,rbindlist(replicate(47, NULL), @@ -11630,7 +11630,7 @@ set.seed(1L) ar.dimnames = list(color = sort(c("green","yellow","red")), year = as.character(2011:2015), status = sort(c("active","inactive","archived","removed"))) -ar.dim = sapply(ar.dimnames, length) +ar.dim = lengths(ar.dimnames) ar = array(sample(c(rep(NA, 4), 4:7/2), prod(ar.dim), TRUE), unname(ar.dim), # array() having length(dims) < 3 will be created as matrix in R so will not be dispatched here but as.data.table.matrix ar.dimnames) @@ -11638,7 +11638,7 @@ dt = as.data.table(ar, na.rm=FALSE) dimcols = head(names(dt), -1L) test(1774.01, TRUE, all( nrow(dt) == 60L, - prod(sapply(ar.dimnames, length)) == dt[, prod(sapply(.SD, uniqueN)), .SDcols = dimcols], + prod(lengths(ar.dimnames)) == dt[, prod(sapply(.SD, uniqueN)), .SDcols = dimcols], dt[is.na(value), .N] == 30L, dt[, .N==1L, c(dimcols)]$V1 )) @@ -11646,7 +11646,7 @@ dt = as.data.table(ar) dimcols = head(names(dt), -1L) test(1774.02, TRUE, all( nrow(dt) == 30L, - prod(sapply(ar.dimnames, length)) == dt[, prod(sapply(.SD, uniqueN)), .SDcols = dimcols], + prod(lengths(ar.dimnames)) == dt[, prod(sapply(.SD, uniqueN)), .SDcols = dimcols], dt[is.na(value), .N] == 0L, dt[, .N==1L, c(dimcols)]$V1 )) @@ -11856,7 +11856,7 @@ test(1800.2, fread("A\n1e55555555\n-1e+234056\n2e-59745"), data.table(A=c("1e555 # Test files with "round" sizes (different multiples of 2, from 512B to 64KB) for (mul in c(16, 128, 512, 1024, 2048)) { ff = file(f<-tempfile(), open="wb") - cat(paste(rep("1234,5678,9012,3456,7890,abcd,4\x0A", mul), collapse=""), file=ff) + cat(strrep("1234,5678,9012,3456,7890,abcd,4\x0A", mul), file=ff) close(ff) DT = data.table(V1=rep(1234L, mul), V2=5678L, V3=9012L, V4=3456L, V5=7890L, V6="abcd", V7=4L) test(1801 + log2(mul)/100 + 0.001, file.info(f)$size, mul*32) @@ -12067,8 +12067,8 @@ if (test_longdouble) { #3258 # Test that integers just above 128 or 256 characters in length parse as strings, not as integers/floats # This guards against potential overflows in the count of digits -src1 = paste0(rep("1234567890", 13), collapse="") # length = 130, slightly above 128 -src2 = paste0(rep("12345678900987654321", 13), collapse="") # length = 260, slightly above 256 +src1 = strrep("1234567890", 13L) # length = 130, slightly above 128 +src2 = strrep("12345678900987654321", 13L) # length = 260, slightly above 256 test(1831.1, fread(paste0("A\n", src1)), data.table(A=src1)) test(1831.2, fread(paste0("A\n", src2)), data.table(A=src2)) test(1831.3, fread(paste0("A\n", src2, ".33")), data.table(A=1.2345678900987655e+259)) @@ -16996,7 +16996,7 @@ test(2156.1, DT[,list(list({attr(value,"class")<-"newclass";value})),by=series]$ DT[1,value]) test(2156.2, truelength(DT[,list(list(value)),by=series]$V1[[1L]])>=0L) # not -64 carried over by duplicate() of the .SD column # cover NULL case in copyAsPlain by putting a NULL alongside a dogroups .SD column. The 'if(.GRP==1L)' is just for fun. -test(2156.3, sapply(DT[, list(if (.GRP==1L) list(value,NULL) else list(NULL,value)), by=series]$V1, length), +test(2156.3, lengths(DT[, list(if (.GRP==1L) list(value,NULL) else list(NULL,value)), by=series]$V1), INT(64,0,0,64)) # CornerstoneR usage revealed copySharedColumns needed work afer PR#4655