Skip to content

Commit

Permalink
Use R 3.2.0 features (#5838)
Browse files Browse the repository at this point in the history
Newly-available functions taken from here:

https://github.com/r-lib/lintr/blob/f00f4a9a715346a3e696aab9cffff2756833e50a/R/backport_linter.R#L152-L159

Most significant are `lengths()`, `trimws()`, and `strrep()`.
  • Loading branch information
MichaelChirico committed May 4, 2024
1 parent d19bfef commit 8fdd408
Show file tree
Hide file tree
Showing 11 changed files with 41 additions and 50 deletions.
4 changes: 2 additions & 2 deletions R/as.data.table.R
Expand Up @@ -223,7 +223,7 @@ as.data.table.data.frame = function(x, keep.rownames=FALSE, key=NULL, ...) {
setnames(ans, 'rn', keep.rownames[1L])
return(ans)
}
if (any(vapply_1i(x, function(xi) length(dim(xi))))) { # not is.atomic because is.atomic(matrix) is true
if (any(cols_with_dims(x))) {
# a data.frame with a column that is data.frame needs to be expanded; test 2013.4
# x may be a class with [[ method that behaves differently, so as.list first for default [[, #4526
return(as.data.table.list(as.list(x), keep.rownames=keep.rownames, ...))
Expand All @@ -245,7 +245,7 @@ as.data.table.data.frame = function(x, keep.rownames=FALSE, key=NULL, ...) {

as.data.table.data.table = function(x, ...) {
# as.data.table always returns a copy, automatically takes care of #473
if (any(vapply_1i(x, function(xi) length(dim(xi))))) { # for test 2089.2
if (any(cols_with_dims(x))) { # for test 2089.2
return(as.data.table.list(x, ...))
}
x = copy(x) # #1681
Expand Down
29 changes: 11 additions & 18 deletions R/data.table.R
@@ -1,11 +1,3 @@
if (!exists("trimws", "package:base")) {
# trimws was new in R 3.2.0. Backport it for internal data.table use in R 3.1.0
trimws = function(x) {
mysub = function(re, x) sub(re, "", x, perl = TRUE)
mysub("[ \t\r\n]+$", mysub("^[ \t\r\n]+", x))
}
}

dim.data.table = function(x)
{
.Call(Cdim, x)
Expand Down Expand Up @@ -356,8 +348,8 @@ replace_dot_alias = function(e) {
# Fixes 4994: a case where quoted expression with a "!", ex: expr = quote(!dt1); dt[eval(expr)] requires
# the "eval" to be checked before `as.name("!")`. Therefore interchanged.
restore.N = remove.N = FALSE
if (exists(".N", envir=parent.frame(), inherits=FALSE)) {
old.N = get(".N", envir=parent.frame(), inherits=FALSE)
old.N = get0(".N", envir=parent.frame(), inherits=FALSE)
if (!is.null(old.N)) {
locked.N = bindingIsLocked(".N", parent.frame())
if (locked.N) eval(call("unlockBinding", ".N", parent.frame())) # eval call to pass R CMD check NOTE until we find cleaner way
assign(".N", nrow(x), envir=parent.frame(), inherits=FALSE)
Expand Down Expand Up @@ -899,12 +891,12 @@ replace_dot_alias = function(e) {
}
if (!is.list(byval)) stopf("'by' or 'keyby' must evaluate to a vector or a list of vectors (where 'list' includes data.table and data.frame which are lists, too)")
if (length(byval)==1L && is.null(byval[[1L]])) bynull=TRUE #3530 when by=(function()NULL)()
if (!bynull) for (jj in seq_len(length(byval))) {
if (!bynull) for (jj in seq_along(byval)) {
if (!(this_type <- typeof(byval[[jj]])) %chin% ORDERING_TYPES) {
stopf("Column or expression %d of 'by' or 'keyby' is type '%s' which is not currently supported. If you have a compelling use case, please add it to https://github.com/Rdatatable/data.table/issues/1597. As a workaround, consider converting the column to a supported type, e.g. by=sapply(list_col, toString), whilst taking care to maintain distinctness in the process.", jj, this_type)
}
}
tt = vapply_1i(byval,length)
tt = lengths(byval)
if (any(tt!=xnrow)) stopf("The items in the 'by' or 'keyby' list are length(s) %s. Each must be length %d; the same length as there are rows in x (after subsetting if i is provided).", brackify(tt), xnrow)
if (is.null(bynames)) bynames = rep.int("",length(byval))
if (length(idx <- which(!nzchar(bynames))) && !bynull) {
Expand Down Expand Up @@ -1034,7 +1026,7 @@ replace_dot_alias = function(e) {
# allow filtering via function in .SDcols, #3950
if (is.function(.SDcols)) {
.SDcols = lapply(x, .SDcols)
if (any(idx <- vapply_1i(.SDcols, length) > 1L | vapply_1c(.SDcols, typeof) != 'logical' | vapply_1b(.SDcols, anyNA)))
if (any(idx <- lengths(.SDcols) > 1L | vapply_1c(.SDcols, typeof) != 'logical' | vapply_1b(.SDcols, anyNA)))
stopf("When .SDcols is a function, it is applied to each column; the output of this function must be a non-missing boolean scalar signalling inclusion/exclusion of the column. However, these conditions were not met for: %s", brackify(names(x)[idx]))
.SDcols = unlist(.SDcols, use.names = FALSE)
}
Expand Down Expand Up @@ -1290,11 +1282,12 @@ replace_dot_alias = function(e) {
# warningf(sym," in j is looking for ",getName," in calling scope, but a column '", sym, "' exists. Column names should not start with ..")
}
getName = substr(sym, 3L, nchar(sym))
if (!exists(getName, parent.frame())) {
getNameVal <- get0(getName, parent.frame())
if (is.null(getNameVal)) {
if (exists(sym, parent.frame())) next # user did 'manual' prefix; i.e. variable in calling scope has .. prefix
stopf("Variable '%s' is not found in calling scope. Looking in calling scope because this symbol was prefixed with .. in the j= parameter.", getName)
}
assign(sym, get(getName, parent.frame()), SDenv)
assign(sym, getNameVal, SDenv)
}
# hash=TRUE (the default) does seem better as expected using e.g. test 645. TO DO experiment with 'size' argument
if (missingby || bynull || (!byjoin && !length(byval))) {
Expand Down Expand Up @@ -1460,7 +1453,7 @@ replace_dot_alias = function(e) {
# is a more general issue but the former can be fixed by forcing units='secs'
SDenv$`-.POSIXt` = function(e1, e2) {
if (inherits(e2, 'POSIXt')) {
if (verbose && !exists('done_units_report', parent.frame())) {
if (verbose && !get0('done_units_report', parent.frame(), ifnotfound = FALSE)) {
catf('\nNote: forcing units="secs" on implicit difftime by group; call difftime explicitly to choose custom units\n')
assign('done_units_report', TRUE, parent.frame())
}
Expand Down Expand Up @@ -2804,7 +2797,7 @@ setDF = function(x, rownames=NULL) {
}
x
} else {
n = vapply_1i(x, length)
n = lengths(x)
mn = max(n)
if (any(n<mn))
stopf("All elements in argument 'x' to 'setDF' must be of same length")
Expand Down Expand Up @@ -3184,7 +3177,7 @@ is_constantish = function(q, check_singleton=FALSE) {
}
if (length(i) == 0L) stopf("Internal error in .isFastSubsettable. Please report to data.table developers") # nocov
## convert i to data.table with all combinations in rows.
if(length(i) > 1L && prod(vapply_1i(i, length)) > 1e4){
if(length(i) > 1L && prod(lengths(i)) > 1e4){
## CJ would result in more than 1e4 rows. This would be inefficient, especially memory-wise #2635
if (verbose) {catf("Subsetting optimization disabled because the cross-product of RHS values exceeds 1e4, causing memory problems.\n");flush.console()}
return(NULL)
Expand Down
2 changes: 1 addition & 1 deletion R/fcast.R
Expand Up @@ -235,7 +235,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
.Call(Csetlistelt, mapunique, 2L, seq_len(nrow(rhs_)))
lhs = lhs_; rhs = rhs_
}
maplen = vapply_1i(mapunique, length)
maplen = lengths(mapunique)
idx = do.call("CJ", mapunique)[map, 'I' := .I][["I"]] # TO DO: move this to C and avoid materialising the Cross Join.
some_fill = anyNA(idx)
fill.default = if (run_agg_funs && is.null(fill) && some_fill) dat_for_default_fill[, maybe_err(eval(fun.call))]
Expand Down
5 changes: 2 additions & 3 deletions R/fmelt.R
Expand Up @@ -27,8 +27,7 @@ patterns = function(..., cols=character(0L), ignore.case=FALSE, perl=FALSE, fixe
if (!is.character(p))
stopf("Input patterns must be of type character.")
matched = lapply(p, grep, cols, ignore.case=ignore.case, perl=perl, fixed=fixed, useBytes=useBytes)
# replace with lengths when R 3.2.0 dependency arrives
if (length(idx <- which(sapply(matched, length) == 0L)))
if (length(idx <- which(lengths(matched) == 0L)))
stopf('Pattern(s) not found: [%s]', brackify(p[idx]))
if (length(matched) == 1L) return(matched[[1L]])
matched
Expand Down Expand Up @@ -125,7 +124,7 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na
stopf("sep must be character string")
}
list.of.vectors = strsplit(cols, sep, fixed=TRUE)
vector.lengths = sapply(list.of.vectors, length)
vector.lengths = lengths(list.of.vectors)
n.groups = max(vector.lengths)
if (n.groups == 1) {
stopf("each column name results in only one item after splitting using sep, which means that all columns would be melted; to fix please either specify melt on all columns directly without using measure, or use a different sep/pattern specification")
Expand Down
4 changes: 2 additions & 2 deletions R/fread.R
Expand Up @@ -91,7 +91,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
}
file_info = file.info(file)
if (is.na(file_info$size)) stopf("File '%s' does not exist or is non-readable. getwd()=='%s'", file, getwd())
if (isTRUE(file_info$isdir)) stopf("File '%s' is a directory. Not yet implemented.", file) # dir.exists() requires R v3.2+, #989
if (isTRUE(file_info$isdir)) stopf("File '%s' is a directory. Not yet implemented.", file) # Could use dir.exists(), but we already ran file.info().
if (!file_info$size) {
warningf("File '%s' has size 0. Returning a NULL %s.", file, if (data.table) 'data.table' else 'data.frame')
return(if (data.table) data.table(NULL) else data.frame(NULL))
Expand Down Expand Up @@ -350,7 +350,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
if (!all(vapply_1b(index, is.character)))
stopf("index argument of data.table() must be a character vector naming columns (NB: col.names are applied before this)")
if (is.list(index)) {
to_split = vapply_1i(index, length) == 1L
to_split = lengths(index) == 1L
if (any(to_split))
index[to_split] = sapply(index[to_split], strsplit, split = ",", fixed = TRUE)
} else {
Expand Down
2 changes: 1 addition & 1 deletion R/programming.R
Expand Up @@ -18,7 +18,7 @@ list2lang = function(x) {
char = vapply(x, is.character, FALSE)
to.name = !asis & char
if (any(to.name)) { ## turns "my_name" character scalar into `my_name` symbol, for convenience
if (any(non.scalar.char <- vapply(x[to.name], length, 0L)!=1L)) {
if (any(non.scalar.char <- lengths(x[to.name])!=1L)) {
stopf("Character objects provided in the input are not scalar objects, if you need them as character vector rather than a name, then wrap each into 'I' call: %s", brackify(names(non.scalar.char)[non.scalar.char]))
}
x[to.name] = lapply(x[to.name], as.name)
Expand Down
2 changes: 1 addition & 1 deletion R/setkey.R
Expand Up @@ -340,7 +340,7 @@ CJ = function(..., sorted = TRUE, unique = FALSE)
if (unique) l[[i]] = unique(y)
}
}
nrow = prod( vapply_1i(l, length) ) # lengths(l) will work from R 3.2.0
nrow = prod(lengths(l))
if (nrow > .Machine$integer.max) stopf("Cross product of elements provided to CJ() would result in %.0f rows which exceeds .Machine$integer.max == %d", nrow, .Machine$integer.max)
l = .Call(Ccj, l)
setDT(l)
Expand Down
4 changes: 2 additions & 2 deletions R/test.data.table.R
Expand Up @@ -349,11 +349,11 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
# from all.equal and different to identical related to row.names and unused factor levels
# 3) each test has a unique id which we refer to in commit messages, emails etc.
# 4) test that a query generates exactly 2 warnings, that they are both the correct warning messages, and that the result is the one expected
.test.data.table = exists("nfail", parent.frame()) # test() can be used inside functions defined in tests.Rraw, so inherits=TRUE (default) here
nfail = get0("nfail", parent.frame()) # test() can be used inside functions defined in tests.Rraw, so inherits=TRUE (default) here
.test.data.table = !is.null(nfail)
numStr = sprintf("%.8g", num)
if (.test.data.table) {
prevtest = get("prevtest", parent.frame())
nfail = get("nfail", parent.frame()) # to cater for both test.data.table() and stepping through tests in dev
whichfail = get("whichfail", parent.frame())
assign("ntest", get("ntest", parent.frame()) + if (num>0) 1L else 0L, parent.frame(), inherits=TRUE) # bump number of tests run
lasttime = get("lasttime", parent.frame())
Expand Down
13 changes: 6 additions & 7 deletions R/utils.R
Expand Up @@ -21,10 +21,6 @@ nan_is_na = function(x) {
stopf("Argument 'nan' must be NA or NaN")
}

if (base::getRversion() < "3.2.0") { # Apr 2015
isNamespaceLoaded = function(x) x %chin% loadedNamespaces()
}

if (!exists('startsWith', 'package:base', inherits=FALSE)) { # R 3.3.0; Apr 2016
startsWith = function(x, stub) substr(x, 1L, nchar(stub))==stub
}
Expand Down Expand Up @@ -67,20 +63,23 @@ require_bit64_if_needed = function(DT) {
}

# vapply for return value character(1)
vapply_1c = function (x, fun, ..., use.names = TRUE) {
vapply_1c = function(x, fun, ..., use.names = TRUE) {
vapply(X = x, FUN = fun, ..., FUN.VALUE = NA_character_, USE.NAMES = use.names)
}

# vapply for return value logical(1)
vapply_1b = function (x, fun, ..., use.names = TRUE) {
vapply_1b = function(x, fun, ..., use.names = TRUE) {
vapply(X = x, FUN = fun, ..., FUN.VALUE = NA, USE.NAMES = use.names)
}

# vapply for return value integer(1)
vapply_1i = function (x, fun, ..., use.names = TRUE) {
vapply_1i = function(x, fun, ..., use.names = TRUE) {
vapply(X = x, FUN = fun, ..., FUN.VALUE = NA_integer_, USE.NAMES = use.names)
}

# not is.atomic because is.atomic(matrix) is true
cols_with_dims = function(x) vapply_1i(x, function(j) length(dim(j))) > 0L

more = function(f) system(paste("more",f)) # nocov (just a dev helper)

# helper used to auto-name columns in data.table(x,y) as c("x","y"), CJ(x,y) and similar
Expand Down
2 changes: 1 addition & 1 deletion inst/tests/other.Rraw
Expand Up @@ -520,7 +520,7 @@ if (loaded[["xts"]]) { # was 1465 in tests.Rraw, #5516

# was 2108 in tests.Rraw, #5516
# first and last should no longer load xts namespace, #3857, below commented test for interactive validation when xts present but not loaded or attached
# stopifnot("xts"%in%installed.packages(), !"xts"%in%loadedNamespaces()); library(data.table); x=as.POSIXct("2019-01-01"); last(x); stopifnot(!"xts" %in% loadedNamespaces())
# stopifnot("xts"%in%installed.packages(), !isNamespaceLoaded("xts")); library(data.table); x=as.POSIXct("2019-01-01"); last(x); stopifnot(!isNamespaceLoaded("xts"))
x = as.POSIXct("2019-09-09")+0:1
old = options(datatable.verbose=TRUE)
test(19.01, last(x), x[length(x)], output="!is.xts(x)")
Expand Down
24 changes: 12 additions & 12 deletions inst/tests/tests.Rraw
Expand Up @@ -67,7 +67,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
shallow = data.table:::shallow # until exported
.shallow = data.table:::.shallow
split.data.table = data.table:::split.data.table
if (!exists('startsWith', 'package:base', inherits=FALSE)) startsWith = data.table:::startsWith
if (!exists('startsWith', 'package:base', inherits=FALSE)) startsWith = data.table:::startsWith # R 3.3.0
stopf = data.table:::stopf
test = data.table:::test
uniqlengths = data.table:::uniqlengths
Expand Down Expand Up @@ -1472,13 +1472,13 @@ test(462, DT[,foo:=10L], data.table(a=1:3,v=4:9,foo=10L,key="a"))
unlink(f)

# Test CJ problems with v1.7.4, #1689
test(463, all(sapply(CJ(1:2,1:3),length)==6L))
test(463, all(lengths(CJ(1:2,1:3)) == 6L))
DT = data.table(x=1:4,y=1:2,cnt=1L,key=c('x', 'y'))
test(464, DT[CJ(1:4,1:4)]$cnt, INT(1,rep(NA,4),1,NA,NA,1,rep(NA,4),1,NA,NA))
test(465, DT[CJ(1:4,1:4), sum(cnt>0), by=.EACHI]$y, rep(1:4,4))
f1 = factor(c("READING","MATHEMATICS"))
f2 = factor(c("2010_2011","2009_2010","2008_2009"), levels=paste(2006:2010,2007:2011,sep="_"))
test(466, all(sapply(CJ(f1, f2),length)==6L))
test(466, all(lengths(CJ(f1, f2))==6L))

# Test list(.SD,newcol=..) gives error with guidance
DT = data.table(a=1:2,v=3:6)
Expand Down Expand Up @@ -2882,7 +2882,7 @@ test(966, fread(input, colClasses=list(character=2:4)), data.table(A=1:2, B=c("f
warning="Column number 4 (colClasses[[1]][3]) is out of range [1,ncol=3]")

# Character input more than 4096 bytes (used to be passed through path.expand which imposed the limit), #2649
test(967, nrow(fread( paste( rep('a\tb\n', 10000), collapse=''), header=FALSE)), 10000L)
test(967, nrow(fread( strrep('a\tb\n', 10000L), header=FALSE)), 10000L)

# Test fread warns about removal of any footer (and autostart skips up over it)
test(968, fread("A,B\n1,3\n2,4\n\nRowcount: 2\n"), data.table(A=1:2,B=3:4), warning="Discarded single-line footer.*Rowcount: 2")
Expand Down Expand Up @@ -7005,7 +7005,7 @@ test(1477.22, transpose(la, list.cols=NA), error="list.cols should be logical TR
ll = list(data.frame(a=1), data.frame(x=1, y=2), NULL, list())
ll <- lapply(ll, setDT)
test(1478.1, sapply(ll, truelength), c(1025L, 1026L, 1024L, 1024L))
test(1478.2, sapply(ll, length), INT(1,2,0,0))
test(1478.2, lengths(ll), INT(1,2,0,0))

# rbindlist stack imbalance issue, #980.
test(1479, rbindlist(replicate(4,rbindlist(replicate(47, NULL),
Expand Down Expand Up @@ -11630,23 +11630,23 @@ set.seed(1L)
ar.dimnames = list(color = sort(c("green","yellow","red")),
year = as.character(2011:2015),
status = sort(c("active","inactive","archived","removed")))
ar.dim = sapply(ar.dimnames, length)
ar.dim = lengths(ar.dimnames)
ar = array(sample(c(rep(NA, 4), 4:7/2), prod(ar.dim), TRUE),
unname(ar.dim), # array() having length(dims) < 3 will be created as matrix in R so will not be dispatched here but as.data.table.matrix
ar.dimnames)
dt = as.data.table(ar, na.rm=FALSE)
dimcols = head(names(dt), -1L)
test(1774.01, TRUE, all(
nrow(dt) == 60L,
prod(sapply(ar.dimnames, length)) == dt[, prod(sapply(.SD, uniqueN)), .SDcols = dimcols],
prod(lengths(ar.dimnames)) == dt[, prod(sapply(.SD, uniqueN)), .SDcols = dimcols],
dt[is.na(value), .N] == 30L,
dt[, .N==1L, c(dimcols)]$V1
))
dt = as.data.table(ar)
dimcols = head(names(dt), -1L)
test(1774.02, TRUE, all(
nrow(dt) == 30L,
prod(sapply(ar.dimnames, length)) == dt[, prod(sapply(.SD, uniqueN)), .SDcols = dimcols],
prod(lengths(ar.dimnames)) == dt[, prod(sapply(.SD, uniqueN)), .SDcols = dimcols],
dt[is.na(value), .N] == 0L,
dt[, .N==1L, c(dimcols)]$V1
))
Expand Down Expand Up @@ -11856,7 +11856,7 @@ test(1800.2, fread("A\n1e55555555\n-1e+234056\n2e-59745"), data.table(A=c("1e555
# Test files with "round" sizes (different multiples of 2, from 512B to 64KB)
for (mul in c(16, 128, 512, 1024, 2048)) {
ff = file(f<-tempfile(), open="wb")
cat(paste(rep("1234,5678,9012,3456,7890,abcd,4\x0A", mul), collapse=""), file=ff)
cat(strrep("1234,5678,9012,3456,7890,abcd,4\x0A", mul), file=ff)
close(ff)
DT = data.table(V1=rep(1234L, mul), V2=5678L, V3=9012L, V4=3456L, V5=7890L, V6="abcd", V7=4L)
test(1801 + log2(mul)/100 + 0.001, file.info(f)$size, mul*32)
Expand Down Expand Up @@ -12067,8 +12067,8 @@ if (test_longdouble) { #3258

# Test that integers just above 128 or 256 characters in length parse as strings, not as integers/floats
# This guards against potential overflows in the count of digits
src1 = paste0(rep("1234567890", 13), collapse="") # length = 130, slightly above 128
src2 = paste0(rep("12345678900987654321", 13), collapse="") # length = 260, slightly above 256
src1 = strrep("1234567890", 13L) # length = 130, slightly above 128
src2 = strrep("12345678900987654321", 13L) # length = 260, slightly above 256
test(1831.1, fread(paste0("A\n", src1)), data.table(A=src1))
test(1831.2, fread(paste0("A\n", src2)), data.table(A=src2))
test(1831.3, fread(paste0("A\n", src2, ".33")), data.table(A=1.2345678900987655e+259))
Expand Down Expand Up @@ -16996,7 +16996,7 @@ test(2156.1, DT[,list(list({attr(value,"class")<-"newclass";value})),by=series]$
DT[1,value])
test(2156.2, truelength(DT[,list(list(value)),by=series]$V1[[1L]])>=0L) # not -64 carried over by duplicate() of the .SD column
# cover NULL case in copyAsPlain by putting a NULL alongside a dogroups .SD column. The 'if(.GRP==1L)' is just for fun.
test(2156.3, sapply(DT[, list(if (.GRP==1L) list(value,NULL) else list(NULL,value)), by=series]$V1, length),
test(2156.3, lengths(DT[, list(if (.GRP==1L) list(value,NULL) else list(NULL,value)), by=series]$V1),
INT(64,0,0,64))

# CornerstoneR usage revealed copySharedColumns needed work afer PR#4655
Expand Down

0 comments on commit 8fdd408

Please sign in to comment.