From 8c77848a06e145750316891708e397e361874150 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 8 May 2024 22:59:58 +0000 Subject: [PATCH 1/8] missing_argument_linter and return_linter --- .ci/.lintr.R | 3 +-- R/IDateTime.R | 4 ++-- R/as.data.table.R | 2 +- R/bmerge.R | 2 +- R/cedta.R | 2 +- R/data.table.R | 24 +++++++++++++----------- R/fcast.R | 2 +- R/fread.R | 2 +- R/utils.R | 2 +- inst/tests/benchmark.Rraw | 2 +- inst/tests/tests.Rraw | 2 +- 11 files changed, 24 insertions(+), 23 deletions(-) diff --git a/.ci/.lintr.R b/.ci/.lintr.R index a62745b15..e76eb918e 100644 --- a/.ci/.lintr.R +++ b/.ci/.lintr.R @@ -60,12 +60,10 @@ linters = c(dt_linters, all_linters( implicit_assignment_linter = NULL, implicit_integer_linter = NULL, keyword_quote_linter = NULL, - missing_argument_linter = NULL, nzchar_linter = NULL, object_overwrite_linter = NULL, paren_body_linter = NULL, redundant_equals_linter = NULL, - return_linter = NULL, scalar_in_linter = NULL, undesirable_function_linter = NULL, unnecessary_concatenation_linter = NULL, @@ -103,6 +101,7 @@ exclusions = c(local({ comparison_negation_linter = Inf, duplicate_argument_linter = Inf, equals_na_linter = Inf, + missing_argument_linter = Inf, paste_linter = Inf, rep_len_linter = Inf, sample_int_linter = Inf, diff --git a/R/IDateTime.R b/R/IDateTime.R index 712fff1d8..713f72fa2 100644 --- a/R/IDateTime.R +++ b/R/IDateTime.R @@ -129,7 +129,7 @@ round.IDate = function(x, digits=c("weeks", "months", "quarters", "years"), ...) } ans = as.integer(unclass(e1) - unclass(e2)) if (!inherits(e2, "Date")) setattr(ans, "class", c("IDate", "Date")) - return(ans) + ans } @@ -177,7 +177,7 @@ as.ITime.character = function(x, format, ...) { w = w[!nna] } } - return(as.ITime(y, ...)) + as.ITime(y, ...) } as.ITime.POSIXlt = function(x, ms = 'truncate', ...) { diff --git a/R/as.data.table.R b/R/as.data.table.R index e480b8007..94b1998b8 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -251,5 +251,5 @@ as.data.table.data.table = function(x, ...) { x = copy(x) # #1681 # fix for #1078 and #1128, see .resetclass() for explanation. setattr(x, 'class', .resetclass(x, "data.table")) - return(x) + x } diff --git a/R/bmerge.R b/R/bmerge.R index ff40fddb4..f32a19f42 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -185,5 +185,5 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos # TO DO: xo could be moved inside Cbmerge ans$xo = xo # for further use by [.data.table - return(ans) + ans } diff --git a/R/cedta.R b/R/cedta.R index 35f382054..0071af0df 100644 --- a/R/cedta.R +++ b/R/cedta.R @@ -35,7 +35,7 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow the_call <- calls[[ii]][[1L]] if (is.name(the_call) && (the_call %chin% c("eval", "evalq"))) return(TRUE) } - return(FALSE) + FALSE } # nocov end diff --git a/R/data.table.R b/R/data.table.R index ce5da744a..1b8d46a63 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -976,7 +976,7 @@ replace_dot_alias = function(e) { if (length(q) == 4L && !is.null(q[[4L]])) q[[4L]] = do_j_names(q[[4L]]) return(q) } - return(q) + q } if (is.name(jsub)) { # j is a single unquoted column name @@ -1453,13 +1453,14 @@ replace_dot_alias = function(e) { # independently by group & attr mismatch among groups is ignored. The latter # is a more general issue but the former can be fixed by forcing units='secs' SDenv$`-.POSIXt` = function(e1, e2) { - if (inherits(e2, 'POSIXt')) { - if (verbose && !get0('done_units_report', parent.frame(), ifnotfound = FALSE)) { - catf('\nNote: forcing units="secs" on implicit difftime by group; call difftime explicitly to choose custom units\n') - assign('done_units_report', TRUE, parent.frame()) - } - return(difftime(e1, e2, units='secs')) - } else return(base::`-.POSIXt`(e1, e2)) + if (!inherits(e2, 'POSIXt')) { + return(base::`-.POSIXt`(e1, e2)) + } + if (verbose && !get0('done_units_report', parent.frame(), ifnotfound = FALSE)) { + catf('\nNote: forcing units="secs" on implicit difftime by group; call difftime explicitly to choose custom units\n') + assign('done_units_report', TRUE, parent.frame()) + } + difftime(e1, e2, units='secs') } if (byjoin) { @@ -2293,7 +2294,7 @@ transform.data.table = function(`_data`, ...) if (!cedta()) return(NextMethod()) # nocov `_data` = copy(`_data`) e = eval(substitute(list(...)), `_data`, parent.frame()) - set(`_data`, ,names(e), e) + set(`_data`, NULL, names(e), e) `_data` } @@ -3052,7 +3053,8 @@ is_constantish = function(q, check_singleton=FALSE) { if (is.symbol(q1)) return(if (q1 %chin% gfuns) q1) if (!q1 %iscall% "::") return(NULL) if (q1[[2L]] != "data.table") return(NULL) - return(if (q1[[3L]] %chin% gdtfuns) q1[[3L]]) + if (q1[[3L]] %chin% gdtfuns) return(q1[[3L]]) + NULL } .gforce_ok = function(q, x) { if (is.N(q)) return(TRUE) # For #334 @@ -3336,5 +3338,5 @@ is_constantish = function(q, check_singleton=FALSE) { ## the final on will contain the xCol as name, the iCol as value on = iCols names(on) = xCols - return(list(on = on, ops = idx_op)) + list(on = on, ops = idx_op) } diff --git a/R/fcast.R b/R/fcast.R index d2423cd31..e0b5ffbde 100644 --- a/R/fcast.R +++ b/R/fcast.R @@ -5,7 +5,7 @@ guess = function(x) { return("(all)") var = names(x)[ncol(x)] messagef("Using '%s' as value column. Use 'value.var' to override", var) - return(var) + var } dcast <- function( diff --git a/R/fread.R b/R/fread.R index da44d6be0..94dbd351d 100644 --- a/R/fread.R +++ b/R/fread.R @@ -317,7 +317,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") }, warning = fun <- function(e) { warningf("Column '%s' was requested to be '%s' but fread encountered the following %s:\n\t%s\nso the column has been left as type '%s'", names(ans)[j], new_class, if (inherits(e, "error")) "error" else "warning", e$message, typeof(v)) - return(v) + v }, error = fun) set(ans, j = j, value = new_v) # aside: new_v == v if the coercion was aborted diff --git a/R/utils.R b/R/utils.R index dd772c064..06f239fed 100644 --- a/R/utils.R +++ b/R/utils.R @@ -144,7 +144,7 @@ is_utc = function(tz) { # via grep('UTC|GMT', OlsonNames(), value = TRUE); ordered by "prior" frequency utc_tz = c("UTC", "GMT", "Etc/UTC", "Etc/GMT", "GMT-0", "GMT+0", "GMT0") if (is.null(tz)) tz = Sys.timezone() - return(tz %chin% utc_tz) + tz %chin% utc_tz } # very nice idea from Michael to avoid expression repetition (risk) in internal code, #4226 diff --git a/inst/tests/benchmark.Rraw b/inst/tests/benchmark.Rraw index f8accb765..2a183453b 100644 --- a/inst/tests/benchmark.Rraw +++ b/inst/tests/benchmark.Rraw @@ -360,7 +360,7 @@ fff = function(aref) { ff = lapply(1:5, function(i) { DT[,list(sumA=sum(get(aref))),by=b][,c:=letters[i]] }) - return(rbindlist(ff)) + rbindlist(ff) } for(i in 1:100) { f = fff("a") diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 0a5de42c8..09c91cb4f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -6276,7 +6276,7 @@ allIndicesValid <- function(DT){ return(FALSE) } } - return(TRUE) + TRUE } ## on data.table where indices are not integer(0) From eedd1ba96b514aca2402ef27c3f140f91f2a2ac4 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 9 May 2024 17:30:36 +0000 Subject: [PATCH 2/8] add scalar_in_linter --- .ci/.lintr.R | 1 - R/data.table.R | 4 ++-- inst/tests/tests.Rraw | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.ci/.lintr.R b/.ci/.lintr.R index e76eb918e..263c28630 100644 --- a/.ci/.lintr.R +++ b/.ci/.lintr.R @@ -64,7 +64,6 @@ linters = c(dt_linters, all_linters( object_overwrite_linter = NULL, paren_body_linter = NULL, redundant_equals_linter = NULL, - scalar_in_linter = NULL, undesirable_function_linter = NULL, unnecessary_concatenation_linter = NULL, unnecessary_nesting_linter = NULL, diff --git a/R/data.table.R b/R/data.table.R index 1b8d46a63..035679dfe 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2805,7 +2805,7 @@ setDF = function(x, rownames=NULL) { if (is.null(xn)) { setattr(x, "names", paste0("V",seq_len(length(x)))) } else { - idx = xn %chin% "" + idx = !nzchar(xn) # NB: keepNA=FALSE intentional if (any(idx)) { xn[idx] = paste0("V", seq_along(which(idx))) setattr(x, "names", xn) @@ -2878,7 +2878,7 @@ setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) { if (is.null(xn)) { setattr(x, "names", paste0("V", seq_along(x))) } else { - idx = xn %chin% "" # names can be NA - test 1006 caught that! + idx = !nzchar(xn) # NB: keepNA=FALSE intentionally, see test 1006 if (any(idx)) { xn[idx] = paste0("V", seq_along(which(idx))) setattr(x, "names", xn) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 09c91cb4f..0ef4d0fb6 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1,4 +1,4 @@ -# in order as they're attached in a normal R session, to match that if these actually have an effect, e.g. under R_DEFAULT_PACKAGES=NULL +￿# in order as they're attached in a normal R session, to match that if these actually have an effect, e.g. under R_DEFAULT_PACKAGES=NULL # NB: pos= is required for these symbols to resolve searching 'upward' from data.table -- if these packages are not already attached, # and we don't use pos=, they'll wind up 'below' data.table on the search() path --> their symbols won't resolve since, when running # from the installed package, this is evaluated from data.table's namespace. @@ -2153,7 +2153,7 @@ test(696, DT[a>2,sum(c),by=(b+1)%%2], data.table(b=c(0,1),V1=c(34L,42L))) # Test subset and %chin% crash with non-character input, #2131 test(697, 4 %chin% letters, error="type") test(698, 4L %chin% letters, error="type") -test(699, "a" %chin% 4, error="type") +test(699, "a" %chin% 4, error="type") # nolint: scalar_in_linter. Testing %chin% behavior. DT = data.table(aa=1:6,bb=7:12) test(700, subset(DT,select="aa"), DT[,list(aa)]) test(701, subset(DT,select=aa), DT[,list(aa)]) From b7c886dc93f3d5f7cdb97124d64762692fadd8c4 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 9 May 2024 17:39:11 +0000 Subject: [PATCH 3/8] nzchar_linter too --- .ci/.lintr.R | 3 +-- inst/tests/benchmark.Rraw | 2 +- inst/tests/other.Rraw | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.ci/.lintr.R b/.ci/.lintr.R index 263c28630..6659efbc8 100644 --- a/.ci/.lintr.R +++ b/.ci/.lintr.R @@ -54,13 +54,11 @@ linters = c(dt_linters, all_linters( todo_comment_linter = NULL, # TODO(michaelchirico): Enforce these and re-activate them one-by-one. brace_linter = NULL, - condition_call_linter = NULL, fixed_regex_linter = NULL, if_not_else_linter = NULL, implicit_assignment_linter = NULL, implicit_integer_linter = NULL, keyword_quote_linter = NULL, - nzchar_linter = NULL, object_overwrite_linter = NULL, paren_body_linter = NULL, redundant_equals_linter = NULL, @@ -98,6 +96,7 @@ exclusions = c(local({ undesirable_operator_linter = Inf, # For ':::', possibly we could be more careful to only exclude ':::'. # TODO(michaelchirico): Enforce these and re-activate them one-by-one. comparison_negation_linter = Inf, + condition_call_linter = Inf, duplicate_argument_linter = Inf, equals_na_linter = Inf, missing_argument_linter = Inf, diff --git a/inst/tests/benchmark.Rraw b/inst/tests/benchmark.Rraw index 2a183453b..957c297fa 100644 --- a/inst/tests/benchmark.Rraw +++ b/inst/tests/benchmark.Rraw @@ -490,7 +490,7 @@ out = capture.output(print(DT)) tt = out[grep("V",out)] tt = unlist(strsplit(gsub(" ","",tt), "V")) test(1982.1, tt[1L], "") -tt = as.integer(tt[tt!=""]) +tt = as.integer(tt[nzchar(tt)]) test(1982.2, tt, seq_along(tt)) # fread leak, #3292 diff --git a/inst/tests/other.Rraw b/inst/tests/other.Rraw index 72d9c8975..80066b304 100644 --- a/inst/tests/other.Rraw +++ b/inst/tests/other.Rraw @@ -408,7 +408,7 @@ if (loaded[["yaml"]]) { # csvy; #1701. Was 2032-2033 in tests.Rraw, #5516 fwrite(DT, f<-tempfile(), bom=TRUE, yaml=TRUE) fcon = file(f, encoding="UTF-8") # Windows readLines needs to be told; see also test 1658.50 in tests.Rraw lines = readLines(fcon) - lines = lines[lines!=""] # an extra "" after "eol: |2+" (line 16) on Linux but not Windows + lines = lines[nzchar(lines)] # an extra "" after "eol: |2+" (line 16) on Linux but not Windows # remove the blank here so we don't need to change this test if/when that changes in yaml package test(17.11, length(lines), 48L) close(fcon) @@ -417,7 +417,7 @@ if (loaded[["yaml"]]) { # csvy; #1701. Was 2032-2033 in tests.Rraw, #5516 fwrite(DT, f<-tempfile(), bom=TRUE, yaml=TRUE) fcon = file(f, encoding="UTF-8") lines = readLines(fcon) - lines = lines[lines!=""] + lines = lines[nzchar(lines)] test(17.13, length(lines), 48L) close(fcon) test(17.14, fread(f), DT) From efc3168e9c49cbcce880f863095243e7c27b68e2 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 9 May 2024 17:41:52 +0000 Subject: [PATCH 4/8] errant character --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 0ef4d0fb6..fb3f64a56 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1,4 +1,4 @@ -￿# in order as they're attached in a normal R session, to match that if these actually have an effect, e.g. under R_DEFAULT_PACKAGES=NULL +# in order as they're attached in a normal R session, to match that if these actually have an effect, e.g. under R_DEFAULT_PACKAGES=NULL # NB: pos= is required for these symbols to resolve searching 'upward' from data.table -- if these packages are not already attached, # and we don't use pos=, they'll wind up 'below' data.table on the search() path --> their symbols won't resolve since, when running # from the installed package, this is evaluated from data.table's namespace. From 1da48c4fd7dd29bf1a25b218aaa9dd5dece7de0c Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 9 May 2024 18:09:47 +0000 Subject: [PATCH 5/8] errant character was masking more lints; fixed --- R/as.data.table.R | 2 +- R/data.table.R | 10 +++++----- R/fmelt.R | 6 +++--- R/fread.R | 8 ++++---- R/fwrite.R | 2 +- R/merge.R | 2 +- R/print.data.table.R | 4 ++-- R/shift.R | 2 +- R/utils.R | 4 ++-- inst/tests/tests.Rraw | 6 +++--- vignettes/datatable-reshape.Rmd | 2 +- 11 files changed, 24 insertions(+), 24 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index 94b1998b8..1addc58aa 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -180,7 +180,7 @@ as.data.table.list = function(x, if (eachnrow[i]>1L && nrow%%eachnrow[i]!=0L) # in future: eachnrow[i]!=nrow warningf("Item %d has %d rows but longest item has %d; recycled with remainder.", i, eachnrow[i], nrow) if (is.data.table(xi)) { # matrix and data.frame were coerced to data.table above - prefix = if (!isFALSE(.named[i]) && isTRUE(nchar(names(x)[i])>0L)) paste0(names(x)[i],".") else "" # test 2058.12 + prefix = if (!isFALSE(.named[i]) && isTRUE(nzchar(names(x)[i], keepNA=TRUE))) paste0(names(x)[i],".") else "" # test 2058.12 for (j in seq_along(xi)) { ans[[k]] = recycle(xi[[j]], nrow) vnames[k] = paste0(prefix, names(xi)[j]) diff --git a/R/data.table.R b/R/data.table.R index 035679dfe..5773bca73 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -618,7 +618,7 @@ replace_dot_alias = function(e) { else stopf("i evaluates to a logical vector length %d but there are %d rows. Recycling of logical i is no longer allowed as it hides more bugs than is worth the rare convenience. Explicitly use rep(...,length=.N) if you really need to recycle.", length(i), nrow(x)) } else { irows = as.integer(i) # e.g. DT[c(1,3)] and DT[c(-1,-3)] ok but not DT[c(1,-3)] (caught as error) - if (nomatch0) warning("Please use nomatch=NULL instead of nomatch=0; see news item 5 in v1.12.0 (Jan 2019)") + if (nomatch0) warningf("Please use nomatch=NULL instead of nomatch=0; see news item 5 in v1.12.0 (Jan 2019)") # warning only for this case where nomatch was ignored before v1.14.2; #3109 irows = .Call(CconvertNegAndZeroIdx, irows, nrow(x), is.null(jsub) || root!=":=", # allowOverMax (NA when selecting, error when assigning) @@ -803,7 +803,7 @@ replace_dot_alias = function(e) { nzidx = nzchar(bysub) # by='' means by=NULL, tests 592&596 if (!all(nzidx)) { - if (length(bysub) > 1L) stop("At least one entry of by is empty") + if (length(bysub) > 1L) stopf("At least one entry of by is empty") bysub = NULL } else { bysub = as.call(c(list(quote(list)), lapply(bysub, as.name))) @@ -948,7 +948,7 @@ replace_dot_alias = function(e) { nm = names(q[-1L]) # check list(a=sum(v),v) if (is.null(nm)) nm = rep.int("", qlen-1L) # attempt to auto-name unnamed columns - for (jj in which(nm=="")) { + for (jj in which(!nzchar(nm))) { thisq = q[[jj + 1L]] if (missing(thisq)) stopf("Item %d of the .() or list() passed to j is missing", jj) #3507 if (is.name(thisq)) nm[jj] = drop_dot(thisq) @@ -1949,7 +1949,7 @@ replace_dot_alias = function(e) { if (is.null(jvnames)) jvnames = character(length(ans)-length(bynames)) if (length(bynames)+length(jvnames)!=length(ans)) stopf("Internal error: jvnames is length %d but ans is %d and bynames is %d", length(jvnames), length(ans), length(bynames)) # nocov - ww = which(jvnames=="") + ww = which(!nzchar(jvnames)) if (any(ww)) jvnames[ww] = paste0("V",ww) setattr(ans, "names", c(bynames, jvnames)) } else { @@ -3293,7 +3293,7 @@ is_constantish = function(q, check_singleton=FALSE) { ## search for column names thisCols = c(thisCols, trimws(strsplit(pieces[[i]][j], pat)[[1L]])) ## there can be empty string column names because of trimws, remove them - thisCols = thisCols[thisCols != ""] + thisCols = thisCols[nzchar(thisCols)] j = j+1L } } diff --git a/R/fmelt.R b/R/fmelt.R index fc01f327e..c7d2661bd 100644 --- a/R/fmelt.R +++ b/R/fmelt.R @@ -39,7 +39,7 @@ measure = function(..., sep="_", pattern, cols, multiple.keyword="value.name") { formal.names = names(formals()) formal.i.vec = which(names(L) %in% formal.names) fun.list = L[-formal.i.vec] - user.named = names(fun.list) != "" + user.named = nzchar(names(fun.list)) is.symb = sapply(fun.list, is.symbol) bad.i = which((!user.named) & (!is.symb)) if (length(bad.i)) { @@ -73,7 +73,7 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na if (!missing(sep) && !missing(pattern)) { stopf("both sep and pattern arguments used; must use either sep or pattern (not both)") } - if (!(is.character(multiple.keyword) && length(multiple.keyword)==1 && !is.na(multiple.keyword) && nchar(multiple.keyword)>0)) { + if (!(is.character(multiple.keyword) && length(multiple.keyword)==1 && !is.na(multiple.keyword) && nzchar(multiple.keyword))) { stopf("multiple.keyword must be a character string with nchar>0") } if (!is.character(cols)) { @@ -82,7 +82,7 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na prob.i <- if (is.null(names(fun.list))) { seq_along(fun.list) } else { - which(names(fun.list) == "") + which(!nzchar(names(fun.list))) } if (length(prob.i)) { stopf("in measurev, %s must be named, problems: %s", group.desc, brackify(prob.i)) diff --git a/R/fread.R b/R/fread.R index 94dbd351d..fc22e9c54 100644 --- a/R/fread.R +++ b/R/fread.R @@ -153,10 +153,10 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") if (length(tt)) { msg = gettextf('na.strings[%d]=="%s" consists only of whitespace, ignoring', tt[1L], na.strings[tt[1L]]) if (strip.white) { - if (any(na.strings=="")) { - warningf('%s. strip.white==TRUE (default) and "" is present in na.strings, so any number of spaces in string columns will already be read as .', msg) - } else { + if (all(nzchar(na.strings))) { warningf('%s. Since strip.white=TRUE (default), use na.strings="" to specify that any number of spaces in a string column should be read as .', msg) + } else { + warningf('%s. strip.white==TRUE (default) and "" is present in na.strings, so any number of spaces in string columns will already be read as .', msg) } na.strings = na.strings[-tt] } else { @@ -299,7 +299,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") } colClassesAs = attr(ans, "colClassesAs", exact=TRUE) # should only be present if one or more are != "" - for (j in which(colClassesAs!="")) { # # 1634 + for (j in which(nzchar(colClassesAs))) { # # 1634 v = .subset2(ans, j) new_class = colClassesAs[j] new_v = tryCatch({ # different to read.csv; i.e. won't error if a column won't coerce (fallback with warning instead) diff --git a/R/fwrite.R b/R/fwrite.R index 37968d4ea..ad92859f3 100644 --- a/R/fwrite.R +++ b/R/fwrite.R @@ -48,7 +48,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto", } stopifnot(is.list(x), identical(quote,"auto") || isTRUEorFALSE(quote), - is.character(sep) && length(sep)==1L && (nchar(sep) == 1L || sep == ""), + is.character(sep) && length(sep)==1L && (nchar(sep) == 1L || identical(sep, "")), is.character(sep2) && length(sep2)==3L && nchar(sep2[2L])==1L, is.character(dec) && length(dec)==1L && nchar(dec) == 1L, dec != sep, # sep2!=dec and sep2!=sep checked at C level when we know if list columns are present diff --git a/R/merge.R b/R/merge.R index 8062d91fc..eabcc56e2 100644 --- a/R/merge.R +++ b/R/merge.R @@ -59,7 +59,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL if (length(list(...))) { ell = as.list(substitute(list(...)))[-1L] for (n in setdiff(names(ell), "")) warningf("Unknown argument '%s' has been passed.", n) - unnamed_n = length(ell) - sum(names(ell) != "") + unnamed_n = length(ell) - sum(nzchar(names(ell), keepNA=TRUE)) if (unnamed_n) warningf("Passed %d unknown and unnamed arguments.", unnamed_n) } diff --git a/R/print.data.table.R b/R/print.data.table.R index f16a39625..e7ff54ecc 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -79,7 +79,7 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"), # FR #353 - add row.names = logical argument to print.data.table if (isTRUE(row.names)) rownames(toprint)=paste0(format(rn,right=TRUE,scientific=FALSE),":") else rownames(toprint)=rep.int("", nrow(toprint)) - if (is.null(names(x)) || all(names(x) == "")) + if (is.null(names(x)) || !any(nzchar(names(x), keepNA=TRUE))) # fixes bug #97 and #545 colnames(toprint)=rep("", ncol(toprint)) if (isTRUE(class) && col.names != "none") { @@ -148,7 +148,7 @@ mimicsAutoPrint = c("knit_print.default") # add maybe repr_text.default. See https://github.com/Rdatatable/data.table/issues/933#issuecomment-220237965 shouldPrint = function(x) { - ret = (.global$print=="" || # to save address() calls and adding lots of address strings to R's global cache + ret = (identical(.global$print, "") || # to save address() calls and adding lots of address strings to R's global cache address(x)!=.global$print) .global$print = "" ret diff --git a/R/shift.R b/R/shift.R index 064eea20c..1c68d13c4 100644 --- a/R/shift.R +++ b/R/shift.R @@ -1,6 +1,6 @@ shift = function(x, n=1L, fill, type=c("lag", "lead", "shift", "cyclic"), give.names=FALSE) { type = match.arg(type) - if (type == "cyclic" && !missing(fill)) warning("Provided argument fill=", fill, " will be ignored since type='shift'.") + if (type == "cyclic" && !missing(fill)) warningf("Provided argument fill=%s will be ignored since type='cyclic'.", fill) if (missing(fill)) fill = NA stopifnot(is.numeric(n)) ans = .Call(Cshift, x, as.integer(n), fill, type) diff --git a/R/utils.R b/R/utils.R index 06f239fed..7d4128a1d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -25,7 +25,7 @@ if (!exists('startsWith', 'package:base', inherits=FALSE)) { # R 3.3.0; Apr 201 startsWith = function(x, stub) substr(x, 1L, nchar(stub))==stub } # endsWith no longer used from #5097 so no need to backport; prevent usage to avoid dev delay until GLCI's R 3.1.0 test -endsWith = function(...) stop("Internal error: use endsWithAny instead of base::endsWith") +endsWith = function(...) stop("Internal error: use endsWithAny instead of base::endsWith", call.=FALSE) startsWithAny = function(x,y) .Call(CstartsWithAny, x, y, TRUE) endsWithAny = function(x,y) .Call(CstartsWithAny, x, y, FALSE) @@ -92,7 +92,7 @@ name_dots = function(...) { } else { vnames[is.na(vnames)] = "" } - notnamed = vnames=="" + notnamed = !nzchar(vnames) if (any(notnamed)) { syms = vapply_1b(dot_sub, is.symbol) # save the deparse() in most cases of plain symbol for (i in which(notnamed)) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index fb3f64a56..6a0728cce 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -10409,7 +10409,7 @@ test(1693.11, yearqtr(t), 2016.5) # fix for #1740 - sub-assigning NAs for factors dt = data.table(x = 1:5, y = factor(c("","a","b","a", "")), z = 5:9) ans = data.table(x = 1:5, y = factor(c(NA,"a","b","a", NA)), z = 5:9) -test(1694.0, dt[y=="", y := NA], ans) +test(1694.0, dt[!nzchar(y, keepNA=TRUE), y := NA], ans) # more tests for between() x = c(NaN, NA, 1, 5, -Inf, Inf) @@ -17309,11 +17309,11 @@ schools.wide <- data.table( read_2 = c(1.2,2.2), math_1 = c(10.1,20.1), math_1_sp = c(TRUE,TRUE), math_2 = c(NA,20.2), math_2_sp = c(NA,FALSE)) -schools.tall <- melt(schools.wide, na.rm=TRUE, measure.vars=measure(subject, number=as.integer, value.name=function(x)ifelse(x=="", "score", "sp"), pattern="([^_]+)_([12])(.*)")) +schools.tall <- melt(schools.wide, na.rm=TRUE, measure.vars=measure(subject, number=as.integer, value.name=function(x)ifelse(nzchar(x, keepNA=TRUE), "sp", "score"), pattern="([^_]+)_([12])(.*)")) schools.expected = data.table(school=c("A","B","A","B","B"), subject=c("read","read","math","math","math"), number=as.integer(c(1,1,1,1,2)), score=c(1.1,2.1,10.1,20.1,20.2), sp=c(TRUE,TRUE,TRUE,TRUE,FALSE)) test(2183.21, schools.tall, schools.expected) who <- data.table(id=1, new_sp_m5564=2, newrel_f65=3) -test(2183.22, melt(who, measure.vars=measure(diagnosis, gender, ages, ymin=as.numeric, ymax=function(y)ifelse(y=="", Inf, as.numeric(y)), pattern="new_?(?.*)_(?.)(?(?0|[0-9]{2})(?[0-9]{0,2}))")), data.table(id=1, diagnosis=c("sp","rel"), gender=c("m","f"), ages=c("5564","65"), ymin=c(55,65), ymax=c(64,Inf), value=c(2,3))) +test(2183.22, melt(who, measure.vars=measure(diagnosis, gender, ages, ymin=as.numeric, ymax=function(y)ifelse(nzchar(y, keepNA=TRUE), as.numeric(y), Inf), pattern="new_?(?.*)_(?.)(?(?0|[0-9]{2})(?[0-9]{0,2}))")), data.table(id=1, diagnosis=c("sp","rel"), gender=c("m","f"), ages=c("5564","65"), ymin=c(55,65), ymax=c(64,Inf), value=c(2,3))) wide.again = dcast(schools.tall, school ~ subject + number, value.var = c("score","sp")) # measure with sep= test(2183.23, melt(wide.again, na.rm=TRUE, measure.vars=measure(value.name, subject, number=as.integer))[order(score)], schools.expected)#should work without sep due to same default _ as dcast. diff --git a/vignettes/datatable-reshape.Rmd b/vignettes/datatable-reshape.Rmd index d05cdb10c..41b36c1a0 100644 --- a/vignettes/datatable-reshape.Rmd +++ b/vignettes/datatable-reshape.Rmd @@ -284,7 +284,7 @@ function, melt(who, measure.vars = measure( diagnosis, gender, ages, ymin=as.numeric, - ymax=function(y)ifelse(y=="", Inf, as.numeric(y)), + ymax=function(y) ifelse(nzchar(y), as.numeric(y), Inf), pattern="new_?(.*)_(.)(([0-9]{2})([0-9]{0,2}))" )) ``` From 8df7f6bd320e9f369790dfd3b8e67f978f1c055b Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 9 May 2024 18:21:00 +0000 Subject: [PATCH 6/8] correct error message in test --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6a0728cce..829507705 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -6812,7 +6812,7 @@ test(1463.67, shift(x, -5, type="cyclic"), x) test(1463.68, shift(x, 6, type="cyclic"), shift(x, 1, type="cyclic")) test(1463.69, shift(x, -6, type="cyclic"), shift(x, -1, type="cyclic")) # test warning -test(1463.70, shift(x, 1, fill=1, type="cyclic"), c(5L, 1L:4L), warning="Provided argument fill=1 will be ignored since type='shift'.") +test(1463.70, shift(x, 1, fill=1, type="cyclic"), c(5L, 1L:4L), warning="Provided argument fill=1 will be ignored since type='cyclic'.") # test raw #5547 x=as.raw(1:5) From 29ee88bd4ae8ed29cad940a6dcc6d8ca0a0ad0b3 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 9 May 2024 18:22:21 +0000 Subject: [PATCH 7/8] fix test, use nolint --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 829507705..021b3f19e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -10409,7 +10409,7 @@ test(1693.11, yearqtr(t), 2016.5) # fix for #1740 - sub-assigning NAs for factors dt = data.table(x = 1:5, y = factor(c("","a","b","a", "")), z = 5:9) ans = data.table(x = 1:5, y = factor(c(NA,"a","b","a", NA)), z = 5:9) -test(1694.0, dt[!nzchar(y, keepNA=TRUE), y := NA], ans) +test(1694.0, dt[y=="", y := NA], ans) # nolint: nzchar_linter. Using factor auto-coercion convenience. # more tests for between() x = c(NaN, NA, 1, 5, -Inf, Inf) From bb63c8b13368928ad9e45ebcb93a031f73847288 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 9 May 2024 11:30:42 -0700 Subject: [PATCH 8/8] remove overly-cautious keepNA=TRUE --- R/merge.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/merge.R b/R/merge.R index eabcc56e2..aabaaf740 100644 --- a/R/merge.R +++ b/R/merge.R @@ -59,7 +59,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL if (length(list(...))) { ell = as.list(substitute(list(...)))[-1L] for (n in setdiff(names(ell), "")) warningf("Unknown argument '%s' has been passed.", n) - unnamed_n = length(ell) - sum(nzchar(names(ell), keepNA=TRUE)) + unnamed_n = length(ell) - sum(nzchar(names(ell))) if (unnamed_n) warningf("Passed %d unknown and unnamed arguments.", unnamed_n) }