diff --git a/NAMESPACE b/NAMESPACE index b9872ee7e..bf2d9fe10 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -202,3 +202,4 @@ S3method(format_list_item, default) export(fdroplevels) S3method(droplevels, data.table) +export(frev) diff --git a/NEWS.md b/NEWS.md index 9163d9eb3..742beb188 100644 --- a/NEWS.md +++ b/NEWS.md @@ -58,6 +58,8 @@ 7. `melt` returns an integer column for `variable` when `measure.vars` is a list of length=1, consistent with the documented behavior, [#5209](https://github.com/Rdatatable/data.table/issues/5209). Thanks to @tdhock for reporting and fixing. Any users who were relying on this behavior can change `measure.vars=list("col_name")` (output `variable` was column name, now is column index/integer) to `measure.vars="col_name"` (`variable` still is column name). +8. New `frev(x, copy=TRUE)` as a faster analogue to `base::rev()` for atomic vectors/lists, [#5885](https://github.com/Rdatatable/data.table/issues/5885). Twice as fast as `base::rev()` on large inputs, and faster with more threads. Thanks to Benjamin Schwendinger for suggesting and implementing. + ## NOTES 1. `transform` method for data.table sped up substantially when creating new columns on large tables. Thanks to @OfekShilon for the report and PR. The implemented solution was proposed by @ColeMiller1. diff --git a/R/wrappers.R b/R/wrappers.R index a018b91ae..4381978ff 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -16,3 +16,6 @@ isRealReallyInt = function(x) .Call(CisRealReallyIntR, x) isReallyReal = function(x) .Call(CisReallyReal, x) coerceAs = function(x, as, copy=TRUE) .Call(CcoerceAs, x, as, copy) + +frev = function(x) .Call(Cfrev, x, TRUE) +setrev = function(x) invisible(.Call(Cfrev, x, FALSE)) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 90ba1f73d..83ef2e43e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -65,6 +65,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { setcoalesce = data.table:::setcoalesce setdiff_ = data.table:::setdiff_ setreordervec = data.table:::setreordervec + setrev = data.table:::setrev shallow = data.table:::shallow # until exported .shallow = data.table:::.shallow split.data.table = data.table:::split.data.table @@ -18566,3 +18567,58 @@ test(2261.04, setNumericRounding(2L), 1L) # or not an object is an invisible copy or not, and prints it anyways. test(2261.05, capture.output(setNumericRounding(2L)), character(0)) setNumericRounding(old) + +# 5885 implement frev +d = c(NA, NaN, Inf, -Inf) +test(2262.00, frev(c(FALSE, NA)), c(NA, FALSE)) +test(2262.01, frev(c(0L, NA)), c(NA, 0L)) +test(2262.02, frev(d), c(-Inf, Inf, NaN, NA)) +test(2262.03, frev(c(NA, 1, 0+2i)), c(0+2i, 1, NA)) +test(2262.04, frev(as.raw(0:1)), as.raw(1:0)) +test(2262.05, frev(NULL), NULL) +test(2262.06, frev(character(5)), character(5)) +test(2262.07, frev(integer(0)), integer(0)) +test(2262.08, frev(list(1, "a")), list("a", 1)) +test(2262.09, setrev(c(0L, NA)), c(NA, 0L)) +test(2262.10, setrev(d), c(-Inf, Inf, NaN, NA)) +test(2262.11, setrev(c(NA, 1, 0+2i)), c(0+2i, 1, NA)) +test(2262.12, setrev(as.raw(0:1)), as.raw(1:0)) +test(2262.13, setrev(NULL), NULL) +test(2262.14, setrev(character(5)), character(5)) +test(2262.15, setrev(integer(0)), integer(0)) +test(2262.16, setrev(list(1, "a")), list("a", 1)) +test(2262.17, frev(1:1e2), rev(1:1e2)) +# copy arguments +x = 1:3 +test(2262.21, {frev(x); x}, 1:3) +test(2262.22, {setrev(x); x}, 3:1) +test(2262.23, address(x) == address(setrev(x))) +test(2262.24, address(x) != address(frev(x))) +# do not alter on subsets +test(2262.25, {setrev(x[1:2]); x}, 1:3) +# levels +f = as.factor(letters) +test(2262.31, frev(f), rev(f)) +test(2262.32, frev(as.IDate(1:10)), as.IDate(10:1)) +test(2262.33, frev(as.IDate(1:10)), as.IDate(10:1)) +# names +x = c(a=1L, b=2L, c=3L) +test(2262.41, frev(x), rev(x)) +test(2262.42, setrev(x), x) +# attributes +x = structure(1:10, class = c("IDate", "Date"), att = 1L) +test(2262.51, attr(frev(x), "att"), attr(rev(x), "att")) +test(2262.52, class(frev(x)), class(rev(x))) +test(2262.53, attr(setrev(x), "att"), 1L) +test(2262.54, class(setrev(x)), c("IDate", "Date")) +x = structure(integer(0), att = 1L) +test(2262.55, attr(frev(x), "att"), attr(rev(x), "att")) +# errors +test(2262.61, frev(data.table()), error="should not be data.frame or data.table") +test(2262.62, frev(expression(1)), error="is not supported by frev") +test(2262.63, frev(matrix(1)), error="should not be matrix or array") +if (test_bit64) { + x = as.integer64(c(1, NA, 3)) + test(2262.71, frev(x), rev(x)) + test(2262.72, setrev(x), x) +} diff --git a/man/frev.Rd b/man/frev.Rd new file mode 100644 index 000000000..3a9ea1e0f --- /dev/null +++ b/man/frev.Rd @@ -0,0 +1,32 @@ +\name{frev} +\alias{frev} +\alias{rev} +\title{Fast reverse} +\description{ + Similar to \code{\link[base]{rev}} but \emph{much faster}. +} + +\usage{ +frev(x) +} +\arguments{ + \item{x}{ An atomic \code{vector} or \code{list}. } +} + +\details{ + \code{frev} does not retain attributes (similar to \code{\link[base]{rev}}). +} + +\value{ + \code{frev} returns the input reversed. +} + +\examples{ +# on vectors +x = setNames(1:26, letters) +frev(x[1:10]) + +# list +frev(list(1, "a")) +} +\keyword{ data } diff --git a/src/data.table.h b/src/data.table.h index 811e6a5a1..b62ace3ba 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -250,6 +250,7 @@ SEXP islockedR(SEXP x); bool need2utf8(SEXP x); SEXP coerceUtf8IfNeeded(SEXP x); SEXP coerceAs(SEXP x, SEXP as, SEXP copyArg); +SEXP frev(SEXP x, SEXP copyArg); // types.c char *end(char *start); diff --git a/src/init.c b/src/init.c index 54039584f..210c1395a 100644 --- a/src/init.c +++ b/src/init.c @@ -143,6 +143,7 @@ R_CallMethodDef callMethods[] = { {"CconvertDate", (DL_FUNC)&convertDate, -1}, {"Cnotchin", (DL_FUNC)¬chin, -1}, {"Cwarn_matrix_column_r", (DL_FUNC)&warn_matrix_column_r, -1}, +{"Cfrev", (DL_FUNC) &frev, -1}, {NULL, NULL, 0} }; diff --git a/src/utils.c b/src/utils.c index cc8dc5bc4..38e150891 100644 --- a/src/utils.c +++ b/src/utils.c @@ -435,3 +435,109 @@ SEXP startsWithAny(const SEXP x, const SEXP y, SEXP start) { return ScalarLogical(false); } +SEXP frev(SEXP x, SEXP copyArg) { + SEXP names, klass, levels; + if (INHERITS(x, char_dataframe)) + error(_("'x' should not be data.frame or data.table.")); + if (!isNull(getAttrib(x, R_DimSymbol))) + error(_("'x' should not be matrix or array")); + if (!IS_TRUE_OR_FALSE(copyArg)) + error(_("%s must be TRUE or FALSE."), "copy"); // # nocov + bool copy = LOGICAL(copyArg)[0]; + R_xlen_t n = xlength(x); + int nprotect = 0; + if (copy) { + x = PROTECT(duplicate(x)); + nprotect++; + } + if (n==0) { + UNPROTECT(nprotect); + return x; + } + switch (TYPEOF(x)) { + case LGLSXP: case INTSXP: { + int *restrict xd = INTEGER(x); + #pragma omp parallel for num_threads(getDTthreads(n, true)) + for (uint64_t i=0; i