Skip to content

Commit

Permalink
shift now accept type="shift" for more intuitive give.names=T output,… (
Browse files Browse the repository at this point in the history
  • Loading branch information
jangorecki authored and mattdowle committed Dec 20, 2018
1 parent fbd5b60 commit 6e60d7b
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 10 deletions.
6 changes: 5 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@

4. `NA` in `between()` and `%between%`'s `lower` and `upper` are now taken as missing bounds and return `TRUE` rather than than `NA`. This is now documented.

5. `shift()` now interprets negative values of `n` naturally to mean the opposite `type=`, [#1708](https://github.com/Rdatatable/data.table/issues/1708).
5. `shift()` now interprets negative values of `n` to mean the opposite `type=`, [#1708](https://github.com/Rdatatable/data.table/issues/1708). When `give.names=TRUE` the result is named using a positive `n` with the appropriate `type=`. Alternatively, a new `type="shift"` names the result using a signed `n` and constant type.
```R
shift(x, n=-5:5, give.names=TRUE) => "_lead_5" ... "_lag_5"
shift(x, n=-5:5, type="shift", give.names=TRUE) => "_shift_-5" ... "_shift_5"
```

5. `fwrite()` now accepts `matrix`, [#2613](https://github.com/Rdatatable/data.table/issues/2613). Thanks to Michael Chirico for the suggestion and Felipe Parages for implementing. For now matrix input is converted to data.table (which can be costly) before writing.

Expand Down
16 changes: 13 additions & 3 deletions R/shift.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
shift <- function(x, n=1L, fill=NA, type=c("lag", "lead"), give.names=FALSE) {
shift <- function(x, n=1L, fill=NA, type=c("lag", "lead", "shift"), give.names=FALSE) {
type = match.arg(type)
ans = .Call(Cshift, x, as.integer(n), fill, type)
if (give.names) {
if (give.names && is.list(ans)) {
if (is.null(names(x))) {
xsub = substitute(x)
if (is.atomic(x) && is.name(xsub)) nx = deparse(xsub, 500L)
else nx = paste0("V", if (is.atomic(x)) 1L else seq_along(x))
}
else nx = names(x)
setattr(ans, 'names', do.call("paste", c(CJ(nx, type, n, sorted=FALSE), sep="_")))
if (type!="shift") {
# flip type for negative n, #3223
neg = (n<0L)
if (type=="lead") neg[ n==0L ] = TRUE # lead_0 should be named lag_0 for consistency
if (any(neg)) {
type = rep(type,length(n))
type[neg] = if (type[1L]=="lead") "lag" else "lead"
n[neg] = -n[neg]
}
}
setattr(ans, "names", paste(rep(nx,each=length(n)), type, n, sep="_"))
}
ans
}
16 changes: 13 additions & 3 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -12883,9 +12883,8 @@ test(1963.2, shift(DT$x, -1, type = 'lead'),
c(NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L))
test(1963.3, shift(DT$x, -1, fill = 0L),
c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 0L))
test(1963.4, shift(DT$x, -1, give.names = TRUE),
structure(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA),
.Names = c("V1_lag_-1", NA, NA, NA, NA, NA, NA, NA, NA, NA)))
test(1963.4, shift(DT$x, -1, give.names = TRUE), # give.names is ignored because we do not return list
c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA))
test(1963.5, shift(DT$x, -1:1),
list(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA), 1:10,
c(NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L)))
Expand All @@ -12900,6 +12899,17 @@ test(1963.7, shift(DT, -1:1),
## some coverage tests for good measure
test(1963.8, shift(DT$x, type = 'some_other_type'), error='should be one of.*lag.*lead')
test(1963.9, shift(c(1+3i, 2-1i)), error = 'Unsupported type')
test(1963.11, shift(DT, -1:1, type="shift", give.names = TRUE), # new type="shift" #3223
ans <- list(`x_shift_-1` = c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA),
x_shift_0 = 1:10,
x_shift_1 = c(NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L),
`y_shift_-1` = c(9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, NA),
y_shift_0 = 10:1,
y_shift_1 = c(NA, 10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L)))
names(ans) <- c("x_lead_1", "x_lag_0", "x_lag_1", "y_lead_1", "y_lag_0", "y_lag_1")
test(1963.12, shift(DT, -1:1, type="lag", give.names = TRUE), ans)
test(1963.13, shift(DT, 1:-1, type="lead", give.names = TRUE), ans)


# 0 column data.table should not have rownames, #3149
M0 = matrix(1:6, nrow=3, ncol=2, dimnames=list(rows=paste0("id",1:3), cols=c("v1","v2")))
Expand Down
5 changes: 3 additions & 2 deletions man/shift.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
}

\usage{
shift(x, n=1L, fill=NA, type=c("lag", "lead"), give.names=FALSE)
shift(x, n=1L, fill=NA, type=c("lag", "lead", "shift"), give.names=FALSE)
}
\arguments{
\item{x}{ A vector, list, data.frame or data.table. }
\item{n}{ integer vector denoting the offset by which to lead or lag the input. To create multiple lead/lag vectors, provide multiple values to \code{n}; negative values of \code{n} will "flip" the value of \code{type}, i.e., \code{n=-1} and \code{type='lead'} is the same as \code{n=1} and \code{type='lag'}. }
\item{fill}{ Value to use for padding when the window goes beyond the input length. }
\item{type}{ default is \code{"lag"} (look "backwards"). The other possible value is \code{"lead"} (look "forwards"). }
\item{type}{ default is \code{"lag"} (look "backwards"). The other possible values \code{"lead"} (look "forwards") and \code{"shift"} (behave same as \code{"lag"} except given names). }
\item{give.names}{default is \code{FALSE} which returns an unnamed list. When \code{TRUE}, names are automatically generated corresponding to \code{type} and \code{n}. }
}
\details{
Expand All @@ -39,6 +39,7 @@ shift(x, n=1, fill=NA, type="lag")
shift(x, n=1:2, fill=0, type="lag")
# getting a window by using positive and negative n:
shift(x, n = -1:1)
shift(x, n = -1:1, type = "shift", give.names = TRUE)
# on data.tables
DT = data.table(year=2010:2014, v1=runif(5), v2=1:5, v3=letters[1:5])
Expand Down
3 changes: 2 additions & 1 deletion src/shift.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ SEXP shift(SEXP obj, SEXP k, SEXP fill, SEXP type) {
R_len_t i=0, j, m, nx, nk, xrows, thisk, protecti=0;
SEXP x, tmp=R_NilValue, elem, ans, thisfill, klass;
unsigned long long *dthisfill;
enum {LAG, LEAD} stype = LAG;
enum {LAG, LEAD/*, SHIFT, CYCLIC*/} stype = LAG; // currently SHIFT maps to LAG and CYCLIC is unimplemented (see comments in #1708)
if (!length(obj)) return(obj); // NULL, list()
if (isVectorAtomic(obj)) {
x = PROTECT(allocVector(VECSXP, 1)); protecti++;
Expand All @@ -26,6 +26,7 @@ SEXP shift(SEXP obj, SEXP k, SEXP fill, SEXP type) {

if (!strcmp(CHAR(STRING_ELT(type, 0)), "lag")) stype = LAG;
else if (!strcmp(CHAR(STRING_ELT(type, 0)), "lead")) stype = LEAD;
else if (!strcmp(CHAR(STRING_ELT(type, 0)), "shift")) stype = LAG; // when we get rid of nested if branches we can use SHIFT, for now it maps to LAG
else error("Internal error: invalid type for shift(), should have been caught before. please report to data.table issue tracker"); // # nocov

nx = length(x); nk = length(k);
Expand Down

0 comments on commit 6e60d7b

Please sign in to comment.