Skip to content

Commit

Permalink
Merge pull request #1325 from MichaelChirico/setDF
Browse files Browse the repository at this point in the history
Closes #1320; setDF gains the rownames argument
  • Loading branch information
arunsrinivasan committed Sep 11, 2015
2 parents 226f337 + 4627706 commit ee3f8bb
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 30 deletions.
74 changes: 47 additions & 27 deletions R/data.table.R
Expand Up @@ -2428,35 +2428,55 @@ address <- function(x) .Call(Caddress, eval(substitute(x), parent.frame()))

":=" <- function(...) stop('Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(":=").')

setDF <- function(x) {
if (!is.list(x)) stop("setDF only accepts data.table, data.frame or list of equal length as input")
if (is.data.table(x)) {
# copied from as.data.frame.data.table
setattr(x, "row.names", .set_row_names(nrow(x)))
setattr(x, "class", "data.frame")
setattr(x, "sorted", NULL)
setattr(x, ".internal.selfref", NULL)
} else if (is.data.frame(x)) {
x
setDF <- function(x, rownames=NULL) {
if (!is.list(x)) stop("setDF only accepts data.table, data.frame or list of equal length as input")
if (any(duplicated(rownames))) stop("rownames contains duplicates")
if (is.data.table(x)) {
# copied from as.data.frame.data.table
if (is.null(rownames)) {
rn <- .set_row_names(nrow(x))
} else {
if (length(rownames) != nrow(x))
stop("rownames incorrect length; expected ", nrow(x), " names, got ", length(rownames))
rn <- rownames
}
setattr(x, "row.names", rn)
setattr(x, "class", "data.frame")
setattr(x, "sorted", NULL)
setattr(x, ".internal.selfref", NULL)
} else if (is.data.frame(x)) {
if (!is.null(rownames)){
if (length(rownames) != nrow(x))
stop("rownames incorrect length; expected ", nrow(x), " names, got ", length(rownames))
setattr(x, "row.names", rownames)
}
x
} else {
n = vapply(x, length, 0L)
mn = max(n)
if (any(n<mn))
stop("All elements in argument 'x' to 'setDF' must be of same length")
xn = names(x)
if (is.null(xn)) {
setattr(x, "names", paste("V",seq_len(length(x)),sep=""))
} else {
n = vapply(x, length, 0L)
mn = max(n)
if (any(n<mn))
stop("All elements in argument 'x' to 'setDF' must be of same length")
xn = names(x)
if (is.null(xn)) {
setattr(x, "names", paste("V",seq_len(length(x)),sep=""))
} else {
idx = xn %chin% ""
if (any(idx)) {
xn[idx] = paste("V", seq_along(which(idx)), sep="")
setattr(x, "names", xn)
}
}
setattr(x,"row.names",.set_row_names(max(n)))
setattr(x,"class","data.frame")
idx = xn %chin% ""
if (any(idx)) {
xn[idx] = paste("V", seq_along(which(idx)), sep="")
setattr(x, "names", xn)
}
}
invisible(x)
if (is.null(rownames)) {
rn <- .set_row_names(mn)
} else {
if (length(rownames) != mn)
stop("rownames incorrect length; expected ", mn, " names, got ", length(rownames))
rn <- rownames
}
setattr(x,"row.names", rn)
setattr(x,"class","data.frame")
}
invisible(x)
}

setDT <- function(x, keep.rownames=FALSE, key=NULL) {
Expand Down
2 changes: 2 additions & 0 deletions README.md
Expand Up @@ -78,6 +78,8 @@

28. `fread()` gains `quote` argument with default value `"\""`. Setting `quote=""` disables (could be useful in reading columns with uneven quotes). Closes [#568](https://github.com/Rdatatable/data.table/issues/568). Also addresses/closes [#1256](https://github.com/Rdatatable/data.table/issues/1256), [#1077](https://github.com/Rdatatable/data.table/issues/1077), [#1079](https://github.com/Rdatatable/data.table/issues/1079) and [#1095](https://github.com/Rdatatable/data.table/issues/1095). Thanks to @Synergist, @daroczig, @geotheory and @rsaporta for the reports.

29. `setDF()` gains `rownames` argument for ready conversion to a `data.frame` with user-specified rows. Closes [#1320](https://github.com/Rdatatable/data.table/issues/1320). Thanks to @MichaelChirico for the FR and PR.

#### BUG FIXES

1. `if (TRUE) DT[,LHS:=RHS]` no longer prints, [#869](https://github.com/Rdatatable/data.table/issues/869) and [#1122](https://github.com/Rdatatable/data.table/issues/1122). Tests added. To get this to work we've had to live with one downside: if a `:=` is used inside a function with no `DT[]` before the end of the function, then the next time `DT` or `print(DT)` is typed at the prompt, nothing will be printed. A repeated `DT` or `print(DT)` will print. To avoid this: include a `DT[]` after the last `:=` in your function. If that is not possible (e.g., it's not a function you can change) then `DT[]` at the prompt is guaranteed to print. As before, adding an extra `[]` on the end of a `:=` query is a recommended idiom to update and then print; e.g. `> DT[,foo:=3L][]`. Thanks to Jureiss and Jan Gorecki for reporting.
Expand Down
18 changes: 18 additions & 0 deletions inst/tests/tests.Rraw
Expand Up @@ -4759,6 +4759,24 @@ df <- list(1:5, 6:10)
test(1305.4, setDF(as.data.table(df)), setDF(df))
test(1305.5, setDF(1:5), error="setDF only accepts")
test(1305.6, setDF(list(1, 2:3)), error="All elements in argument")
# Tests .7 - .13 for FR #1320: setDF accepts rownames argument
dt <- data.table(a=1:5, b=6:10)
df <- data.frame(a=1:5, b=6:10)
lst <- list(a=1:5, b=6:10)
df2 <- data.frame(a=1:5, b=6:10)
rownames(df2) <- LETTERS[1:5]
test(1305.7, setDF(dt, rownames=LETTERS[1:5]), df2)
test(1305.8, setDF(df, rownames=LETTERS[1:5]), df2)
test(1305.9, setDF(lst,rownames=LETTERS[1:5]), df2)
# setDF returns an error for each type if rownames incorrect length
dt <- data.table(a=1:5, b=6:10)
df <- data.frame(a=1:5, b=6:10)
lst <- list(a=1:5, b=6:10)
test(1305.10, setDF(dt, rownames="a"), error='rownames incorrect length')
test(1305.11, setDF(df, rownames="a"), error='rownames incorrect length')
test(1305.12, setDF(lst,rownames="a"), error='rownames incorrect length')
# setDF returns an error when rownames contains duplicates
test(1305.13, setDF(dt, rownames=rep("a",5)), error='rownames contains duplicates')

# .SD retains as much of head(key) as appropriate.
# by= always keeps data appearance order, so it's which columns are grouped and selected that drive how much of key is retained
Expand Down
12 changes: 9 additions & 3 deletions man/setDF.Rd
Expand Up @@ -7,14 +7,17 @@
A helper function to convert a \code{data.table} or \code{list} of equal length to \code{data.frame} by reference.
}
\usage{
setDF(x)
setDF(x, rownames=NULL)
}
\arguments{
\item{x}{ A \code{data.table}, \code{data.frame} or \code{list} of equal length. }
\item{rownames}{ A \code{character} vector to assign as the row names of \code{x}. }
}

\details{
This feature request came up on the data.table mailing list: \url{http://bit.ly/1xkokNQ}. All \code{data.table} attributes including any keys of the input data.table are stripped off.

When using \code{rownames}, recall that the row names of a \code{data.frame} must be unique. By default, the assigned set of row names is simply the sequence 1, ..., \code{nrow(x)} (or \code{length(x)} for \code{list}s).
}

\value{
Expand All @@ -28,9 +31,12 @@ X = data.table(x=1:5, y=6:10)
## convert 'X' to data.frame, without any copy.
setDF(X)

X = data.table(x=1:5, y=6:10)
## idem, assigning row names
setDF(X, rownames = LETTERS[1:5])

X = list(x=1:5, y=6:10)
# X is converted to a data.frame without any copy.
setDF(X)
}
\keyword{ data }

\keyword{ data }

0 comments on commit ee3f8bb

Please sign in to comment.