Merge pull request #998 from jangorecki/as.xts

Closes #882. xts <-> data.table conversions.
Rdatatable · Jan 9, 2015 · a18844e · a18844e
2 parents ea2a006 + 73d5b4a
commit a18844e
Show file tree

Hide file tree

Showing 6 changed files with 89 additions and 0 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -23,6 +23,7 @@ export(frankv)
 export(address)
 export(.SD,.N,.I,.GRP,.BY,.EACHI)
 export(rleid)
+export(as.xts.data.table)
 
 S3method("[", data.table)
 S3method("[<-", data.table)
@@ -42,6 +43,7 @@ S3method(as.data.table, factor)
 S3method(as.data.table, ordered)
 S3method(as.data.table, Date)
 S3method(as.data.table, table)
+S3method(as.data.table, xts)
 S3method(as.data.frame, data.table)
 S3method(as.list, data.table)
 S3method(as.matrix, data.table)

diff --git a/R/xts.R b/R/xts.R
@@ -0,0 +1,17 @@
+as.data.table.xts <- function(x, keep.rownames = TRUE){
+  stopifnot(requireNamespace("xts"), !missing(x), xts::is.xts(x))
+  if(!keep.rownames) return(setDT(as.data.frame(x, row.names=FALSE))[])
+  if("index" %in% names(x)) stop("Input xts object should not have 'index' column because it would result in duplicate column names. Rename 'index' column in xts or use `keep.rownames=FALSE` and add index manually as another column.")
+  r = setDT(as.data.frame(x, row.names=FALSE))
+  r[, index := xts:::index.xts(x)]
+  setcolorder(r,c("index",names(r)[names(r)!="index"]))[]
+}
+
+as.xts.data.table <- function(x){
+  stopifnot(requireNamespace("xts"), !missing(x), is.data.table(x))
+  if(!any(class(x[[1]]) %in% c("POSIXct","Date"))) stop("data.table must have a POSIXct or Date column on first position, use `setcolorder` function.")
+  colsNumeric = sapply(x, is.numeric)[-1] # exclude first col, xts index
+  if(any(!colsNumeric)) warning(paste("Following columns are not numeric and will be omitted:",paste(names(colsNumeric)[!colsNumeric],collapse=", ")))
+  r = setDF(x[,.SD,.SDcols=names(colsNumeric)[colsNumeric]])
+  xts::as.xts(r, order.by=x[[1]])
+}
diff --git a/README.md b/README.md
@@ -27,6 +27,8 @@
   6. `frank()` is now implemented. It's much faster than `base::rank` and does more. It accepts *vectors*, *lists* with all elements of equal lengths, *data.frames* and *data.tables*, and optionally takes a `cols` argument. In addition to implementing all the `ties.method` methods available from `base::rank`, it also implements *dense rank*. See `?frank` for more. Closes [#760](https://github.com/Rdatatable/data.table/issues/760) and [#771](https://github.com/Rdatatable/data.table/issues/771)
 
   7. `rleid()`, a convenience function for generating a run-length type id column to be used in grouping operations is now implemented. Closes [#686](https://github.com/Rdatatable/data.table/issues/686). Check `?rleid` examples section for usage scenarios.
+
+  8. Efficient convertion of `xts` to data.table. Closes [#882](https://github.com/Rdatatable/data.table/issues/882). Check examples in `?as.xts.data.table` and `?as.data.table.xts`. Thanks to @jangorecki for the PR.
 
 #### BUG FIXES
 

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
@@ -5722,6 +5722,25 @@ DT = data.table(grp=rep(c("A", "B", "C", "A", "B"),
           c(2,2,3,1,2)), value=1:10)
 test(1464, rleid(DT, "grp"), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L))
 
+# data.table-xts conversion #882
+
+if ("package:xts" %in% search()) {
+    # Date index
+    dt = data.table(index = as.Date((as.Date("2014-12-12")-49):as.Date("2014-12-12"),origin="1970-01-01"),quantity = rep(c(1:5),10),value = rep(c(1:10)*100,5))
+    xt = as.xts(matrix(data = c(dt$quantity, dt$value),ncol = 2,dimnames = list(NULL,c("quantity","value"))),order.by = dt$index)
+    dt_xt = as.data.table(xt)
+    xt_dt = as.xts.data.table(dt)
+    test(1465.1, all.equal(dt, dt_xt, check.attributes = FALSE))
+    test(1465.2, xt, xt_dt)
+    # POSIXct index
+    dt <- data.table(index = as.POSIXct(as.Date((as.Date("2014-12-12")-49):as.Date("2014-12-12"),origin="1970-01-01"),origin="1970-01-01"),quantity = rep(c(1:5),10),value = rep(c(1:10)*100,5))
+    xt = as.xts(matrix(data = c(dt$quantity, dt$value),ncol = 2,dimnames = list(NULL,c("quantity","value"))),order.by = dt$index)
+    dt_xt = as.data.table(xt)
+    xt_dt = as.xts.data.table(dt)
+    test(1465.3, all.equal(dt, dt_xt, check.attributes = FALSE))
+    test(1465.4, xt, xt_dt)
+}
+
 ##########################
 
 

diff --git a/man/as.data.table.xts.Rd b/man/as.data.table.xts.Rd
@@ -0,0 +1,25 @@
+\name{as.data.table.xts}
+\alias{as.data.table.xts}
+\title{Efficient xts to as.data.table conversion}
+\description{
+  Efficient conversion xts to data.table.
+}
+\usage{
+\method{as.data.table}{xts}(x, keep.rownames = TRUE)
+}
+\arguments{
+\item{x}{xts to convert to data.table}
+
+\item{keep.rownames}{keep xts index as \emph{index} column in result data.table}
+}
+\seealso{ \code{\link{as.xts.data.table}} }
+\examples{
+\dontrun{
+data(sample_matrix, package = "xts")
+sample.xts <- xts::as.xts(sample_matrix) # xts might not be attached on search path
+# print head of xts
+print(head(sample.xts))
+# print dt
+print(as.data.table(sample.xts))
+}
+}
diff --git a/man/as.xts.data.table.Rd b/man/as.xts.data.table.Rd
@@ -0,0 +1,24 @@
+\name{as.xts.data.table}
+\alias{as.xts.data.table}
+\title{Efficient data.table to xts conversion}
+\description{
+  Efficient conversion of data.table to xts, data.table must have \emph{POSIXct} or \emph{Date} type in first column.
+}
+\usage{
+as.xts.data.table(x)
+}
+\arguments{
+\item{x}{data.table to convert to xts, must have \emph{POSIXct} or \emph{Date} in the first column. All others non-numeric columns will be omitted with warning.}
+}
+\seealso{ \code{\link{as.data.table.xts}} }
+\examples{
+\dontrun{
+sample.dt <- data.table(date = as.Date((Sys.Date()-999):Sys.Date(),origin="1970-01-01"),
+                        quantity = sample(10:50,1000,TRUE),
+                        value = sample(100:1000,1000,TRUE))
+# print dt
+print(sample.dt)
+# print head of xts
+print(head(as.xts.data.table(sample.dt))) # xts might not be attached on search path
+}
+}