diff --git a/R/data.table.R b/R/data.table.R index 6df869525..586a45099 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -16,135 +16,9 @@ setPackageName("data.table",.global) # So even though .BY doesn't appear in this file, it should still be NULL here and exported because it's # defined in SDenv and can be used by users. -mimicsAutoPrint = c("knit_print.default") -# add maybe repr_text.default. See https://github.com/Rdatatable/data.table/issues/933#issuecomment-220237965 - -shouldPrint = function(x) { - ret = (.global$print=="" || # to save address() calls and adding lots of address strings to R's global cache - address(x)!=.global$print) - .global$print = "" - ret -} - -print.data.table <- function(x, topn=getOption("datatable.print.topn"), - nrows=getOption("datatable.print.nrows"), - class=getOption("datatable.print.class"), - row.names=getOption("datatable.print.rownames"), - print.keys=getOption("datatable.print.keys"), - quote=FALSE, ...) { # topn - print the top topn and bottom topn rows with '---' inbetween (5) - # nrows - under this the whole (small) table is printed, unless topn is provided (100) - # class - should column class be printed underneath column name? (FALSE) - if (!shouldPrint(x)) { - # := in [.data.table sets .global$print=address(x) to suppress the next print i.e., like <- does. See FAQ 2.22 and README item in v1.9.5 - # The issue is distinguishing "> DT" (after a previous := in a function) from "> DT[,foo:=1]". To print.data.table(), there - # is no difference. Now from R 3.2.0 a side effect of the very welcome and requested change to avoid silent deep copy is that - # there is now no longer a difference between > DT and > print(DT). So decided that DT[] is now needed to guarantee print; simpler. - # This applies just at the prompt. Inside functions, print(DT) will of course print. - # Other options investigated (could revisit): Cstack_info(), .Last.value gets set first before autoprint, history(), sys.status(), - # topenv(), inspecting next statement in caller, using clock() at C level to timeout suppression after some number of cycles - SYS <- sys.calls() - if (length(SYS) <= 2 || # "> DT" auto-print or "> print(DT)" explicit print (cannot distinguish from R 3.2.0 but that's ok) - ( length(SYS) > 3L && is.symbol(thisSYS <- SYS[[length(SYS)-3L]][[1L]]) && - as.character(thisSYS) %chin% mimicsAutoPrint ) ) { - return(invisible()) - # is.symbol() temp fix for #1758. - } - } - if (!is.numeric(nrows)) nrows = 100L - if (!is.infinite(nrows)) nrows = as.integer(nrows) - if (nrows <= 0L) return(invisible()) # ability to turn off printing - if (!is.numeric(topn)) topn = 5L - topnmiss = missing(topn) - topn = max(as.integer(topn),1L) - if (print.keys){ - if (!is.null(ky <- key(x))) - cat("Key: <", paste(ky, collapse=", "), ">\n", sep="") - if (!is.null(ixs <- indices(x))) - cat("Ind", if (length(ixs) > 1) "ices" else "ex", ": <", - paste(ixs, collapse=">, <"), ">\n", sep="") - } - if (nrow(x) == 0L) { - if (length(x)==0L) - cat("Null data.table (0 rows and 0 cols)\n") # See FAQ 2.5 and NEWS item in v1.8.9 - else - cat("Empty data.table (0 rows) of ",length(x)," col",if(length(x)>1L)"s",": ",paste(head(names(x),6),collapse=","),if(ncol(x)>6)"...","\n",sep="") - return(invisible()) - } - if (topn*2nrows || !topnmiss)) { - toprint = rbind(head(x, topn), tail(x, topn)) - rn = c(seq_len(topn), seq.int(to=nrow(x), length.out=topn)) - printdots = TRUE - } else { - toprint = x - rn = seq_len(nrow(x)) - printdots = FALSE - } - toprint=format.data.table(toprint, ...) - - if ((!"bit64" %chin% loadedNamespaces()) && any(sapply(x,inherits,"integer64"))) require_bit64() - # When we depend on R 3.2.0 (Apr 2015) we can use isNamespaceLoaded() added then, instead of %chin% above - - # FR #5020 - add row.names = logical argument to print.data.table - if (isTRUE(row.names)) rownames(toprint)=paste(format(rn,right=TRUE,scientific=FALSE),":",sep="") else rownames(toprint)=rep.int("", nrow(toprint)) - if (is.null(names(x)) | all(names(x) == "")) colnames(toprint)=rep("", ncol(toprint)) # fixes bug #97 (RF#4934) and #545 (RF#5253) - if (isTRUE(class)) { - #Matching table for most common types & their abbreviations - class_abb = c(list = "", integer = "", numeric = "", - character = "", Date = "", complex = "", - factor = "", POSIXct = "", logical = "", - IDate = "", integer64 = "", raw = "", - expression = "", ordered = "") - classes = vapply(x, function(col) class(col)[1L], "", USE.NAMES=FALSE) - abbs = unname(class_abb[classes]) - if ( length(idx <- which(is.na(abbs))) ) - abbs[idx] = paste("<", classes[idx], ">", sep="") - toprint = rbind(abbs, toprint) - rownames(toprint)[1L] = "" - } - if (quote) colnames(toprint) <- paste0('"', old <- colnames(toprint), '"') - if (printdots) { - toprint = rbind(head(toprint, topn), "---"="", tail(toprint, topn)) - rownames(toprint) = format(rownames(toprint), justify="right") - print(toprint, right=TRUE, quote=quote) - return(invisible()) - } - if (nrow(toprint)>20L) - # repeat colnames at the bottom if over 20 rows so you don't have to scroll up to see them - toprint=rbind(toprint, matrix(if (quote) old else colnames(toprint), nrow=1L)) # fixes bug #4934 - print(toprint, right=TRUE, quote=quote) - invisible() -} - # FR #2591 - format.data.table issue with columns of class "formula" is.formula <- function(x) class(x) == "formula" -format.data.table <- function (x, ..., justify="none") { - if (is.atomic(x) && !is.null(x)) { - stop("Internal structure doesn't seem to be a list. Possibly corrupt data.table.") - } - format.item <- function(x) { - if (is.atomic(x) || is.formula(x)) # FR #2591 - format.data.table issue with columns of class "formula" - paste(c(format(head(x,6), justify=justify, ...), if(length(x)>6)""),collapse=",") # fix for #5435 - format has to be added here... - else - paste("<",class(x)[1L],">",sep="") - } - # FR #1091 for pretty printing of character - # TODO: maybe instead of doing "this is...", we could do "this ... test"? - char.trunc <- function(x, trunc.char = getOption("datatable.prettyprint.char")) { - trunc.char = max(0L, suppressWarnings(as.integer(trunc.char[1L])), na.rm=TRUE) - if (!is.character(x) || trunc.char <= 0L) return(x) - idx = which(nchar(x) > trunc.char) - x[idx] = paste(substr(x[idx], 1L, as.integer(trunc.char)), "...", sep="") - x - } - do.call("cbind",lapply(x,function(col,...){ - if (!is.null(dim(col))) stop("Invalid column: it has dimensions. Can't format it. If it's the result of data.table(table()), use as.data.table(table()) instead.") - if (is.list(col)) col = vapply_1c(col, format.item) - else col = format(char.trunc(col), justify=justify, ...) # added an else here to fix #5435 - col - },...)) -} - is.data.table <- function(x) inherits(x, "data.table") is.ff <- function(x) inherits(x, "ff") # define this in data.table so that we don't have to require(ff), but if user is using ff we'd like it to work diff --git a/R/print.data.table.R b/R/print.data.table.R new file mode 100644 index 000000000..69b7cfe97 --- /dev/null +++ b/R/print.data.table.R @@ -0,0 +1,127 @@ +# Moved here out from data.table.R on 10 Aug 2017. See data.table.R for history prior to that. + +print.data.table <- function(x, topn=getOption("datatable.print.topn"), + nrows=getOption("datatable.print.nrows"), + class=getOption("datatable.print.class"), + row.names=getOption("datatable.print.rownames"), + print.keys=getOption("datatable.print.keys"), + quote=FALSE, ...) { # topn - print the top topn and bottom topn rows with '---' inbetween (5) + # nrows - under this the whole (small) table is printed, unless topn is provided (100) + # class - should column class be printed underneath column name? (FALSE) + if (!shouldPrint(x)) { + # := in [.data.table sets .global$print=address(x) to suppress the next print i.e., like <- does. See FAQ 2.22 and README item in v1.9.5 + # The issue is distinguishing "> DT" (after a previous := in a function) from "> DT[,foo:=1]". To print.data.table(), there + # is no difference. Now from R 3.2.0 a side effect of the very welcome and requested change to avoid silent deep copy is that + # there is now no longer a difference between > DT and > print(DT). So decided that DT[] is now needed to guarantee print; simpler. + # This applies just at the prompt. Inside functions, print(DT) will of course print. + # Other options investigated (could revisit): Cstack_info(), .Last.value gets set first before autoprint, history(), sys.status(), + # topenv(), inspecting next statement in caller, using clock() at C level to timeout suppression after some number of cycles + SYS <- sys.calls() + if (length(SYS) <= 2 || # "> DT" auto-print or "> print(DT)" explicit print (cannot distinguish from R 3.2.0 but that's ok) + ( length(SYS) > 3L && is.symbol(thisSYS <- SYS[[length(SYS)-3L]][[1L]]) && + as.character(thisSYS) %chin% mimicsAutoPrint ) ) { + return(invisible()) + # is.symbol() temp fix for #1758. + } + } + if (!is.numeric(nrows)) nrows = 100L + if (!is.infinite(nrows)) nrows = as.integer(nrows) + if (nrows <= 0L) return(invisible()) # ability to turn off printing + if (!is.numeric(topn)) topn = 5L + topnmiss = missing(topn) + topn = max(as.integer(topn),1L) + if (print.keys){ + if (!is.null(ky <- key(x))) + cat("Key: <", paste(ky, collapse=", "), ">\n", sep="") + if (!is.null(ixs <- indices(x))) + cat("Ind", if (length(ixs) > 1) "ices" else "ex", ": <", + paste(ixs, collapse=">, <"), ">\n", sep="") + } + if (nrow(x) == 0L) { + if (length(x)==0L) + cat("Null data.table (0 rows and 0 cols)\n") # See FAQ 2.5 and NEWS item in v1.8.9 + else + cat("Empty data.table (0 rows) of ",length(x)," col",if(length(x)>1L)"s",": ",paste(head(names(x),6),collapse=","),if(ncol(x)>6)"...","\n",sep="") + return(invisible()) + } + if (topn*2nrows || !topnmiss)) { + toprint = rbind(head(x, topn), tail(x, topn)) + rn = c(seq_len(topn), seq.int(to=nrow(x), length.out=topn)) + printdots = TRUE + } else { + toprint = x + rn = seq_len(nrow(x)) + printdots = FALSE + } + toprint=format.data.table(toprint, ...) + + if ((!"bit64" %chin% loadedNamespaces()) && any(sapply(x,inherits,"integer64"))) require_bit64() + # When we depend on R 3.2.0 (Apr 2015) we can use isNamespaceLoaded() added then, instead of %chin% above + + # FR #5020 - add row.names = logical argument to print.data.table + if (isTRUE(row.names)) rownames(toprint)=paste(format(rn,right=TRUE,scientific=FALSE),":",sep="") else rownames(toprint)=rep.int("", nrow(toprint)) + if (is.null(names(x)) | all(names(x) == "")) colnames(toprint)=rep("", ncol(toprint)) # fixes bug #97 (RF#4934) and #545 (RF#5253) + if (isTRUE(class)) { + #Matching table for most common types & their abbreviations + class_abb = c(list = "", integer = "", numeric = "", + character = "", Date = "", complex = "", + factor = "", POSIXct = "", logical = "", + IDate = "", integer64 = "", raw = "", + expression = "", ordered = "") + classes = vapply(x, function(col) class(col)[1L], "", USE.NAMES=FALSE) + abbs = unname(class_abb[classes]) + if ( length(idx <- which(is.na(abbs))) ) + abbs[idx] = paste("<", classes[idx], ">", sep="") + toprint = rbind(abbs, toprint) + rownames(toprint)[1L] = "" + } + if (quote) colnames(toprint) <- paste0('"', old <- colnames(toprint), '"') + if (printdots) { + toprint = rbind(head(toprint, topn), "---"="", tail(toprint, topn)) + rownames(toprint) = format(rownames(toprint), justify="right") + print(toprint, right=TRUE, quote=quote) + return(invisible()) + } + if (nrow(toprint)>20L) + # repeat colnames at the bottom if over 20 rows so you don't have to scroll up to see them + toprint=rbind(toprint, matrix(if (quote) old else colnames(toprint), nrow=1L)) # fixes bug #4934 + print(toprint, right=TRUE, quote=quote) + invisible() +} + +format.data.table <- function (x, ..., justify="none") { + if (is.atomic(x) && !is.null(x)) { + stop("Internal structure doesn't seem to be a list. Possibly corrupt data.table.") + } + format.item <- function(x) { + if (is.atomic(x) || is.formula(x)) # FR #2591 - format.data.table issue with columns of class "formula" + paste(c(format(head(x,6), justify=justify, ...), if(length(x)>6)""),collapse=",") # fix for #5435 - format has to be added here... + else + paste("<",class(x)[1L],">",sep="") + } + # FR #1091 for pretty printing of character + # TODO: maybe instead of doing "this is...", we could do "this ... test"? + char.trunc <- function(x, trunc.char = getOption("datatable.prettyprint.char")) { + trunc.char = max(0L, suppressWarnings(as.integer(trunc.char[1L])), na.rm=TRUE) + if (!is.character(x) || trunc.char <= 0L) return(x) + idx = which(nchar(x) > trunc.char) + x[idx] = paste(substr(x[idx], 1L, as.integer(trunc.char)), "...", sep="") + x + } + do.call("cbind",lapply(x,function(col,...){ + if (!is.null(dim(col))) stop("Invalid column: it has dimensions. Can't format it. If it's the result of data.table(table()), use as.data.table(table()) instead.") + if (is.list(col)) col = vapply_1c(col, format.item) + else col = format(char.trunc(col), justify=justify, ...) # added an else here to fix #5435 + col + },...)) +} + +mimicsAutoPrint = c("knit_print.default") +# add maybe repr_text.default. See https://github.com/Rdatatable/data.table/issues/933#issuecomment-220237965 + +shouldPrint = function(x) { + ret = (.global$print=="" || # to save address() calls and adding lots of address strings to R's global cache + address(x)!=.global$print) + .global$print = "" + ret +}