-
Notifications
You must be signed in to change notification settings - Fork 0
/
as_dtx.R
250 lines (239 loc) · 8.57 KB
/
as_dtx.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
#' Coerce objects into data.frames, data.tables, tibbles or matrices
#'
#' @description Objects are coerced into the desired class. For [as_dtx()], the
#' desired class is obtained from `getOption("SciViews.as_dtx")`, with a default
#' value producing a data.table object. If the data are grouped with
#' [dplyr::group_by()], the resulting data frame is also [dplyr::ungroup()]ed
#' in the process.
#'
#' @param x An object.
#' @param ... Further arguments passed to the methods (not used yet).
#' @param rownames The name of the column with row names. If `NULL`, it is assessed from `getOptions("SciViews.dtx.rownames")`.
#' @param row.names Same as `rownames`, but for base R functions.
#' @param keep.key Do we keep the data.table key into a "key" attribute or do we restore `data.table`key from the attribute?
#' @param byref If `TRUE`, the object is modified by reference when converted into a `data.table` (faster, but not conventional). This is `FALSE` by default, or `NULL` if the argument does not apply in the context.
#' @param optional logical, If `TRUE`, setting row names and converting column names to syntactically correct names is optional.
#'
#' @return The coerced object. For `as_dtx()`, the coercion is determined from `getOption("SciViews.as_dtx")` which must return one of the three other `as_dt...()` functions (`as_dtt` by default). The `default_dtx()` does the same as `as_dtx()` if the object is a data.frame, a data.table, or a tibble, but it return the unmodified object for any other class (including subclassed data frames). This is a convenient function to force conversion only between those three objects classes.
#'
#' @note
#' Use [as_matrix()] instead of [base::as.matrix()]: it has different default
#' arguments to better account for `rownames` in data.table and tibble!
#' @export
#'
#' @examples
#' # A data.frame
#' dtf <- dtf(
#' x = 1:5,
#' y = rnorm(5),
#' f = letters[1:5],
#' l = sample(c(TRUE, FALSE), 5, replace = TRUE))
#'
#' # Convert into a tibble
#' (dtbl <- as_dtbl(dtf))
#' # Since row names are trivial (1 -> 5), a .rownames column is not added
#'
#' dtf2 <- dtf
#' rownames(dtf2) <- letters[1:5]
#' dtf2
#'
#' # Now, the conversion into a tibble adds .rownames
#' (dtbl2 <- as_dtbl(dtf2))
#' # and data frame row names are set again when converted bock to dtf
#' as_dtf(dtbl2)
#'
#' # It also work for conversions data.frame <-> data.table
#' (dtt2 <- as_dtt(dtf2))
#' as_dtf(dtt2)
#'
#' # It does not work when converting a tibble or a data.table into a matrix
#' # with as.matrix()
#' as.matrix(dtbl2)
#' # ... but as_matrix() does the job!
#' as_matrix(dtbl2)
#'
#' # The name for row in dtt and dtbl is in:
#' # (data.frame's row names are converted into a column with this name)
#' getOption("SciViews.dtx.rownames", default = ".rownames")
#'
#' # Convert into the preferred data frame object (data.table by default)
#' (dtx2 <- as_dtx(dtf2))
#' class(dtx2)
#'
#' # The default data frame object used:
#' getOption("SciViews.as_dtx", default = as_dtt)
#'
#' # default_dtx() does the same as as_dtx(),
#' # but it also does not change other objects
#' # So, it is safe to use whaterver the object you pass to it
#' (dtx2 <- default_dtx(dtf2))
#' class(dtx2)
#' # Any other object than data.frame, data.table or tbl_df is not converted
#' res <- default_dtx(1:5)
#' class(res)
#' # No conversion if the data frame is subclassed
#' dtf3 <- dtf2
#' class(dtf3) <- c("subclassed", "data.frame")
#' class(default_dtx(dtf3))
#'
#' # data.table keys are converted into a 'key' attribute and back
#' library(data.table)
#' setkey(dtt2, 'x')
#' haskey(dtt2)
#' key(dtt2)
#'
#' (dtf3 <- as_dtf(dtt2))
#' attributes(dtf3)
#' # Key is restored when converted back into a data.table (also from a tibble)
#' (dtt3 <- as_dtt(dtf3))
#' haskey(dtt3)
#' key(dtt3)
#'
#' # Grouped tibbles are ungrouped with as_dtbl() or as_dtx()/default_dtx()!
#' mtcars |> dplyr::group_by(cyl) -> mtcars_grouped
#' class(mtcars_grouped)
#' mtcars2 <- as_dtbl(mtcars_grouped)
#' class(mtcars2)
as_dtx <- function(x, ..., rownames = NULL, keep.key = TRUE,
byref = FALSE) {
if (is.null(rownames))
rownames <- getOption("SciViews.dtx.rownames", default = ".rownames")
getOption("SciViews.as_dtx", default = as_dtt)(.ungroup_dtbl(x), ...,
rownames = rownames, keep.key = keep.key, byref = byref)
}
#' @export
#' @rdname as_dtx
as_dtf <- function(x, ..., rownames = NULL, keep.key = TRUE,
byref = NULL) {
if (is.null(rownames))
rownames <- getOption("SciViews.dtx.rownames", default = ".rownames")
dtf <- as.data.frame(.ungroup_dtbl(x), ...)
# If there is a column named as rownames, convert it into row names
if (rownames %in% names(dtf)) {
rownames(dtf) <- dtf[[rownames]]
dtf[[rownames]] <- NULL
}
# Possibly get data.table keys
if (isTRUE(keep.key) && haskey(x))
attr(dtf, "key") <- key(x)
dtf
}
#' @export
#' @rdname as_dtx
as_dtt <- function(x, ..., rownames = NULL, keep.key = TRUE,
byref = FALSE) {
if (is.null(rownames))
rownames <- getOption("SciViews.dtx.rownames", default = ".rownames")
if (rownames %in% names(x) || all(rownames(x) == seq_len(nrow(x))))
rownames <- FALSE # Otherwise, rownames is duplicated or trivial ones are added
if (is.data.frame(x) && isTRUE(byref)) {
if (isTRUE(keep.key)) {
key <- attr(x, "key")
} else {
key <- NULL
}
x <- .ungroup_dtbl(x)
setDT(x, keep.rownames = rownames, key = key)
attr(x, "key") <- NULL
} else {
key <- attr(x, "key")
x <- as.data.table(.ungroup_dtbl(x), keep.rownames = rownames)
if (isTRUE(keep.key) && !is.null(key))
setkeyv(x, key)
attr(x, "key") <- NULL
}
rownames(x) <- NULL
x
}
#' @export
#' @rdname as_dtx
as_dtbl <- function(x, ..., rownames = NULL, keep.key = TRUE,
byref = NULL) {
if (is.null(rownames))
rownames <- getOption("SciViews.dtx.rownames", default = ".rownames")
if (is.logical(rownames) && rownames == FALSE)
rownames <- NULL # Not FALSE here, but NULL to drop rownames
if (rownames %in% names(x) || all(rownames(x) == seq_len(nrow(x))))
rownames <- NULL # Otherwise, rownames is duplicated (not FALSE here, but NULL)
# Or trivial rownames are added
if (isTRUE(keep.key) && haskey(x)) {
key <- key(x)
} else {
key <- NULL
}
dtbl <- as_tibble(.ungroup_dtbl(x), ..., rownames = rownames)
attr(dtbl, "key") <- key
dtbl
}
#' @export
#' @rdname as_dtx
default_dtx <- function(x, ..., rownames = NULL, keep.key = TRUE,
byref = FALSE) {
if (is_dtx(x, strict = TRUE)) {
# Convert
if (is.null(rownames))
rownames <- getOption("SciViews.dtx.rownames", default = ".rownames")
getOption("SciViews.as_dtx", default = as_dtt)(.ungroup_dtbl(x), ...,
rownames = rownames, keep.key = keep.key, byref = byref)
} else {
# Keep intact
x
}
}
#' @export
#' @rdname as_dtx
#' @method as.matrix tbl_df
as.matrix.tbl_df <- function(x, row.names = NULL, optional = FALSE, ...) {
as.matrix(as.data.frame(.ungroup_dtbl(x), row.names = row.names,
optional = optional, ...))
}
#' @export
#' @rdname as_dtx
as_matrix <- function(x, rownames = NULL, ...) {
if (is.null(rownames))
rownames <- getOption("SciViews.dtx.rownames", default = ".rownames")
if (is.logical(rownames) && isTRUE(!rownames))
rownames <- NULL # The value to use for data.table conversion
# Special case for tbl_df: .rownames is **not** honored.
# So, it is transformed first
x <- as_dtf(.ungroup_dtbl(x))
as.matrix(x, rownames = rownames, row.names = rownames, ...)
}
.ungroup_dtbl <- function(x) {
if (inherits(x, "GRP_df")) {
res <- fungroup(x)
} else if (inherits(x, "tbl_df")) {
res <- ungroup(x)
} else {
res <- x
}
# Special case for "groupedData"
if (inherits(res, "groupedData")) {
res <- as.data.frame(res)
# If there are labels or units, apply them to res properly
# y and x, according to formula
f <- attr(res, "formula")
# It is like y ~ x | z
if (length(f) == 3 && length(f[[3]]) == 3) {
xy <- list(x = as.character(f[[3]][[2]]), y = as.character(f[[2]]))
labels <- attr(res, "labels")
if (!is.null(labels) && is.list(labels)) {
# Apply labels to each column
nms <- names(labels)
for (nm in nms)
attr(res[[xy[[nm]]]], "label") <- labels[[nm]]
attr(res, "labels") <- NULL
}
units <- attr(res, "units")
if (!is.null(units) && is.list(units)) {
# Apply labels to each column
nms <- names(units)
# If units are between parentheses, eliminate them
for (nm in nms)
attr(res[[xy[[nm]]]], "units") <- sub("^\\((.+)\\)$", "\\1", units[[nm]])
attr(res, "units") <- NULL
}
}
}
res
}