Merge pull request #423 from JanMarvin/gh_issue_420

speedup numfmt assignment
JanMarvin · Nov 21, 2022 · c797b06 · c797b06
2 parents 0c2a23f + d5ef7c5
commit c797b06
Show file tree

Hide file tree

Showing 8 changed files with 138 additions and 29 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -20,6 +20,7 @@ export(create_font)
 export(create_hyperlink)
 export(create_numfmt)
 export(create_sparklines)
+export(dataframe_to_dims)
 export(delete_data)
 export(dims_to_dataframe)
 export(get_cell_refs)

diff --git a/NEWS.md b/NEWS.md
@@ -2,6 +2,8 @@
 
 ## New features
 
+* Improve writing styles to workbook. Previously every cell was checked, this has been changed to check unique styles. [423](https://github.com/JanMarvin/openxlsx2/pull/423)
+
 * Implement reading custom file properties. [418](https://github.com/JanMarvin/openxlsx2/pull/418)
 
 * Improved `add_named_region()`. This function includes now various xml options. [386](https://github.com/JanMarvin/openxlsx2/pull/386)

diff --git a/R/class-workbook.R b/R/class-workbook.R
@@ -4813,7 +4813,12 @@ wbWorkbook <- R6::R6Class(
 
       dims <- unname(unlist(did[rows, cols, drop = FALSE]))
 
-      for (dim in dims) {
+      cc <- self$worksheets[[sheet]]$sheet_data$cc
+      cc <- cc[cc$r %in% dims, ]
+      styles <- unique(cc[["c_s"]])
+
+      for (style in styles) {
+        dim <- cc[cc$c_s == style, "r"]
 
         new_fill <- create_fill(
           gradientFill = gradient_fill,
@@ -4822,7 +4827,7 @@ wbWorkbook <- R6::R6Class(
         )
         self$styles_mgr$add(new_fill, new_fill)
 
-        xf_prev <- get_cell_styles(self, sheet, dim)
+        xf_prev <- get_cell_styles(self, sheet, dim[[1]])
         xf_new_fill <- set_fill(xf_prev, self$styles_mgr$get_fill_id(new_fill))
         self$styles_mgr$add(xf_new_fill, xf_new_fill)
         s_id <- self$styles_mgr$get_xf_id(xf_new_fill)
@@ -4880,7 +4885,12 @@ wbWorkbook <- R6::R6Class(
       did <- dims_to_dataframe(dims, fill = TRUE)
       dims <- unname(unlist(did))
 
-      for (dim in dims) {
+      cc <- self$worksheets[[sheet]]$sheet_data$cc
+      cc <- cc[cc$r %in% dims, ]
+      styles <- unique(cc[["c_s"]])
+
+      for (style in styles) {
+        dim <- cc[cc$c_s == style, "r"]
 
         new_font <- create_font(
           b = bold,
@@ -4901,7 +4911,7 @@ wbWorkbook <- R6::R6Class(
         )
         self$styles_mgr$add(new_font, new_font)
 
-        xf_prev <- get_cell_styles(self, sheet, dim)
+        xf_prev <- get_cell_styles(self, sheet, dim[[1]])
         xf_new_font <- set_font(xf_prev, self$styles_mgr$get_font_id(new_font))
 
         self$styles_mgr$add(xf_new_font, xf_new_font)
@@ -4931,26 +4941,32 @@ wbWorkbook <- R6::R6Class(
       did <- dims_to_dataframe(dims, fill = TRUE)
       dims <- unname(unlist(did))
 
+      cc <- self$worksheets[[sheet]]$sheet_data$cc
+      cc <- cc[cc$r %in% dims, ]
+      styles <- unique(cc[["c_s"]])
+
       if (inherits(numfmt, "character")) {
 
-        new_numfmt <- create_numfmt(
-          numFmtId = self$styles_mgr$next_numfmt_id(),
-          formatCode = numfmt
-        )
-        self$styles_mgr$add(new_numfmt, new_numfmt)
+        for (style in styles) {
+          dim <- cc[cc$c_s == style, "r"]
 
-        for (dim in dims) {
-          xf_prev <- get_cell_styles(self, sheet, dim)
+          new_numfmt <- create_numfmt(
+            numFmtId = self$styles_mgr$next_numfmt_id(),
+            formatCode = numfmt
+          )
+          self$styles_mgr$add(new_numfmt, new_numfmt)
+
+          xf_prev <- get_cell_styles(self, sheet, dim[[1]])
           xf_new_numfmt <- set_numfmt(xf_prev, self$styles_mgr$get_numfmt_id(new_numfmt))
           self$styles_mgr$add(xf_new_numfmt, xf_new_numfmt)
           s_id <- self$styles_mgr$get_xf_id(xf_new_numfmt)
           self$set_cell_style(sheet, dim, s_id)
         }
 
       } else { # format is numeric
-
-        for (dim in dims) {
-          xf_prev <- get_cell_styles(self, sheet, dim)
+        for (style in styles) {
+          dim <- cc[cc$c_s == style, "r"]
+          xf_prev <- get_cell_styles(self, sheet, dim[[1]])
           xf_new_numfmt <- set_numfmt(xf_prev, numfmt)
           self$styles_mgr$add(xf_new_numfmt, xf_new_numfmt)
           s_id <- self$styles_mgr$get_xf_id(xf_new_numfmt)
@@ -5035,8 +5051,13 @@ wbWorkbook <- R6::R6Class(
       did <- dims_to_dataframe(dims, fill = TRUE)
       dims <- unname(unlist(did))
 
-      for (dim in dims) {
-        xf_prev <- get_cell_styles(self, sheet, dim)
+      cc <- self$worksheets[[sheet]]$sheet_data$cc
+      cc <- cc[cc$r %in% dims, ]
+      styles <- unique(cc[["c_s"]])
+
+      for (style in styles) {
+        dim <- cc[cc$c_s == style, "r"]
+        xf_prev <- get_cell_styles(self, sheet, dim[[1]])
         xf_new_cellstyle <- set_cellstyle(
           xf_node           = xf_prev,
           applyAlignment    = applyAlignment,

diff --git a/R/wb_functions.R b/R/wb_functions.R
@@ -47,6 +47,38 @@ dims_to_dataframe <- function(dims, fill = FALSE) {
   )
 }
 
+#' create dimensions from dataframe
+#' @param df dataframe with spreadsheet columns and rows
+#' @examples {
+#'   df <- dims_to_dataframe("A1:D5;F1:F6;D8", fill = TRUE)
+#'   dataframe_to_dims(df)
+#' }
+#' @export
+dataframe_to_dims <- function(df) {
+
+  # get continuous sequences of columns and rows in df
+  v <- as.integer(rownames(df))
+  rows <- split(v, cumsum(diff(c(-Inf, v)) != 1))
+
+  v <- col2int(colnames(df))
+  cols <- split(colnames(df), cumsum(diff(c(-Inf, v)) != 1))
+
+  # combine columns and rows to construct dims
+  out <- NULL
+  for (col in seq_along(cols)) {
+    for (row in seq_along(rows)) {
+      tmp <- paste0(
+        cols[[col]][[1]], rows[[row]][[1]],
+        ":",
+        rev(cols[[col]])[[1]],  rev(rows[[row]])[[1]]
+        )
+      out <- c(out, tmp)
+    }
+  }
+
+  paste0(out, collapse = ";")
+}
+
 # # similar to all, simply check if most of the values match the condition
 # # in guess_col_type not all bools may be "b" some are "s" (missings)
 # most <- function(x) {
@@ -467,16 +499,16 @@ wb_to_df <- function(
   cc$val <- NA_character_
   cc$typ <- NA_character_
 
-  cc_tab <- table(cc$c_t)
+  cc_tab <- unique(cc$c_t)
 
   # bool
-  if (isTRUE(cc_tab[c("b")] > 0)) {
+  if (any(cc_tab == c("b"))) {
     sel <- cc$c_t %in% c("b")
     cc$val[sel] <- as.logical(as.numeric(cc$v[sel]))
     cc$typ[sel] <- "b"
   }
   # text in v
-  if (isTRUE(any(cc_tab[c("str", "e")] > 0))) {
+  if (any(cc_tab %in% c("str", "e"))) {
     sel <- cc$c_t %in% c("str", "e")
     cc$val[sel] <- cc$v[sel]
     cc$typ[sel] <- "s"
@@ -487,13 +519,13 @@ wb_to_df <- function(
     cc$typ[sel] <- "s"
   }
   # text in t
-  if (isTRUE(cc_tab[c("inlineStr")] > 0)) {
+  if (any(cc_tab %in% c("inlineStr"))) {
     sel <- cc$c_t %in% c("inlineStr")
     cc$val[sel] <- is_to_txt(cc$is[sel])
     cc$typ[sel] <- "s"
   }
   # test is sst
-  if (isTRUE(cc_tab[c("s")] > 0)) {
+  if (any(cc_tab %in% c("s"))) {
     sel <- cc$c_t %in% c("s")
     cc$val[sel] <- sst[as.numeric(cc$v[sel]) + 1]
     cc$typ[sel] <- "s"

diff --git a/R/write.R b/R/write.R
@@ -377,14 +377,7 @@ write_data2 <- function(
       # ignore first row if colNames
       if (colNames) sel_rows <- sel_rows[-1]
 
-      paste(
-        unname(
-          unlist(
-            rtyp[rownames(rtyp) %in% sel_rows, sel_cols, drop = FALSE]
-          )
-        ),
-        collapse = ";"
-      )
+      dataframe_to_dims(rtyp[rownames(rtyp) %in% sel_rows, sel_cols, drop = FALSE])
     }
 
     # if hyperlinks are found, Excel sets something like the following font

diff --git a/man/dataframe_to_dims.Rd b/man/dataframe_to_dims.Rd
diff --git a/tests/testthat/test-wb_functions.R b/tests/testthat/test-wb_functions.R
@@ -183,3 +183,13 @@ test_that("dims_to_dataframe", {
   expect_equal(exp, got)
 
 })
+
+test_that("dataframe_to_dims", {
+
+  # dims_to_dataframe will always create a square
+  df <- dims_to_dataframe("A1:D5;F1:F6;D8", fill = TRUE)
+  dims <- dataframe_to_dims(df)
+  df2 <- dims_to_dataframe(dims, fill = TRUE)
+  expect_equal(df, df2)
+
+})
diff --git a/tests/testthat/test-wb_styles.R b/tests/testthat/test-wb_styles.R
@@ -471,3 +471,33 @@ test_that("style names are xml", {
   expect_equal(exp, got)
 
 })
+
+test_that("add numfmt is no longer slow", {
+
+  beg <- "1900-1-1"
+  end <- "2022-11-18"
+
+  dat <- seq(
+    from = as.POSIXct(beg, tz = "UTC"),
+    to   = as.POSIXct(end, tz = "UTC"),
+    by   = "day"
+  )
+
+  # when writing this creates the 29Feb1900 (#421)
+  out <- data.frame(
+    date = dat,
+    chr  = as.character(dat),
+    num  = seq_along(dat) - 1
+  )
+
+  wb <- wb_workbook() %>%
+    wb_add_worksheet()
+
+  # just a tiny test to check that this does not run forever
+  expect_silent(
+    wb <- wb %>%
+      wb_add_data(x = out) %>%
+      wb_add_numfmt(dims = "C1:C44882", numfmt = "#.0")
+  )
+
+})