Skip to content

Commit

Permalink
Merge pull request #112 from ropensci/fix-96
Browse files Browse the repository at this point in the history
filter out special control characters
  • Loading branch information
zkamvar committed May 16, 2024
2 parents be564d5 + c8b887c commit 4c0c6be
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 3 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

* Inline math with single characters will no longer cause an error (issue: #101,
fix: #103, @maelle)
* Special control characters are now filtered out before processing XML (issue:
#96, fix: #111, @zkamvar)

## MISC

Expand Down
9 changes: 6 additions & 3 deletions R/to_xml.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,13 @@ to_xml <- function(path, encoding = "UTF-8", sourcepos = FALSE, anchor_links = T


clean_content <- function(content){
illegal_control_chars <- "[^\u0009\u000a\u000d\u0020-\uD7FF\uE000-\uFFFD]"
smart_double_quotes <- "[\u201C\u201D]"
smart_single_quotes <- "[\u2018\u2019]"
content %>%
str_replace_all("\u201C", '"') %>%
str_replace_all("\u201D", '"') %>%
str_replace_all("\u2019", "'")
str_replace_all(smart_double_quotes, '"') %>%
str_replace_all(smart_single_quotes, "'") %>%
str_replace_all(illegal_control_chars, "")
}


Expand Down
14 changes: 14 additions & 0 deletions tests/testthat/test-to_xml.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,20 @@ test_that("to_xml works for Rmd", {
})


test_that("to_xml can parse markdown with special control characters", {
# skip if we are on windows with R version lower than 4.2.0
os <- tolower(Sys.info())[["sysname"]]
no_utf8_support <- os == "windows" && getRversion() < numeric_version('4.2.0')
skip_if(no_utf8_support, message = "this system cannot test UTF-8 output")

tmp <- withr::local_tempfile()
writeLines("\u2018test single\u2019 \u001C\u201Ctest double\u201D", tmp)
expect_no_error(xml <- tinkr::to_xml(tmp))
expect_equal(xml2::xml_text(xml$body), "'test single' \"test double\"")
})



test_that("to_xml will not convert numeric options to character", {
txt <- "```{r txt, fig.width=4.2, fig.height=4.2, out.width='100%', purl = TRUE}\n#code\n```"
con <- textConnection(txt)
Expand Down

0 comments on commit 4c0c6be

Please sign in to comment.