Skip to content

Commit

Permalink
close #210
Browse files Browse the repository at this point in the history
  • Loading branch information
gagolews committed Jan 30, 2016
1 parent a8bc0c4 commit fb97b40
Show file tree
Hide file tree
Showing 13 changed files with 139 additions and 35 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
@@ -1,6 +1,6 @@
Package: stringi
Version: 1.0-2
Date: 2016-01-29
Date: 2016-01-30
Title: Character String Processing Facilities
Description: Allows for fast, correct, consistent, portable,
as well as convenient character string/text processing in every locale
Expand Down
4 changes: 3 additions & 1 deletion NEWS
Expand Up @@ -13,7 +13,9 @@ http://bugs.icu-project.org/trac/ticket/11554]

* [BUGFIX] #214: allow a regex pattern like `.*` to match an empty string.

* ... #210: stri_replace_all_fixed(c("1", "NULL"), "NULL", NA) should result in c("1", NA)
* [BUGFIX] #210: `stri_replace_all_fixed(c("1", "NULL"), "NULL", NA)`
now results in `c("1", NA)`

* ... #207
* ... #199
* ... #175
Expand Down
2 changes: 2 additions & 0 deletions R/search_replace_4.R
Expand Up @@ -103,6 +103,8 @@
#' stri_replace_all_fixed(s, " ", "#")
#' stri_replace_all_fixed(s, "o", "0")
#'
#' stri_replace_all_fixed(c("1", "NULL", "3"), "NULL", NA)
#'
#' stri_replace_all_regex(s, " .*? ", "#")
#' stri_replace_all_regex(s, "(el|s)it", "1234")
#' stri_replace_all_regex('abaca', 'a', c('!', '*'))
Expand Down
15 changes: 11 additions & 4 deletions devel/testthat/test-replace-charclass.R
Expand Up @@ -6,7 +6,7 @@ test_that("stri_replace_all_charclass-vectorize_all=FALSE", {
expect_error(stri_replace_all_charclass("b",character(0),c("a", "b"), vectorize_all=FALSE))
expect_error(stri_replace_all_charclass("b",c("", ""),"a", vectorize_all=FALSE))
expect_identical(stri_replace_all_charclass(NA,c("\\p{WHITE_SPACE}", "[a]"),"?", vectorize_all=FALSE),NA_character_)
expect_identical(stri_replace_all_charclass("X",c("\\p{WHITE_SPACE}", "[a]"),NA, vectorize_all=FALSE),NA_character_)
expect_identical(stri_replace_all_charclass(c("X", " ", "a"),c("\\p{WHITE_SPACE}", "[a]"),NA, vectorize_all=FALSE),c("X",NA,NA))
expect_identical(stri_replace_all_charclass("X",NA,"?", vectorize_all=FALSE),NA_character_)
expect_identical(stri_replace_all_charclass(NA,NA,"?", vectorize_all=FALSE),NA_character_)
expect_identical(stri_replace_all_charclass(NA,NA,NA, vectorize_all=FALSE),NA_character_)
Expand All @@ -24,14 +24,16 @@ test_that("stri_replace_all_charclass-vectorize_all=FALSE", {
expect_identical(stri_replace_all_charclass("aaa bbb, ccc", c("\\p{L}", "\\p{Z}"), merge=TRUE,
c("xxxxx", ""), vectorize_all=FALSE), "xxxxxxxxxx,xxxxx")

expect_identical(stri_replace_all_charclass(c("1RR", "NURR", "3"), c("[R]", "[L]"), c("L", NA), vectorize_all=FALSE), c(NA, NA, "3"))

})

test_that("stri_replace_all_charclass", {
expect_identical(stri_replace_all_charclass(character(0),"\\p{Z}",""),character(0))
expect_identical(stri_replace_all_charclass("b",character(0),"a"),character(0))
expect_error(stri_replace_all_charclass("b","","a"))
expect_identical(stri_replace_all_charclass(NA,"\\p{WHITE_SPACE}","?"),NA_character_)
expect_identical(stri_replace_all_charclass("X","\\p{WHITE_SPACE}",NA),NA_character_)
expect_identical(stri_replace_all_charclass(c(" ","X"),"\\p{WHITE_SPACE}",NA),c(NA,"X"))
expect_identical(stri_replace_all_charclass("X",NA,"?"),NA_character_)
expect_identical(stri_replace_all_charclass(NA,NA,"?"),NA_character_)
expect_identical(stri_replace_all_charclass(NA,NA,NA),NA_character_)
Expand All @@ -45,13 +47,15 @@ test_that("stri_replace_all_charclass", {
expect_identical(stri_replace_all_charclass(c(stri_dup("Y ", 0:1500)), "\\p{Wspace}", "X"), stri_dup("YX", 0:1500))
expect_identical(stri_replace_all_charclass("ala ma \t \n kota ", "\\p{WHITESPACE}", " ", merge=TRUE),
"ala ma kota ")

expect_identical(stri_replace_all_charclass(c("1", "N", "3"), "\\p{Lu}", NA), c("1", NA, "3"))
})


test_that("stri_replace_first_charclass", {
expect_identical(stri_replace_first_charclass(character(0),"\\p{Z}",""),character(0))
expect_identical(stri_replace_first_charclass(NA,"\\p{Wspace}","?"),NA_character_)
expect_identical(stri_replace_first_charclass("X","\\p{Wspace}",NA),NA_character_)
expect_identical(stri_replace_first_charclass(c(" ","X"),"\\p{Wspace}",NA),c(NA_character_, "X"))
expect_identical(stri_replace_first_charclass("X",NA,"?"),NA_character_)
expect_identical(stri_replace_first_charclass(NA,NA,"?"),NA_character_)
expect_identical(stri_replace_first_charclass(NA,NA,NA),NA_character_)
Expand All @@ -60,13 +64,14 @@ test_that("stri_replace_first_charclass", {
expect_identical(stri_replace_first_charclass(c("a a ", " aa ", "aa ", "aa"), "\\p{Wspace}", "X"), c("aXa ", "Xaa ", "aaX ", "aa"))
expect_identical(stri_replace_first_charclass("a1 ", c("\\p{Z}", "\\p{Nd}", "\\p{Ll}", "\\p{P}"), "X"), c("a1X", "aX ", "X1 ", "a1 "))
expect_identical(stri_replace_first_charclass("a1 ", "\\P{Z}", c("X", "Y")), c("X1 ", "Y1 "))
expect_identical(stri_replace_first_charclass(c("1", "N", "3"), "\\p{Lu}", NA), c("1", NA, "3"))
})


test_that("stri_replace_last_charclass", {
expect_identical(stri_replace_last_charclass(character(0),"\\p{Z}",""),character(0))
expect_identical(stri_replace_last_charclass(NA,"\\p{Wspace}","?"),NA_character_)
expect_identical(stri_replace_last_charclass("X","\\p{Wspace}",NA),NA_character_)
expect_identical(stri_replace_last_charclass(c(" ","X"),"\\p{Wspace}",NA),c(NA_character_, "X"))
expect_identical(stri_replace_last_charclass("X",NA,"?"),NA_character_)
expect_identical(stri_replace_last_charclass(NA,NA,"?"),NA_character_)
expect_identical(stri_replace_last_charclass(NA,NA,NA),NA_character_)
Expand All @@ -75,4 +80,6 @@ test_that("stri_replace_last_charclass", {
expect_identical(stri_replace_last_charclass(c("a a ", " aa ", "aa ", "aa", " aa"), "\\p{Wspace}", "X"), c("a aX", " aaX", "aa X", "aa", "Xaa"))
expect_identical(stri_replace_last_charclass("a1 ", c("\\p{Z}", "\\p{Nd}", "\\p{Ll}", "\\p{P}"), "X"), c("a1X", "aX ", "X1 ", "a1 "))
expect_identical(stri_replace_last_charclass("a1 ", "\\P{Z}", c("X", "Y")), c("aX ", "aY "))

expect_identical(stri_replace_all_charclass(c("1", "N", "3"), "\\p{Lu}", NA), c("1", NA, "3"))
})
12 changes: 10 additions & 2 deletions devel/testthat/test-replace-coll.R
Expand Up @@ -8,7 +8,7 @@ test_that("stri_replace_all_coll", {
expect_warning(stri_replace_all_coll("a", c("a", "b", "c"), c("b", "d"), vectorize_all=FALSE))
expect_equivalent(stri_replace_all_coll("a", c("a", NA), c("b", "d"), vectorize_all=FALSE), c(NA_character_))
expect_equivalent(stri_replace_all_coll(c("a", "b"), c("a", NA), c("b", "d"), vectorize_all=FALSE), c(NA_character_, NA_character_))
expect_equivalent(stri_replace_all_coll(c("aba", "bbbb"), c("a", "c"), c(NA, "d"), vectorize_all=FALSE), c(NA, NA_character_))
expect_equivalent(stri_replace_all_coll(c("aba", "bbbb"), c("a", "c"), c(NA, "d"), vectorize_all=FALSE), c(NA, "bbbb"))
expect_equivalent(stri_replace_all_coll(character(0), c("a", "c"), c(NA, "d"), vectorize_all=FALSE), character(0))
expect_equivalent(stri_replace_all_coll(c("", "", ""), c("a", "c"), c("e", "d"), vectorize_all=FALSE), c("", "", ""))
expect_equivalent(stri_replace_all_coll(c("abacada", "aaa", "fdsueo"), c("a", "b"), c("x", "y"), vectorize_all = FALSE),
Expand All @@ -19,7 +19,9 @@ test_that("stri_replace_all_coll", {
expect_equivalent(stri_replace_all_coll("The quick brown fox jumped over the lazy dog.",
c("quick", "brown", "fox", "dog"), c(""), vectorize_all = FALSE),
"The jumped over the lazy .")
expect_identical(stri_replace_all_coll("X",c("a", "b"),NA, vectorize_all=FALSE),NA_character_)
expect_identical(stri_replace_all_coll(c("X", "Y"),c("a", "X", "b"),NA, vectorize_all=FALSE),c(NA, "Y"))

expect_identical(stri_replace_all_coll(c("1RR", "NURR", "3"), c("RR", "NULL"), c("LL", NA), vectorize_all=FALSE), c("1LL", NA, "3"))
})

test_that("stri_replace_all_coll", {
Expand All @@ -38,6 +40,8 @@ test_that("stri_replace_all_coll", {
expect_identical(stri_replace_all_coll("ALA MA KOTA",c(" ", "A", NA) ,""), c("ALAMAKOTA", "L M KOT", NA))
expect_identical(stri_replace_all_coll("ALA","BF","HA"),"ALA")
expect_identical(stri_replace_all_coll("ALA","ALA", c("","RYBA")), c("", "RYBA"))

expect_identical(stri_replace_all_coll(c("1", "NULL", "3"), "NULL", NA), c("1", NA, "3"))
})


Expand All @@ -55,6 +59,8 @@ test_that("stri_replace_first_coll", {
expect_identical(stri_replace_first_coll("ALA MA KOTA",c(" ", "A", NA) ,""), c("ALAMA KOTA", "LA MA KOTA", NA))
expect_identical(stri_replace_first_coll("ALA","BF","HA"),"ALA")
expect_identical(stri_replace_first_coll("ALA","ALA", c("","RYBA")), c("", "RYBA"))

expect_identical(stri_replace_first_coll(c("1", "NULL", "3"), "NULL", NA), c("1", NA, "3"))
})


Expand All @@ -73,4 +79,6 @@ test_that("stri_replace_last_coll", {
expect_identical(stri_replace_last_coll("ALA MA KOTA",c(" ", "A", NA) ,""), c("ALA MA KOTA", "ALA MA KOT", NA))
expect_identical(stri_replace_last_coll("ALA","BF","HA"),"ALA")
expect_identical(stri_replace_last_coll("ALA","ALA", c("","RYBA")), c("", "RYBA"))

expect_identical(stri_replace_last_coll(c("1", "NULL", "3"), "NULL", NA), c("1", NA, "3"))
})
12 changes: 10 additions & 2 deletions devel/testthat/test-replace-fixed.R
Expand Up @@ -8,7 +8,7 @@ test_that("stri_replace_all_fixed", {
expect_warning(stri_replace_all_fixed("a", c("a", "b", "c"), c("b", "d"), vectorize_all=FALSE))
expect_equivalent(stri_replace_all_fixed("a", c("a", NA), c("b", "d"), vectorize_all=FALSE), c(NA_character_))
expect_equivalent(stri_replace_all_fixed(c("a", "b"), c("a", NA), c("b", "d"), vectorize_all=FALSE), c(NA_character_, NA_character_))
expect_equivalent(stri_replace_all_fixed(c("aba", "bbbb"), c("a", "c"), c(NA, "d"), vectorize_all=FALSE), c(NA, NA_character_))
expect_equivalent(stri_replace_all_fixed(c("aba", "bbbb"), c("a", "c"), c(NA, "d"), vectorize_all=FALSE), c(NA, "bbbb"))
expect_equivalent(stri_replace_all_fixed(character(0), c("a", "c"), c(NA, "d"), vectorize_all=FALSE), character(0))
expect_equivalent(stri_replace_all_fixed(c("", "", ""), c("a", "c"), c("e", "d"), vectorize_all=FALSE), c("", "", ""))
expect_equivalent(stri_replace_all_fixed(c("abacada", "aaa", "fdsueo"), c("a", "b"), c("x", "y"), vectorize_all = FALSE),
Expand All @@ -19,7 +19,9 @@ test_that("stri_replace_all_fixed", {
expect_equivalent(stri_replace_all_fixed("The quick brown fox jumped over the lazy dog.",
c("quick", "brown", "fox", "dog"), c(""), vectorize_all = FALSE),
"The jumped over the lazy .")
expect_identical(stri_replace_all_fixed("X",c("a", "b"),NA, vectorize_all=FALSE),NA_character_)
expect_identical(stri_replace_all_fixed(c("Y", "X"),c("a", "b", "X"),NA, vectorize_all=FALSE), c("Y", NA))

expect_identical(stri_replace_all_fixed(c("1RR", "NURR", "3"), c("RR", "NULL"), c("LL", NA), vectorize_all=FALSE), c("1LL", NA, "3"))
})


Expand All @@ -38,13 +40,17 @@ test_that("stri_replace_all_fixed", {
expect_identical(stri_replace_all_fixed(NA,NA,NA),NA_character_)
expect_warning(stri_replace_all_fixed('fasgasgas',c(" ","o"),1:3))

expect_identical(stri_replace_all_fixed(c("1", "NULL", "3"), "NULL", NA), c("1", NA, "3"))

expect_identical(stri_replace_all_fixed("ALA MA KOTA",c(" ", "A", NA) ,""), c("ALAMAKOTA", "L M KOT", NA))
expect_identical(stri_replace_all_fixed("ALA","BF","HA"),"ALA")
expect_identical(stri_replace_all_fixed("ALA","ALA", c("","RYBA")), c("", "RYBA"))
})


test_that("stri_replace_first_fixed", {
expect_identical(stri_replace_first_fixed(c("1", "NULL", "3"), "NULL", NA), c("1", NA, "3"))

expect_identical(stri_replace_first("abcde", fixed="bcd", replacement=""), "ae")
expect_identical(stri_replace_first_fixed(character(0),1,2),character(0))
expect_identical(stri_replace_first_fixed("abab123 a","a",1),"1bab123 a")
Expand All @@ -69,6 +75,8 @@ test_that("stri_replace_first_fixed", {


test_that("stri_replace_last_fixed", {
expect_identical(stri_replace_last_fixed(c("1", "NULL", "3"), "NULL", NA), c("1", NA, "3"))

expect_identical(stri_replace_last("abcde", fixed="bcd", replacement=""), "ae")
expect_identical(stri_replace_last_fixed(character(0),1,2),character(0))
expect_identical(stri_replace_last_fixed("abab123 a","a",1),"abab123 1")
Expand Down
12 changes: 10 additions & 2 deletions devel/testthat/test-replace-regex.R
Expand Up @@ -8,7 +8,7 @@ test_that("stri_replace_all_regex [vectorize_all=FALSE]", {
expect_warning(stri_replace_all_regex("a", c("a", "b", "c"), c("b", "d"), vectorize_all=FALSE))
expect_equivalent(stri_replace_all_regex("a", c("a", NA), c("b", "d"), vectorize_all=FALSE), c(NA_character_))
expect_equivalent(stri_replace_all_regex(c("a", "b"), c("a", NA), c("b", "d"), vectorize_all=FALSE), c(NA_character_, NA_character_))
expect_equivalent(stri_replace_all_regex(c("aba", "bbbb"), c("a", "c"), c(NA, "d"), vectorize_all=FALSE), c(NA, NA_character_))
expect_equivalent(stri_replace_all_regex(c("aba", "bbbb"), c("a", "c"), c(NA, "d"), vectorize_all=FALSE), c(NA, "bbbb"))
expect_equivalent(stri_replace_all_regex(character(0), c("a", "c"), c(NA, "d"), vectorize_all=FALSE), character(0))
expect_equivalent(stri_replace_all_regex(c("", "", ""), c("a", "c"), c("e", "d"), vectorize_all=FALSE), c("", "", ""))
expect_equivalent(stri_replace_all_regex(c("abacada", "aaa", "fdsueo"), c("a+", "b"), c("x", "y"), vectorize_all = FALSE),
Expand All @@ -19,7 +19,7 @@ test_that("stri_replace_all_regex [vectorize_all=FALSE]", {
expect_equivalent(stri_replace_all_regex("The quick brown fox jumped over the lazy dog.",
c("quick", "brown", "fox", "dog"), c(""), vectorize_all = FALSE),
"The jumped over the lazy .")
expect_identical(stri_replace_all_regex("X",c("a", "b"),NA, vectorize_all=FALSE),NA_character_)
expect_identical(stri_replace_all_regex(c("X","Y"),c("a", "b", "X"),NA, vectorize_all=FALSE),c(NA, "Y"))

expect_identical(stri_replace_all_regex("",c("^.*$","h"),c("hey!", "y"), vectorize_all=FALSE),"yey!")
expect_identical(stri_replace_all_regex(" ",c("^.*$","h"),c("hey!", "y"), vectorize_all=FALSE),"yey!")
Expand All @@ -30,6 +30,8 @@ test_that("stri_replace_all_regex [vectorize_all=FALSE]", {
# expect_identical(stri_replace_all_regex("ABC", "(.*)", "ONE($1)"), "ONE(A)")
# expect_identical(stri_replace_all_regex("A", ".*", "ONE($0)"), "ONE(A)")
# expect_identical(stri_replace_all_regex("A", "^.*", "ONE($0)"), "ONE(A)")

expect_identical(stri_replace_all_regex(c("1RR", "NURR", "3"), c("RR", "NULL"), c("LL", NA), vectorize_all=FALSE), c("1LL", NA, "3"))
})

test_that("stri_replace_all_regex", {
Expand All @@ -46,6 +48,8 @@ test_that("stri_replace_all_regex", {
expect_identical(stri_replace_all_regex(NA,NA,NA),NA_character_)
expect_warning(stri_replace_all_regex('fasgasgas',c(" ","o"),1:3))

expect_identical(stri_replace_all_regex(c("1", "NULL", "3"), "NULL", NA), c("1", NA, "3"))

expect_identical(stri_replace_all_regex("","^.*$","hey!"),"hey!")
expect_identical(stri_replace_all_regex(" ","^.*$","hey!"),"hey!")

Expand Down Expand Up @@ -109,6 +113,8 @@ test_that("stri_replace_first_regex", {
c("\u0105\u0106\u0108\u0107", "\u0105\u0107")) # match of zero length:
expect_identical(stri_replace_first_regex("","^.*$","hey!"),"hey!")
expect_identical(stri_replace_first_regex(" ","^.*$","hey!"),"hey!")

expect_identical(stri_replace_first_regex(c("1", "NULL", "3"), "NULL", NA), c("1", NA, "3"))
})


Expand All @@ -135,4 +141,6 @@ test_that("stri_replace_last_regex", {

expect_identical(stri_replace_last_regex("","^.*$","hey!"),"hey!")
expect_identical(stri_replace_last_regex(" ","^.*$","hey!"),"hey!")

expect_identical(stri_replace_last_regex(c("1", "NULL", "3"), "NULL", NA), c("1", NA, "3"))
})
2 changes: 2 additions & 0 deletions man/stri_replace.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 27 additions & 3 deletions src/stri_search_class_replace.cpp
Expand Up @@ -71,6 +71,9 @@ using namespace std;
*
* @version 0.3-1 (Marek Gagolewski, 2014-11-04)
* Issue #112: str_prepare_arg* retvals were not PROTECTed from gc
*
* @version 1.0-2 (Marek Gagolewski, 2016-01-30)
* Issue #210: Allow NA replacement
*/
SEXP stri__replace_all_charclass_yes_vectorize_all(SEXP str, SEXP pattern, SEXP replacement, SEXP merge)
{
Expand All @@ -95,7 +98,7 @@ SEXP stri__replace_all_charclass_yes_vectorize_all(SEXP str, SEXP pattern, SEXP
i != pattern_cont.vectorize_end();
i = pattern_cont.vectorize_next(i))
{
if (str_cont.isNA(i) || replacement_cont.isNA(i) || pattern_cont.isNA(i)) {
if (str_cont.isNA(i) || pattern_cont.isNA(i)) {
SET_STRING_ELT(ret, i, NA_STRING);
continue;
}
Expand All @@ -114,6 +117,11 @@ SEXP stri__replace_all_charclass_yes_vectorize_all(SEXP str, SEXP pattern, SEXP
continue;
}

if (replacement_cont.isNA(i)) {
SET_STRING_ELT(ret, i, NA_STRING);
continue;
}

R_len_t replacement_cur_n = replacement_cont.get(i).length();
R_len_t buf_need = str_cur_n+(R_len_t)occurrences.size()*replacement_cur_n-sumbytes;
buf.resize(buf_need, false/*destroy contents*/);
Expand Down Expand Up @@ -150,6 +158,9 @@ SEXP stri__replace_all_charclass_yes_vectorize_all(SEXP str, SEXP pattern, SEXP
*
* @version 0.3-1 (Marek Gagolewski, 2014-11-04)
* Issue #112: str_prepare_arg* retvals were not PROTECTed from gc
*
* @version 1.0-2 (Marek Gagolewski, 2016-01-30)
* Issue #210: Allow NA replacement
*/
SEXP stri__replace_all_charclass_no_vectorize_all(SEXP str, SEXP pattern, SEXP replacement, SEXP merge)
{
Expand Down Expand Up @@ -191,7 +202,7 @@ SEXP stri__replace_all_charclass_no_vectorize_all(SEXP str, SEXP pattern, SEXP r

for (R_len_t i = 0; i<pattern_n; ++i)
{
if (pattern_cont.isNA(i) || replacement_cont.isNA(i)) {
if (pattern_cont.isNA(i)) {
STRI__UNPROTECT_ALL
return stri__vector_NA_strings(str_n);
}
Expand All @@ -208,6 +219,14 @@ SEXP stri__replace_all_charclass_no_vectorize_all(SEXP str, SEXP pattern, SEXP r
false /* byte-based indices */
);

if (occurrences.size() == 0)
continue;

if (replacement_cont.isNA(i)) {
str_cont.setNA(j);
continue;
}

R_len_t replacement_cur_n = replacement_cont.get(i).length();
R_len_t buf_need = str_cur_n+(R_len_t)occurrences.size()*replacement_cur_n-sumbytes;
buf.resize(buf_need, false/*destroy contents*/);
Expand Down Expand Up @@ -296,7 +315,7 @@ SEXP stri__replace_firstlast_charclass(SEXP str, SEXP pattern, SEXP replacement,
i != pattern_cont.vectorize_end();
i = pattern_cont.vectorize_next(i))
{
if (str_cont.isNA(i) || replacement_cont.isNA(i) || pattern_cont.isNA(i)) {
if (str_cont.isNA(i) || pattern_cont.isNA(i)) {
SET_STRING_ELT(ret, i, NA_STRING);
continue;
}
Expand Down Expand Up @@ -337,6 +356,11 @@ SEXP stri__replace_firstlast_charclass(SEXP str, SEXP pattern, SEXP replacement,
continue;
}

if (replacement_cont.isNA(i)) {
SET_STRING_ELT(ret, i, NA_STRING);
continue;
}

R_len_t replacement_cur_n = replacement_cont.get(i).length();
const char* replacement_cur_s = replacement_cont.get(i).c_str();
R_len_t buf_need = str_cur_n+replacement_cur_n-(j-jlast);
Expand Down

0 comments on commit fb97b40

Please sign in to comment.