Skip to content

Commit

Permalink
[atoms] Fix getText atom for unicode charater middle of word (#8736)
Browse files Browse the repository at this point in the history
* [atoms] Fix getText atom for unicode charater middle of word

The change to getText atom by commit c065dda
does not handle case when unicode character is in the middle
of a word, and unicode character will be incorrectly capitalized
(see https://bugs.chromium.org/p/chromedriver/issues/detail?id=3611).
The problem is \b mark the boundary between word character and
unicode character as a boundary. This is fixed by explicitly using
unicode flag and specifying unicode character and unicode symbol.

* [atoms] Add meta tag for charset in test html

Add charset for utf-8 in text_test.html
  • Loading branch information
k7z45 committed Oct 20, 2020
1 parent 004be30 commit cf49ba2
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 1 deletion.
2 changes: 1 addition & 1 deletion javascript/atoms/dom.js
Expand Up @@ -1175,7 +1175,7 @@ bot.dom.appendVisibleTextLinesFromTextNode_ = function(textNode, lines,
}

if (textTransform == 'capitalize') {
text = text.replace(/(^|\s|\b)(\S)/g, function() {
text = text.replace(/(^|[^\d\p{L}\p{S}])([\p{Ll}|\p{S}])/gu, function() {
return arguments[1] + arguments[2].toUpperCase();
});
} else if (textTransform == 'uppercase') {
Expand Down
29 changes: 29 additions & 0 deletions javascript/atoms/test/text_test.html
Expand Up @@ -18,6 +18,7 @@
<html>
<head>
<title>text_test.html</title>
<meta charset="UTF-8">
<script src="test_bootstrap.js"></script>
<script type="text/javascript">
goog.require('bot.dom');
Expand Down Expand Up @@ -262,6 +263,23 @@
assertEquals("hello, world! bla-bla-bla", text);
text = getVisibleTextByElementId("uppercased");
assertEquals("HELLO, WORLD! BLA-BLA-BLA", text);

text = getVisibleTextByElementId("capitalized-1");
assertEquals("Äåìî", text);
text = getVisibleTextByElementId("capitalized-2");
assertEquals("Manipulowanie Przepływem", text);
text = getVisibleTextByElementId("capitalized-3");
assertEquals("Manipulowanie Przepływem", text);
text = getVisibleTextByElementId("capitalized-4");
assertEquals("Manipulowanie Pr0123z4epływem", text);
text = getVisibleTextByElementId("capitalized-5");
assertEquals("Lorem Ipsum Dolor Sit Amet, Consectetur Adipisicing Elit,", text);
text = getVisibleTextByElementId("capitalized-6");
assertEquals("Ⓐⓑⓒ (Ⓓⓔⓕ) —Ⓖⓗⓘ— Ⓙkl", text);
text = getVisibleTextByElementId("capitalized-7");
assertEquals('(This) “Is” [A] –Short– -Test- «For» *The* _Css_ ¿Capitalize? ?¡Transfor', text);
text = getVisibleTextByElementId("capitalized-8");
assertEquals('The Dutch Word: "Ijsland" Starts With A Digraph', text);
}

function getVisibleTextByElementId(id) {
Expand Down Expand Up @@ -403,5 +421,16 @@
<a id="uppercased" style="text-transform: uppercase">hello, world! bla-bla-BLA</a><br/>
</div>

<div>
<a lang="ru" id="capitalized-1" style="text-transform: capitalize">äåìî</a><br/>
<a id="capitalized-2" style="text-transform: capitalize">Manipulowanie przepływem</a><br/>
<a id="capitalized-3" style="text-transform: capitalize">manipulowanie przep&#x142ywem</a><br/>
<a id="capitalized-4" style="text-transform: capitalize">Manipulowanie pr0123z4epływem</a><br/>
<a id="capitalized-5" style="text-transform: capitalize">Lorem ipsum dolor sit amet, consectetur adipisicing elit,</a><br/>
<a id="capitalized-6" style="text-transform: capitalize">ⓐⓑⓒ (ⓓⓔⓕ) —ⓖⓗⓘ— ⓙkl</a><br/>
<a id="capitalized-7" style="text-transform: capitalize">(this) “is” [a] –short– -test- «for» *the* _css_ ¿capitalize? ?¡transfor</a><br/>
<a id="capitalized-8" style="text-transform: capitalize">The Dutch word: "ijsland" starts with a digraph</a><br/>
</div>

</body>
</html>

0 comments on commit cf49ba2

Please sign in to comment.