Skip to content

Commit

Permalink
LibWeb: Implement Node.normalize()
Browse files Browse the repository at this point in the history
This method puts the given node and all of its sub-tree into a
normalized form. A normalized sub-tree has no empty text nodes and no
adjacent text nodes.
  • Loading branch information
tcl3 authored and awesomekling committed Jul 13, 2024
1 parent c92222d commit 0a0651f
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 0 deletions.
8 changes: 8 additions & 0 deletions Tests/LibWeb/Text/expected/DOM/Node-normalize.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Document fragment initial text: 12, child nodes: 3
Element initial text: 34, child nodes: 2
Element text after document.normalize(): 34, child nodes: 2
Document fragment text after documentFragment.normalize(): 1234, child nodes: 2
Text node 1 data: 12
Text node 2 data: 2
Text node 3 data: 34
Text node 4 data: 4
29 changes: 29 additions & 0 deletions Tests/LibWeb/Text/input/DOM/Node-normalize.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<!DOCTYPE html>
<script src="../include.js"></script>
<script>
test(() => {
const documentFragment = document.createDocumentFragment();
const textNode1 = document.createTextNode("1");
const textNode2 = document.createTextNode("2");
const textNode3 = document.createTextNode("3");
const textNode4 = document.createTextNode("4");
const emptyTextNode = document.createTextNode("");
documentFragment.appendChild(textNode1);
documentFragment.appendChild(emptyTextNode);
documentFragment.appendChild(textNode2);
println(`Document fragment initial text: ${documentFragment.textContent}, child nodes: ${documentFragment.childNodes.length}`);
let element = document.createElement('div');
documentFragment.appendChild(element);
element.appendChild(textNode3);
element.appendChild(textNode4);
println(`Element initial text: ${element.textContent}, child nodes: ${element.childNodes.length}`);
document.normalize();
println(`Element text after document.normalize(): ${element.textContent}, child nodes: ${element.childNodes.length}`);
documentFragment.normalize();
println(`Document fragment text after documentFragment.normalize(): ${documentFragment.textContent}, child nodes: ${documentFragment.childNodes.length}`);
println(`Text node 1 data: ${textNode1.data}`);
println(`Text node 2 data: ${textNode2.data}`);
println(`Text node 3 data: ${textNode3.data}`);
println(`Text node 4 data: ${textNode4.data}`);
});
</script>
108 changes: 108 additions & 0 deletions Userland/Libraries/LibWeb/DOM/Node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,114 @@ void Node::set_text_content(Optional<String> const& maybe_content)
document().bump_dom_tree_version();
}

// https://dom.spec.whatwg.org/#dom-node-normalize
WebIDL::ExceptionOr<void> Node::normalize()
{
auto contiguous_exclusive_text_nodes_excluding_self = [](Node& node) {
// https://dom.spec.whatwg.org/#contiguous-exclusive-text-nodes
// The contiguous exclusive Text nodes of a node node are node, node’s previous sibling exclusive Text node, if any,
// and its contiguous exclusive Text nodes, and node’s next sibling exclusive Text node, if any,
// and its contiguous exclusive Text nodes, avoiding any duplicates.
// NOTE: The callers of this method require node itself to be excluded.
Vector<Text*> nodes;

auto* current_node = node.previous_sibling();
while (current_node) {
if (!current_node->is_text())
break;

nodes.append(static_cast<Text*>(current_node));
current_node = current_node->previous_sibling();
}

// Reverse the order of the nodes so that they are in tree order.
nodes.reverse();

current_node = node.next_sibling();
while (current_node) {
if (!current_node->is_text())
break;

nodes.append(static_cast<Text*>(current_node));
current_node = current_node->next_sibling();
}

return nodes;
};

// The normalize() method steps are to run these steps for each descendant exclusive Text node node of this
Vector<Text&> descendant_exclusive_text_nodes;
for_each_in_inclusive_subtree_of_type<Text>([&](Text const& node) {
if (!node.is_cdata_section())
descendant_exclusive_text_nodes.append(const_cast<Text&>(node));

return TraversalDecision::Continue;
});

for (auto& node : descendant_exclusive_text_nodes) {
// 1. Let length be node’s length.
auto& character_data = static_cast<CharacterData&>(node);
auto length = character_data.length_in_utf16_code_units();

// 2. If length is zero, then remove node and continue with the next exclusive Text node, if any.
if (length == 0) {
if (node.parent())
node.remove();
continue;
}

// 3. Let data be the concatenation of the data of node’s contiguous exclusive Text nodes (excluding itself), in tree order.
StringBuilder data;
for (auto const& text_node : contiguous_exclusive_text_nodes_excluding_self(node))
data.append(text_node->data());

// 4. Replace data with node node, offset length, count 0, and data data.
TRY(character_data.replace_data(length, 0, MUST(data.to_string())));

// 5. Let currentNode be node’s next sibling.
auto* current_node = node.next_sibling();

// 6. While currentNode is an exclusive Text node:
while (current_node && is<Text>(*current_node)) {
// 1. For each live range whose start node is currentNode, add length to its start offset and set its start node to node.
for (auto& range : Range::live_ranges()) {
if (range->start_container() == current_node)
TRY(range->set_start(node, range->start_offset() + length));
}

// 2. For each live range whose end node is currentNode, add length to its end offset and set its end node to node.
for (auto& range : Range::live_ranges()) {
if (range->end_container() == current_node)
TRY(range->set_end(node, range->end_offset() + length));
}

// 3. For each live range whose start node is currentNode’s parent and start offset is currentNode’s index, set its start node to node and its start offset to length.
for (auto& range : Range::live_ranges()) {
if (range->start_container() == current_node->parent() && range->start_offset() == current_node->index())
TRY(range->set_start(node, length));
}

// 4. For each live range whose end node is currentNode’s parent and end offset is currentNode’s index, set its end node to node and its end offset to length.
for (auto& range : Range::live_ranges()) {
if (range->end_container() == current_node->parent() && range->end_offset() == current_node->index())
TRY(range->set_end(node, length));
}

// 5. Add currentNode’s length to length.
length += static_cast<Text&>(*current_node).length();

// 6. Set currentNode to its next sibling.
current_node = current_node->next_sibling();
}

// 7. Remove node’s contiguous exclusive Text nodes (excluding itself), in tree order.
for (auto const& text_node : contiguous_exclusive_text_nodes_excluding_self(node))
text_node->remove();
}

return {};
}

// https://dom.spec.whatwg.org/#dom-node-nodevalue
Optional<String> Node::node_value() const
{
Expand Down
2 changes: 2 additions & 0 deletions Userland/Libraries/LibWeb/DOM/Node.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ class Node : public EventTarget {
Optional<String> text_content() const;
void set_text_content(Optional<String> const&);

WebIDL::ExceptionOr<void> normalize();

Optional<String> node_value() const;
void set_node_value(Optional<String> const&);

Expand Down
1 change: 1 addition & 0 deletions Userland/Libraries/LibWeb/DOM/Node.idl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ interface Node : EventTarget {
// However, we only apply it to setters, so this works as a stop gap.
// Replace this with something like a special cased [LegacyNullToEmptyString].
[LegacyNullToEmptyString, CEReactions] attribute DOMString? textContent;
[CEReactions] undefined normalize();

[CEReactions] Node appendChild(Node node);
[ImplementedAs=pre_insert, CEReactions] Node insertBefore(Node node, Node? child);
Expand Down

0 comments on commit 0a0651f

Please sign in to comment.