Skip to content

Commit

Permalink
Implement removeWhitespaceElements on XMLDecoder (#222)
Browse files Browse the repository at this point in the history
This updates the `XMLStackParser` to accept a parameter called `removeWhitespaceElements`.

The purpose of the `XMLStackParser` is to call the XML parser and create a tree of `XMLCoderElement` representing the structure of the parsed XML.

Assuming that XMLStackParser has `trimValueWhitespaces` set to `false`, when attempting to parse a nested data structure like the following:
```xml
<SomeType>
    <nestedStringList>
        <member>
            <member>foo</member>
            <member>bar</member>
        </member>
        <member>
            <member>baz</member>
            <member>qux</member>
        </member>
    </nestedStringList>
</SomeType>
```

... then there will multiple `XMLCoderElement`s in the tree which will have `elements` set to elements that are either:
a.  Purely whitespaced elements or
b.  The child elements

These purely whitespaced elements are problematic for users who are implementing custom `Decoder` logic, as they are interpreted as regular child elements.  Therefore, setting `removeWhitespaceElements` to `true` while `trimValueWhitespaces` is set to `false`, will remove these whitespace elements during the construction of the `XMLCoderElement` tree.

An in-depth analysis of the original problem can be found [here](#219).

For historical purposes, a separate approach was implemented.  It uses a similar algorithm in a different part of the code. #221
  • Loading branch information
wooj2 committed Jul 30, 2021
1 parent 338d503 commit 487ece5
Show file tree
Hide file tree
Showing 10 changed files with 303 additions and 7 deletions.
10 changes: 10 additions & 0 deletions README.md
Expand Up @@ -212,6 +212,16 @@ Starting with [version 0.5](https://github.com/MaxDesiatov/XMLCoder/releases/tag
you can now set a property `trimValueWhitespaces` to `false` (the default value is `true`) on
`XMLDecoder` instance to preserve all whitespaces in decoded strings.


### Remove whitespace elements

When decoding pretty-printed XML while `trimValueWhitespaces` is set to `false`, it's possible
for whitespace elements to be added as child elements on an instance of `XMLCoderElement`. These
whitespace elements make it impossible to decode data structures that require custom `Decodable` logic.
Starting with [version 0.13.0](https://github.com/MaxDesiatov/XMLCoder/releases/tag/0.13.0) you can
set a property `removeWhitespaceElements` to `true` (the default value is `false`) on
`XMLDecoder` to remove these whitespace elements.

### Choice element coding

Starting with [version 0.8](https://github.com/MaxDesiatov/XMLCoder/releases/tag/0.8.0),
Expand Down
6 changes: 6 additions & 0 deletions Sources/XMLCoder/Auxiliaries/String+Extensions.swift
Expand Up @@ -44,3 +44,9 @@ extension StringProtocol {
self = lowercasingFirstLetter()
}
}

extension String {
func isAllWhitespace() -> Bool {
return self.trimmingCharacters(in: .whitespacesAndNewlines) == ""
}
}
7 changes: 7 additions & 0 deletions Sources/XMLCoder/Auxiliaries/XMLCoderElement.swift
Expand Up @@ -392,3 +392,10 @@ extension XMLCoderElement {
}
}
}

extension XMLCoderElement {
func isWhitespaceWithNoElements() -> Bool {
let stringValueIsWhitespaceOrNil = stringValue?.isAllWhitespace() ?? true
return self.key == "" && stringValueIsWhitespaceOrNil && self.elements.isEmpty
}
}
35 changes: 30 additions & 5 deletions Sources/XMLCoder/Auxiliaries/XMLStackParser.swift
Expand Up @@ -15,19 +15,23 @@ class XMLStackParser: NSObject {
var root: XMLCoderElement?
private var stack: [XMLCoderElement] = []
private let trimValueWhitespaces: Bool
private let removeWhitespaceElements: Bool

init(trimValueWhitespaces: Bool = true) {
init(trimValueWhitespaces: Bool = true, removeWhitespaceElements: Bool = false) {
self.trimValueWhitespaces = trimValueWhitespaces
self.removeWhitespaceElements = removeWhitespaceElements
super.init()
}

static func parse(
with data: Data,
errorContextLength length: UInt,
shouldProcessNamespaces: Bool,
trimValueWhitespaces: Bool
trimValueWhitespaces: Bool,
removeWhitespaceElements: Bool
) throws -> Box {
let parser = XMLStackParser(trimValueWhitespaces: trimValueWhitespaces)
let parser = XMLStackParser(trimValueWhitespaces: trimValueWhitespaces,
removeWhitespaceElements: removeWhitespaceElements)

let node = try parser.parse(
with: data,
Expand Down Expand Up @@ -141,15 +145,36 @@ extension XMLStackParser: XMLParserDelegate {
return
}

let updatedElement = removeWhitespaceElements ? elementWithFilteredElements(element: element) : element

withCurrentElement { currentElement in
currentElement.append(element: element, forKey: element.key)
currentElement.append(element: updatedElement, forKey: updatedElement.key)
}

if stack.isEmpty {
root = element
root = updatedElement
}
}

func elementWithFilteredElements(element: XMLCoderElement) -> XMLCoderElement {
var hasWhitespaceElements = false
var hasNonWhitespaceElements = false
var filteredElements: [XMLCoderElement] = []
for ele in element.elements {
if ele.isWhitespaceWithNoElements() {
hasWhitespaceElements = true
} else {
hasNonWhitespaceElements = true
filteredElements.append(ele)
}
}

if hasWhitespaceElements && hasNonWhitespaceElements {
return XMLCoderElement(key: element.key, elements: filteredElements, attributes: element.attributes)
}
return element
}

func parser(_: XMLParser, foundCharacters string: String) {
let processedString = process(string: string)
guard processedString.count > 0, string.count != 0 else {
Expand Down
12 changes: 10 additions & 2 deletions Sources/XMLCoder/Decoder/XMLDecoder.swift
Expand Up @@ -310,6 +310,12 @@ open class XMLDecoder {
*/
open var trimValueWhitespaces: Bool

/** A boolean value that determines whether to remove pure whitespace elements
that have sibling elements that aren't pure whitespace. The default value
is `false`.
*/
open var removeWhitespaceElements: Bool

/// Options set on the top-level encoder to pass down the decoding hierarchy.
struct Options {
let dateDecodingStrategy: DateDecodingStrategy
Expand All @@ -335,8 +341,9 @@ open class XMLDecoder {
// MARK: - Constructing a XML Decoder

/// Initializes `self` with default strategies.
public init(trimValueWhitespaces: Bool = true) {
public init(trimValueWhitespaces: Bool = true, removeWhitespaceElements: Bool = false) {
self.trimValueWhitespaces = trimValueWhitespaces
self.removeWhitespaceElements = removeWhitespaceElements
}

// MARK: - Decoding Values
Expand All @@ -356,7 +363,8 @@ open class XMLDecoder {
with: data,
errorContextLength: errorContextLength,
shouldProcessNamespaces: shouldProcessNamespaces,
trimValueWhitespaces: trimValueWhitespaces
trimValueWhitespaces: trimValueWhitespaces,
removeWhitespaceElements: removeWhitespaceElements
)

let decoder = XMLDecoderImplementation(
Expand Down
35 changes: 35 additions & 0 deletions Tests/XMLCoderTests/Auxiliary/String+ExtensionsTests.swift
Expand Up @@ -41,4 +41,39 @@ class StringExtensionsTests: XCTestCase {
}
XCTAssertEqual(expected, mutated)
}

func testIsAllWhitespace() {
let testString1 = ""
let testString2 = " "

let testString3 = "\n"
let testString4 = "\n "
let testString5 = " \n "
let testString6 = " \n"

let testString7 = "\r"
let testString8 = "\r "
let testString9 = " \r "
let testString10 = " \r"

let testString11 = "\r\n"
let testString12 = "\r\n "
let testString13 = " \r\n "
let testString14 = " \r\n"

XCTAssert(testString1.isAllWhitespace())
XCTAssert(testString2.isAllWhitespace())
XCTAssert(testString3.isAllWhitespace())
XCTAssert(testString4.isAllWhitespace())
XCTAssert(testString5.isAllWhitespace())
XCTAssert(testString6.isAllWhitespace())
XCTAssert(testString7.isAllWhitespace())
XCTAssert(testString8.isAllWhitespace())
XCTAssert(testString9.isAllWhitespace())
XCTAssert(testString10.isAllWhitespace())
XCTAssert(testString11.isAllWhitespace())
XCTAssert(testString12.isAllWhitespace())
XCTAssert(testString13.isAllWhitespace())
XCTAssert(testString14.isAllWhitespace())
}
}
15 changes: 15 additions & 0 deletions Tests/XMLCoderTests/Auxiliary/XMLElementTests.swift
Expand Up @@ -49,4 +49,19 @@ class XMLElementTests: XCTestCase {
XCTAssertEqual(keyed.elements, [element])
XCTAssertEqual(keyed.attributes, [])
}

func testWhitespaceWithNoElements_keyed() {
let keyed = XMLCoderElement(key: "foo", isStringBoxCDATA: false, box: StringBox("bar"))
XCTAssertFalse(keyed.isWhitespaceWithNoElements())
}

func testWhitespaceWithNoElements_whitespace() {
let whitespaceElement1 = XMLCoderElement(stringValue: "\n ")
let whitespaceElement2 = XMLCoderElement(stringValue: "\n")
let whitespaceElement3 = XMLCoderElement(stringValue: " ")

XCTAssert(whitespaceElement1.isWhitespaceWithNoElements())
XCTAssert(whitespaceElement2.isWhitespaceWithNoElements())
XCTAssert(whitespaceElement3.isWhitespaceWithNoElements())
}
}
102 changes: 102 additions & 0 deletions Tests/XMLCoderTests/Auxiliary/XMLStackParserTests.swift
Expand Up @@ -56,4 +56,106 @@ class XMLStackParserTests: XCTestCase {
shouldProcessNamespaces: false
))
}

func testNestedMembers_removeWhitespaceElements() throws {
let parser = XMLStackParser(trimValueWhitespaces: false, removeWhitespaceElements: true)
let xmlData =
"""
<SomeType>
<nestedStringList>
<member>
<member>foo</member>
<member>bar</member>
</member>
<member>
<member>baz</member>
<member>qux</member>
</member>
</nestedStringList>
</SomeType>
""".data(using: .utf8)!
let root = try parser.parse(with: xmlData, errorContextLength: 0, shouldProcessNamespaces: false)

XCTAssertEqual(root.elements[0].key, "nestedStringList")

XCTAssertEqual(root.elements[0].elements[0].key, "member")
XCTAssertEqual(root.elements[0].elements[0].elements[0].key, "member")
XCTAssertEqual(root.elements[0].elements[0].elements[0].elements[0].stringValue, "foo")
XCTAssertEqual(root.elements[0].elements[0].elements[1].elements[0].stringValue, "bar")

XCTAssertEqual(root.elements[0].elements[1].key, "member")
XCTAssertEqual(root.elements[0].elements[1].elements[0].key, "member")
XCTAssertEqual(root.elements[0].elements[1].elements[0].elements[0].stringValue, "baz")
XCTAssertEqual(root.elements[0].elements[1].elements[1].elements[0].stringValue, "qux")
}

func testNestedMembers() throws {
let parser = XMLStackParser(trimValueWhitespaces: false, removeWhitespaceElements: false)
let xmlData =
"""
<SomeType>
<nestedStringList>
<member>
<member>foo</member>
<member>bar</member>
</member>
<member>
<member>baz</member>
<member>qux</member>
</member>
</nestedStringList>
</SomeType>
""".data(using: .utf8)!
let root = try parser.parse(with: xmlData, errorContextLength: 0, shouldProcessNamespaces: false)

XCTAssertEqual(root.elements[0].key, "")
XCTAssertEqual(root.elements[0].stringValue, "\n ")

XCTAssertEqual(root.elements[1].key, "nestedStringList")
XCTAssertEqual(root.elements[1].elements[0].key, "")
XCTAssertEqual(root.elements[1].elements[0].stringValue, "\n ")
XCTAssertEqual(root.elements[1].elements[1].key, "member")
XCTAssertEqual(root.elements[1].elements[1].elements[0].stringValue, "\n ")

XCTAssertEqual(root.elements[1].elements[1].elements[1].key, "member")
XCTAssertEqual(root.elements[1].elements[1].elements[1].elements[0].stringValue, "foo")
XCTAssertEqual(root.elements[1].elements[1].elements[3].key, "member")
XCTAssertEqual(root.elements[1].elements[1].elements[3].elements[0].stringValue, "bar")

XCTAssertEqual(root.elements[1].elements[3].elements[1].key, "member")
XCTAssertEqual(root.elements[1].elements[3].elements[1].elements[0].stringValue, "baz")
XCTAssertEqual(root.elements[1].elements[3].elements[3].key, "member")
XCTAssertEqual(root.elements[1].elements[3].elements[3].elements[0].stringValue, "qux")
}

func testEscapableCharacters_removeWhitespaceElements() throws {
let parser = XMLStackParser(trimValueWhitespaces: false, removeWhitespaceElements: true)
let xmlData =
"""
<SomeType>
<strValue>escaped data: &amp;lt;&#xD;&#10;</strValue>
</SomeType>
""".data(using: .utf8)!
let root = try parser.parse(with: xmlData, errorContextLength: 0, shouldProcessNamespaces: false)

XCTAssertEqual(root.key, "SomeType")
XCTAssertEqual(root.elements[0].key, "strValue")
XCTAssertEqual(root.elements[0].elements[0].stringValue, "escaped data: &lt;\r\n")
}

func testEscapableCharacters() throws {
let parser = XMLStackParser(trimValueWhitespaces: false, removeWhitespaceElements: false)
let xmlData =
"""
<SomeType>
<strValue>escaped data: &amp;lt;&#xD;&#10;</strValue>
</SomeType>
""".data(using: .utf8)!
let root = try parser.parse(with: xmlData, errorContextLength: 0, shouldProcessNamespaces: false)
XCTAssertEqual(root.key, "SomeType")
XCTAssertEqual(root.elements[0].key, "")
XCTAssertEqual(root.elements[0].stringValue, "\n ")
XCTAssertEqual(root.elements[1].elements[0].stringValue, "escaped data: &lt;\r\n")
XCTAssertEqual(root.elements[2].stringValue, "\n")
}
}
74 changes: 74 additions & 0 deletions Tests/XMLCoderTests/Minimal/NestedStringList.swift
@@ -0,0 +1,74 @@
// Copyright (c) 2018-2020 XMLCoder contributors
//
// This software is released under the MIT License.
// https://opensource.org/licenses/MIT
//
// Created by John Woo on 7/29/21.
//

import Foundation

import XCTest
@testable import XMLCoder

class NestedStringList: XCTestCase {

struct TypeWithNestedStringList: Decodable {
let nestedStringList: [[String]]

enum CodingKeys: String, CodingKey {
case nestedStringList
}

enum NestedMemberKeys: String, CodingKey {
case member
}

public init (from decoder: Decoder) throws {
let containerValues = try decoder.container(keyedBy: CodingKeys.self)
let nestedStringListWrappedContainer = try containerValues.nestedContainer(keyedBy: NestedMemberKeys.self, forKey: .nestedStringList)
let nestedStringListContainer = try nestedStringListWrappedContainer.decodeIfPresent([[String]].self, forKey: .member)
var nestedStringListBuffer:[[String]] = []
if let nestedStringListContainer = nestedStringListContainer {
nestedStringListBuffer = [[String]]()
var listBuffer0: [String]? = nil
for listContainer0 in nestedStringListContainer {
listBuffer0 = [String]()
for stringContainer1 in listContainer0 {
listBuffer0?.append(stringContainer1)
}
if let listBuffer0 = listBuffer0 {
nestedStringListBuffer.append(listBuffer0)
}
}
}
nestedStringList = nestedStringListBuffer
}
}

func testRemoveWhitespaceElements() throws {
let decoder = XMLDecoder(trimValueWhitespaces: false, removeWhitespaceElements: true)
let xmlString =
"""
<TypeWithNestedStringList>
<nestedStringList>
<member>
<member>foo: &amp;lt;&#xD;&#10;</member>
<member>bar: &amp;lt;&#xD;&#10;</member>
</member>
<member>
<member>baz: &amp;lt;&#xD;&#10;</member>
<member>qux: &amp;lt;&#xD;&#10;</member>
</member>
</nestedStringList>
</TypeWithNestedStringList>
"""
let xmlData = xmlString.data(using: .utf8)!

let decoded = try decoder.decode(TypeWithNestedStringList.self, from: xmlData)
XCTAssertEqual(decoded.nestedStringList[0][0], "foo: &lt;\r\n")
XCTAssertEqual(decoded.nestedStringList[0][1], "bar: &lt;\r\n")
XCTAssertEqual(decoded.nestedStringList[1][0], "baz: &lt;\r\n")
XCTAssertEqual(decoded.nestedStringList[1][1], "qux: &lt;\r\n")
}
}
14 changes: 14 additions & 0 deletions Tests/XMLCoderTests/Minimal/StringTests.swift
Expand Up @@ -79,4 +79,18 @@ class StringTests: XCTestCase {
XCTAssertEqual(String(data: encoded, encoding: .utf8)!, xmlString)
}
}

func testRemoveWhitespaceElements() throws {
let decoder = XMLDecoder(trimValueWhitespaces: false)
let xmlString =
"""
<Container>
<value>escaped data: &amp;lt;&#xD;&#10;</value>
</Container>
"""
let xmlData = xmlString.data(using: .utf8)!

let decoded = try decoder.decode(Container.self, from: xmlData)
XCTAssertEqual(decoded.value, "escaped data: &lt;\r\n")
}
}

0 comments on commit 487ece5

Please sign in to comment.