diff --git a/README.md b/README.md index 544d9d5..d5516f3 100644 --- a/README.md +++ b/README.md @@ -107,8 +107,8 @@ do { // The .text property recursively gets all text content from a node. print("Link text: \(link.text)") // Prints: Link text: Page 1 - // Access attributes using subscripting. - if let href = link.attributes["href"] { + // Access attributes using getAttribute(). + if let href = link.getAttribute("href") { print("Link URL: \(href)") // Prints: Link URL: /page1 } } diff --git a/Sources/YaXHParser/CSS/CSSToXPath.swift b/Sources/YaXHParser/CSS/CSSToXPath.swift index 17dfa7d..50c28fc 100644 --- a/Sources/YaXHParser/CSS/CSSToXPath.swift +++ b/Sources/YaXHParser/CSS/CSSToXPath.swift @@ -4,7 +4,7 @@ private struct SelectorTranslator: ~Copyable { private let source: String.UTF8View private var cursor: String.UTF8View.Index - init(_ css: String) { + init(_ css: consuming String) { self.source = css.utf8 self.cursor = self.source.startIndex } @@ -41,14 +41,15 @@ private struct SelectorTranslator: ~Copyable { } private mutating func parseComponent() throws(CSSError) -> String { - var path = "" + var parts: [String] = [] + parts.reserveCapacity(4) // Typical: tag + id + 2 classes // Tag let startOfComponent = self.cursor if let char = peek(), (char >= 97 && char <= 122) || (char >= 65 && char <= 90) { - path += self.parseIdentifier() + parts.append(String(self.parseIdentifier())) } else { - path += "*" + parts.append("*") } // ID, Classes, and other attributes @@ -57,11 +58,11 @@ private struct SelectorTranslator: ~Copyable { case Self.hash: // # self.advance() let id = self.parseIdentifier() - path += "[@id='\(id)']" + parts.append("[@id='\(id)']") case Self.dot: // . self.advance() let `class` = self.parseIdentifier() - path += "[contains(concat(' ', normalize-space(@class), ' '), ' \(`class`) ')]" + parts.append("[contains(concat(' ', normalize-space(@class), ' '), ' \(`class`) ')]") case Self.attributeStart: // [ // For now, attribute selectors are not supported. Throw an error. throw CSSError.unsupportedSelector( @@ -70,19 +71,21 @@ private struct SelectorTranslator: ~Copyable { ) default: if self.cursor == startOfComponent { self.advance() } - return path + return parts.joined() } } - return path + return parts.joined() } mutating func parse(relative: Bool) throws(CSSError) -> String { var xpathParts: [String] = [] + xpathParts.reserveCapacity(2) // Most selectors have 1-2 groups while !self.isAtEnd { self.skipWhitespace() var components: [String] = [] + components.reserveCapacity(3) // Typical: 1-3 descendant components // Using a for loop here instead of a while to avoid re-calculating `isAtEnd` // and to make the logic clearer. for _ in 0 ..< Int.max { @@ -120,7 +123,7 @@ private struct SelectorTranslator: ~Copyable { /// A simple, CSS selector to XPath translator. /// This is not a full CSS3 implementation, but it covers the most common cases. /// It is optimized to minimize string allocations by operating on the UTF8 view of the selector. -func translateCSSToXPath(_ css: String, relative: Bool = false) throws(CSSError) -> String { +func translateCSSToXPath(_ css: consuming String, relative: Bool = false) throws(CSSError) -> String { var parser = SelectorTranslator(css) return try parser.parse(relative: relative) } diff --git a/Sources/YaXHParser/Concurrency/ParsingService.swift b/Sources/YaXHParser/Concurrency/ParsingService.swift index 8781233..55f088c 100644 --- a/Sources/YaXHParser/Concurrency/ParsingService.swift +++ b/Sources/YaXHParser/Concurrency/ParsingService.swift @@ -31,12 +31,14 @@ public actor ParsingService { public init() {} /// Parse XML from a string in a thread-safe manner - public func parseXML(string: String) throws(XMLError) -> XMLDocument { + /// - Parameter string: The string to parse. This parameter is consumed for optimal performance. + public func parseXML(string: consuming String) throws(XMLError) -> XMLDocument { try self.memoryParser.parse(string: string) } /// Parse HTML from a string in a thread-safe manner - public func parseHTML(string: String) throws(XMLError) -> XMLDocument { + /// - Parameter string: The string to parse. This parameter is consumed for optimal performance. + public func parseHTML(string: consuming String) throws(XMLError) -> XMLDocument { try self.htmlParser.parse(string: string) } @@ -66,13 +68,13 @@ public actor ParsingService { /// within the actor's serialization context, preventing data races. /// /// - Parameters: - /// - string: The XML string to parse. + /// - string: The XML string to parse. This parameter is consumed for optimal performance. /// - extract: A closure that takes the parsed `XMLDocument` and returns a `Sendable` value. /// - Returns: The value returned by the `extract` closure. /// - Throws: Rethrows any `XMLError` from parsing or any error thrown by the `extract` closure, /// wrapping the latter in `XMLError.userTransformError`. public func parseXMLAndExtract( - string: String, + string: consuming String, extract: (XMLDocument) throws -> T ) async throws(XMLError) -> T { do { @@ -90,13 +92,13 @@ public actor ParsingService { /// within the actor's serialization context, preventing data races. /// /// - Parameters: - /// - string: The HTML string to parse. + /// - string: The HTML string to parse. This parameter is consumed for optimal performance. /// - extract: A closure that takes the parsed `XMLDocument` and returns a `Sendable` value. /// - Returns: The value returned by the `extract` closure. /// - Throws: Rethrows any `XMLError` from parsing or any error thrown by the `extract` closure, /// wrapping the latter in `XMLError.userTransformError`. public func parseHTMLAndExtract( - string: String, + string: consuming String, extract: (XMLDocument) throws -> T ) async throws(XMLError) -> T { do { diff --git a/Sources/YaXHParser/Memory/MemoryParser.swift b/Sources/YaXHParser/Memory/MemoryParser.swift index 9e8d5d4..7aaa864 100644 --- a/Sources/YaXHParser/Memory/MemoryParser.swift +++ b/Sources/YaXHParser/Memory/MemoryParser.swift @@ -61,9 +61,9 @@ public struct MemoryParser: ~Copyable { } /// Parse XML/HTML from a String. + /// - Parameter string: The string to parse. This parameter is consumed for optimal performance. /// - Note: This method is NOT thread-safe. libxml2 uses global state during parsing. - public func parse(string: String) throws(XMLError) -> XMLDocument { - var string = string + public func parse(string: consuming String) throws(XMLError) -> XMLDocument { do { return try string.withUTF8 { buffer in return try self.parse(bytes: Span(_unsafeElements: buffer)) @@ -118,11 +118,11 @@ public struct MemoryParser: ~Copyable { /// you can provide different options. /// /// - Parameters: -/// - string: The XML string to parse. +/// - string: The XML string to parse. This parameter is consumed for optimal performance. /// - options: The parsing options to use. Defaults to `.strict`. /// - Returns: A parsed `XMLDocument`. /// - Throws: `XMLError` if parsing fails according to the provided options. -public func parseXML(string: String, options: MemoryParserOptions = .strict) throws(XMLError) +public func parseXML(string: consuming String, options: MemoryParserOptions = .strict) throws(XMLError) -> XMLDocument { let parser = MemoryParser(options: options) return try parser.parse(string: string) @@ -134,11 +134,11 @@ public func parseXML(string: String, options: MemoryParserOptions = .strict) thr /// designed to handle common well-formedness issues, similar to a web browser. /// /// - Parameters: -/// - string: The HTML string to parse. +/// - string: The HTML string to parse. This parameter is consumed for optimal performance. /// - options: The parsing options to use. Defaults to `.lenientHTML`. /// - Returns: A parsed `XMLDocument`. /// - Throws: `XMLError` if parsing fails according to the provided options. -public func parseHTML(string: String, options: MemoryParserOptions = .lenientHTML) throws(XMLError) +public func parseHTML(string: consuming String, options: MemoryParserOptions = .lenientHTML) throws(XMLError) -> XMLDocument { let parser = MemoryParser(options: options) return try parser.parse(string: string)