Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ do {
// The .text property recursively gets all text content from a node.
print("Link text: \(link.text)") // Prints: Link text: Page 1

// Access attributes using subscripting.
if let href = link.attributes["href"] {
// Access attributes using getAttribute().
if let href = link.getAttribute("href") {
print("Link URL: \(href)") // Prints: Link URL: /page1
}
}
Expand Down
21 changes: 12 additions & 9 deletions Sources/YaXHParser/CSS/CSSToXPath.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ private struct SelectorTranslator: ~Copyable {
private let source: String.UTF8View
private var cursor: String.UTF8View.Index

init(_ css: String) {
init(_ css: consuming String) {
self.source = css.utf8
self.cursor = self.source.startIndex
}
Expand Down Expand Up @@ -41,14 +41,15 @@ private struct SelectorTranslator: ~Copyable {
}

private mutating func parseComponent() throws(CSSError) -> String {
var path = ""
var parts: [String] = []
parts.reserveCapacity(4) // Typical: tag + id + 2 classes

// Tag
let startOfComponent = self.cursor
if let char = peek(), (char >= 97 && char <= 122) || (char >= 65 && char <= 90) {
path += self.parseIdentifier()
parts.append(String(self.parseIdentifier()))
} else {
path += "*"
parts.append("*")
}

// ID, Classes, and other attributes
Expand All @@ -57,11 +58,11 @@ private struct SelectorTranslator: ~Copyable {
case Self.hash: // #
self.advance()
let id = self.parseIdentifier()
path += "[@id='\(id)']"
parts.append("[@id='\(id)']")
case Self.dot: // .
self.advance()
let `class` = self.parseIdentifier()
path += "[contains(concat(' ', normalize-space(@class), ' '), ' \(`class`) ')]"
parts.append("[contains(concat(' ', normalize-space(@class), ' '), ' \(`class`) ')]")
case Self.attributeStart: // [
// For now, attribute selectors are not supported. Throw an error.
throw CSSError.unsupportedSelector(
Expand All @@ -70,19 +71,21 @@ private struct SelectorTranslator: ~Copyable {
)
default:
if self.cursor == startOfComponent { self.advance() }
return path
return parts.joined()
}
}
return path
return parts.joined()
}

mutating func parse(relative: Bool) throws(CSSError) -> String {
var xpathParts: [String] = []
xpathParts.reserveCapacity(2) // Most selectors have 1-2 groups

while !self.isAtEnd {
self.skipWhitespace()

var components: [String] = []
components.reserveCapacity(3) // Typical: 1-3 descendant components
// Using a for loop here instead of a while to avoid re-calculating `isAtEnd`
// and to make the logic clearer.
for _ in 0 ..< Int.max {
Expand Down Expand Up @@ -120,7 +123,7 @@ private struct SelectorTranslator: ~Copyable {
/// A simple, CSS selector to XPath translator.
/// This is not a full CSS3 implementation, but it covers the most common cases.
/// It is optimized to minimize string allocations by operating on the UTF8 view of the selector.
func translateCSSToXPath(_ css: String, relative: Bool = false) throws(CSSError) -> String {
func translateCSSToXPath(_ css: consuming String, relative: Bool = false) throws(CSSError) -> String {
var parser = SelectorTranslator(css)
return try parser.parse(relative: relative)
}
14 changes: 8 additions & 6 deletions Sources/YaXHParser/Concurrency/ParsingService.swift
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,14 @@ public actor ParsingService {
public init() {}

/// Parse XML from a string in a thread-safe manner
public func parseXML(string: String) throws(XMLError) -> XMLDocument {
/// - Parameter string: The string to parse. This parameter is consumed for optimal performance.
public func parseXML(string: consuming String) throws(XMLError) -> XMLDocument {
try self.memoryParser.parse(string: string)
}

/// Parse HTML from a string in a thread-safe manner
public func parseHTML(string: String) throws(XMLError) -> XMLDocument {
/// - Parameter string: The string to parse. This parameter is consumed for optimal performance.
public func parseHTML(string: consuming String) throws(XMLError) -> XMLDocument {
try self.htmlParser.parse(string: string)
}

Expand Down Expand Up @@ -66,13 +68,13 @@ public actor ParsingService {
/// within the actor's serialization context, preventing data races.
///
/// - Parameters:
/// - string: The XML string to parse.
/// - string: The XML string to parse. This parameter is consumed for optimal performance.
/// - extract: A closure that takes the parsed `XMLDocument` and returns a `Sendable` value.
/// - Returns: The value returned by the `extract` closure.
/// - Throws: Rethrows any `XMLError` from parsing or any error thrown by the `extract` closure,
/// wrapping the latter in `XMLError.userTransformError`.
public func parseXMLAndExtract<T: Sendable>(
string: String,
string: consuming String,
extract: (XMLDocument) throws -> T
) async throws(XMLError) -> T {
do {
Expand All @@ -90,13 +92,13 @@ public actor ParsingService {
/// within the actor's serialization context, preventing data races.
///
/// - Parameters:
/// - string: The HTML string to parse.
/// - string: The HTML string to parse. This parameter is consumed for optimal performance.
/// - extract: A closure that takes the parsed `XMLDocument` and returns a `Sendable` value.
/// - Returns: The value returned by the `extract` closure.
/// - Throws: Rethrows any `XMLError` from parsing or any error thrown by the `extract` closure,
/// wrapping the latter in `XMLError.userTransformError`.
public func parseHTMLAndExtract<T: Sendable>(
string: String,
string: consuming String,
extract: (XMLDocument) throws -> T
) async throws(XMLError) -> T {
do {
Expand Down
12 changes: 6 additions & 6 deletions Sources/YaXHParser/Memory/MemoryParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ public struct MemoryParser: ~Copyable {
}

/// Parse XML/HTML from a String.
/// - Parameter string: The string to parse. This parameter is consumed for optimal performance.
/// - Note: This method is NOT thread-safe. libxml2 uses global state during parsing.
public func parse(string: String) throws(XMLError) -> XMLDocument {
var string = string
public func parse(string: consuming String) throws(XMLError) -> XMLDocument {
do {
return try string.withUTF8 { buffer in
return try self.parse(bytes: Span(_unsafeElements: buffer))
Expand Down Expand Up @@ -118,11 +118,11 @@ public struct MemoryParser: ~Copyable {
/// you can provide different options.
///
/// - Parameters:
/// - string: The XML string to parse.
/// - string: The XML string to parse. This parameter is consumed for optimal performance.
/// - options: The parsing options to use. Defaults to `.strict`.
/// - Returns: A parsed `XMLDocument`.
/// - Throws: `XMLError` if parsing fails according to the provided options.
public func parseXML(string: String, options: MemoryParserOptions = .strict) throws(XMLError)
public func parseXML(string: consuming String, options: MemoryParserOptions = .strict) throws(XMLError)
-> XMLDocument {
let parser = MemoryParser(options: options)
return try parser.parse(string: string)
Expand All @@ -134,11 +134,11 @@ public func parseXML(string: String, options: MemoryParserOptions = .strict) thr
/// designed to handle common well-formedness issues, similar to a web browser.
///
/// - Parameters:
/// - string: The HTML string to parse.
/// - string: The HTML string to parse. This parameter is consumed for optimal performance.
/// - options: The parsing options to use. Defaults to `.lenientHTML`.
/// - Returns: A parsed `XMLDocument`.
/// - Throws: `XMLError` if parsing fails according to the provided options.
public func parseHTML(string: String, options: MemoryParserOptions = .lenientHTML) throws(XMLError)
public func parseHTML(string: consuming String, options: MemoryParserOptions = .lenientHTML) throws(XMLError)
-> XMLDocument {
let parser = MemoryParser(options: options)
return try parser.parse(string: string)
Expand Down