diff --git a/MARKDOWN_PARSER.md b/MARKDOWN_PARSER.md index f8b1e73..dc47df9 100644 --- a/MARKDOWN_PARSER.md +++ b/MARKDOWN_PARSER.md @@ -602,6 +602,12 @@ class MarkdownPluginManager { - Add documentation comments for public APIs - Keep functions focused and single-purpose +### Documentation +The codebase now contains detailed Swift documentation comments explaining the +responsibilities of core types such as `CodeParser`, `CodeConstructor` and the +inline parser. These comments can be viewed in Xcode Quick Help or rendered by +documentation tools. + ### Testing Requirements - All new features must include comprehensive tests - Maintain test coverage above 90% @@ -653,4 +659,4 @@ This project is licensed under the MIT License - see the LICENSE file for detail --- -*Last updated: 2025-07-20* +*Last updated: 2025-07-21* diff --git a/Sources/SwiftParser/Core/CodeConstructor.swift b/Sources/SwiftParser/Core/CodeConstructor.swift index e98b93d..6ce270d 100644 --- a/Sources/SwiftParser/Core/CodeConstructor.swift +++ b/Sources/SwiftParser/Core/CodeConstructor.swift @@ -5,24 +5,34 @@ // Created by Dongyu Zhao on 7/21/25. // +/// Consumes a list of tokens to build an AST using registered node builders. public class CodeConstructor where Node: CodeNodeElement, Token: CodeTokenElement { + /// Ordered collection of node builders that attempt to consume tokens. private let builders: [any CodeNodeBuilder] + /// Factory that provides initial construction state for each parse run. private var state: () -> (any CodeConstructState)? - public init(builders: [any CodeNodeBuilder], state: @escaping () -> (any CodeConstructState)?) { + /// Create a new constructor + /// - Parameters: + /// - builders: The node builders responsible for producing AST nodes. + /// - state: Factory returning the initial parsing state object. + public init( + builders: [any CodeNodeBuilder], + state: @escaping () -> (any CodeConstructState)? + ) { self.builders = builders self.state = state } + /// Build an AST from a token stream + /// - Parameters: + /// - tokens: Token list to consume. + /// - root: Root node that will receive parsed children. + /// - Returns: The populated root node and any construction errors. public func parse(_ tokens: [any CodeToken], root: CodeNode) -> (CodeNode, [CodeError]) { var context = CodeConstructContext(current: root, tokens: tokens, state: state()) while context.consuming < context.tokens.count { - // Stop at EOF without recording an error - if let token = context.tokens[context.consuming] as? MarkdownToken, - token.element == .eof { - break - } var matched = false for node in builders { diff --git a/Sources/SwiftParser/Core/CodeError.swift b/Sources/SwiftParser/Core/CodeError.swift index a61e72d..a09c30a 100644 --- a/Sources/SwiftParser/Core/CodeError.swift +++ b/Sources/SwiftParser/Core/CodeError.swift @@ -1,8 +1,16 @@ import Foundation +/// Represents a parsing error encountered during tokenization or AST building. public struct CodeError: Error { + /// Human readable error message. public let message: String + /// Range in the original source where the error occurred, if available. public let range: Range? + + /// Create a new error instance. + /// - Parameters: + /// - message: Description of the problem. + /// - range: Optional source range that triggered the error. public init(_ message: String, range: Range? = nil) { self.message = message self.range = range diff --git a/Sources/SwiftParser/Core/CodeLanguage.swift b/Sources/SwiftParser/Core/CodeLanguage.swift index 0172ae6..6bf519c 100644 --- a/Sources/SwiftParser/Core/CodeLanguage.swift +++ b/Sources/SwiftParser/Core/CodeLanguage.swift @@ -4,11 +4,26 @@ public protocol CodeLanguage where Node: CodeNodeElement, Token: Co associatedtype Node: CodeNodeElement associatedtype Token: CodeTokenElement - var tokenizer: any CodeOutdatedTokenizer { get } + /// The token builders used to tokenize the input. var tokens: [any CodeTokenBuilder] { get } + + /// The node builders used to construct the AST. var nodes: [any CodeNodeBuilder] { get } + /// The funtion that create the root node of the AST. func root() -> CodeNode + + /// The function that creates the initial context for AST construction. func state() -> (any CodeConstructState)? + + /// The function that creates the initial context for tokenization. func state() -> (any CodeTokenState)? + + /// Provide an EOF token if the language requires one. + /// - Parameter range: The range where the EOF token should be inserted. + func eofToken(at range: Range) -> (any CodeToken)? +} + +extension CodeLanguage { + public func eofToken(at range: Range) -> (any CodeToken)? { nil } } diff --git a/Sources/SwiftParser/Core/CodeOutdatedParser.swift b/Sources/SwiftParser/Core/CodeOutdatedParser.swift deleted file mode 100644 index bd7e966..0000000 --- a/Sources/SwiftParser/Core/CodeOutdatedParser.swift +++ /dev/null @@ -1,53 +0,0 @@ -import Foundation - -@available(*, deprecated, renamed: "CodeParser", message: "Use `CodeParser` instead.") -public final class CodeOutdatedParser where Node: CodeNodeElement, Token: CodeTokenElement { - private let language: any CodeLanguage - - public init(language: any CodeLanguage) { - self.language = language - } - - @available(*, deprecated, renamed: "parse", message: "Use `parse(_:)` instead.") - public func parse(_ input: String, root: CodeNode) -> (node: CodeNode, context: CodeConstructContext) { - let normalized = normalize(input) - let tokens = language.tokenizer.tokenize(normalized) - var context = CodeConstructContext(current: root, tokens: tokens, state: language.state(of: normalized)) - - while context.consuming < context.tokens.count { - // Stop at EOF without recording an error - if let token = context.tokens[context.consuming] as? MarkdownToken, - token.element == .eof { - break - } - - var matched = false - for builder in language.nodes { - if builder.build(from: &context) { - matched = true - break - } - } - - if !matched { - // If no builder matched, record an error and skip the token - let token = context.tokens[context.consuming] - let error = CodeError("Unrecognized token: \(token.element)", range: token.range) - context.errors.append(error) - context.consuming += 1 - } - } - - return (root, context) - } - - /// Normalizes input string to handle line ending inconsistencies and other common issues - /// This ensures consistent behavior across different platforms and input sources - private func normalize(_ raw: String) -> String { - // Normalize line endings: Convert CRLF (\r\n) and CR (\r) to LF (\n) - // This prevents issues with different line ending conventions - return raw - .replacingOccurrences(of: "\r\n", with: "\n") // Windows CRLF -> Unix LF - .replacingOccurrences(of: "\r", with: "\n") // Classic Mac CR -> Unix LF - } -} diff --git a/Sources/SwiftParser/Core/CodeOutdatedTokenizer.swift b/Sources/SwiftParser/Core/CodeOutdatedTokenizer.swift deleted file mode 100644 index a08d2a3..0000000 --- a/Sources/SwiftParser/Core/CodeOutdatedTokenizer.swift +++ /dev/null @@ -1,7 +0,0 @@ -import Foundation - -@available(*, deprecated, renamed: "CodeTokenizer", message: "Use `CodeTokenizer` instead.") -public protocol CodeOutdatedTokenizer where Element: CodeTokenElement { - associatedtype Element: CodeTokenElement - func tokenize(_ input: String) -> [any CodeToken] -} diff --git a/Sources/SwiftParser/Core/CodeParser.swift b/Sources/SwiftParser/Core/CodeParser.swift index eb94e2e..0be6dad 100644 --- a/Sources/SwiftParser/Core/CodeParser.swift +++ b/Sources/SwiftParser/Core/CodeParser.swift @@ -1,8 +1,16 @@ +/// Result returned from `CodeParser.parse` containing the AST, token stream and +/// any parsing errors. public struct CodeParseResult { public let root: CodeNode public let tokens: [any CodeToken] public let errors: [CodeError] + /// Create a result object + /// - Parameters: + /// - root: The constructed root node of the AST. + /// - tokens: Token stream produced while parsing. + /// - errors: Any errors that occurred during tokenization or AST + /// construction. public init(root: CodeNode, tokens: [any CodeToken], errors: [CodeError] = []) { self.root = root self.tokens = tokens @@ -10,6 +18,10 @@ public struct CodeParseResult { } } +/// High level parser that orchestrates tokenization and AST construction. +/// +/// `CodeParser` uses the provided `CodeLanguage` implementation to tokenize the +/// source text and then build an AST using the registered node builders. public class CodeParser where Node: CodeNodeElement, Token: CodeTokenElement { private let language: any CodeLanguage @@ -18,10 +30,25 @@ public class CodeParser where No public init(language: any CodeLanguage) { self.language = language - self.tokenizer = CodeTokenizer(builders: language.tokens, state: language.state) - self.constructor = CodeConstructor(builders: language.nodes, state: language.state) + self.tokenizer = CodeTokenizer( + builders: language.tokens, + state: language.state, + eofTokenFactory: { language.eofToken(at: $0) } + ) + self.constructor = CodeConstructor( + builders: language.nodes, + state: language.state + ) } + /// Parse a source string using the supplied language. + /// + /// This method first tokenizes the input and, if tokenization succeeds, + /// constructs the AST using the language's node builders. + /// - Parameter source: The raw text to parse. + /// - Parameter language: The language definition to use for parsing. + /// - Returns: A `CodeParseResult` containing the root node, tokens and any + /// errors encountered. public func parse(_ source: String, language: any CodeLanguage) -> CodeParseResult { let normalized = normalize(source) let root = language.root() diff --git a/Sources/SwiftParser/Core/CodeTokenizer.swift b/Sources/SwiftParser/Core/CodeTokenizer.swift index f11e295..dffe0f1 100644 --- a/Sources/SwiftParser/Core/CodeTokenizer.swift +++ b/Sources/SwiftParser/Core/CodeTokenizer.swift @@ -8,10 +8,16 @@ public class CodeTokenizer where Token: CodeTokenElement { private let builders: [any CodeTokenBuilder] private var state: () -> (any CodeTokenState)? + private let eofTokenFactory: ((Range) -> (any CodeToken)?)? - public init(builders: [any CodeTokenBuilder], state: @escaping () -> (any CodeTokenState)?) { + public init( + builders: [any CodeTokenBuilder], + state: @escaping () -> (any CodeTokenState)?, + eofTokenFactory: ((Range) -> (any CodeToken)?)? = nil + ) { self.builders = builders self.state = state + self.eofTokenFactory = eofTokenFactory } public func tokenize(_ input: String) -> ([any CodeToken], [CodeError]) { @@ -42,10 +48,9 @@ public class CodeTokenizer where Token: CodeTokenElement { } } - // Automatically append EOF token for Markdown - if Token.self == MarkdownTokenElement.self, - let eof = MarkdownToken.eof(at: input.endIndex.. { - context.tokens.append(eof) + // Append EOF token if provided by the language + if let token = eofTokenFactory?(input.endIndex.. CodeNode { - return root() - } - - @available(*, deprecated, renamed: "state") - public func state(of source: String) -> (any CodeConstructState)? { - return state() - } -} diff --git a/Sources/SwiftParser/Markdown/MarkdownLanguage.swift b/Sources/SwiftParser/Markdown/MarkdownLanguage.swift index 60f4650..bf69345 100644 --- a/Sources/SwiftParser/Markdown/MarkdownLanguage.swift +++ b/Sources/SwiftParser/Markdown/MarkdownLanguage.swift @@ -1,19 +1,28 @@ import Foundation // MARK: - Markdown Language Implementation +/// Default Markdown language implementation following CommonMark with optional +/// extensions. +/// +/// The language exposes a set of token and node builders that together +/// understand Markdown syntax. The initializer allows callers to supply a +/// custom list of builders to enable or disable features. public class MarkdownLanguage: CodeLanguage { public typealias Node = MarkdownNodeElement public typealias Token = MarkdownTokenElement // MARK: - Language Components - public let tokenizer: any CodeOutdatedTokenizer public var tokens: [any CodeTokenBuilder] public let nodes: [any CodeNodeBuilder] // MARK: - Initialization + /// Create a Markdown language with the provided builders. + /// + /// - Parameter consumers: Node builders to be used when constructing the + /// document AST. Passing a custom set allows features to be enabled or + /// disabled. public init( - tokenizer: any CodeOutdatedTokenizer = MarkdownTokenizer(), consumers: [any CodeNodeBuilder] = [ MarkdownReferenceDefinitionBuilder(), MarkdownHeadingBuilder(), @@ -28,10 +37,10 @@ public class MarkdownLanguage: CodeLanguage { MarkdownListBuilder(), MarkdownBlockquoteBuilder(), MarkdownParagraphBuilder(), - MarkdownNewlineBuilder() + MarkdownNewlineBuilder(), + MarkdownEOFBuilder() ] ) { - self.tokenizer = tokenizer self.nodes = consumers let single = MarkdownSingleCharacterTokenBuilder() self.tokens = [ @@ -59,6 +68,10 @@ public class MarkdownLanguage: CodeLanguage { public func state() -> (any CodeTokenState)? { nil } + + public func eofToken(at range: Range) -> (any CodeToken)? { + return MarkdownToken.eof(at: range) + } } // MARK: - Language Configuration @@ -256,26 +269,6 @@ extension MarkdownLanguage { plugins: true ) } - - /// Create a language instance with specific configuration - public static func configured(_ config: Configuration) -> MarkdownLanguage { - let tokenizer = MarkdownTokenizer() - let consumers: [any CodeNodeBuilder] = [] - - // TODO: Add consumers based on configuration when implemented - // if config.commonMark { - // consumers.append(CommonMarkConsumer()) - // } - // if config.gfm { - // consumers.append(GFMConsumer()) - // } - // if config.math { - // consumers.append(MathConsumer()) - // } - // ... etc - - return MarkdownLanguage(tokenizer: tokenizer, consumers: consumers) - } } // MARK: - Language Capabilities diff --git a/Sources/SwiftParser/Markdown/MarkdownNodes.swift b/Sources/SwiftParser/Markdown/MarkdownNodes.swift index c1ec79a..09a9f11 100644 --- a/Sources/SwiftParser/Markdown/MarkdownNodes.swift +++ b/Sources/SwiftParser/Markdown/MarkdownNodes.swift @@ -24,6 +24,7 @@ public class MarkdownNodeBase: CodeNode { } // MARK: - Document Structure +/// Root node representing an entire Markdown document. public class DocumentNode: MarkdownNodeBase { public var title: String? public var metadata: [String: Any] = [:] diff --git a/Sources/SwiftParser/Markdown/MarkdownTokenizer.swift b/Sources/SwiftParser/Markdown/MarkdownTokenizer.swift deleted file mode 100644 index 2410d74..0000000 --- a/Sources/SwiftParser/Markdown/MarkdownTokenizer.swift +++ /dev/null @@ -1,1444 +0,0 @@ -import Foundation - -// MARK: - Markdown Tokenizer -public class MarkdownTokenizer: CodeOutdatedTokenizer { - // MARK: - Tokenization State - private var input: String = "" - private var current: String.Index = "".startIndex - private var tokens: [MarkdownToken] = [] - - public init() {} - - // MARK: - Main Tokenization Entry Point - public func tokenize(_ input: String) -> [any CodeToken] { - self.input = input - self.current = input.startIndex - self.tokens = [] - - while current < input.endIndex { - tokenizeNext() - } - - // Add EOF token - let eofRange = current..": - addToken(.gt, text: ">", from: startIndex) - - case "<": - if tokenizeAutolink(from: startIndex) { - return // Don't call advanceIndex() if we handled an autolink - } - if tokenizeHtmlStructure(from: startIndex) { - return // Don't call advanceIndex() if we handled a multi-character token - } - addToken(.lt, text: "<", from: startIndex) - - case "&": - if tokenizeHtmlEntity() { - return // Don't call advanceIndex() if we handled an HTML entity - } - addToken(.ampersand, text: "&", from: startIndex) - - case "\\": - if tokenizeBackslash(from: startIndex) { - return // Don't call advanceIndex() if we handled a multi-character token - } - - case "/": - addToken(.forwardSlash, text: "/", from: startIndex) - - case "\"": - addToken(.quote, text: "\"", from: startIndex) - - case "'": - addToken(.singleQuote, text: "'", from: startIndex) - - case "[": - addToken(.leftBracket, text: "[", from: startIndex) - - case "]": - addToken(.rightBracket, text: "]", from: startIndex) - - case "(": - addToken(.leftParen, text: "(", from: startIndex) - - case ")": - addToken(.rightParen, text: ")", from: startIndex) - - case "{": - addToken(.leftBrace, text: "{", from: startIndex) - - case "}": - addToken(.rightBrace, text: "}", from: startIndex) - - case "$": - if tokenizeMathFormula(from: startIndex) { - return // Don't call advanceIndex() if we handled a math formula - } - // If not a math formula, treat as regular text token - addToken(.text, text: "$", from: startIndex) - - // MARK: - Whitespace Tokens - case " ": - // Check if this could be the start of an indented code block - if isAtLineStart() && tokenizeIndentedCodeBlock(from: startIndex) { - return // Don't call advanceIndex() if we handled an indented code block - } - addToken(.space, text: " ", from: startIndex) - - case "\t": - // Check if this could be the start of an indented code block - if isAtLineStart() && tokenizeIndentedCodeBlock(from: startIndex) { - return // Don't call advanceIndex() if we handled an indented code block - } - addToken(.tab, text: "\t", from: startIndex) - - case "\n": - addToken(.newline, text: "\n", from: startIndex) - - case "\r\n": - // Handle CRLF as a single newline token - addToken(.newline, text: "\r\n", from: startIndex) - - case "\r": - if let nextIndex = input.index(current, offsetBy: 1, limitedBy: input.endIndex), - nextIndex < input.endIndex && input[nextIndex] == "\n" { - // Handle CRLF as a single newline - addToken(.newline, text: "\r\n", from: startIndex) - current = input.index(nextIndex, offsetBy: 1, limitedBy: input.endIndex) ?? input.endIndex - return // Don't call advanceIndex() again - } else { - addToken(.carriageReturn, text: "\r", from: startIndex) - } - - // MARK: - Digits - case "0"..."9": - // Check if this is a pure number or mixed alphanumeric - if shouldTokenizeAsText(from: startIndex) { - tokenizeText(from: startIndex) - } else { - tokenizeNumber(from: startIndex) - } - return // Don't call advanceIndex() as tokenize methods handle it - - // MARK: - Default Text - default: - tokenizeText(from: startIndex) - return // Don't call advanceIndex() as tokenizeText handles it - } - - advanceIndex() - } - - // MARK: - Helper Methods - private func addToken(_ element: MarkdownTokenElement, text: String, from startIndex: String.Index) { - let endIndex = input.index(startIndex, offsetBy: text.count, limitedBy: input.endIndex) ?? input.endIndex - let range = startIndex.. Character? { - guard let index = input.index(current, offsetBy: offset, limitedBy: input.endIndex), - index < input.endIndex else { - return nil - } - return input[index] - } - - private func peekString(length: Int) -> String? { - guard let endIndex = input.index(current, offsetBy: length, limitedBy: input.endIndex) else { - return nil - } - return String(input[current.. String { - let startIndex = current - let endIndex = input.index(current, offsetBy: count, limitedBy: input.endIndex) ?? input.endIndex - let result = String(input[startIndex.. Bool) -> String { - let startIndex = current - - while current < input.endIndex && condition(input[current]) { - current = input.index(after: current) - } - - return String(input[startIndex.. Bool { - guard let endIndex = input.index(current, offsetBy: string.count, limitedBy: input.endIndex) else { - return false - } - return input[current.. Bool { - // We're at line start if we're at the beginning of input - if current == input.startIndex { - return true - } - - // Or if the previous character was a newline or carriage return - let prevIndex = input.index(before: current) - let prevChar = input[prevIndex] - return prevChar == "\n" || prevChar == "\r" - } - - private func isAtLineEnd() -> Bool { - return current >= input.endIndex || - input[current] == "\n" || - input[current] == "\r" - } - - private func isWhitespace(_ char: Character) -> Bool { - return char == " " || char == "\t" - } - - private func isNewline(_ char: Character) -> Bool { - return char == "\n" || char == "\r" - } - - private func isAlphanumeric(_ char: Character) -> Bool { - return char.isLetter || char.isNumber - } - - private func isPunctuation(_ char: Character) -> Bool { - return "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~".contains(char) - } - - private func isUnicodeWhitespace(_ char: Character) -> Bool { - return char.isWhitespace - } - - private func isUnicodePunctuation(_ char: Character) -> Bool { - return char.isPunctuation - } -} - -// MARK: - Extended Tokenization Methods -extension MarkdownTokenizer { - - /// Tokenize a sequence of characters as a single token - private func tokenizeSequence(_ element: MarkdownTokenElement, - startChar: Character, - minLength: Int = 1, - maxLength: Int = Int.max) -> Bool { - let startIndex = current - var length = 0 - - while current < input.endIndex && - input[current] == startChar && - length < maxLength { - current = input.index(after: current) - length += 1 - } - - if length >= minLength { - let range = startIndex.. Bool { - // First check if this is an HTML comment - if tokenizeHtmlComment(from: startIndex) { - return true - } - - // Try to tokenize as HTML tag first - if let tagResult = tryTokenizeHtmlTag(from: startIndex) { - current = tagResult.endIndex - - // Check if this is a self-closing tag - if tagResult.isSelfClosing { - tokens.append(MarkdownToken(element: .htmlTag, text: tagResult.content, range: startIndex.. HtmlTagResult? { - var currentIndex = startIndex - guard currentIndex < input.endIndex && input[currentIndex] == "<" else { - return nil - } - - currentIndex = input.index(after: currentIndex) - - // Check for closing tag - var isClosingTag = false - if currentIndex < input.endIndex && input[currentIndex] == "/" { - isClosingTag = true - currentIndex = input.index(after: currentIndex) - } - - // Must have a letter to start tag name - guard currentIndex < input.endIndex && (input[currentIndex].isLetter || input[currentIndex] == "!") else { - return nil - } - - // Extract tag name - let tagNameStart = currentIndex - while currentIndex < input.endIndex { - let char = input[currentIndex] - if char.isLetter || char.isNumber || char == "-" || char == "_" { - currentIndex = input.index(after: currentIndex) - } else { - break - } - } - - let tagName = String(input[tagNameStart.." { - // End of tag - currentIndex = input.index(after: currentIndex) - foundClosingBracket = true - break - } else if char == "/" { - // Self-closing tag - currentIndex = input.index(after: currentIndex) - if currentIndex < input.endIndex && input[currentIndex] == ">" { - currentIndex = input.index(after: currentIndex) - isSelfClosing = true - foundClosingBracket = true - break - } - } else { - currentIndex = input.index(after: currentIndex) - } - } - - // Check if we found a complete tag - if foundClosingBracket && currentIndex > input.index(after: startIndex) { - let content = String(input[startIndex.. HtmlBlockResult? { - var currentIndex = openingTag.endIndex - - // Look for the matching closing tag - let closingTagPattern = "" - - while currentIndex < input.endIndex { - // Try to find the closing tag - if let closingTagRange = input.range(of: closingTagPattern, options: .caseInsensitive, range: currentIndex.. HtmlBlockResult? { - var currentIndex = openingTag.endIndex - - // Look for the first blank line (two consecutive newlines) - while currentIndex < input.endIndex { - let char = input[currentIndex] - - if char == "\n" { - // Found a newline, check if next line is blank - let nextIndex = input.index(after: currentIndex) - if nextIndex < input.endIndex { - let nextChar = input[nextIndex] - if nextChar == "\n" { - // Found blank line, end the unclosed block here - return HtmlBlockResult( - content: String(input[startIndex.. Bool { - var currentIndex = current - - // Check for comment start "" - while currentIndex < input.endIndex { - if input[currentIndex] == "-" { - let remainingChars = input.distance(from: currentIndex, to: input.endIndex) - if remainingChars >= 3 { - let endCheck = input[currentIndex...input.index(currentIndex, offsetBy: 2)] - if endCheck == "-->" { - currentIndex = input.index(currentIndex, offsetBy: 3) - break - } - } - } - currentIndex = input.index(after: currentIndex) - } - - if currentIndex > input.index(after: startIndex) { - current = currentIndex - let range = startIndex.. Bool { - let startIndex = current - - guard input[current] == "&" else { return false } - - current = input.index(after: current) - - // Named entity - if current < input.endIndex && input[current].isLetter { - let entityStart = current - while current < input.endIndex && - (input[current].isLetter || input[current].isNumber) { - current = input.index(after: current) - } - - if current < input.endIndex && input[current] == ";" { - let entityName = String(input[entityStart.. Bool { - // Common HTML entities that should be recognized - let validEntities: Set = [ - "amp", "lt", "gt", "quot", "apos", "nbsp", "copy", "reg", "trade", - "hellip", "mdash", "ndash", "lsquo", "rsquo", "ldquo", "rdquo", - "bull", "middot", "times", "divide", "plusmn", "sup2", "sup3", - "frac14", "frac12", "frac34", "iexcl", "cent", "pound", "curren", - "yen", "brvbar", "sect", "uml", "ordf", "laquo", "not", "shy", - "macr", "deg", "plusmn", "acute", "micro", "para", "middot", - "cedil", "ordm", "raquo", "iquest", "Agrave", "Aacute", "Acirc", - "Atilde", "Auml", "Aring", "AElig", "Ccedil", "Egrave", "Eacute", - "Ecirc", "Euml", "Igrave", "Iacute", "Icirc", "Iuml", "ETH", - "Ntilde", "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml", "times", - "Oslash", "Ugrave", "Uacute", "Ucirc", "Uuml", "Yacute", "THORN", - "szlig", "agrave", "aacute", "acirc", "atilde", "auml", "aring", - "aelig", "ccedil", "egrave", "eacute", "ecirc", "euml", "igrave", - "iacute", "icirc", "iuml", "eth", "ntilde", "ograve", "oacute", - "ocirc", "otilde", "ouml", "divide", "oslash", "ugrave", "uacute", - "ucirc", "uuml", "yacute", "thorn", "yuml" - ] - - return validEntities.contains(name) - } - - /// Tokenize backslash and potential TeX math delimiters - /// Returns true if a multi-character token was handled - private func tokenizeBackslash(from startIndex: String.Index) -> Bool { - // Check if this is a TeX math delimiter that should be tokenized as a complete formula - if let nextIndex = input.index(current, offsetBy: 1, limitedBy: input.endIndex), - nextIndex < input.endIndex { - let nextChar = input[nextIndex] - - switch nextChar { - case "[": - // \[...\] - TeX display math - if let formulaToken = tokenizeTexDisplayMath(from: startIndex) { - tokens.append(formulaToken) - return true - } - - case "(": - // \(...\) - TeX inline math - if let formulaToken = tokenizeTexInlineMath(from: startIndex) { - tokens.append(formulaToken) - return true - } - - case "]", ")": - // \] or \) - These are closing delimiters without opening, treat as regular text - let range = startIndex.. Bool { - // Check if this starts with $$ - if let nextIndex = input.index(current, offsetBy: 1, limitedBy: input.endIndex), - nextIndex < input.endIndex && input[nextIndex] == "$" { - // This might be a display math formula $$...$$ - if let formulaToken = tokenizeDisplayMath(from: startIndex) { - tokens.append(formulaToken) - return true - } - // If we can't find a complete display math formula, don't treat it as math - return false - } else { - // This might be an inline math formula $...$ - if let formulaToken = tokenizeInlineMath(from: startIndex) { - tokens.append(formulaToken) - return true - } - // If we can't find a complete inline math formula, don't treat it as math - return false - } - } - - /// Tokenize display math formula $$...$$ - private func tokenizeDisplayMath(from startIndex: String.Index) -> MarkdownToken? { - var currentIndex = startIndex - - // Skip the opening $$ - guard let afterOpenIndex = input.index(currentIndex, offsetBy: 2, limitedBy: input.endIndex) else { - return nil - } - currentIndex = afterOpenIndex - - // Find the closing $$ - while currentIndex < input.endIndex { - if input[currentIndex] == "$" { - if let nextIndex = input.index(currentIndex, offsetBy: 1, limitedBy: input.endIndex), - nextIndex < input.endIndex && input[nextIndex] == "$" { - // Found closing $$ - let endIndex = input.index(nextIndex, offsetBy: 1, limitedBy: input.endIndex) ?? input.endIndex - let range = startIndex.. MarkdownToken? { - var currentIndex = startIndex - - // Skip the opening $ - guard let afterOpenIndex = input.index(currentIndex, offsetBy: 1, limitedBy: input.endIndex) else { - return nil - } - currentIndex = afterOpenIndex - - // Check if the first character after $ is whitespace - if so, not a valid math formula - if currentIndex < input.endIndex && input[currentIndex].isWhitespace { - return nil - } - - // Find the closing $ - while currentIndex < input.endIndex { - let char = input[currentIndex] - - if char == "$" { - // Check if the character before $ is whitespace - if so, not a valid math formula - if currentIndex > afterOpenIndex { - let prevIndex = input.index(before: currentIndex) - if input[prevIndex].isWhitespace { - return nil - } - } - - // Found closing $ - let endIndex = input.index(currentIndex, offsetBy: 1, limitedBy: input.endIndex) ?? input.endIndex - let range = startIndex.. MarkdownToken? { - var currentIndex = startIndex - - // Skip the opening \[ - guard let afterOpenIndex = input.index(currentIndex, offsetBy: 2, limitedBy: input.endIndex) else { - return nil - } - currentIndex = afterOpenIndex - - // Find the closing \] - while currentIndex < input.endIndex { - if input[currentIndex] == "\\" { - if let nextIndex = input.index(currentIndex, offsetBy: 1, limitedBy: input.endIndex), - nextIndex < input.endIndex && input[nextIndex] == "]" { - // Found closing \] - let endIndex = input.index(nextIndex, offsetBy: 1, limitedBy: input.endIndex) ?? input.endIndex - let range = startIndex.. MarkdownToken? { - var currentIndex = startIndex - - // Skip the opening \( - guard let afterOpenIndex = input.index(currentIndex, offsetBy: 2, limitedBy: input.endIndex) else { - return nil - } - currentIndex = afterOpenIndex - - // Find the closing \) - while currentIndex < input.endIndex { - let char = input[currentIndex] - - // Check for newline characters - these terminate inline math - if char == "\n" || char == "\r" { - // Found newline, treat as TeX inline math from \( to end of line - let range = startIndex.. Bool { - // Check if this is a fenced code block (```) - if let fencedToken = tokenizeFencedCodeBlock(from: startIndex) { - tokens.append(fencedToken) - return true - } - - // Check if this is inline code (`) - if let inlineToken = tokenizeInlineCode(from: startIndex) { - tokens.append(inlineToken) - return true - } - - return false - } - - /// Check if we're at the start of a line and can tokenize indented code block - private func tokenizeIndentedCodeBlock(from startIndex: String.Index) -> Bool { - // Check if we have 4 spaces or 1 tab at the start of a line - var tempIndex = startIndex - var spaceCount = 0 - - // Count spaces and tabs - while tempIndex < input.endIndex { - if input[tempIndex] == " " { - spaceCount += 1 - if spaceCount >= 4 { - tempIndex = input.index(after: tempIndex) - break - } - } else if input[tempIndex] == "\t" { - spaceCount = 4 // Tab counts as 4 spaces - tempIndex = input.index(after: tempIndex) - break - } else { - break - } - tempIndex = input.index(after: tempIndex) - } - - // Need at least 4 spaces worth of indentation - if spaceCount < 4 { - return false - } - - // Check if there's actual content after the indentation (not just whitespace) - var hasContent = false - var contentCheckIndex = tempIndex - while contentCheckIndex < input.endIndex && input[contentCheckIndex] != "\n" && input[contentCheckIndex] != "\r" { - if input[contentCheckIndex] != " " && input[contentCheckIndex] != "\t" { - hasContent = true - break - } - contentCheckIndex = input.index(after: contentCheckIndex) - } - - // If there's no content on this line, this is not an indented code block - if !hasContent { - return false - } - - // Find the end of the indented code block - let codeBlockStart = startIndex - var codeBlockEnd = startIndex - - // Scan for the end of the indented code block - while tempIndex < input.endIndex { - // Skip the current line - while tempIndex < input.endIndex && input[tempIndex] != "\n" && input[tempIndex] != "\r" { - tempIndex = input.index(after: tempIndex) - } - - codeBlockEnd = tempIndex - - // Skip line ending - if tempIndex < input.endIndex && input[tempIndex] == "\r" { - tempIndex = input.index(after: tempIndex) - if tempIndex < input.endIndex && input[tempIndex] == "\n" { - tempIndex = input.index(after: tempIndex) - } - } else if tempIndex < input.endIndex && input[tempIndex] == "\n" { - tempIndex = input.index(after: tempIndex) - } - - // Check if next line is also indented (or blank) - let lineStart = tempIndex - var lineSpaces = 0 - var isBlankLine = true - - while tempIndex < input.endIndex && input[tempIndex] != "\n" && input[tempIndex] != "\r" { - if input[tempIndex] == " " { - lineSpaces += 1 - } else if input[tempIndex] == "\t" { - lineSpaces = 4 - isBlankLine = false - break - } else { - isBlankLine = false - break - } - tempIndex = input.index(after: tempIndex) - } - - // If it's a blank line, continue - if isBlankLine { - continue - } - - // If next line doesn't have enough indentation, stop - if lineSpaces < 4 { - break - } - - // Reset to continue scanning - tempIndex = lineStart - } - - // Create the indented code block token - let range = codeBlockStart.. MarkdownToken? { - // Check if we have at least 3 backticks - var tickCount = 0 - var tempIndex = startIndex - - while tempIndex < input.endIndex && input[tempIndex] == "`" { - tickCount += 1 - tempIndex = input.index(after: tempIndex) - } - - if tickCount < 3 { - return nil - } - - // Skip any language specifier on the same line - while tempIndex < input.endIndex && input[tempIndex] != "\n" && input[tempIndex] != "\r" { - tempIndex = input.index(after: tempIndex) - } - - // Skip the newline after the opening fence - if tempIndex < input.endIndex && (input[tempIndex] == "\n" || input[tempIndex] == "\r") { - if input[tempIndex] == "\r" && tempIndex < input.endIndex { - let nextIndex = input.index(after: tempIndex) - if nextIndex < input.endIndex && input[nextIndex] == "\n" { - tempIndex = input.index(after: nextIndex) - } else { - tempIndex = nextIndex - } - } else { - tempIndex = input.index(after: tempIndex) - } - } - - // Find the closing fence - var closingFenceStart: String.Index? - - while tempIndex < input.endIndex { - if input[tempIndex] == "`" { - let fenceStart = tempIndex - var closingTickCount = 0 - - while tempIndex < input.endIndex && input[tempIndex] == "`" { - closingTickCount += 1 - tempIndex = input.index(after: tempIndex) - } - - if closingTickCount >= tickCount { - closingFenceStart = fenceStart - break - } - } else { - tempIndex = input.index(after: tempIndex) - } - } - - let endIndex: String.Index - if let closingStart = closingFenceStart { - endIndex = closingStart - // Advance current to after the closing fence - current = tempIndex - } else { - // No closing fence found - treat as code block until EOF - endIndex = input.endIndex - current = input.endIndex - } - - let range = startIndex..<(closingFenceStart != nil ? tempIndex : endIndex) - let text = String(input[range]) - - return MarkdownToken.fencedCodeBlock(text, at: range) - } - - /// Tokenize inline code (`...`) - private func tokenizeInlineCode(from startIndex: String.Index) -> MarkdownToken? { - // Check if we have exactly one backtick - if input[startIndex] != "`" { - return nil - } - - // Look for next backtick that's not escaped - var tempIndex = input.index(after: startIndex) - var foundEnd = false - - while tempIndex < input.endIndex { - if input[tempIndex] == "`" { - foundEnd = true - break - } - // Skip over escaped backticks - if input[tempIndex] == "\\" && tempIndex < input.endIndex { - let nextIndex = input.index(after: tempIndex) - if nextIndex < input.endIndex { - tempIndex = input.index(after: nextIndex) - } else { - tempIndex = nextIndex - } - } else { - tempIndex = input.index(after: tempIndex) - } - } - - if !foundEnd { - return nil - } - - // Include the closing backtick - let endIndex = input.index(after: tempIndex) - current = endIndex - - let range = startIndex.. Bool { - switch char { - case "#", "*", "_", "`", "-", "+", "=", "~", "^", "@", "|", ":", ";", "!", "?", ".", ",", ">", "<", "&", "\\", "/", "\"", "'", "[", "]", "(", ")", "{", "}", "$": - return true - case " ", "\t", "\n", "\r": - return true - default: - return false - } - } - - /// Check if a number should be tokenized as text (mixed alphanumeric) - private func shouldTokenizeAsText(from startIndex: String.Index) -> Bool { - var currentIndex = current - - // Look ahead to see if we have letters mixed with numbers - while currentIndex < input.endIndex { - let char = input[currentIndex] - - if isSpecialCharacter(char) { - break - } - - if char.isLetter { - return true // Found a letter, treat as text - } - - currentIndex = input.index(after: currentIndex) - } - - return false // Only digits found, treat as number - } - - /// Tokenize escape sequences - private func tokenizeEscapeSequence() -> Bool { - let startIndex = current - - guard input[current] == "\\" else { return false } - - guard let nextIndex = input.index(current, offsetBy: 1, limitedBy: input.endIndex), - nextIndex < input.endIndex else { return false } - - let nextChar = input[nextIndex] - - // Check if it's a valid escape sequence - if isPunctuation(nextChar) { - // For now, treat escape sequences as separate tokens - // Parser layer will handle the semantic meaning - addToken(.backslash, text: "\\", from: startIndex) - return false - } - - return false - } - - /// Tokenize Unicode escape sequences - private func tokenizeUnicodeEscape() -> Bool { - let startIndex = current - - guard match("\\u") else { return false } - - current = input.index(current, offsetBy: 2) - - // Expect 4 hex digits - var hexCount = 0 - while current < input.endIndex && - input[current].isHexDigit && - hexCount < 4 { - current = input.index(after: current) - hexCount += 1 - } - - if hexCount == 4 { - // For now, treat as separate tokens - // Parser layer will handle the semantic meaning - current = startIndex - addToken(.backslash, text: "\\", from: startIndex) - return false - } - - // Reset on failure - current = startIndex - return false - } - - /// Tokenize autolinks and URLs - private func tokenizeAutolink(from startIndex: String.Index) -> Bool { - // Check if this is an autolink or - if input[startIndex] == "<" { - return tokenizeAutolinkInBrackets(from: startIndex) - } - - // Check if this is a bare URL - return tokenizeBareURL(from: startIndex) - } - - /// Tokenize autolinks in brackets - private func tokenizeAutolinkInBrackets(from startIndex: String.Index) -> Bool { - guard input[startIndex] == "<" else { return false } - - var tempIndex = input.index(after: startIndex) - var urlContent = "" - - // Find the closing > - while tempIndex < input.endIndex { - let char = input[tempIndex] - - if char == ">" { - // Found closing bracket - let fullRange = startIndex.. Bool { - // This is more complex and depends on context - // For now, we'll implement a simple version that looks for common URL patterns - - // Check if this starts with a URL scheme - let remainingText = String(input[startIndex...]) - let urlPattern = /^(https?:\/\/[^\s<>\[\]]+)/ - - if let match = remainingText.firstMatch(of: urlPattern) { - let matchedText = String(match.1) - let endIndex = input.index(startIndex, offsetBy: matchedText.count) - let range = startIndex.. Bool { - // Look for email pattern in the remaining text - let remainingText = String(input[startIndex...]) - let emailPattern = /^([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/ - - if let match = remainingText.firstMatch(of: emailPattern) { - let matchedText = String(match.1) - let endIndex = input.index(startIndex, offsetBy: matchedText.count) - let range = startIndex.. Bool { - // Email pattern - if content.contains("@") { - let emailPattern = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/ - return content.firstMatch(of: emailPattern) != nil - } - - // URL pattern - let urlPattern = /^[a-zA-Z][a-zA-Z0-9+.-]*:[^\s]*$/ - return content.firstMatch(of: urlPattern) != nil - } - - /// Check if we're processing text that could be a URL or email - private func tokenizeBareURLInText(from startIndex: String.Index) -> Bool { - // Check if current position starts with http:// or https:// - let remainingText = String(input[startIndex...]) - - if remainingText.hasPrefix("http://") || remainingText.hasPrefix("https://") { - return tokenizeBareURL(from: startIndex) - } - - // Check if this might be an email address - if tokenizeBareEmail(from: startIndex) { - return true - } - - return false - } - - /// Tokenize custom containers starting with ':::' at line start - private func tokenizeCustomContainer(from startIndex: String.Index) -> Bool { - guard isAtLineStart(index: startIndex), match(":::") else { return false } - - var tempIndex = input.index(startIndex, offsetBy: 3) - - // Scan for the closing ':::' at line start - while tempIndex < input.endIndex { - if isAtLineStart(index: tempIndex) && input[tempIndex...].hasPrefix(":::") { - // Move to end of closing line - var end = input.index(tempIndex, offsetBy: 3) - while end < input.endIndex && input[end] != "\n" && input[end] != "\r" { - end = input.index(after: end) - } - if end < input.endIndex { - if input[end] == "\r" { - let next = input.index(after: end) - if next < input.endIndex && input[next] == "\n" { - end = input.index(after: next) - } else { - end = next - } - } else { - end = input.index(after: end) - } - } - let range = startIndex.. Bool { - if index == input.startIndex { return true } - let prev = input[input.index(before: index)] - return prev == "\n" || prev == "\r" - } - - // ...existing code... -} - -// MARK: - Character Extensions -extension Character { - var isHexDigit: Bool { - return self.isNumber || ("a"..."f").contains(self) || ("A"..."F").contains(self) - } -} diff --git a/Sources/SwiftParser/Markdown/Nodes/MarkdownEOFBuilder.swift b/Sources/SwiftParser/Markdown/Nodes/MarkdownEOFBuilder.swift new file mode 100644 index 0000000..b258156 --- /dev/null +++ b/Sources/SwiftParser/Markdown/Nodes/MarkdownEOFBuilder.swift @@ -0,0 +1,14 @@ +import Foundation + +/// Consumes trailing EOF tokens without modifying the AST. +public class MarkdownEOFBuilder: CodeNodeBuilder { + public init() {} + + public func build(from context: inout CodeConstructContext) -> Bool { + guard context.consuming < context.tokens.count, + let token = context.tokens[context.consuming] as? MarkdownToken, + token.element == .eof else { return false } + context.consuming += 1 + return true + } +} diff --git a/Sources/SwiftParser/Markdown/Nodes/MarkdownInlineParser.swift b/Sources/SwiftParser/Markdown/Nodes/MarkdownInlineParser.swift index 543f3dc..eed2ec1 100644 --- a/Sources/SwiftParser/Markdown/Nodes/MarkdownInlineParser.swift +++ b/Sources/SwiftParser/Markdown/Nodes/MarkdownInlineParser.swift @@ -1,6 +1,13 @@ import Foundation +/// Simple inline parser used by block builders to parse inline Markdown syntax. +/// Handles emphasis, links, images, inline code and other span level elements. struct MarkdownInlineParser { + /// Parse inline content until one of the `stopAt` tokens is encountered. + /// - Parameters: + /// - context: Construction context providing tokens and current state. + /// - stopAt: Tokens that terminate inline parsing. + /// - Returns: Array of parsed inline nodes. static func parseInline( _ context: inout CodeConstructContext, stopAt: Set = [.newline, .eof] diff --git a/Sources/SwiftParser/Markdown/TokenBuilders/MarkdownCodeTokenBuilder.swift b/Sources/SwiftParser/Markdown/Tokens/MarkdownCodeTokenBuilder.swift similarity index 100% rename from Sources/SwiftParser/Markdown/TokenBuilders/MarkdownCodeTokenBuilder.swift rename to Sources/SwiftParser/Markdown/Tokens/MarkdownCodeTokenBuilder.swift diff --git a/Sources/SwiftParser/Markdown/TokenBuilders/MarkdownCustomContainerTokenBuilder.swift b/Sources/SwiftParser/Markdown/Tokens/MarkdownCustomContainerTokenBuilder.swift similarity index 100% rename from Sources/SwiftParser/Markdown/TokenBuilders/MarkdownCustomContainerTokenBuilder.swift rename to Sources/SwiftParser/Markdown/Tokens/MarkdownCustomContainerTokenBuilder.swift diff --git a/Sources/SwiftParser/Markdown/TokenBuilders/MarkdownFormulaTokenBuilder.swift b/Sources/SwiftParser/Markdown/Tokens/MarkdownFormulaTokenBuilder.swift similarity index 100% rename from Sources/SwiftParser/Markdown/TokenBuilders/MarkdownFormulaTokenBuilder.swift rename to Sources/SwiftParser/Markdown/Tokens/MarkdownFormulaTokenBuilder.swift diff --git a/Sources/SwiftParser/Markdown/TokenBuilders/MarkdownHTMLTokenBuilder.swift b/Sources/SwiftParser/Markdown/Tokens/MarkdownHTMLTokenBuilder.swift similarity index 100% rename from Sources/SwiftParser/Markdown/TokenBuilders/MarkdownHTMLTokenBuilder.swift rename to Sources/SwiftParser/Markdown/Tokens/MarkdownHTMLTokenBuilder.swift diff --git a/Sources/SwiftParser/Markdown/TokenBuilders/MarkdownNumberTokenBuilder.swift b/Sources/SwiftParser/Markdown/Tokens/MarkdownNumberTokenBuilder.swift similarity index 100% rename from Sources/SwiftParser/Markdown/TokenBuilders/MarkdownNumberTokenBuilder.swift rename to Sources/SwiftParser/Markdown/Tokens/MarkdownNumberTokenBuilder.swift diff --git a/Sources/SwiftParser/Markdown/TokenBuilders/MarkdownSingleCharacterTokenBuilder.swift b/Sources/SwiftParser/Markdown/Tokens/MarkdownSingleCharacterTokenBuilder.swift similarity index 100% rename from Sources/SwiftParser/Markdown/TokenBuilders/MarkdownSingleCharacterTokenBuilder.swift rename to Sources/SwiftParser/Markdown/Tokens/MarkdownSingleCharacterTokenBuilder.swift diff --git a/Sources/SwiftParser/Markdown/TokenBuilders/MarkdownTextTokenBuilder.swift b/Sources/SwiftParser/Markdown/Tokens/MarkdownTextTokenBuilder.swift similarity index 100% rename from Sources/SwiftParser/Markdown/TokenBuilders/MarkdownTextTokenBuilder.swift rename to Sources/SwiftParser/Markdown/Tokens/MarkdownTextTokenBuilder.swift diff --git a/Sources/SwiftParser/Markdown/TokenBuilders/MarkdownURLTokenBuilder.swift b/Sources/SwiftParser/Markdown/Tokens/MarkdownURLTokenBuilder.swift similarity index 100% rename from Sources/SwiftParser/Markdown/TokenBuilders/MarkdownURLTokenBuilder.swift rename to Sources/SwiftParser/Markdown/Tokens/MarkdownURLTokenBuilder.swift diff --git a/Sources/SwiftParser/Markdown/TokenBuilders/MarkdownWhitespaceTokenBuilder.swift b/Sources/SwiftParser/Markdown/Tokens/MarkdownWhitespaceTokenBuilder.swift similarity index 100% rename from Sources/SwiftParser/Markdown/TokenBuilders/MarkdownWhitespaceTokenBuilder.swift rename to Sources/SwiftParser/Markdown/Tokens/MarkdownWhitespaceTokenBuilder.swift diff --git a/Sources/SwiftParser/SwiftParser.swift b/Sources/SwiftParser/SwiftParser.swift index 9620502..5dcfacd 100644 --- a/Sources/SwiftParser/SwiftParser.swift +++ b/Sources/SwiftParser/SwiftParser.swift @@ -8,13 +8,6 @@ public struct SwiftParser where let parser = CodeParser(language: language) return parser.parse(source, language: language) } - - public func parse(_ source: String, language: any CodeLanguage) -> ParsedSource { - let root = language.root() - let parser = CodeOutdatedParser(language: language) - let result = parser.parse(source, root: root) - return ParsedSource(content: source, root: result.node, errors: result.context.errors) - } } /// Represents a parsed source file diff --git a/Tests/SwiftParserTests/Markdown/Builders/MarkdownAllFeaturesBuilderTests.swift b/Tests/SwiftParserTests/Markdown/Builders/MarkdownAllFeaturesBuilderTests.swift index 41607c2..e783de5 100644 --- a/Tests/SwiftParserTests/Markdown/Builders/MarkdownAllFeaturesBuilderTests.swift +++ b/Tests/SwiftParserTests/Markdown/Builders/MarkdownAllFeaturesBuilderTests.swift @@ -3,13 +3,13 @@ import XCTest /// Comprehensive tests covering all supported Markdown features. final class MarkdownAllFeaturesBuilderTests: XCTestCase { - private var parser: CodeOutdatedParser! + private var parser: CodeParser! private var language: MarkdownLanguage! override func setUp() { super.setUp() language = MarkdownLanguage() - parser = CodeOutdatedParser(language: language) + parser = CodeParser(language: language) } func testParsingComprehensiveMarkdownDocument() { @@ -65,11 +65,10 @@ Citation[@smith2023] and footnote[^1]. [@smith2023]: Smith, J. (2023). Example. """ - let root = language.root(of: markdown) - let (node, context) = parser.parse(markdown, root: root) + let result = parser.parse(markdown, language: language) - XCTAssertTrue(context.errors.isEmpty) - XCTAssertGreaterThan(node.children.count, 0) + XCTAssertTrue(result.errors.isEmpty) + XCTAssertGreaterThan(result.root.children.count, 0) // Ensure tokenizer runs without errors using the new tokenizer let tokenizer = CodeTokenizer(builders: language.tokens, state: language.state) @@ -77,19 +76,19 @@ Citation[@smith2023] and footnote[^1]. XCTAssertGreaterThan(tokens.count, 0) // Verify important structures exist - XCTAssertNotNil(node.first { ($0 as? HeaderNode) != nil }) - XCTAssertNotNil(node.first { ($0 as? ParagraphNode) != nil }) - XCTAssertNotNil(node.first { ($0 as? BlockquoteNode) != nil }) - XCTAssertEqual(node.nodes { $0.element == .orderedList }.count, 1) - XCTAssertEqual(node.nodes { $0.element == .unorderedList }.count, 2) - XCTAssertNotNil(node.first { ($0 as? DefinitionListNode) != nil }) - XCTAssertNotNil(node.first { ($0 as? TableNode) != nil }) - XCTAssertNotNil(node.first { ($0 as? FormulaBlockNode) != nil }) - XCTAssertNotNil(node.first { ($0 as? CodeBlockNode) != nil }) - XCTAssertNotNil(node.first { ($0 as? ThematicBreakNode) != nil }) - XCTAssertEqual(node.nodes { $0.element == .footnote }.count, 1) - XCTAssertNotNil(node.first { ($0 as? HTMLBlockNode) != nil }) - XCTAssertNotNil(node.first { ($0 as? ImageNode) != nil }) + XCTAssertNotNil(result.root.first { ($0 as? HeaderNode) != nil }) + XCTAssertNotNil(result.root.first { ($0 as? ParagraphNode) != nil }) + XCTAssertNotNil(result.root.first { ($0 as? BlockquoteNode) != nil }) + XCTAssertEqual(result.root.nodes { $0.element == .orderedList }.count, 1) + XCTAssertEqual(result.root.nodes { $0.element == .unorderedList }.count, 2) + XCTAssertNotNil(result.root.first { ($0 as? DefinitionListNode) != nil }) + XCTAssertNotNil(result.root.first { ($0 as? TableNode) != nil }) + XCTAssertNotNil(result.root.first { ($0 as? FormulaBlockNode) != nil }) + XCTAssertNotNil(result.root.first { ($0 as? CodeBlockNode) != nil }) + XCTAssertNotNil(result.root.first { ($0 as? ThematicBreakNode) != nil }) + XCTAssertEqual(result.root.nodes { $0.element == .footnote }.count, 1) + XCTAssertNotNil(result.root.first { ($0 as? HTMLBlockNode) != nil }) + XCTAssertNotNil(result.root.first { ($0 as? ImageNode) != nil }) } } diff --git a/Tests/SwiftParserTests/Markdown/Builders/MarkdownBlockElementTests.swift b/Tests/SwiftParserTests/Markdown/Builders/MarkdownBlockElementTests.swift index da64c52..92736a5 100644 --- a/Tests/SwiftParserTests/Markdown/Builders/MarkdownBlockElementTests.swift +++ b/Tests/SwiftParserTests/Markdown/Builders/MarkdownBlockElementTests.swift @@ -2,22 +2,21 @@ import XCTest @testable import SwiftParser final class MarkdownBlockElementTests: XCTestCase { - var parser: CodeOutdatedParser! - var language: MarkdownLanguage! + private var parser: CodeParser! + private var language: MarkdownLanguage! override func setUp() { super.setUp() language = MarkdownLanguage() - parser = CodeOutdatedParser(language: language) + parser = CodeParser(language: language) } func testFencedCodeBlock() { let input = "```swift\nlet x = 1\n```" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) - XCTAssertTrue(context.errors.isEmpty) - XCTAssertEqual(node.children.count, 1) - if let code = node.children.first as? CodeBlockNode { + let result = parser.parse(input, language: language) + XCTAssertTrue(result.errors.isEmpty) + XCTAssertEqual(result.root.children.count, 1) + if let code = result.root.children.first as? CodeBlockNode { XCTAssertEqual(code.language, "swift") } else { XCTFail("Expected CodeBlockNode") @@ -26,67 +25,60 @@ final class MarkdownBlockElementTests: XCTestCase { func testHorizontalRule() { let input = "---" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) - XCTAssertTrue(context.errors.isEmpty) - XCTAssertEqual(node.children.count, 1) - XCTAssertTrue(node.children.first is ThematicBreakNode) + let result = parser.parse(input, language: language) + XCTAssertTrue(result.errors.isEmpty) + XCTAssertEqual(result.root.children.count, 1) + XCTAssertTrue(result.root.children.first is ThematicBreakNode) } func testUnorderedList() { let input = "- item" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) - XCTAssertTrue(context.errors.isEmpty) - XCTAssertEqual(node.children.count, 1) - let list = node.children.first as? UnorderedListNode + let result = parser.parse(input, language: language) + XCTAssertTrue(result.errors.isEmpty) + XCTAssertEqual(result.root.children.count, 1) + let list = result.root.children.first as? UnorderedListNode XCTAssertNotNil(list) XCTAssertEqual(list?.children().count, 1) } func testStrikethroughInline() { let input = "~~strike~~" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) - XCTAssertTrue(context.errors.isEmpty) - guard let para = node.children.first as? ParagraphNode else { return XCTFail("Expected ParagraphNode") } + let result = parser.parse(input, language: language) + XCTAssertTrue(result.errors.isEmpty) + guard let para = result.root.children.first as? ParagraphNode else { return XCTFail("Expected ParagraphNode") } XCTAssertTrue(para.children.first is StrikeNode) } func testFormulaBlock() { let input = "$$x=1$$" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) - XCTAssertTrue(context.errors.isEmpty) - XCTAssertTrue(node.children.first is FormulaBlockNode) + let result = parser.parse(input, language: language) + XCTAssertTrue(result.errors.isEmpty) + XCTAssertTrue(result.root.children.first is FormulaBlockNode) } func testDefinitionList() { let input = "Term\n: Definition" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) - XCTAssertTrue(context.errors.isEmpty) - XCTAssertEqual(node.children.count, 1) - let list = node.children.first as? DefinitionListNode + let result = parser.parse(input, language: language) + XCTAssertTrue(result.errors.isEmpty) + XCTAssertEqual(result.root.children.count, 1) + let list = result.root.children.first as? DefinitionListNode XCTAssertNotNil(list) XCTAssertEqual(list?.children().count, 1) } func testAdmonitionBlock() { let input = "> [!NOTE]\n> hello" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) - XCTAssertTrue(context.errors.isEmpty) - XCTAssertEqual(node.children.count, 1) - XCTAssertTrue(node.children.first is AdmonitionNode) + let result = parser.parse(input, language: language) + XCTAssertTrue(result.errors.isEmpty) + XCTAssertEqual(result.root.children.count, 1) + XCTAssertTrue(result.root.children.first is AdmonitionNode) } func testCustomContainerBlock() { let input = "::: custom\nhello\n:::" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) - XCTAssertTrue(context.errors.isEmpty) - XCTAssertEqual(node.children.count, 1) - XCTAssertTrue(node.children.first is CustomContainerNode) + let result = parser.parse(input, language: language) + XCTAssertTrue(result.errors.isEmpty) + XCTAssertEqual(result.root.children.count, 1) + XCTAssertTrue(result.root.children.first is CustomContainerNode) } } diff --git a/Tests/SwiftParserTests/Markdown/Builders/MarkdownInlineBuilderTests.swift b/Tests/SwiftParserTests/Markdown/Builders/MarkdownInlineBuilderTests.swift index 5b3288a..118bafa 100644 --- a/Tests/SwiftParserTests/Markdown/Builders/MarkdownInlineBuilderTests.swift +++ b/Tests/SwiftParserTests/Markdown/Builders/MarkdownInlineBuilderTests.swift @@ -2,23 +2,22 @@ import XCTest @testable import SwiftParser final class MarkdownInlineBuilderTests: XCTestCase { - private var parser: CodeOutdatedParser! + private var parser: CodeParser! private var language: MarkdownLanguage! override func setUp() { super.setUp() language = MarkdownLanguage() - parser = CodeOutdatedParser(language: language) + parser = CodeParser(language: language) } func testItalicBuilderParsesItalicText() { let input = "*italic*" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) + let result = parser.parse(input, language: language) - XCTAssertTrue(context.errors.isEmpty) - XCTAssertEqual(node.children.count, 1) - guard let para = node.children.first as? ParagraphNode else { + XCTAssertTrue(result.errors.isEmpty) + XCTAssertEqual(result.root.children.count, 1) + guard let para = result.root.children.first as? ParagraphNode else { return XCTFail("Expected ParagraphNode") } XCTAssertEqual(para.children.count, 1) @@ -34,12 +33,11 @@ final class MarkdownInlineBuilderTests: XCTestCase { func testBoldBuilderParsesStrongText() { let input = "**bold**" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) + let result = parser.parse(input, language: language) - XCTAssertTrue(context.errors.isEmpty) - XCTAssertEqual(node.children.count, 1) - guard let para = node.children.first as? ParagraphNode else { + XCTAssertTrue(result.errors.isEmpty) + XCTAssertEqual(result.root.children.count, 1) + guard let para = result.root.children.first as? ParagraphNode else { return XCTFail("Expected ParagraphNode") } XCTAssertEqual(para.children.count, 1) @@ -55,12 +53,11 @@ final class MarkdownInlineBuilderTests: XCTestCase { func testNestedEmphasisParsesBoldAndItalic() { let input = "**bold *and italic***" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) + let result = parser.parse(input, language: language) - XCTAssertTrue(context.errors.isEmpty) + XCTAssertTrue(result.errors.isEmpty) // Ensure parsing succeeded - guard let para = node.children.first as? ParagraphNode else { + guard let para = result.root.children.first as? ParagraphNode else { return XCTFail("Expected ParagraphNode") } XCTAssertEqual(para.children.count, 3) @@ -71,12 +68,11 @@ final class MarkdownInlineBuilderTests: XCTestCase { func testInlineCodeBuilderParsesInlineCode() { let input = "`code`" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) + let result = parser.parse(input, language: language) - XCTAssertTrue(context.errors.isEmpty) - XCTAssertEqual(node.children.count, 1) - guard let para = node.children.first as? ParagraphNode else { + XCTAssertTrue(result.errors.isEmpty) + XCTAssertEqual(result.root.children.count, 1) + guard let para = result.root.children.first as? ParagraphNode else { return XCTFail("Expected ParagraphNode") } XCTAssertEqual(para.children.count, 1) @@ -87,12 +83,11 @@ final class MarkdownInlineBuilderTests: XCTestCase { func testInlineFormulaBuilderParsesFormula() { let input = "$x^2$" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) + let result = parser.parse(input, language: language) - XCTAssertTrue(context.errors.isEmpty) - XCTAssertEqual(node.children.count, 1) - guard let para = node.children.first as? ParagraphNode else { + XCTAssertTrue(result.errors.isEmpty) + XCTAssertEqual(result.root.children.count, 1) + guard let para = result.root.children.first as? ParagraphNode else { return XCTFail("Expected ParagraphNode") } XCTAssertEqual(para.children.count, 1) @@ -104,12 +99,11 @@ final class MarkdownInlineBuilderTests: XCTestCase { func testAutolinkBuilderParsesAutolink() { let urlString = "https://example.com" let input = "<\(urlString)>" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) + let result = parser.parse(input, language: language) - XCTAssertTrue(context.errors.isEmpty) - XCTAssertEqual(node.children.count, 1) - guard let para = node.children.first as? ParagraphNode else { + XCTAssertTrue(result.errors.isEmpty) + XCTAssertEqual(result.root.children.count, 1) + guard let para = result.root.children.first as? ParagraphNode else { return XCTFail("Expected ParagraphNode") } XCTAssertEqual(para.children.count, 1) @@ -122,12 +116,11 @@ final class MarkdownInlineBuilderTests: XCTestCase { func testURLBuilderParsesBareURL() { let urlString = "https://example.com" let input = urlString - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) + let result = parser.parse(input, language: language) - XCTAssertTrue(context.errors.isEmpty) - XCTAssertEqual(node.children.count, 1) - guard let para = node.children.first as? ParagraphNode else { + XCTAssertTrue(result.errors.isEmpty) + XCTAssertEqual(result.root.children.count, 1) + guard let para = result.root.children.first as? ParagraphNode else { return XCTFail("Expected ParagraphNode") } XCTAssertEqual(para.children.count, 1) @@ -139,12 +132,11 @@ final class MarkdownInlineBuilderTests: XCTestCase { func testHTMLInlineBuilderParsesEntityAndTag() { let input = "&bold" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) + let result = parser.parse(input, language: language) - XCTAssertTrue(context.errors.isEmpty) - XCTAssertEqual(node.children.count, 1) - guard let para = node.children.first as? ParagraphNode else { + XCTAssertTrue(result.errors.isEmpty) + XCTAssertEqual(result.root.children.count, 1) + guard let para = result.root.children.first as? ParagraphNode else { return XCTFail("Expected ParagraphNode") } XCTAssertEqual(para.children.count, 2) @@ -161,12 +153,11 @@ final class MarkdownInlineBuilderTests: XCTestCase { func testBlockquoteBuilderParsesBlockquote() { let input = "> hello" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) + let result = parser.parse(input, language: language) - XCTAssertTrue(context.errors.isEmpty) - XCTAssertEqual(node.children.count, 1) - let bq = node.children.first as? BlockquoteNode + XCTAssertTrue(result.errors.isEmpty) + XCTAssertEqual(result.root.children.count, 1) + let bq = result.root.children.first as? BlockquoteNode XCTAssertNotNil(bq) XCTAssertEqual(bq?.children.count, 1) if let text = bq?.children.first as? TextNode { diff --git a/Tests/SwiftParserTests/Markdown/Builders/MarkdownNestedEmphasisTests.swift b/Tests/SwiftParserTests/Markdown/Builders/MarkdownNestedEmphasisTests.swift index 49dd505..571bb03 100644 --- a/Tests/SwiftParserTests/Markdown/Builders/MarkdownNestedEmphasisTests.swift +++ b/Tests/SwiftParserTests/Markdown/Builders/MarkdownNestedEmphasisTests.swift @@ -2,22 +2,21 @@ import XCTest @testable import SwiftParser final class MarkdownNestedEmphasisTests: XCTestCase { - private var parser: CodeOutdatedParser! + private var parser: CodeParser! private var language: MarkdownLanguage! override func setUp() { super.setUp() language = MarkdownLanguage() - parser = CodeOutdatedParser(language: language) + parser = CodeParser(language: language) } func testEmphasisWithLinkAndCode() { let input = "*see [link](url) `code`*" - let root = language.root(of: input) - let (node, ctx) = parser.parse(input, root: root) - XCTAssertTrue(ctx.errors.isEmpty) - XCTAssertEqual(node.children.count, 1) - guard let para = node.children.first as? ParagraphNode, + let result = parser.parse(input, language: language) + XCTAssertTrue(result.errors.isEmpty) + XCTAssertEqual(result.root.children.count, 1) + guard let para = result.root.children.first as? ParagraphNode, let emph = para.children.first as? EmphasisNode else { return XCTFail("Expected EmphasisNode inside Paragraph") } @@ -30,11 +29,10 @@ final class MarkdownNestedEmphasisTests: XCTestCase { func testStrongWithImageAndHTML() { let input = "**image ![alt](img.png) bold**" - let root = language.root(of: input) - let (node, ctx) = parser.parse(input, root: root) - XCTAssertTrue(ctx.errors.isEmpty) - XCTAssertEqual(node.children.count, 1) - guard let para = node.children.first as? ParagraphNode, + let result = parser.parse(input, language: language) + XCTAssertTrue(result.errors.isEmpty) + XCTAssertEqual(result.root.children.count, 1) + guard let para = result.root.children.first as? ParagraphNode, let strong = para.children.first as? StrongNode else { return XCTFail("Expected StrongNode inside Paragraph") } diff --git a/Tests/SwiftParserTests/Markdown/Builders/MarkdownReferenceFootnoteTests.swift b/Tests/SwiftParserTests/Markdown/Builders/MarkdownReferenceFootnoteTests.swift index 443b2cf..ace994f 100644 --- a/Tests/SwiftParserTests/Markdown/Builders/MarkdownReferenceFootnoteTests.swift +++ b/Tests/SwiftParserTests/Markdown/Builders/MarkdownReferenceFootnoteTests.swift @@ -2,22 +2,21 @@ import XCTest @testable import SwiftParser final class MarkdownReferenceFootnoteTests: XCTestCase { - private var parser: CodeOutdatedParser! + private var parser: CodeParser! private var language: MarkdownLanguage! override func setUp() { super.setUp() language = MarkdownLanguage() - parser = CodeOutdatedParser(language: language) + parser = CodeParser(language: language) } func testReferenceDefinition() { let input = "[ref]: https://example.com" - let root = language.root(of: input) - let (node, ctx) = parser.parse(input, root: root) - XCTAssertTrue(ctx.errors.isEmpty) - XCTAssertEqual(node.children.count, 1) - if let ref = node.children.first as? ReferenceNode { + let result = parser.parse(input, language: language) + XCTAssertTrue(result.errors.isEmpty) + XCTAssertEqual(result.root.children.count, 1) + if let ref = result.root.children.first as? ReferenceNode { XCTAssertEqual(ref.identifier, "ref") XCTAssertEqual(ref.url, "https://example.com") } else { @@ -27,16 +26,15 @@ final class MarkdownReferenceFootnoteTests: XCTestCase { func testFootnoteDefinitionAndReference() { let input = "[^1]: Footnote text\nParagraph with reference[^1]" - let root = language.root(of: input) - let (node, ctx) = parser.parse(input, root: root) - XCTAssertTrue(ctx.errors.isEmpty) - XCTAssertEqual(node.children.count, 2) - guard let footnote = node.children.first as? FootnoteNode else { + let result = parser.parse(input, language: language) + XCTAssertTrue(result.errors.isEmpty) + XCTAssertEqual(result.root.children.count, 2) + guard let footnote = result.root.children.first as? FootnoteNode else { return XCTFail("Expected FootnoteNode") } XCTAssertEqual(footnote.identifier, "1") XCTAssertEqual(footnote.content, "Footnote text") - guard let paragraph = node.children.last as? ParagraphNode else { + guard let paragraph = result.root.children.last as? ParagraphNode else { return XCTFail("Expected ParagraphNode") } XCTAssertTrue(paragraph.children.contains { $0 is FootnoteNode }) diff --git a/Tests/SwiftParserTests/Markdown/Builders/MarkdownTokenBuilderTests.swift b/Tests/SwiftParserTests/Markdown/Builders/MarkdownTokenBuilderTests.swift index eaf3e70..ccdf050 100644 --- a/Tests/SwiftParserTests/Markdown/Builders/MarkdownTokenBuilderTests.swift +++ b/Tests/SwiftParserTests/Markdown/Builders/MarkdownTokenBuilderTests.swift @@ -2,23 +2,22 @@ import XCTest @testable import SwiftParser final class MarkdownTokenBuilderTests: XCTestCase { - private var parser: CodeOutdatedParser! + private var parser: CodeParser! private var language: MarkdownLanguage! override func setUp() { super.setUp() language = MarkdownLanguage() - parser = CodeOutdatedParser(language: language) + parser = CodeParser(language: language) } func testHeadingBuilderAppendsHeaderNodeWithText() { let input = "# Hello" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) + let result = parser.parse(input, language: language) // Expect one child: HeaderNode - XCTAssertEqual(node.children.count, 1) - let header = node.children.first as? HeaderNode + XCTAssertEqual(result.root.children.count, 1) + let header = result.root.children.first as? HeaderNode XCTAssertTrue(header != nil, "Expected a HeaderNode as first child") XCTAssertEqual(header?.level, 1) // Level 1 for single '#' @@ -32,17 +31,16 @@ final class MarkdownTokenBuilderTests: XCTestCase { } // No errors - XCTAssertTrue(context.errors.isEmpty) + XCTAssertTrue(result.errors.isEmpty) } func testTextBuilderAppendsTextNodeToRoot() { let input = "Hello World" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) + let result = parser.parse(input, language: language) // Expect a paragraph with one TextNode - XCTAssertEqual(node.children.count, 1) - guard let para = node.children.first as? ParagraphNode else { + XCTAssertEqual(result.root.children.count, 1) + guard let para = result.root.children.first as? ParagraphNode else { return XCTFail("Expected ParagraphNode") } XCTAssertEqual(para.children.count, 1) @@ -52,20 +50,19 @@ final class MarkdownTokenBuilderTests: XCTestCase { XCTFail("Expected TextNode inside Paragraph") } - XCTAssertTrue(context.errors.isEmpty) + XCTAssertTrue(result.errors.isEmpty) } func testNewlineBuilderResetsContextToParent() { let input = "# Title\nSubtitle" - let root = language.root(of: input) - let (node, context) = parser.parse(input, root: root) + let result = parser.parse(input, language: language) // After header parse, Title in HeaderNode, then newline resets context, Subtitle appended to root // Document should have two children: HeaderNode and ParagraphNode - XCTAssertEqual(node.children.count, 2) - XCTAssertTrue(node.children[0] is HeaderNode, "First child should be HeaderNode") - guard let para = node.children[1] as? ParagraphNode else { + XCTAssertEqual(result.root.children.count, 2) + XCTAssertTrue(result.root.children[0] is HeaderNode, "First child should be HeaderNode") + guard let para = result.root.children[1] as? ParagraphNode else { return XCTFail("Expected ParagraphNode after newline") } if let subtitleNode = para.children.first as? TextNode { @@ -74,6 +71,6 @@ final class MarkdownTokenBuilderTests: XCTestCase { XCTFail("Expected Subtitle as TextNode") } - XCTAssertTrue(context.errors.isEmpty) + XCTAssertTrue(result.errors.isEmpty) } } diff --git a/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerBasicTests.swift b/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerBasicTests.swift index 87024b6..a2826cd 100644 --- a/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerBasicTests.swift +++ b/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerBasicTests.swift @@ -4,7 +4,11 @@ import XCTest final class MarkdownCodeTokenizerBasicTests: XCTestCase { func testHeadingTokenization() { let language = MarkdownLanguage() - let tokenizer = CodeTokenizer(builders: language.tokens, state: language.state) + let tokenizer = CodeTokenizer( + builders: language.tokens, + state: language.state, + eofTokenFactory: { language.eofToken(at: $0) } + ) let (tokens, _) = tokenizer.tokenize("# Title") XCTAssertEqual(tokens.count, 4) XCTAssertEqual(tokens[0].element, .hash) @@ -15,7 +19,11 @@ final class MarkdownCodeTokenizerBasicTests: XCTestCase { func testAutolinkTokenization() { let language = MarkdownLanguage() - let tokenizer = CodeTokenizer(builders: language.tokens, state: language.state) + let tokenizer = CodeTokenizer( + builders: language.tokens, + state: language.state, + eofTokenFactory: { language.eofToken(at: $0) } + ) let (tokens, _) = tokenizer.tokenize("") XCTAssertEqual(tokens.count, 2) XCTAssertEqual(tokens[0].element, .autolink) @@ -25,7 +33,11 @@ final class MarkdownCodeTokenizerBasicTests: XCTestCase { func testBareURLTokenization() { let language = MarkdownLanguage() - let tokenizer = CodeTokenizer(builders: language.tokens, state: language.state) + let tokenizer = CodeTokenizer( + builders: language.tokens, + state: language.state, + eofTokenFactory: { language.eofToken(at: $0) } + ) let (tokens, _) = tokenizer.tokenize("https://example.com") XCTAssertEqual(tokens.count, 2) XCTAssertEqual(tokens[0].element, .url) @@ -34,7 +46,11 @@ final class MarkdownCodeTokenizerBasicTests: XCTestCase { func testBareEmailTokenization() { let language = MarkdownLanguage() - let tokenizer = CodeTokenizer(builders: language.tokens, state: language.state) + let tokenizer = CodeTokenizer( + builders: language.tokens, + state: language.state, + eofTokenFactory: { language.eofToken(at: $0) } + ) let (tokens, _) = tokenizer.tokenize("user@example.com") XCTAssertEqual(tokens.count, 2) XCTAssertEqual(tokens[0].element, .email) diff --git a/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerCodeTests.swift b/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerCodeTests.swift index 201cd69..1fdd735 100644 --- a/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerCodeTests.swift +++ b/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerCodeTests.swift @@ -4,7 +4,11 @@ import XCTest final class MarkdownCodeTokenizerCodeTests: XCTestCase { private func tokenize(_ input: String) -> [any CodeToken] { let language = MarkdownLanguage() - let tokenizer = CodeTokenizer(builders: language.tokens, state: language.state) + let tokenizer = CodeTokenizer( + builders: language.tokens, + state: language.state, + eofTokenFactory: { language.eofToken(at: $0) } + ) let (tokens, _) = tokenizer.tokenize(input) return tokens } diff --git a/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerCustomContainerTests.swift b/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerCustomContainerTests.swift index da17de1..1d372dd 100644 --- a/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerCustomContainerTests.swift +++ b/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerCustomContainerTests.swift @@ -4,7 +4,11 @@ import XCTest final class MarkdownCodeTokenizerCustomContainerTests: XCTestCase { private func tokenize(_ input: String) -> [any CodeToken] { let language = MarkdownLanguage() - let tokenizer = CodeTokenizer(builders: language.tokens, state: language.state) + let tokenizer = CodeTokenizer( + builders: language.tokens, + state: language.state, + eofTokenFactory: { language.eofToken(at: $0) } + ) let (tokens, _) = tokenizer.tokenize(input) return tokens } diff --git a/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerFormulaTests.swift b/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerFormulaTests.swift index 8da9cf8..cd1b2f4 100644 --- a/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerFormulaTests.swift +++ b/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerFormulaTests.swift @@ -4,7 +4,11 @@ import XCTest final class MarkdownCodeTokenizerFormulaTests: XCTestCase { private func tokenize(_ input: String) -> [any CodeToken] { let language = MarkdownLanguage() - let tokenizer = CodeTokenizer(builders: language.tokens, state: language.state) + let tokenizer = CodeTokenizer( + builders: language.tokens, + state: language.state, + eofTokenFactory: { language.eofToken(at: $0) } + ) let (tokens, _) = tokenizer.tokenize(input) return tokens } diff --git a/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerHTMLTests.swift b/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerHTMLTests.swift index e8f4566..eff0fc6 100644 --- a/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerHTMLTests.swift +++ b/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownCodeTokenizerHTMLTests.swift @@ -4,7 +4,11 @@ import XCTest final class MarkdownCodeTokenizerHTMLTests: XCTestCase { private func tokenize(_ input: String) -> [any CodeToken] { let language = MarkdownLanguage() - let tokenizer = CodeTokenizer(builders: language.tokens, state: language.state) + let tokenizer = CodeTokenizer( + builders: language.tokens, + state: language.state, + eofTokenFactory: { language.eofToken(at: $0) } + ) let (tokens, _) = tokenizer.tokenize(input) return tokens } diff --git a/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownTokenizerBasicTests.swift b/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownTokenizerBasicTests.swift index 000ffa8..e08eb7c 100644 --- a/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownTokenizerBasicTests.swift +++ b/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownTokenizerBasicTests.swift @@ -8,7 +8,11 @@ final class MarkdownTokenizerBasicTests: XCTestCase { override func setUp() { super.setUp() let language = MarkdownLanguage() - tokenizer = CodeTokenizer(builders: language.tokens, state: language.state) + tokenizer = CodeTokenizer( + builders: language.tokens, + state: language.state, + eofTokenFactory: { language.eofToken(at: $0) } + ) } override func tearDown() { diff --git a/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownTokenizerComplexTests.swift b/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownTokenizerComplexTests.swift index 3409aa5..9c49e21 100644 --- a/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownTokenizerComplexTests.swift +++ b/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownTokenizerComplexTests.swift @@ -8,7 +8,11 @@ final class MarkdownTokenizerComplexTests: XCTestCase { override func setUp() { super.setUp() let language = MarkdownLanguage() - tokenizer = CodeTokenizer(builders: language.tokens, state: language.state) + tokenizer = CodeTokenizer( + builders: language.tokens, + state: language.state, + eofTokenFactory: { language.eofToken(at: $0) } + ) } override func tearDown() { diff --git a/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownTokenizerFormulaTests.swift b/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownTokenizerFormulaTests.swift index 97ab3c6..6a45362 100644 --- a/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownTokenizerFormulaTests.swift +++ b/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownTokenizerFormulaTests.swift @@ -8,7 +8,11 @@ final class MarkdownTokenizerFormulaTests: XCTestCase { override func setUp() { super.setUp() let language = MarkdownLanguage() - tokenizer = CodeTokenizer(builders: language.tokens, state: language.state) + tokenizer = CodeTokenizer( + builders: language.tokens, + state: language.state, + eofTokenFactory: { language.eofToken(at: $0) } + ) } override func tearDown() { diff --git a/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownTokenizerHTMLTests.swift b/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownTokenizerHTMLTests.swift index 99a293b..1299b18 100644 --- a/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownTokenizerHTMLTests.swift +++ b/Tests/SwiftParserTests/Markdown/Tokenizer/MarkdownTokenizerHTMLTests.swift @@ -8,7 +8,11 @@ final class MarkdownTokenizerHTMLTests: XCTestCase { override func setUp() { super.setUp() let language = MarkdownLanguage() - tokenizer = CodeTokenizer(builders: language.tokens, state: language.state) + tokenizer = CodeTokenizer( + builders: language.tokens, + state: language.state, + eofTokenFactory: { language.eofToken(at: $0) } + ) } override func tearDown() {