Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion MARKDOWN_PARSER.md
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,12 @@ class MarkdownPluginManager {
- Add documentation comments for public APIs
- Keep functions focused and single-purpose

### Documentation
The codebase now contains detailed Swift documentation comments explaining the
responsibilities of core types such as `CodeParser`, `CodeConstructor` and the
inline parser. These comments can be viewed in Xcode Quick Help or rendered by
documentation tools.

### Testing Requirements
- All new features must include comprehensive tests
- Maintain test coverage above 90%
Expand Down Expand Up @@ -653,4 +659,4 @@ This project is licensed under the MIT License - see the LICENSE file for detail

---

*Last updated: 2025-07-20*
*Last updated: 2025-07-21*
22 changes: 16 additions & 6 deletions Sources/SwiftParser/Core/CodeConstructor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,34 @@
// Created by Dongyu Zhao on 7/21/25.
//

/// Consumes a list of tokens to build an AST using registered node builders.
public class CodeConstructor<Node, Token> where Node: CodeNodeElement, Token: CodeTokenElement {
/// Ordered collection of node builders that attempt to consume tokens.
private let builders: [any CodeNodeBuilder<Node, Token>]
/// Factory that provides initial construction state for each parse run.
private var state: () -> (any CodeConstructState<Node, Token>)?

public init(builders: [any CodeNodeBuilder<Node, Token>], state: @escaping () -> (any CodeConstructState<Node, Token>)?) {
/// Create a new constructor
/// - Parameters:
/// - builders: The node builders responsible for producing AST nodes.
/// - state: Factory returning the initial parsing state object.
public init(
builders: [any CodeNodeBuilder<Node, Token>],
state: @escaping () -> (any CodeConstructState<Node, Token>)?
) {
self.builders = builders
self.state = state
}

/// Build an AST from a token stream
/// - Parameters:
/// - tokens: Token list to consume.
/// - root: Root node that will receive parsed children.
/// - Returns: The populated root node and any construction errors.
public func parse(_ tokens: [any CodeToken<Token>], root: CodeNode<Node>) -> (CodeNode<Node>, [CodeError]) {
var context = CodeConstructContext(current: root, tokens: tokens, state: state())

while context.consuming < context.tokens.count {
// Stop at EOF without recording an error
if let token = context.tokens[context.consuming] as? MarkdownToken,
token.element == .eof {
break
}

var matched = false
for node in builders {
Expand Down
8 changes: 8 additions & 0 deletions Sources/SwiftParser/Core/CodeError.swift
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
import Foundation

/// Represents a parsing error encountered during tokenization or AST building.
public struct CodeError: Error {
/// Human readable error message.
public let message: String
/// Range in the original source where the error occurred, if available.
public let range: Range<String.Index>?

/// Create a new error instance.
/// - Parameters:
/// - message: Description of the problem.
/// - range: Optional source range that triggered the error.
public init(_ message: String, range: Range<String.Index>? = nil) {
self.message = message
self.range = range
Expand Down
17 changes: 16 additions & 1 deletion Sources/SwiftParser/Core/CodeLanguage.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,26 @@ public protocol CodeLanguage<Node, Token> where Node: CodeNodeElement, Token: Co
associatedtype Node: CodeNodeElement
associatedtype Token: CodeTokenElement

var tokenizer: any CodeOutdatedTokenizer<Token> { get }
/// The token builders used to tokenize the input.
var tokens: [any CodeTokenBuilder<Token>] { get }

/// The node builders used to construct the AST.
var nodes: [any CodeNodeBuilder<Node, Token>] { get }

/// The funtion that create the root node of the AST.
func root() -> CodeNode<Node>

/// The function that creates the initial context for AST construction.
func state() -> (any CodeConstructState<Node, Token>)?

/// The function that creates the initial context for tokenization.
func state() -> (any CodeTokenState<Token>)?

/// Provide an EOF token if the language requires one.
/// - Parameter range: The range where the EOF token should be inserted.
func eofToken(at range: Range<String.Index>) -> (any CodeToken<Token>)?
}

extension CodeLanguage {
public func eofToken(at range: Range<String.Index>) -> (any CodeToken<Token>)? { nil }
}
53 changes: 0 additions & 53 deletions Sources/SwiftParser/Core/CodeOutdatedParser.swift

This file was deleted.

7 changes: 0 additions & 7 deletions Sources/SwiftParser/Core/CodeOutdatedTokenizer.swift

This file was deleted.

31 changes: 29 additions & 2 deletions Sources/SwiftParser/Core/CodeParser.swift
Original file line number Diff line number Diff line change
@@ -1,15 +1,27 @@
/// Result returned from `CodeParser.parse` containing the AST, token stream and
/// any parsing errors.
public struct CodeParseResult<Node: CodeNodeElement, Token: CodeTokenElement> {
public let root: CodeNode<Node>
public let tokens: [any CodeToken<Token>]
public let errors: [CodeError]

/// Create a result object
/// - Parameters:
/// - root: The constructed root node of the AST.
/// - tokens: Token stream produced while parsing.
/// - errors: Any errors that occurred during tokenization or AST
/// construction.
public init(root: CodeNode<Node>, tokens: [any CodeToken<Token>], errors: [CodeError] = []) {
self.root = root
self.tokens = tokens
self.errors = errors
}
}

/// High level parser that orchestrates tokenization and AST construction.
///
/// `CodeParser` uses the provided `CodeLanguage` implementation to tokenize the
/// source text and then build an AST using the registered node builders.
public class CodeParser<Node: CodeNodeElement, Token: CodeTokenElement> where Node: CodeNodeElement, Token: CodeTokenElement {
private let language: any CodeLanguage<Node, Token>

Expand All @@ -18,10 +30,25 @@ public class CodeParser<Node: CodeNodeElement, Token: CodeTokenElement> where No

public init(language: any CodeLanguage<Node, Token>) {
self.language = language
self.tokenizer = CodeTokenizer(builders: language.tokens, state: language.state)
self.constructor = CodeConstructor(builders: language.nodes, state: language.state)
self.tokenizer = CodeTokenizer(
builders: language.tokens,
state: language.state,
eofTokenFactory: { language.eofToken(at: $0) }
)
self.constructor = CodeConstructor(
builders: language.nodes,
state: language.state
)
}

/// Parse a source string using the supplied language.
///
/// This method first tokenizes the input and, if tokenization succeeds,
/// constructs the AST using the language's node builders.
/// - Parameter source: The raw text to parse.
/// - Parameter language: The language definition to use for parsing.
/// - Returns: A `CodeParseResult` containing the root node, tokens and any
/// errors encountered.
public func parse(_ source: String, language: any CodeLanguage<Node, Token>) -> CodeParseResult<Node, Token> {
let normalized = normalize(source)
let root = language.root()
Expand Down
15 changes: 10 additions & 5 deletions Sources/SwiftParser/Core/CodeTokenizer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,16 @@
public class CodeTokenizer<Token> where Token: CodeTokenElement {
private let builders: [any CodeTokenBuilder<Token>]
private var state: () -> (any CodeTokenState<Token>)?
private let eofTokenFactory: ((Range<String.Index>) -> (any CodeToken<Token>)?)?

public init(builders: [any CodeTokenBuilder<Token>], state: @escaping () -> (any CodeTokenState<Token>)?) {
public init(
builders: [any CodeTokenBuilder<Token>],
state: @escaping () -> (any CodeTokenState<Token>)?,
eofTokenFactory: ((Range<String.Index>) -> (any CodeToken<Token>)?)? = nil
) {
self.builders = builders
self.state = state
self.eofTokenFactory = eofTokenFactory
}

public func tokenize(_ input: String) -> ([any CodeToken<Token>], [CodeError]) {
Expand Down Expand Up @@ -42,10 +48,9 @@ public class CodeTokenizer<Token> where Token: CodeTokenElement {
}
}

// Automatically append EOF token for Markdown
if Token.self == MarkdownTokenElement.self,
let eof = MarkdownToken.eof(at: input.endIndex..<input.endIndex) as? any CodeToken<Token> {
context.tokens.append(eof)
// Append EOF token if provided by the language
if let token = eofTokenFactory?(input.endIndex..<input.endIndex) {
context.tokens.append(token)
}

return (context.tokens, context.errors)
Expand Down
13 changes: 0 additions & 13 deletions Sources/SwiftParser/LanguageExtensions.swift

This file was deleted.

41 changes: 17 additions & 24 deletions Sources/SwiftParser/Markdown/MarkdownLanguage.swift
Original file line number Diff line number Diff line change
@@ -1,19 +1,28 @@
import Foundation

// MARK: - Markdown Language Implementation
/// Default Markdown language implementation following CommonMark with optional
/// extensions.
///
/// The language exposes a set of token and node builders that together
/// understand Markdown syntax. The initializer allows callers to supply a
/// custom list of builders to enable or disable features.
public class MarkdownLanguage: CodeLanguage {
public typealias Node = MarkdownNodeElement
public typealias Token = MarkdownTokenElement

// MARK: - Language Components
public let tokenizer: any CodeOutdatedTokenizer<MarkdownTokenElement>
public var tokens: [any CodeTokenBuilder<MarkdownTokenElement>]
public let nodes: [any CodeNodeBuilder<MarkdownNodeElement, MarkdownTokenElement>]


// MARK: - Initialization
/// Create a Markdown language with the provided builders.
///
/// - Parameter consumers: Node builders to be used when constructing the
/// document AST. Passing a custom set allows features to be enabled or
/// disabled.
public init(
tokenizer: any CodeOutdatedTokenizer<MarkdownTokenElement> = MarkdownTokenizer(),
consumers: [any CodeNodeBuilder<MarkdownNodeElement, MarkdownTokenElement>] = [
MarkdownReferenceDefinitionBuilder(),
MarkdownHeadingBuilder(),
Expand All @@ -28,10 +37,10 @@ public class MarkdownLanguage: CodeLanguage {
MarkdownListBuilder(),
MarkdownBlockquoteBuilder(),
MarkdownParagraphBuilder(),
MarkdownNewlineBuilder()
MarkdownNewlineBuilder(),
MarkdownEOFBuilder()
]
) {
self.tokenizer = tokenizer
self.nodes = consumers
let single = MarkdownSingleCharacterTokenBuilder()
self.tokens = [
Expand Down Expand Up @@ -59,6 +68,10 @@ public class MarkdownLanguage: CodeLanguage {
public func state() -> (any CodeTokenState<MarkdownTokenElement>)? {
nil
}

public func eofToken(at range: Range<String.Index>) -> (any CodeToken<MarkdownTokenElement>)? {
return MarkdownToken.eof(at: range)
}
}

// MARK: - Language Configuration
Expand Down Expand Up @@ -256,26 +269,6 @@ extension MarkdownLanguage {
plugins: true
)
}

/// Create a language instance with specific configuration
public static func configured(_ config: Configuration) -> MarkdownLanguage {
let tokenizer = MarkdownTokenizer()
let consumers: [any CodeNodeBuilder<MarkdownNodeElement, MarkdownTokenElement>] = []

// TODO: Add consumers based on configuration when implemented
// if config.commonMark {
// consumers.append(CommonMarkConsumer())
// }
// if config.gfm {
// consumers.append(GFMConsumer())
// }
// if config.math {
// consumers.append(MathConsumer())
// }
// ... etc

return MarkdownLanguage(tokenizer: tokenizer, consumers: consumers)
}
}

// MARK: - Language Capabilities
Expand Down
1 change: 1 addition & 0 deletions Sources/SwiftParser/Markdown/MarkdownNodes.swift
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ public class MarkdownNodeBase: CodeNode<MarkdownNodeElement> {
}

// MARK: - Document Structure
/// Root node representing an entire Markdown document.
public class DocumentNode: MarkdownNodeBase {
public var title: String?
public var metadata: [String: Any] = [:]
Expand Down
Loading