DongyuZhao · DongyuZhao · Jul 21, 2025 · Jul 18, 2025 · Jul 20, 2025 · Jul 20, 2025
diff --git a/MARKDOWN_PARSER.md b/MARKDOWN_PARSER.md
diff --git a/Sources/SwiftParser/Core/CodeContext.swift b/Sources/SwiftParser/Core/CodeContext.swift
@@ -1,13 +1,31 @@
 import Foundation
 
-public struct CodeContext {
-    public var tokens: [any CodeToken]
-    public var currentNode: CodeNode
+public protocol CodeContextState<Node, Token> where Node: CodeNodeElement, Token: CodeTokenElement {
+    associatedtype Node: CodeNodeElement
+    associatedtype Token: CodeTokenElement
+}
+
+public class CodeContext<Node, Token> where Node: CodeNodeElement, Token: CodeTokenElement {
+    /// The current node being processed in the context
+    public var current: CodeNode<Node>
+
+    /// The tokens that need to be processed in this context
+    public var tokens: [any CodeToken<Token>]
+
+    /// The index of the next token to consume
+    public var consuming: Int
+
+    /// Any errors encountered during processing
     public var errors: [CodeError]
 
-    public init(tokens: [any CodeToken], currentNode: CodeNode, errors: [CodeError]) {
+    /// The state of the processing, which can hold additional information
+    public var state:  (any CodeContextState<Node, Token>)?
+
+    public init(current: CodeNode<Node>, tokens: [any CodeToken<Token>], consuming: Int = 0, state: (any CodeContextState<Node, Token>)? = nil, errors: [CodeError] = []) {
+        self.current = current
         self.tokens = tokens
-        self.currentNode = currentNode
+        self.consuming = consuming
+        self.state = state
         self.errors = errors
     }
 }
diff --git a/Sources/SwiftParser/Core/CodeElement.swift b/Sources/SwiftParser/Core/CodeElement.swift
diff --git a/Sources/SwiftParser/Core/CodeLanguage.swift b/Sources/SwiftParser/Core/CodeLanguage.swift
@@ -1,7 +1,12 @@
 import Foundation
 
-public protocol CodeLanguage {
-    var tokenizer: CodeTokenizer { get }
-    var consumers: [CodeTokenConsumer] { get }
-    var rootElement: any CodeElement { get }
+public protocol CodeLanguage<Node, Token> where Node: CodeNodeElement, Token: CodeTokenElement {
+    associatedtype Node: CodeNodeElement
+    associatedtype Token: CodeTokenElement
+
+    var tokenizer: any CodeTokenizer<Token> { get }
+    var builders: [any CodeNodeBuilder<Node, Token>] { get }
+
+    func root(of content: String) -> CodeNode<Node>
+    func state(of content: String) -> (any CodeContextState<Node, Token>)?
 }
diff --git a/Sources/SwiftParser/Core/CodeNode.swift b/Sources/SwiftParser/Core/CodeNode.swift
@@ -1,71 +1,81 @@
 import Foundation
 
-public class CodeNode {
-    public let type: any CodeElement
-    public var value: String
-    public weak var parent: CodeNode?
-    public var children: [CodeNode] = []
-    public var range: Range<String.Index>?
+public protocol CodeNodeElement: CaseIterable, RawRepresentable where RawValue == String {}
 
+public class CodeNode<Node> where Node: CodeNodeElement {
+    public let element: Node
+    public weak var parent: CodeNode<Node>?
+    public var children: [CodeNode<Node>] = []
+
+    /// The node's id relies on its element and children
     public var id: Int {
         var hasher = Hasher()
-        hasher.combine(String(describing: type))
-        hasher.combine(value)
+        hash(into: &hasher)
         for child in children {
             hasher.combine(child.id)
         }
         return hasher.finalize()
     }
 
-    public init(type: any CodeElement, value: String, range: Range<String.Index>? = nil) {
-        self.type = type
-        self.value = value
-        self.range = range
+    public init(element: Node) {
+        self.element = element
+    }
+
+    /// The function to compute the hash value of this node.
+    /// Since some structure node do not have hashable content, we leave this function open.
+    /// Each subclass can override this method to provide its own hash logic.
+    open func hash(into hasher: inout Hasher) {
+        hasher.combine(element.rawValue)
     }
 
-    public func addChild(_ node: CodeNode) {
+    // MARK: - Child management
+
+    /// Add a child node to this node
+    public func append(_ node: CodeNode<Node>) {
         node.parent = self
         children.append(node)
     }
 
     /// Insert a child node at the specified index
-    public func insertChild(_ node: CodeNode, at index: Int) {
+    public func insert(_ node: CodeNode<Node>, at index: Int) {
         node.parent = self
         children.insert(node, at: index)
     }
 
     /// Remove and return the child node at the given index
     @discardableResult
-    public func removeChild(at index: Int) -> CodeNode {
+    public func remove(at index: Int) -> CodeNode<Node> {
         let removed = children.remove(at: index)
         removed.parent = nil
         return removed
     }
 
+    /// Detach this node from its parent
+    public func remove() {
+        parent?.children.removeAll { $0 === self }
+        parent = nil
+    }
+
     /// Replace the child node at the given index with another node
-    public func replaceChild(at index: Int, with node: CodeNode) {
+    public func replace(at index: Int, with node: CodeNode<Node>) {
         children[index].parent = nil
         node.parent = self
         children[index] = node
     }
 
-    /// Detach this node from its parent
-    public func removeFromParent() {
-        parent?.children.removeAll { $0 === self }
-        parent = nil
-    }
+    // MARK: - Traversal and Searching
 
     /// Depth-first traversal of this node and all descendants
-    public func traverseDepthFirst(_ visit: (CodeNode) -> Void) {
+    public func dfs(_ visit: (CodeNode<Node>) -> Void) {
         visit(self)
         for child in children {
-            child.traverseDepthFirst(visit)
+            child.dfs(visit)
         }
     }
 
     /// Breadth-first traversal of this node and all descendants
-    public func traverseBreadthFirst(_ visit: (CodeNode) -> Void) {
-        var queue: [CodeNode] = [self]
+    public func bfs(_ visit: (CodeNode<Node>) -> Void) {
+        var queue: [CodeNode<Node>] = [self]
         while !queue.isEmpty {
             let node = queue.removeFirst()
             visit(node)
@@ -74,7 +84,7 @@ public class CodeNode {
     }
 
     /// Return the first node in the subtree satisfying the predicate
-    public func first(where predicate: (CodeNode) -> Bool) -> CodeNode? {
+    public func first(where predicate: (CodeNode<Node>) -> Bool) -> CodeNode<Node>? {
         if predicate(self) { return self }
         for child in children {
             if let result = child.first(where: predicate) {
@@ -85,17 +95,17 @@ public class CodeNode {
     }
 
     /// Return all nodes in the subtree satisfying the predicate
-    public func findAll(where predicate: (CodeNode) -> Bool) -> [CodeNode] {
-        var results: [CodeNode] = []
-        traverseDepthFirst { node in
+    public func nodes(where predicate: (CodeNode<Node>) -> Bool) -> [CodeNode<Node>] {
+        var results: [CodeNode<Node>] = []
+        dfs { node in
             if predicate(node) { results.append(node) }
         }
         return results
     }
 
     /// Number of nodes in this subtree including this node
-    public var subtreeCount: Int {
-        1 + children.reduce(0) { $0 + $1.subtreeCount }
+    public var count: Int {
+        1 + children.reduce(0) { $0 + $1.count }
     }
 
     /// Depth of this node from the root node

diff --git a/Sources/SwiftParser/Core/CodeNodeBuilder.swift b/Sources/SwiftParser/Core/CodeNodeBuilder.swift
@@ -0,0 +1,11 @@
+import Foundation
+
+/// Consume tokens to build a tree of nodes.
+public protocol CodeNodeBuilder<Node, Token> where Node: CodeNodeElement, Token: CodeTokenElement {
+    associatedtype Node: CodeNodeElement
+    associatedtype Token: CodeTokenElement
+
+    /// Attempt to build part of the AST from the context.
+    /// Returns true if the builder successfully consumed tokens and updated the context.
+    func build(from context: inout CodeContext<Node, Token>) -> Bool
+}
diff --git a/Sources/SwiftParser/Core/CodeParser.swift b/Sources/SwiftParser/Core/CodeParser.swift
@@ -1,50 +1,51 @@
 import Foundation
 
-public final class CodeParser {
-    private var consumers: [CodeTokenConsumer]
-    private let tokenizer: CodeTokenizer
+public final class CodeParser<Node, Token> where Node: CodeNodeElement, Token: CodeTokenElement {
+    private let language: any CodeLanguage<Node, Token>
 
-    // Registered state is now reset for each parse run
-
-    public init(language: CodeLanguage) {
-        self.tokenizer = language.tokenizer
-        self.consumers = language.consumers
+    public init(language: any CodeLanguage<Node, Token>) {
+        self.language = language
     }
 
+    public func parse(_ input: String, root: CodeNode<Node>) -> (node: CodeNode<Node>, context: CodeContext<Node, Token>) {
+        let normalized = normalize(input)
+        let tokens = language.tokenizer.tokenize(normalized)
+        var context = CodeContext(current: root, tokens: tokens, state: language.state(of: normalized))
 
-
-    public func parse(_ input: String, rootNode: CodeNode) -> (node: CodeNode, context: CodeContext) {
-        let tokens = tokenizer.tokenize(input)
-        var context = CodeContext(tokens: tokens, currentNode: rootNode, errors: [])
-
-        // Infinite loop protection: track token count progression
-        var lastCount = context.tokens.count + 1
-
-        while let token = context.tokens.first {
-            // Infinite loop detection - if token count hasn't decreased, terminate parsing immediately
-            if context.tokens.count == lastCount {
-                context.errors.append(CodeError("Infinite loop detected: parser stuck at token \(token.kindDescription). Terminating parse to prevent hang.", range: token.range))
+        while context.consuming < context.tokens.count {
+            // Stop at EOF without recording an error
+            if let token = context.tokens[context.consuming] as? MarkdownToken,
+               token.element == .eof {
                 break
             }
-            lastCount = context.tokens.count
 
-            if token.kindDescription == "eof" {
-                break
-            }
             var matched = false
-            for consumer in consumers {
-                if consumer.consume(context: &context, token: token) {
+            for builder in language.builders {
+                if builder.build(from: &context) {
                     matched = true
                     break
                 }
             }
 
             if !matched {
-                context.errors.append(CodeError("Unrecognized token \(token.kindDescription)", range: token.range))
-                context.tokens.removeFirst()
+                // If no builder matched, record an error and skip the token
+                let token = context.tokens[context.consuming]
+                let error = CodeError("Unrecognized token: \(token.element)", range: token.range)
+                context.errors.append(error)
+                context.consuming += 1
             }
         }
 
-        return (rootNode, context)
+        return (root, context)
+    }
+
+    /// Normalizes input string to handle line ending inconsistencies and other common issues
+    /// This ensures consistent behavior across different platforms and input sources
+    private func normalize(_ raw: String) -> String {
+        // Normalize line endings: Convert CRLF (\r\n) and CR (\r) to LF (\n)
+        // This prevents issues with different line ending conventions
+        return raw
+            .replacingOccurrences(of: "\r\n", with: "\n")  // Windows CRLF -> Unix LF
+            .replacingOccurrences(of: "\r", with: "\n")    // Classic Mac CR -> Unix LF
     }
 }
diff --git a/Sources/SwiftParser/Core/CodeToken.swift b/Sources/SwiftParser/Core/CodeToken.swift
@@ -1,7 +1,10 @@
 import Foundation
 
-public protocol CodeToken {
-    var kindDescription: String { get }
+public protocol CodeTokenElement: CaseIterable, RawRepresentable where RawValue == String {}
+
+public protocol CodeToken<Element> where Element: CodeTokenElement {
+    associatedtype Element: CodeTokenElement
+    var element: Element { get }
     var text: String { get }
     var range: Range<String.Index> { get }
 }
diff --git a/Sources/SwiftParser/Core/CodeTokenConsumer.swift b/Sources/SwiftParser/Core/CodeTokenConsumer.swift
diff --git a/Sources/SwiftParser/Core/CodeTokenizer.swift b/Sources/SwiftParser/Core/CodeTokenizer.swift
@@ -1,5 +1,5 @@
 import Foundation
-
-public protocol CodeTokenizer {
-    func tokenize(_ input: String) -> [any CodeToken]
+public protocol CodeTokenizer<Element> where Element: CodeTokenElement {
+    associatedtype Element: CodeTokenElement
+    func tokenize(_ input: String) -> [any CodeToken<Element>]
 }
diff --git a/Sources/SwiftParser/Markdown/Builders/MarkdownAdmonitionBuilder.swift b/Sources/SwiftParser/Markdown/Builders/MarkdownAdmonitionBuilder.swift
@@ -0,0 +1,63 @@
+import Foundation
+
+public class MarkdownAdmonitionBuilder: CodeNodeBuilder {
+    public init() {}
+
+    public func build(from context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
+        guard context.consuming < context.tokens.count,
+              isStartOfLine(context),
+              let gt = context.tokens[context.consuming] as? MarkdownToken,
+              gt.element == .gt else { return false }
+        var idx = context.consuming + 1
+        if idx < context.tokens.count,
+           let space = context.tokens[idx] as? MarkdownToken,
+           space.element == .space {
+            idx += 1
+        }
+        guard idx + 3 < context.tokens.count,
+              let lb = context.tokens[idx] as? MarkdownToken, lb.element == .leftBracket,
+              let ex = context.tokens[idx+1] as? MarkdownToken, ex.element == .exclamation,
+              let text = context.tokens[idx+2] as? MarkdownToken, text.element == .text,
+              let rb = context.tokens[idx+3] as? MarkdownToken, rb.element == .rightBracket else { return false }
+        let kind = text.text.lowercased()
+        idx += 4
+        guard idx < context.tokens.count,
+              let nl = context.tokens[idx] as? MarkdownToken,
+              nl.element == .newline else { return false }
+        idx += 1
+        guard idx < context.tokens.count,
+              isStartOfLine(index: idx, tokens: context.tokens),
+              let gt2 = context.tokens[idx] as? MarkdownToken,
+              gt2.element == .gt else { return false }
+        idx += 1
+        if idx < context.tokens.count,
+           let sp = context.tokens[idx] as? MarkdownToken,
+           sp.element == .space { idx += 1 }
+        context.consuming = idx
+        let children = MarkdownInlineParser.parseInline(&context)
+        let node = AdmonitionNode(kind: kind)
+        for c in children { node.append(c) }
+        context.current.append(node)
+        if context.consuming < context.tokens.count,
+           let nl2 = context.tokens[context.consuming] as? MarkdownToken,
+           nl2.element == .newline { context.consuming += 1 }
+        return true
+    }
+
+    private func isStartOfLine(_ context: CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
+        if context.consuming == 0 { return true }
+        if let prev = context.tokens[context.consuming - 1] as? MarkdownToken {
+            return prev.element == .newline
+        }
+        return false
+    }
+
+    private func isStartOfLine(index: Int, tokens: [any CodeToken<MarkdownTokenElement>]) -> Bool {
+        if index == 0 { return true }
+        if index - 1 < tokens.count,
+           let prev = tokens[index - 1] as? MarkdownToken {
+            return prev.element == .newline
+        }
+        return false
+    }
+}