From eb16f29332f5c940acdfe35bbb276d596d4ebbd0 Mon Sep 17 00:00:00 2001 From: Dongyu Zhao Date: Mon, 21 Jul 2025 00:33:19 +0800 Subject: [PATCH] Implement emphasis parsing --- .../Markdown/MarkdownContextState.swift | 18 ++++- .../Markdown/MarkdownEmphasisConsumer.swift | 70 +++++++++++++++++++ .../Markdown/MarkdownLanguage.swift | 1 + .../Markdown/MarkdownTokenConsumer.swift | 7 +- 4 files changed, 93 insertions(+), 3 deletions(-) create mode 100644 Sources/SwiftParser/Markdown/MarkdownEmphasisConsumer.swift diff --git a/Sources/SwiftParser/Markdown/MarkdownContextState.swift b/Sources/SwiftParser/Markdown/MarkdownContextState.swift index 05732fb..f23c94d 100644 --- a/Sources/SwiftParser/Markdown/MarkdownContextState.swift +++ b/Sources/SwiftParser/Markdown/MarkdownContextState.swift @@ -3,7 +3,21 @@ import Foundation public class MarkdownContextState: CodeContextState { public typealias Node = MarkdownNodeElement public typealias Token = MarkdownTokenElement - /// Stack of open emphasis/strong nodes: the node, its parent, delimiter element, and delimiter length - public var openEmphasis: [(node: MarkdownNodeBase, parent: MarkdownNodeBase, element: MarkdownTokenElement, length: Int)] = [] + /// Stack of open emphasis/strong delimiters. Each entry stores the node to + /// be created once closed, its parent container, the index at which the + /// delimiter appeared, the token element (`*` or `_`), and the delimiter + /// length (1 for emphasis, 2 for strong). + public var openEmphasis: [(node: MarkdownNodeBase, parent: MarkdownNodeBase, startIndex: Int, element: MarkdownTokenElement, length: Int)] = [] + + /// Pending delimiter run that has not yet been processed. We accumulate + /// consecutive `*` or `_` tokens here until a non-delimiter token is + /// encountered. + public var pendingDelimiterElement: MarkdownTokenElement? + public var pendingDelimiterCount: Int = 0 + + /// Indicates that an emphasis delimiter was just opened. This prevents the + /// next text token from merging with a previous `TextNode`. + public var justOpenedDelimiter: Bool = false + public init() {} } diff --git a/Sources/SwiftParser/Markdown/MarkdownEmphasisConsumer.swift b/Sources/SwiftParser/Markdown/MarkdownEmphasisConsumer.swift new file mode 100644 index 0000000..5f35cc3 --- /dev/null +++ b/Sources/SwiftParser/Markdown/MarkdownEmphasisConsumer.swift @@ -0,0 +1,70 @@ +import Foundation + +/// Consumer for emphasis and strong emphasis following CommonMark rules +public struct MarkdownEmphasisConsumer: CodeTokenConsumer { + public typealias Node = MarkdownNodeElement + public typealias Token = MarkdownTokenElement + + public init() {} + + public func consume(token: any CodeToken, context: inout CodeContext) -> Bool { + guard let mdState = context.state as? MarkdownContextState else { return false } + guard let mdToken = token as? MarkdownToken else { return false } + + // Only handle emphasis delimiters and EOF for flushing + if mdToken.isEmphasisDelimiter { + // Accumulate consecutive delimiters + if mdState.pendingDelimiterElement == mdToken.element { + mdState.pendingDelimiterCount += 1 + } else { + flushPending(state: mdState, context: &context) + mdState.pendingDelimiterElement = mdToken.element + mdState.pendingDelimiterCount = 1 + } + return true + } else { + flushPending(state: mdState, context: &context) + // EOF is consumed here so other consumers don't process it + if mdToken.element == .eof { + return true + } + return false + } + } + + private func flushPending(state: MarkdownContextState, context: inout CodeContext) { + guard state.pendingDelimiterCount > 0, let element = state.pendingDelimiterElement else { return } + var remaining = state.pendingDelimiterCount + + while remaining > 0 { + if let last = state.openEmphasis.last, last.element == element, last.length <= remaining { + // Close existing delimiter + state.openEmphasis.removeLast() + let parent = last.parent + let start = last.startIndex + guard start <= parent.children.count else { continue } + let children = Array(parent.children[start..= 2 ? 2 : 1 + let newNode: MarkdownNodeBase = length == 2 ? StrongNode(content: "") : EmphasisNode(content: "") + let parent = context.current as! MarkdownNodeBase + let startIndex = parent.children.count + state.openEmphasis.append((node: newNode, parent: parent, startIndex: startIndex, element: element, length: length)) + state.justOpenedDelimiter = true + remaining -= length + } + } + + state.pendingDelimiterCount = 0 + state.pendingDelimiterElement = nil + } +} diff --git a/Sources/SwiftParser/Markdown/MarkdownLanguage.swift b/Sources/SwiftParser/Markdown/MarkdownLanguage.swift index b570cae..edda579 100644 --- a/Sources/SwiftParser/Markdown/MarkdownLanguage.swift +++ b/Sources/SwiftParser/Markdown/MarkdownLanguage.swift @@ -20,6 +20,7 @@ public class MarkdownLanguage: CodeLanguage { BlockquoteConsumer(), InlineCodeConsumer(), InlineFormulaConsumer(), + MarkdownEmphasisConsumer(), AutolinkConsumer(), URLConsumer(), HTMLInlineConsumer(), diff --git a/Sources/SwiftParser/Markdown/MarkdownTokenConsumer.swift b/Sources/SwiftParser/Markdown/MarkdownTokenConsumer.swift index a236330..19506ca 100644 --- a/Sources/SwiftParser/Markdown/MarkdownTokenConsumer.swift +++ b/Sources/SwiftParser/Markdown/MarkdownTokenConsumer.swift @@ -40,7 +40,12 @@ public struct TextConsumer: CodeTokenConsumer { switch token.element { case .text: let content = token.text - if let last = context.current.children.last as? TextNode { + let mdState = context.state as? MarkdownContextState + if mdState?.justOpenedDelimiter == true { + mdState?.justOpenedDelimiter = false + let textNode = TextNode(content: content) + context.current.append(textNode) + } else if let last = context.current.children.last as? TextNode { last.content += content } else { let textNode = TextNode(content: content)