From 23fe42fa553670792ae9eb99cb40b51cec4462ff Mon Sep 17 00:00:00 2001 From: Dongyu Zhao Date: Thu, 7 Aug 2025 13:25:46 +0800 Subject: [PATCH 1/2] refactor: modularize inline markdown parsing --- .../Markdown/Nodes/MarkdownInlineParser.swift | 263 +++++++++++++----- 1 file changed, 201 insertions(+), 62 deletions(-) diff --git a/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownInlineParser.swift b/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownInlineParser.swift index 5cd8eb3..36448a6 100644 --- a/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownInlineParser.swift +++ b/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownInlineParser.swift @@ -2,7 +2,8 @@ import Foundation import SwiftParser /// Simple inline parser used by block builders to parse inline Markdown syntax. -/// Handles emphasis, links, images, inline code and other span level elements. +/// Instead of a giant switch, each inline element is handled by its own builder +/// and a composite builder loops through them to construct the result. struct MarkdownInlineParser { /// Parse inline content until one of the `stopAt` tokens is encountered. /// - Parameters: @@ -16,83 +17,221 @@ struct MarkdownInlineParser { var nodes: [MarkdownNodeBase] = [] var delimiters: [Delimiter] = [] - while context.consuming < context.tokens.count { + let builders: [InlineBuilder] = [ + EmphasisBuilder(), + InlineCodeBuilder(), + FormulaBuilder(), + HTMLBuilder(), + ImageBuilder(), + LinkBuilder(), + AutolinkBuilder(), + TextBuilder() + ] + + outer: while context.consuming < context.tokens.count { guard let token = context.tokens[context.consuming] as? MarkdownToken else { break } if stopAt.contains(token.element) { break } - switch token.element { - case .asterisk, .underscore, .tilde: - let marker = token.element - var count = 0 - while context.consuming < context.tokens.count, - let t = context.tokens[context.consuming] as? MarkdownToken, - t.element == marker { - count += 1 - context.consuming += 1 - } - if marker == .tilde && count < 2 { - let text = String(repeating: "~", count: count) - nodes.append(TextNode(content: text)) - } else { - handleDelimiter(marker: marker, count: count, nodes: &nodes, stack: &delimiters) - } - case .inlineCode: - nodes.append(InlineCodeNode(code: trimBackticks(token.text))) - context.consuming += 1 - case .formula: - nodes.append(FormulaNode(expression: trimFormula(token.text))) - context.consuming += 1 - case .htmlTag, .htmlBlock, .htmlUnclosedBlock, .htmlEntity: - nodes.append(HTMLNode(content: token.text)) - context.consuming += 1 - case .exclamation: - if let image = parseImage(&context) { - nodes.append(image) - } else { - nodes.append(TextNode(content: token.text)) - context.consuming += 1 + for builder in builders { + if builder.build(from: &context, nodes: &nodes, delimiters: &delimiters) { + continue outer } - case .leftBracket: - if let link = parseLinkOrFootnote(&context) { - nodes.append(link) - } else { - nodes.append(TextNode(content: token.text)) - context.consuming += 1 - } - case .autolink, .url: - let url = trimAutolink(token.text) - let link = LinkNode(url: url, title: url) - nodes.append(link) - context.consuming += 1 - default: - let shouldMerge: Bool - if let lastIndex = nodes.indices.last, - let _ = nodes[lastIndex] as? TextNode, - !delimiters.contains(where: { $0.index == lastIndex }) { - shouldMerge = true - } else { - shouldMerge = false - } - - if shouldMerge, let last = nodes.last as? TextNode { - last.content += token.text - } else { - nodes.append(TextNode(content: token.text)) - } - context.consuming += 1 } + + // If no builder handled the token, advance to avoid infinite loop + context.consuming += 1 } return nodes } - private struct Delimiter { var marker: MarkdownTokenElement var count: Int var index: Int } + /// Protocol for inline node builders. + private protocol InlineBuilder { + func build( + from context: inout CodeConstructContext, + nodes: inout [MarkdownNodeBase], + delimiters: inout [Delimiter] + ) -> Bool + } + + // MARK: - Individual inline builders + private struct EmphasisBuilder: InlineBuilder { + func build( + from context: inout CodeConstructContext, + nodes: inout [MarkdownNodeBase], + delimiters: inout [Delimiter] + ) -> Bool { + guard context.consuming < context.tokens.count, + let token = context.tokens[context.consuming] as? MarkdownToken, + token.element == .asterisk || token.element == .underscore || token.element == .tilde + else { return false } + + let marker = token.element + var count = 0 + while context.consuming < context.tokens.count, + let t = context.tokens[context.consuming] as? MarkdownToken, + t.element == marker { + count += 1 + context.consuming += 1 + } + if marker == .tilde && count < 2 { + let text = String(repeating: "~", count: count) + nodes.append(TextNode(content: text)) + } else { + handleDelimiter(marker: marker, count: count, nodes: &nodes, stack: &delimiters) + } + return true + } + } + + private struct InlineCodeBuilder: InlineBuilder { + func build( + from context: inout CodeConstructContext, + nodes: inout [MarkdownNodeBase], + delimiters: inout [Delimiter] + ) -> Bool { + guard context.consuming < context.tokens.count, + let token = context.tokens[context.consuming] as? MarkdownToken, + token.element == .inlineCode + else { return false } + + nodes.append(InlineCodeNode(code: trimBackticks(token.text))) + context.consuming += 1 + return true + } + } + + private struct FormulaBuilder: InlineBuilder { + func build( + from context: inout CodeConstructContext, + nodes: inout [MarkdownNodeBase], + delimiters: inout [Delimiter] + ) -> Bool { + guard context.consuming < context.tokens.count, + let token = context.tokens[context.consuming] as? MarkdownToken, + token.element == .formula + else { return false } + + nodes.append(FormulaNode(expression: trimFormula(token.text))) + context.consuming += 1 + return true + } + } + + private struct HTMLBuilder: InlineBuilder { + func build( + from context: inout CodeConstructContext, + nodes: inout [MarkdownNodeBase], + delimiters: inout [Delimiter] + ) -> Bool { + guard context.consuming < context.tokens.count, + let token = context.tokens[context.consuming] as? MarkdownToken, + token.element == .htmlTag || token.element == .htmlBlock || token.element == .htmlUnclosedBlock || token.element == .htmlEntity + else { return false } + + nodes.append(HTMLNode(content: token.text)) + context.consuming += 1 + return true + } + } + + private struct ImageBuilder: InlineBuilder { + func build( + from context: inout CodeConstructContext, + nodes: inout [MarkdownNodeBase], + delimiters: inout [Delimiter] + ) -> Bool { + guard context.consuming < context.tokens.count, + let token = context.tokens[context.consuming] as? MarkdownToken, + token.element == .exclamation + else { return false } + + if let image = parseImage(&context) { + nodes.append(image) + } else { + nodes.append(TextNode(content: token.text)) + context.consuming += 1 + } + return true + } + } + + private struct LinkBuilder: InlineBuilder { + func build( + from context: inout CodeConstructContext, + nodes: inout [MarkdownNodeBase], + delimiters: inout [Delimiter] + ) -> Bool { + guard context.consuming < context.tokens.count, + let token = context.tokens[context.consuming] as? MarkdownToken, + token.element == .leftBracket + else { return false } + + if let link = parseLinkOrFootnote(&context) { + nodes.append(link) + } else { + nodes.append(TextNode(content: token.text)) + context.consuming += 1 + } + return true + } + } + + private struct AutolinkBuilder: InlineBuilder { + func build( + from context: inout CodeConstructContext, + nodes: inout [MarkdownNodeBase], + delimiters: inout [Delimiter] + ) -> Bool { + guard context.consuming < context.tokens.count, + let token = context.tokens[context.consuming] as? MarkdownToken, + token.element == .autolink || token.element == .url + else { return false } + + let url = trimAutolink(token.text) + let link = LinkNode(url: url, title: url) + nodes.append(link) + context.consuming += 1 + return true + } + } + + private struct TextBuilder: InlineBuilder { + func build( + from context: inout CodeConstructContext, + nodes: inout [MarkdownNodeBase], + delimiters: inout [Delimiter] + ) -> Bool { + guard context.consuming < context.tokens.count, + let token = context.tokens[context.consuming] as? MarkdownToken + else { return false } + + let shouldMerge: Bool + if let lastIndex = nodes.indices.last, + let _ = nodes[lastIndex] as? TextNode, + !delimiters.contains(where: { $0.index == lastIndex }) { + shouldMerge = true + } else { + shouldMerge = false + } + + if shouldMerge, let last = nodes.last as? TextNode { + last.content += token.text + } else { + nodes.append(TextNode(content: token.text)) + } + context.consuming += 1 + return true + } + } + + // MARK: - Shared helpers private static func handleDelimiter( marker: MarkdownTokenElement, count: Int, From a06d451dcfa09ec7fbabf2e2d90ecc6b6dc34a10 Mon Sep 17 00:00:00 2001 From: Dongyu Zhao Date: Thu, 7 Aug 2025 13:42:26 +0800 Subject: [PATCH 2/2] refactor: convert inline parser into builder --- .../Nodes/MarkdownAdmonitionBuilder.swift | 11 ++++++-- .../Nodes/MarkdownBlockquoteBuilder.swift | 12 ++++++--- .../Nodes/MarkdownDefinitionListBuilder.swift | 17 +++++++------ .../Nodes/MarkdownHeadingBuilder.swift | 12 ++++++--- ...rser.swift => MarkdownInlineBuilder.swift} | 25 ++++++++++++++----- .../Markdown/Nodes/MarkdownListBuilder.swift | 11 ++++++-- .../Nodes/MarkdownParagraphBuilder.swift | 12 ++++++--- .../Markdown/Nodes/MarkdownTableBuilder.swift | 4 +-- 8 files changed, 75 insertions(+), 29 deletions(-) rename Sources/SwiftParserShowCase/Code/Markdown/Nodes/{MarkdownInlineParser.swift => MarkdownInlineBuilder.swift} (95%) diff --git a/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownAdmonitionBuilder.swift b/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownAdmonitionBuilder.swift index 4a5e253..e9da96d 100644 --- a/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownAdmonitionBuilder.swift +++ b/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownAdmonitionBuilder.swift @@ -35,9 +35,16 @@ public class MarkdownAdmonitionBuilder: CodeNodeBuilder { let sp = context.tokens[idx] as? MarkdownToken, sp.element == .space { idx += 1 } context.consuming = idx - let children = MarkdownInlineParser.parseInline(&context) let node = AdmonitionNode(kind: kind) - for c in children { node.append(c) } + var inlineCtx = CodeConstructContext( + current: node, + tokens: context.tokens, + consuming: context.consuming, + state: context.state + ) + let inlineBuilder = MarkdownInlineBuilder() + _ = inlineBuilder.build(from: &inlineCtx) + context.consuming = inlineCtx.consuming context.current.append(node) if context.consuming < context.tokens.count, let nl2 = context.tokens[context.consuming] as? MarkdownToken, diff --git a/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownBlockquoteBuilder.swift b/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownBlockquoteBuilder.swift index 38185f2..8d86613 100644 --- a/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownBlockquoteBuilder.swift +++ b/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownBlockquoteBuilder.swift @@ -16,10 +16,16 @@ public class MarkdownBlockquoteBuilder: CodeNodeBuilder { space.element == .space { context.consuming += 1 } - // Parse inline content until a newline or EOF inside the blockquote - let children = MarkdownInlineParser.parseInline(&context) let node = BlockquoteNode() - for child in children { node.append(child) } + var inlineCtx = CodeConstructContext( + current: node, + tokens: context.tokens, + consuming: context.consuming, + state: context.state + ) + let inlineBuilder = MarkdownInlineBuilder() + _ = inlineBuilder.build(from: &inlineCtx) + context.consuming = inlineCtx.consuming context.current.append(node) if context.consuming < context.tokens.count, let nl = context.tokens[context.consuming] as? MarkdownToken, diff --git a/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownDefinitionListBuilder.swift b/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownDefinitionListBuilder.swift index fe377c7..113601c 100644 --- a/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownDefinitionListBuilder.swift +++ b/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownDefinitionListBuilder.swift @@ -51,16 +51,17 @@ public class MarkdownDefinitionListBuilder: CodeNodeBuilder { context.consuming += 1 } - var termContext = CodeConstructContext(current: DocumentNode(), tokens: termTokens) - let termChildren = MarkdownInlineParser.parseInline(&termContext) - var defContext = CodeConstructContext(current: DocumentNode(), tokens: defTokens) - let defChildren = MarkdownInlineParser.parseInline(&defContext) - - let item = DefinitionItemNode() let termNode = DefinitionTermNode() - for c in termChildren { termNode.append(c) } + var termContext = CodeConstructContext(current: termNode, tokens: termTokens, state: context.state) + let inlineBuilder = MarkdownInlineBuilder(stopAt: []) + _ = inlineBuilder.build(from: &termContext) + let descNode = DefinitionDescriptionNode() - for c in defChildren { descNode.append(c) } + var defContext = CodeConstructContext(current: descNode, tokens: defTokens, state: context.state) + let inlineBuilder2 = MarkdownInlineBuilder(stopAt: []) + _ = inlineBuilder2.build(from: &defContext) + + let item = DefinitionItemNode() item.append(termNode) item.append(descNode) diff --git a/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownHeadingBuilder.swift b/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownHeadingBuilder.swift index d1069b9..99af3ff 100644 --- a/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownHeadingBuilder.swift +++ b/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownHeadingBuilder.swift @@ -26,10 +26,16 @@ public class MarkdownHeadingBuilder: CodeNodeBuilder { idx += 1 context.consuming = idx - // Parse inline content until a newline or EOF - let children = MarkdownInlineParser.parseInline(&context) let node = HeaderNode(level: level) - for child in children { node.append(child) } + var inlineCtx = CodeConstructContext( + current: node, + tokens: context.tokens, + consuming: context.consuming, + state: context.state + ) + let inlineBuilder = MarkdownInlineBuilder() + _ = inlineBuilder.build(from: &inlineCtx) + context.consuming = inlineCtx.consuming context.current.append(node) if context.consuming < context.tokens.count, diff --git a/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownInlineParser.swift b/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownInlineBuilder.swift similarity index 95% rename from Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownInlineParser.swift rename to Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownInlineBuilder.swift index 36448a6..6cdb1ae 100644 --- a/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownInlineParser.swift +++ b/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownInlineBuilder.swift @@ -1,18 +1,31 @@ import Foundation import SwiftParser -/// Simple inline parser used by block builders to parse inline Markdown syntax. -/// Instead of a giant switch, each inline element is handled by its own builder -/// and a composite builder loops through them to construct the result. -struct MarkdownInlineParser { +/// Inline node builder that parses Markdown inline elements. +/// Each inline element is handled by a dedicated sub-builder and a composite +/// builder loops through them to construct the result. +public class MarkdownInlineBuilder: CodeNodeBuilder { + private let stopAt: Set + + public init(stopAt: Set = [.newline, .eof]) { + self.stopAt = stopAt + } + + public func build(from context: inout CodeConstructContext) -> Bool { + let start = context.consuming + let nodes = Self.parseInline(&context, stopAt: stopAt) + for node in nodes { context.current.append(node) } + return context.consuming > start + } + /// Parse inline content until one of the `stopAt` tokens is encountered. /// - Parameters: /// - context: Construction context providing tokens and current state. /// - stopAt: Tokens that terminate inline parsing. /// - Returns: Array of parsed inline nodes. - static func parseInline( + private static func parseInline( _ context: inout CodeConstructContext, - stopAt: Set = [.newline, .eof] + stopAt: Set ) -> [MarkdownNodeBase] { var nodes: [MarkdownNodeBase] = [] var delimiters: [Delimiter] = [] diff --git a/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownListBuilder.swift b/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownListBuilder.swift index 12e2a2a..92e3b1a 100644 --- a/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownListBuilder.swift +++ b/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownListBuilder.swift @@ -86,8 +86,15 @@ public class MarkdownListBuilder: CodeNodeBuilder { } else { item = ListItemNode(marker: markerText) } - let children = MarkdownInlineParser.parseInline(&context) - for child in children { item.append(child) } + var inlineCtx = CodeConstructContext( + current: item, + tokens: context.tokens, + consuming: context.consuming, + state: context.state + ) + let inlineBuilder = MarkdownInlineBuilder() + _ = inlineBuilder.build(from: &inlineCtx) + context.consuming = inlineCtx.consuming listNode.append(item) if context.consuming < context.tokens.count, diff --git a/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownParagraphBuilder.swift b/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownParagraphBuilder.swift index c282bcc..6314241 100644 --- a/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownParagraphBuilder.swift +++ b/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownParagraphBuilder.swift @@ -11,9 +11,15 @@ public class MarkdownParagraphBuilder: CodeNodeBuilder { token.element != .eof else { return false } let node = ParagraphNode(range: token.range) - // Stop parsing at either a newline or EOF to avoid leftover empty nodes - let children = MarkdownInlineParser.parseInline(&context) - for child in children { node.append(child) } + var inlineCtx = CodeConstructContext( + current: node, + tokens: context.tokens, + consuming: context.consuming, + state: context.state + ) + let inlineBuilder = MarkdownInlineBuilder() + _ = inlineBuilder.build(from: &inlineCtx) + context.consuming = inlineCtx.consuming context.current.append(node) if context.consuming < context.tokens.count, diff --git a/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownTableBuilder.swift b/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownTableBuilder.swift index 731861c..3d92431 100644 --- a/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownTableBuilder.swift +++ b/Sources/SwiftParserShowCase/Code/Markdown/Nodes/MarkdownTableBuilder.swift @@ -42,8 +42,8 @@ public class MarkdownTableBuilder: CodeNodeBuilder { if tok.element == .pipe { let cell = TableCellNode(range: start.range) var subCtx = CodeConstructContext(current: cell, tokens: cellTokens, state: context.state) - let children = MarkdownInlineParser.parseInline(&subCtx, stopAt: []) - for child in children { cell.append(child) } + let inlineBuilder = MarkdownInlineBuilder(stopAt: []) + _ = inlineBuilder.build(from: &subCtx) row.append(cell) cellTokens.removeAll() } else {