From 3c35538e411a5524ccd234c19b9fd40fdd10a064 Mon Sep 17 00:00:00 2001 From: Dongyu Zhao Date: Tue, 15 Jul 2025 01:34:31 +0800 Subject: [PATCH] Add reference link support --- Sources/SwiftParser/CodeParser.swift | 2 +- Sources/SwiftParser/Core.swift | 8 ++- .../Languages/MarkdownLanguage.swift | 57 ++++++++++++++++++- Tests/SwiftParserTests/SwiftParserTests.swift | 8 +++ 4 files changed, 71 insertions(+), 4 deletions(-) diff --git a/Sources/SwiftParser/CodeParser.swift b/Sources/SwiftParser/CodeParser.swift index 95fb9c0..230d9cc 100644 --- a/Sources/SwiftParser/CodeParser.swift +++ b/Sources/SwiftParser/CodeParser.swift @@ -50,7 +50,7 @@ public final class CodeParser { public func parse(_ input: String, rootNode: CodeNode) -> (node: CodeNode, context: CodeContext) { let tokens = tokenizer.tokenize(input) - var context = CodeContext(tokens: tokens, index: 0, currentNode: rootNode, errors: [], input: input) + var context = CodeContext(tokens: tokens, index: 0, currentNode: rootNode, errors: [], input: input, linkReferences: [:]) snapshots = [:] lastTokens = tokens diff --git a/Sources/SwiftParser/Core.swift b/Sources/SwiftParser/Core.swift index ae67663..aaa3fde 100644 --- a/Sources/SwiftParser/Core.swift +++ b/Sources/SwiftParser/Core.swift @@ -61,13 +61,15 @@ public struct CodeContext { public var currentNode: CodeNode public var errors: [CodeError] public let input: String + public var linkReferences: [String: String] - public init(tokens: [any CodeToken], index: Int, currentNode: CodeNode, errors: [CodeError], input: String) { + public init(tokens: [any CodeToken], index: Int, currentNode: CodeNode, errors: [CodeError], input: String, linkReferences: [String: String] = [:]) { self.tokens = tokens self.index = index self.currentNode = currentNode self.errors = errors self.input = input + self.linkReferences = linkReferences } /// Snapshot represents a parser state that can be restored later. @@ -76,11 +78,12 @@ public struct CodeContext { fileprivate let node: CodeNode fileprivate let childCount: Int fileprivate let errorCount: Int + fileprivate let linkReferences: [String: String] } /// Capture the current parser state so it can be restored on demand. public func snapshot() -> Snapshot { - Snapshot(index: index, node: currentNode, childCount: currentNode.children.count, errorCount: errors.count) + Snapshot(index: index, node: currentNode, childCount: currentNode.children.count, errorCount: errors.count, linkReferences: linkReferences) } /// Restore the parser to a previously captured state, discarding any new nodes or errors. @@ -93,6 +96,7 @@ public struct CodeContext { if errors.count > snapshot.errorCount { errors.removeLast(errors.count - snapshot.errorCount) } + linkReferences = snapshot.linkReferences } } diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage.swift b/Sources/SwiftParser/Languages/MarkdownLanguage.swift index 188ffe5..b739ea5 100644 --- a/Sources/SwiftParser/Languages/MarkdownLanguage.swift +++ b/Sources/SwiftParser/Languages/MarkdownLanguage.swift @@ -21,6 +21,7 @@ public struct MarkdownLanguage: CodeLanguage { case strikethrough case table case autoLink + case linkReferenceDefinition } public enum Token: CodeToken { @@ -779,6 +780,50 @@ public struct MarkdownLanguage: CodeLanguage { } } + public class LinkReferenceDefinitionBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard context.index + 3 < context.tokens.count else { return false } + guard let lb = token as? Token, + let txt = context.tokens[context.index + 1] as? Token, + let rb = context.tokens[context.index + 2] as? Token, + let colon = context.tokens[context.index + 3] as? Token else { return false } + if case .lbracket = lb, + case .text = txt, + case .rbracket = rb, + case .text(let s, _) = colon, + s.trimmingCharacters(in: .whitespaces).hasPrefix(":") { + return true + } + return false + } + public func build(context: inout CodeContext) { + context.index += 1 + var id = "" + if context.index < context.tokens.count, let idTok = context.tokens[context.index] as? Token, case .text(let s, _) = idTok { + id = s + context.index += 1 + } + if context.index < context.tokens.count { context.index += 1 } // skip ] + var text = "" + if context.index < context.tokens.count, let colon = context.tokens[context.index] as? Token, case .text(let s, _) = colon { + text = s + context.index += 1 + } + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token { + if case .newline = tok { context.index += 1; break } + else { text += tok.text; context.index += 1 } + } else { context.index += 1 } + } + var url = text.trimmingCharacters(in: .whitespaces) + if url.hasPrefix(":") { url.removeFirst() } + url = url.trimmingCharacters(in: .whitespaces) + context.linkReferences[id.trimmingCharacters(in: .whitespaces).lowercased()] = url + context.currentNode.addChild(CodeNode(type: Element.linkReferenceDefinition, value: id + "|" + url)) + } + } + public class StrongBuilder: CodeElementBuilder { public init() {} public func accept(context: CodeContext, token: any CodeToken) -> Bool { @@ -904,6 +949,16 @@ public struct MarkdownLanguage: CodeLanguage { } } else { context.index += 1 } } + } else if context.index + 2 < context.tokens.count, + let lb = context.tokens[context.index] as? Token, case .lbracket = lb, + let idTok = context.tokens[context.index + 1] as? Token, + let rb = context.tokens[context.index + 2] as? Token, case .rbracket = rb, + case .text(let id, _) = idTok { + context.index += 3 + let key = id.trimmingCharacters(in: .whitespaces).lowercased() + if let ref = context.linkReferences[key] { + url = ref + } } let node = CodeNode(type: Element.link, value: text + "|" + url) context.currentNode.addChild(node) @@ -961,7 +1016,7 @@ public struct MarkdownLanguage: CodeLanguage { public var tokenizer: CodeTokenizer { Tokenizer() } public var builders: [CodeElementBuilder] { - [HeadingBuilder(), SetextHeadingBuilder(), CodeBlockBuilder(), IndentedCodeBlockBuilder(), BlockQuoteBuilder(), ThematicBreakBuilder(), OrderedListItemBuilder(), ListItemBuilder(), ImageBuilder(), HTMLBuilder(), EntityBuilder(), StrikethroughBuilder(), AutoLinkBuilder(), TableBuilder(), FootnoteBuilder(), LinkBuilder(), StrongBuilder(), EmphasisBuilder(), InlineCodeBuilder(), ParagraphBuilder()] + [HeadingBuilder(), SetextHeadingBuilder(), CodeBlockBuilder(), IndentedCodeBlockBuilder(), BlockQuoteBuilder(), ThematicBreakBuilder(), OrderedListItemBuilder(), ListItemBuilder(), ImageBuilder(), HTMLBuilder(), EntityBuilder(), StrikethroughBuilder(), AutoLinkBuilder(), TableBuilder(), FootnoteBuilder(), LinkReferenceDefinitionBuilder(), LinkBuilder(), StrongBuilder(), EmphasisBuilder(), InlineCodeBuilder(), ParagraphBuilder()] } public var expressionBuilders: [CodeExpressionBuilder] { [] } public var rootElement: any CodeElement { Element.root } diff --git a/Tests/SwiftParserTests/SwiftParserTests.swift b/Tests/SwiftParserTests/SwiftParserTests.swift index d98f1ee..a50f974 100644 --- a/Tests/SwiftParserTests/SwiftParserTests.swift +++ b/Tests/SwiftParserTests/SwiftParserTests.swift @@ -76,6 +76,14 @@ final class SwiftParserTests: XCTestCase { XCTAssertEqual(result.root.children.first?.type as? MarkdownLanguage.Element, .link) } + func testMarkdownReferenceLink() { + let parser = SwiftParser() + let source = "[title][ref]\n[ref]: http://example.com" + let result = parser.parse(source, language: MarkdownLanguage()) + XCTAssertEqual(result.errors.count, 0) + XCTAssertEqual(result.root.children.first?.type as? MarkdownLanguage.Element, .link) + } + func testMarkdownBlockQuote() { let parser = SwiftParser() let source = "> quote"