From 2f5065e1479be1369f8ddc95cda2f89e918a569a Mon Sep 17 00:00:00 2001 From: Dongyu Zhao Date: Tue, 15 Jul 2025 10:32:20 +0800 Subject: [PATCH] Refactor AST nodes into concrete classes --- Sources/SwiftParser/Core.swift | 2 +- .../Languages/MarkdownLanguage.swift | 87 ++++++----- .../SwiftParser/Languages/MarkdownNodes.swift | 144 ++++++++++++++++++ Sources/SwiftParser/SwiftParser.swift | 7 +- Tests/SwiftParserTests/SwiftParserTests.swift | 12 +- 5 files changed, 206 insertions(+), 46 deletions(-) create mode 100644 Sources/SwiftParser/Languages/MarkdownNodes.swift diff --git a/Sources/SwiftParser/Core.swift b/Sources/SwiftParser/Core.swift index aaa3fde..260de9b 100644 --- a/Sources/SwiftParser/Core.swift +++ b/Sources/SwiftParser/Core.swift @@ -17,7 +17,7 @@ public protocol CodeElementBuilder { func build(context: inout CodeContext) } -public final class CodeNode { +public class CodeNode { public let type: any CodeElement public var value: String public weak var parent: CodeNode? diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage.swift b/Sources/SwiftParser/Languages/MarkdownLanguage.swift index 7dee54d..1393213 100644 --- a/Sources/SwiftParser/Languages/MarkdownLanguage.swift +++ b/Sources/SwiftParser/Languages/MarkdownLanguage.swift @@ -304,7 +304,7 @@ public struct MarkdownLanguage: CodeLanguage { text = text.trimmingCharacters(in: .whitespaces) } - context.currentNode.addChild(CodeNode(type: Element.heading, value: text)) + context.currentNode.addChild(MarkdownHeadingNode(value: text, level: count)) } } @@ -378,21 +378,26 @@ public struct MarkdownLanguage: CodeLanguage { } } else { context.index += 1 } } + var level: Int? while context.index < context.tokens.count { if let tok = context.tokens[context.index] as? Token { switch tok { - case .dash, .equal: + case .dash: + if level == nil { level = 2 } + context.index += 1 + case .equal: + if level == nil { level = 1 } context.index += 1 case .text(let s, _) where s.trimmingCharacters(in: .whitespaces).isEmpty: context.index += 1 case .newline: context.index += 1 - let node = CodeNode(type: Element.heading, value: text.trimmingCharacters(in: .whitespaces)) + let node = MarkdownHeadingNode(value: text.trimmingCharacters(in: .whitespaces), level: level ?? 1) context.currentNode.addChild(node) return case .eof: context.index += 1 - let node = CodeNode(type: Element.heading, value: text.trimmingCharacters(in: .whitespaces)) + let node = MarkdownHeadingNode(value: text.trimmingCharacters(in: .whitespaces), level: level ?? 1) context.currentNode.addChild(node) return default: @@ -400,7 +405,7 @@ public struct MarkdownLanguage: CodeLanguage { } } else { context.index += 1 } } - context.currentNode.addChild(CodeNode(type: Element.heading, value: text.trimmingCharacters(in: .whitespaces))) + context.currentNode.addChild(MarkdownHeadingNode(value: text.trimmingCharacters(in: .whitespaces), level: level ?? 1)) } } @@ -447,7 +452,7 @@ public struct MarkdownLanguage: CodeLanguage { public func build(context: inout CodeContext) { func parseList(_ level: Int) -> CodeNode { - let list = CodeNode(type: Element.unorderedList, value: "") + let list = MarkdownUnorderedListNode(value: "") var isLoose = false while context.index < context.tokens.count { guard let bullet = context.tokens[context.index] as? Token, isBullet(bullet), lineIndent(before: context.index, in: context) == level else { break } @@ -469,7 +474,7 @@ public struct MarkdownLanguage: CodeLanguage { context.index += 1 } - let node = CodeNode(type: Element.listItem, value: "") + let node = MarkdownListItemNode(value: "") var text = "" itemLoop: while context.index < context.tokens.count { guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue } @@ -555,7 +560,7 @@ public struct MarkdownLanguage: CodeLanguage { public func build(context: inout CodeContext) { func parseList(_ level: Int) -> CodeNode { - let list = CodeNode(type: Element.orderedList, value: "") + let list = MarkdownOrderedListNode(value: "") var isLoose = false while context.index < context.tokens.count { guard context.index + 1 < context.tokens.count, @@ -579,7 +584,7 @@ public struct MarkdownLanguage: CodeLanguage { context.index += 1 } - let node = CodeNode(type: Element.orderedListItem, value: "") + let node = MarkdownOrderedListItemNode(value: "") var text = "" itemLoop: while context.index < context.tokens.count { guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue } @@ -695,7 +700,7 @@ public struct MarkdownLanguage: CodeLanguage { if count >= fenceLength { context.index = idx if context.index < context.tokens.count, let nl = context.tokens[context.index] as? Token, case .newline = nl { context.index += 1 } - context.currentNode.addChild(CodeNode(type: Element.codeBlock, value: text)) + context.currentNode.addChild(MarkdownCodeBlockNode(value: text)) return } } @@ -703,7 +708,7 @@ public struct MarkdownLanguage: CodeLanguage { context.index += 1 } else { context.index += 1 } } - context.currentNode.addChild(CodeNode(type: Element.codeBlock, value: text)) + context.currentNode.addChild(MarkdownCodeBlockNode(value: text)) } } @@ -725,11 +730,11 @@ public struct MarkdownLanguage: CodeLanguage { switch tok { case .newline: context.index += 1 - let node = CodeNode(type: Element.blockQuote, value: text.trimmingCharacters(in: .whitespaces)) + let node = MarkdownBlockQuoteNode(value: text.trimmingCharacters(in: .whitespaces)) context.currentNode.addChild(node) return case .eof: - let node = CodeNode(type: Element.blockQuote, value: text.trimmingCharacters(in: .whitespaces)) + let node = MarkdownBlockQuoteNode(value: text.trimmingCharacters(in: .whitespaces)) context.currentNode.addChild(node) context.index += 1 return @@ -764,7 +769,7 @@ public struct MarkdownLanguage: CodeLanguage { text += "\n" + String(s.dropFirst(4)) context.index += 1 } else { - context.currentNode.addChild(CodeNode(type: Element.codeBlock, value: text)) + context.currentNode.addChild(MarkdownCodeBlockNode(value: text)) return } case .text(let s, _): @@ -776,7 +781,7 @@ public struct MarkdownLanguage: CodeLanguage { } } else { context.index += 1 } } - context.currentNode.addChild(CodeNode(type: Element.codeBlock, value: text)) + context.currentNode.addChild(MarkdownCodeBlockNode(value: text)) } } @@ -813,7 +818,7 @@ public struct MarkdownLanguage: CodeLanguage { } } if let nl = context.tokens[context.index] as? Token, case .newline = nl { context.index += 1 } - context.currentNode.addChild(CodeNode(type: Element.thematicBreak, value: "")) + context.currentNode.addChild(MarkdownThematicBreakNode(value: "")) } } @@ -846,7 +851,7 @@ public struct MarkdownLanguage: CodeLanguage { } else { context.index += 1 } } } - context.currentNode.addChild(CodeNode(type: Element.image, value: alt + "|" + url)) + context.currentNode.addChild(MarkdownImageNode(value: alt + "|" + url)) } } @@ -865,7 +870,7 @@ public struct MarkdownLanguage: CodeLanguage { else { text += tok.text; context.index += 1 } } else { context.index += 1 } } - context.currentNode.addChild(CodeNode(type: Element.html, value: text)) + context.currentNode.addChild(MarkdownHtmlNode(value: text)) } } @@ -886,7 +891,7 @@ public struct MarkdownLanguage: CodeLanguage { } else { context.index += 1 } } let decoded = decode(text) - context.currentNode.addChild(CodeNode(type: Element.entity, value: decoded)) + context.currentNode.addChild(MarkdownEntityNode(value: decoded)) } private func decode(_ entity: String) -> String { @@ -928,14 +933,14 @@ public struct MarkdownLanguage: CodeLanguage { let t2 = context.tokens[context.index + 1] as? Token, t1.kindDescription == "~" && t2.kindDescription == "~" { context.index += 2 - context.currentNode.addChild(CodeNode(type: Element.strikethrough, value: text)) + context.currentNode.addChild(MarkdownStrikethroughNode(value: text)) return } else if let tok = context.tokens[context.index] as? Token { text += tok.text context.index += 1 } else { context.index += 1 } } - context.currentNode.addChild(CodeNode(type: Element.strikethrough, value: text)) + context.currentNode.addChild(MarkdownStrikethroughNode(value: text)) } } @@ -955,7 +960,7 @@ public struct MarkdownLanguage: CodeLanguage { else { text += tok.text; context.index += 1 } } else { context.index += 1 } } - context.currentNode.addChild(CodeNode(type: Element.autoLink, value: text)) + context.currentNode.addChild(MarkdownAutoLinkNode(value: text)) } } @@ -986,7 +991,7 @@ public struct MarkdownLanguage: CodeLanguage { guard let m = Self.regex.firstMatch(in: text, range: range) else { return } let endPos = context.input.index(start, offsetBy: m.range.length) let url = String(context.input[start..? = nil) { + super.init(type: MarkdownLanguage.Element.root, value: value, range: range) + } +} + +public final class MarkdownParagraphNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.paragraph, value: value, range: range) + } +} + +public final class MarkdownHeadingNode: CodeNode { + public let level: Int + public init(value: String = "", level: Int, range: Range? = nil) { + self.level = level + super.init(type: MarkdownLanguage.Element.heading, value: value, range: range) + } + public override var id: Int { + var hasher = Hasher() + hasher.combine(String(describing: type)) + hasher.combine(value) + hasher.combine(level) + for child in children { hasher.combine(child.id) } + return hasher.finalize() + } +} + +public final class MarkdownTextNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.text, value: value, range: range) + } +} + +public final class MarkdownListItemNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.listItem, value: value, range: range) + } +} + +public final class MarkdownOrderedListItemNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.orderedListItem, value: value, range: range) + } +} + +public final class MarkdownUnorderedListNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.unorderedList, value: value, range: range) + } +} + +public final class MarkdownOrderedListNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.orderedList, value: value, range: range) + } +} + +public final class MarkdownEmphasisNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.emphasis, value: value, range: range) + } +} + +public final class MarkdownStrongNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.strong, value: value, range: range) + } +} + +public final class MarkdownCodeBlockNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.codeBlock, value: value, range: range) + } +} + +public final class MarkdownInlineCodeNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.inlineCode, value: value, range: range) + } +} + +public final class MarkdownLinkNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.link, value: value, range: range) + } +} + +public final class MarkdownBlockQuoteNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.blockQuote, value: value, range: range) + } +} + +public final class MarkdownThematicBreakNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.thematicBreak, value: value, range: range) + } +} + +public final class MarkdownImageNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.image, value: value, range: range) + } +} + +public final class MarkdownHtmlNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.html, value: value, range: range) + } +} + +public final class MarkdownEntityNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.entity, value: value, range: range) + } +} + +public final class MarkdownStrikethroughNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.strikethrough, value: value, range: range) + } +} + +public final class MarkdownTableNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.table, value: value, range: range) + } +} + +public final class MarkdownAutoLinkNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.autoLink, value: value, range: range) + } +} + +public final class MarkdownLinkReferenceDefinitionNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.linkReferenceDefinition, value: value, range: range) + } +} diff --git a/Sources/SwiftParser/SwiftParser.swift b/Sources/SwiftParser/SwiftParser.swift index 0efcb89..f24eee5 100644 --- a/Sources/SwiftParser/SwiftParser.swift +++ b/Sources/SwiftParser/SwiftParser.swift @@ -5,7 +5,12 @@ public struct SwiftParser { public init() {} public func parse(_ source: String, language: CodeLanguage) -> ParsedSource { - let root = CodeNode(type: language.rootElement, value: "") + let root: CodeNode + if language is MarkdownLanguage { + root = MarkdownRootNode(value: "") + } else { + root = CodeNode(type: language.rootElement, value: "") + } let parser = CodeParser(tokenizer: language.tokenizer, builders: language.builders, expressionBuilders: language.expressionBuilders) let result = parser.parse(source, rootNode: root) return ParsedSource(content: source, root: result.node, errors: result.context.errors) diff --git a/Tests/SwiftParserTests/SwiftParserTests.swift b/Tests/SwiftParserTests/SwiftParserTests.swift index d5f3f6a..dfc1bf7 100644 --- a/Tests/SwiftParserTests/SwiftParserTests.swift +++ b/Tests/SwiftParserTests/SwiftParserTests.swift @@ -22,6 +22,8 @@ final class SwiftParserTests: XCTestCase { let result = parser.parse(source, language: MarkdownLanguage()) XCTAssertEqual(result.errors.count, 0) XCTAssertEqual(result.root.children.count, 2) + let heading = result.root.children.first as? MarkdownHeadingNode + XCTAssertEqual(heading?.level, 1) } func testMarkdownComplexATXHeading() { @@ -30,8 +32,10 @@ final class SwiftParserTests: XCTestCase { let result = parser.parse(source, language: MarkdownLanguage()) XCTAssertEqual(result.errors.count, 0) XCTAssertEqual(result.root.children.count, 1) - XCTAssertEqual(result.root.children.first?.type as? MarkdownLanguage.Element, .heading) - XCTAssertEqual(result.root.children.first?.value, "Complex") + let heading = result.root.children.first as? MarkdownHeadingNode + XCTAssertEqual(heading?.type as? MarkdownLanguage.Element, .heading) + XCTAssertEqual(heading?.value, "Complex") + XCTAssertEqual(heading?.level, 3) } func testMarkdownSetextHeading() { @@ -39,7 +43,9 @@ final class SwiftParserTests: XCTestCase { let source = "Title\n----\n" let result = parser.parse(source, language: MarkdownLanguage()) XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.first?.type as? MarkdownLanguage.Element, .heading) + let heading = result.root.children.first as? MarkdownHeadingNode + XCTAssertEqual(heading?.type as? MarkdownLanguage.Element, .heading) + XCTAssertEqual(heading?.level, 2) } func testMarkdownListItem() {