From ec02940acb433afdea943bf016a6b7739a0c7e2e Mon Sep 17 00:00:00 2001 From: Dongyu Zhao Date: Tue, 15 Jul 2025 18:26:07 +0800 Subject: [PATCH] Add structured table parsing --- .../Languages/MarkdownLanguage.swift | 69 +++++++++++++++++-- .../SwiftParser/Languages/MarkdownNodes.swift | 18 +++++ Tests/SwiftParserTests/SwiftParserTests.swift | 32 ++++++++- 3 files changed, 111 insertions(+), 8 deletions(-) diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage.swift b/Sources/SwiftParser/Languages/MarkdownLanguage.swift index 45e803b..bccef47 100644 --- a/Sources/SwiftParser/Languages/MarkdownLanguage.swift +++ b/Sources/SwiftParser/Languages/MarkdownLanguage.swift @@ -22,6 +22,9 @@ public struct MarkdownLanguage: CodeLanguage { case entity case strikethrough case table + case tableHeader + case tableRow + case tableCell case autoLink case linkReferenceDefinition } @@ -1045,10 +1048,10 @@ public struct MarkdownLanguage: CodeLanguage { } return false } - public func build(context: inout CodeContext) { + func parseRow(_ context: inout CodeContext) -> [String] { var cells: [String] = [] var cell = "" - context.index += 1 // skip first pipe + context.index += 1 // skip leading pipe while context.index < context.tokens.count { if let tok = context.tokens[context.index] as? Token { switch tok { @@ -1060,8 +1063,7 @@ public struct MarkdownLanguage: CodeLanguage { cells.append(cell.trimmingCharacters(in: .whitespaces)) if let last = cells.last, last.isEmpty { cells.removeLast() } context.index += 1 - context.currentNode.addChild(MarkdownTableNode(value: cells.joined(separator: "|"))) - return + return cells default: cell += tok.text context.index += 1 @@ -1072,7 +1074,64 @@ public struct MarkdownLanguage: CodeLanguage { } if !cell.isEmpty || !cells.isEmpty { cells.append(cell.trimmingCharacters(in: .whitespaces)) - context.currentNode.addChild(MarkdownTableNode(value: cells.joined(separator: "|"))) + } + return cells + } + + func parseDelimiter(_ context: inout CodeContext) -> [String]? { + guard context.index < context.tokens.count, + let first = context.tokens[context.index] as? Token, + case .pipe = first else { return nil } + var snapshot = context.snapshot() + let cells = parseRow(&context) + for cell in cells { + var trimmed = cell.trimmingCharacters(in: .whitespaces) + if trimmed.hasPrefix(":") { trimmed.removeFirst() } + if trimmed.hasSuffix(":") { trimmed.removeLast() } + if trimmed.count < 3 { context.restore(snapshot); return nil } + if !trimmed.allSatisfy({ $0 == "-" }) { + context.restore(snapshot); return nil + } + } + return cells + } + + public func build(context: inout CodeContext) { + var ctx = context + let header = parseRow(&ctx) + let startIndex = ctx.index + if let _ = parseDelimiter(&ctx) { + var rows: [[String]] = [] + while ctx.index < ctx.tokens.count, + let tok = ctx.tokens[ctx.index] as? Token, + case .pipe = tok { + rows.append(parseRow(&ctx)) + } + + let table = MarkdownTableNode() + let headerNode = MarkdownTableHeaderNode() + for cell in header { + let cellNode = MarkdownTableCellNode() + cellNode.addChild(MarkdownTextNode(value: cell)) + headerNode.addChild(cellNode) + } + table.addChild(headerNode) + + for row in rows { + let rowNode = MarkdownTableRowNode() + for cell in row { + let cellNode = MarkdownTableCellNode() + cellNode.addChild(MarkdownTextNode(value: cell)) + rowNode.addChild(cellNode) + } + table.addChild(rowNode) + } + + context = ctx + context.currentNode.addChild(table) + } else { + context.index = startIndex + context.currentNode.addChild(MarkdownTableNode(value: header.joined(separator: "|"))) } } } diff --git a/Sources/SwiftParser/Languages/MarkdownNodes.swift b/Sources/SwiftParser/Languages/MarkdownNodes.swift index 4d72958..76b041a 100644 --- a/Sources/SwiftParser/Languages/MarkdownNodes.swift +++ b/Sources/SwiftParser/Languages/MarkdownNodes.swift @@ -175,6 +175,24 @@ public final class MarkdownTableNode: CodeNode { } } +public final class MarkdownTableHeaderNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.tableHeader, value: value, range: range) + } +} + +public final class MarkdownTableRowNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.tableRow, value: value, range: range) + } +} + +public final class MarkdownTableCellNode: CodeNode { + public init(value: String = "", range: Range? = nil) { + super.init(type: MarkdownLanguage.Element.tableCell, value: value, range: range) + } +} + public final class MarkdownAutoLinkNode: CodeNode { public let url: String diff --git a/Tests/SwiftParserTests/SwiftParserTests.swift b/Tests/SwiftParserTests/SwiftParserTests.swift index c843c6b..8880daf 100644 --- a/Tests/SwiftParserTests/SwiftParserTests.swift +++ b/Tests/SwiftParserTests/SwiftParserTests.swift @@ -356,11 +356,37 @@ final class SwiftParserTests: XCTestCase { func testMarkdownTable() { let parser = SwiftParser() - let source = "|a|b|\n" + let source = "|a|b|\n|---|---|\n|c|d|" let result = parser.parse(source, language: MarkdownLanguage()) XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.first?.type as? MarkdownLanguage.Element, .table) - XCTAssertEqual(result.root.children.first?.value, "a|b") + let table = result.root.children.first as? MarkdownTableNode + XCTAssertNotNil(table) + XCTAssertEqual(table?.children.count, 2) + let header = table?.children.first as? MarkdownTableHeaderNode + XCTAssertEqual(header?.children.first?.children.first?.value, "a") + XCTAssertEqual(header?.children.last?.children.first?.value, "b") + let row = table?.children.last as? MarkdownTableRowNode + XCTAssertEqual(row?.children.first?.children.first?.value, "c") + XCTAssertEqual(row?.children.last?.children.first?.value, "d") + } + + func testMarkdownTableVariants() { + let sources = [ + "| Name | Age |\n|-------|-----|\n| Alice | 25 |\n| Bob | 30 |", + "| Name | Age |\n|-------|:-----:|\n| Alice | 25 |\n| Bob | 30 |", + "| Name | Age |\n|-------|-----|\n| Alice | 25 |\n| Bob | 30 " + ] + for src in sources { + let parser = SwiftParser() + let result = parser.parse(src, language: MarkdownLanguage()) + XCTAssertEqual(result.errors.count, 0) + let table = result.root.children.first as? MarkdownTableNode + XCTAssertNotNil(table) + XCTAssertEqual(table?.children.count, 3) + let header = table?.children.first as? MarkdownTableHeaderNode + XCTAssertEqual(header?.children[0].children.first?.value, "Name") + XCTAssertEqual(header?.children[1].children.first?.value, "Age") + } } func testMarkdownLinkReferenceDefinition() {