diff --git a/Sources/SwiftParser/CodeParser.swift b/Sources/SwiftParser/CodeParser.swift index a275240..4ab1de4 100644 --- a/Sources/SwiftParser/CodeParser.swift +++ b/Sources/SwiftParser/CodeParser.swift @@ -1,189 +1,50 @@ import Foundation public final class CodeParser { - private var builders: [CodeElementBuilder] + private var consumers: [CodeTokenConsumer] private let tokenizer: CodeTokenizer - private var expressionBuilders: [CodeExpressionBuilder] - // State for incremental parsing - private var lastContext: CodeContext? - private var snapshots: [Int: CodeContext.Snapshot] = [:] - private var lastTokens: [any CodeToken] = [] + // Registered state is now reset for each parse run - public init(tokenizer: CodeTokenizer, builders: [CodeElementBuilder] = [], expressionBuilders: [CodeExpressionBuilder] = []) { - self.tokenizer = tokenizer - self.builders = builders - self.expressionBuilders = expressionBuilders + public init(language: CodeLanguage) { + self.tokenizer = language.tokenizer + self.consumers = language.consumers } - public func register(builder: CodeElementBuilder) { - builders.append(builder) - } - - public func unregister(builder: CodeElementBuilder) { - if let target = builder as? AnyObject { - if let index = builders.firstIndex(where: { ($0 as? AnyObject) === target }) { - builders.remove(at: index) - } - } - } - - public func clearBuilders() { - builders.removeAll() - } - public func register(expressionBuilder: CodeExpressionBuilder) { - expressionBuilders.append(expressionBuilder) - } - - public func unregister(expressionBuilder: CodeExpressionBuilder) { - if let target = expressionBuilder as? AnyObject { - if let index = expressionBuilders.firstIndex(where: { ($0 as? AnyObject) === target }) { - expressionBuilders.remove(at: index) - } - } - } - - public func clearExpressionBuilders() { - expressionBuilders.removeAll() - } public func parse(_ input: String, rootNode: CodeNode) -> (node: CodeNode, context: CodeContext) { let tokens = tokenizer.tokenize(input) - var context = CodeContext(tokens: tokens, index: 0, currentNode: rootNode, errors: [], input: input, linkReferences: [:]) + var context = CodeContext(tokens: tokens, currentNode: rootNode, errors: []) - snapshots = [:] - lastTokens = tokens + // Infinite loop protection: track token count progression + var lastCount = context.tokens.count + 1 - // Infinite loop protection: track index progression - var lastIndex = -1 - - while context.index < context.tokens.count { - // Infinite loop detection - if index hasn't advanced, terminate parsing immediately - if context.index == lastIndex { - context.errors.append(CodeError("Infinite loop detected: parser stuck at token index \(context.index). Terminating parse to prevent hang.", range: context.tokens[context.index].range)) + while let token = context.tokens.first { + // Infinite loop detection - if token count hasn't decreased, terminate parsing immediately + if context.tokens.count == lastCount { + context.errors.append(CodeError("Infinite loop detected: parser stuck at token \(token.kindDescription). Terminating parse to prevent hang.", range: token.range)) break } - lastIndex = context.index - - snapshots[context.index] = context.snapshot() - let token = context.tokens[context.index] + lastCount = context.tokens.count + if token.kindDescription == "eof" { break } var matched = false - for builder in builders { - if builder.accept(context: context, token: token) { - builder.build(context: &context) + for consumer in consumers { + if consumer.consume(context: &context, token: token) { matched = true break } } - if !matched { - for expr in expressionBuilders { - if expr.accept(context: context, token: token) { - if let node = expr.parse(context: &context) { - context.currentNode.addChild(node) - } - matched = true - break - } - } - } - if !matched { - context.errors.append(CodeError("Unrecognized token \(token.kindDescription)", range: token.range)) - context.index += 1 - } - } - snapshots[context.index] = context.snapshot() - lastContext = context - return (rootNode, context) - } - - public func update(_ input: String, rootNode: CodeNode) -> (node: CodeNode, context: CodeContext) { - guard var context = lastContext else { - return parse(input, rootNode: rootNode) - } - let newTokens = tokenizer.tokenize(input) - - var diffIndex = 0 - while diffIndex < min(lastTokens.count, newTokens.count) { - if !tokenEqual(lastTokens[diffIndex], newTokens[diffIndex]) { - break - } - diffIndex += 1 - } - - var restoreIndex = diffIndex - while restoreIndex >= 0 && snapshots[restoreIndex] == nil { - restoreIndex -= 1 - } - if let snap = snapshots[restoreIndex] { - context.restore(snap) - } - - context.tokens = newTokens - context.index = restoreIndex - - snapshots = snapshots.filter { $0.key <= restoreIndex } - lastTokens = newTokens - - // Infinite loop protection for update method - var lastIndex = -1 - - while context.index < context.tokens.count { - // Infinite loop detection - if index hasn't advanced, terminate parsing immediately - if context.index == lastIndex { - context.errors.append(CodeError("Infinite loop detected in update: parser stuck at token index \(context.index). Terminating parse to prevent hang.", range: context.tokens[context.index].range)) - break - } - lastIndex = context.index - - snapshots[context.index] = context.snapshot() - let token = context.tokens[context.index] - if token.kindDescription == "eof" { break } - var matched = false - for builder in builders { - if builder.accept(context: context, token: token) { - builder.build(context: &context) - matched = true - break - } - } - if !matched { - for expr in expressionBuilders { - if expr.accept(context: context, token: token) { - if let node = expr.parse(context: &context) { - context.currentNode.addChild(node) - } - matched = true - break - } - } - } if !matched { context.errors.append(CodeError("Unrecognized token \(token.kindDescription)", range: token.range)) - context.index += 1 + context.tokens.removeFirst() } } - snapshots[context.index] = context.snapshot() - lastContext = context return (rootNode, context) } - private func tokenEqual(_ a: any CodeToken, _ b: any CodeToken) -> Bool { - return a.kindDescription == b.kindDescription && a.text == b.text - } - - public func parseExpression(context: inout CodeContext, minBP: Int = 0) -> CodeNode? { - guard context.index < context.tokens.count else { return nil } - let token = context.tokens[context.index] - for expr in expressionBuilders { - if expr.accept(context: context, token: token) { - return expr.parse(context: &context, minBP: minBP) - } - } - return nil - } } diff --git a/Sources/SwiftParser/Core.swift b/Sources/SwiftParser/Core.swift index 28830f6..57c80c9 100644 --- a/Sources/SwiftParser/Core.swift +++ b/Sources/SwiftParser/Core.swift @@ -12,9 +12,10 @@ public protocol CodeTokenizer { func tokenize(_ input: String) -> [any CodeToken] } -public protocol CodeElementBuilder { - func accept(context: CodeContext, token: any CodeToken) -> Bool - func build(context: inout CodeContext) +/// Consumes a token and optionally updates the AST if it is recognized. +/// - Returns: `true` if the token was handled and the context advanced. +public protocol CodeTokenConsumer { + func consume(context: inout CodeContext, token: any CodeToken) -> Bool } public class CodeNode { @@ -138,52 +139,18 @@ public struct CodeError: Error { public struct CodeContext { public var tokens: [any CodeToken] - public var index: Int public var currentNode: CodeNode public var errors: [CodeError] - public let input: String - public var linkReferences: [String: String] - public init(tokens: [any CodeToken], index: Int, currentNode: CodeNode, errors: [CodeError], input: String, linkReferences: [String: String] = [:]) { + public init(tokens: [any CodeToken], currentNode: CodeNode, errors: [CodeError]) { self.tokens = tokens - self.index = index self.currentNode = currentNode self.errors = errors - self.input = input - self.linkReferences = linkReferences - } - - /// Snapshot represents a parser state that can be restored later. - public struct Snapshot { - fileprivate let index: Int - fileprivate let node: CodeNode - fileprivate let childCount: Int - fileprivate let errorCount: Int - fileprivate let linkReferences: [String: String] - } - - /// Capture the current parser state so it can be restored on demand. - public func snapshot() -> Snapshot { - Snapshot(index: index, node: currentNode, childCount: currentNode.children.count, errorCount: errors.count, linkReferences: linkReferences) - } - - /// Restore the parser to a previously captured state, discarding any new nodes or errors. - public mutating func restore(_ snapshot: Snapshot) { - index = snapshot.index - currentNode = snapshot.node - if currentNode.children.count > snapshot.childCount { - currentNode.children.removeLast(currentNode.children.count - snapshot.childCount) - } - if errors.count > snapshot.errorCount { - errors.removeLast(errors.count - snapshot.errorCount) - } - linkReferences = snapshot.linkReferences } } public protocol CodeLanguage { var tokenizer: CodeTokenizer { get } - var builders: [CodeElementBuilder] { get } + var consumers: [CodeTokenConsumer] { get } var rootElement: any CodeElement { get } - var expressionBuilders: [CodeExpressionBuilder] { get } } diff --git a/Sources/SwiftParser/ExpressionBuilder.swift b/Sources/SwiftParser/ExpressionBuilder.swift deleted file mode 100644 index 7014380..0000000 --- a/Sources/SwiftParser/ExpressionBuilder.swift +++ /dev/null @@ -1,36 +0,0 @@ -import Foundation - -public protocol CodeExpressionBuilder: CodeElementBuilder { - func isPrefix(token: any CodeToken) -> Bool - func prefix(context: inout CodeContext, token: any CodeToken) -> CodeNode? - func infixBindingPower(of token: any CodeToken) -> (left: Int, right: Int)? - func infix(context: inout CodeContext, left: CodeNode, token: any CodeToken, right: CodeNode) -> CodeNode -} - -public extension CodeExpressionBuilder { - func accept(context: CodeContext, token: any CodeToken) -> Bool { - return isPrefix(token: token) - } - - func build(context: inout CodeContext) { - if let node = parse(context: &context) { - context.currentNode.addChild(node) - } - } - - func parse(context: inout CodeContext, minBP: Int = 0) -> CodeNode? { - guard context.index < context.tokens.count else { return nil } - let first = context.tokens[context.index] - guard isPrefix(token: first) else { return nil } - context.index += 1 - guard var left = prefix(context: &context, token: first) else { return nil } - while context.index < context.tokens.count { - let opToken = context.tokens[context.index] - guard let bp = infixBindingPower(of: opToken), bp.left >= minBP else { break } - context.index += 1 - let right = parse(context: &context, minBP: bp.right) ?? CodeNode(type: left.type, value: "") - left = infix(context: &context, left: left, token: opToken, right: right) - } - return left - } -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+AutoLinkBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+AutoLinkBuilder.swift deleted file mode 100644 index e261549..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+AutoLinkBuilder.swift +++ /dev/null @@ -1,24 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class AutoLinkBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .lessThan = tok { return true } - return false - } - public func build(context: inout CodeContext) { - context.index += 1 - var text = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .greaterThan = tok { context.index += 1; break } - else { text += tok.text; context.index += 1 } - } else { context.index += 1 } - } - context.currentNode.addChild(MarkdownAutoLinkNode(url: text)) - } - } - -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+BareAutoLinkBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+BareAutoLinkBuilder.swift deleted file mode 100644 index 1e3ca17..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+BareAutoLinkBuilder.swift +++ /dev/null @@ -1,42 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class BareAutoLinkBuilder: CodeElementBuilder { - private static let regex: NSRegularExpression = { - let pattern = #"^((https?|ftp)://[^\s<>]+|www\.[^\s<>]+|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,})"# - return try! NSRegularExpression(pattern: pattern, options: []) - }() - - public init() {} - - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - let start = tok.range.lowerBound - let text = String(context.input[start...]) - let range = NSRange(location: 0, length: text.utf16.count) - if let m = Self.regex.firstMatch(in: text, range: range), m.range.location == 0 { - return true - } - return false - } - - public func build(context: inout CodeContext) { - guard let tok = context.tokens[context.index] as? Token else { return } - let start = tok.range.lowerBound - let text = String(context.input[start...]) - let range = NSRange(location: 0, length: text.utf16.count) - guard let m = Self.regex.firstMatch(in: text, range: range) else { return } - let endPos = context.input.index(start, offsetBy: m.range.length) - let url = String(context.input[start.. Bool { - guard let tok = token as? Token else { return false } - if case .greaterThan = tok { - if context.index == 0 { return true } - if let prev = context.tokens[context.index - 1] as? Token, case .newline = prev { return true } - } - return false - } - public func build(context: inout CodeContext) { - context.index += 1 // skip '>' - var text = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - switch tok { - case .newline: - context.index += 1 - let node = MarkdownBlockQuoteNode(value: text.trimmingCharacters(in: .whitespaces)) - context.currentNode.addChild(node) - return - case .eof: - let node = MarkdownBlockQuoteNode(value: text.trimmingCharacters(in: .whitespaces)) - context.currentNode.addChild(node) - context.index += 1 - return - default: - text += tok.text - context.index += 1 - } - } else { context.index += 1 } - } - } - } - -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+BlockTexFormulaBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+BlockTexFormulaBuilder.swift deleted file mode 100644 index 48620fa..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+BlockTexFormulaBuilder.swift +++ /dev/null @@ -1,69 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class BlockTexFormulaBuilder: CodeElementBuilder { - public init() {} - - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = context.tokens[context.index] as? Token else { return false } - - // Check for $$ at the beginning of a line - if case .dollar = tok { - // Check if there's another $ right after - if context.index + 1 < context.tokens.count, - let nextTok = context.tokens[context.index + 1] as? Token, - case .dollar = nextTok { - return true - } - } - return false - } - - public func build(context: inout CodeContext) { - let startFirstDollar = (context.tokens[context.index] as? Token)?.range - let startSecondDollar = (context.tokens[context.index + 1] as? Token)?.range - context.index += 2 // Skip opening $$ - - var endFirstDollar: Range? = nil - var foundClosing = false - - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .dollar = tok { - // Check if this is $$ - if context.index + 1 < context.tokens.count, - let nextTok = context.tokens[context.index + 1] as? Token, - case .dollar = nextTok { - endFirstDollar = tok.range - context.index += 2 // Skip closing $$ - foundClosing = true - break - } else { - context.index += 1 - } - } else { - context.index += 1 - } - } else { - context.index += 1 - } - } - - if foundClosing, - let _ = startFirstDollar, - let startSecond = startSecondDollar, - let endFirst = endFirstDollar { - // Extract formula content using original input string - let formulaStart = startSecond.upperBound - let formulaEnd = endFirst.lowerBound - let formulaText = String(context.input[formulaStart.. Bool { - guard let first = token as? Token else { return false } - let fenceKind: String - switch first { - case .backtick: fenceKind = "`" - case .tilde: fenceKind = "~" - default: return false - } - var idx = context.index - var count = 0 - while idx < context.tokens.count, let t = context.tokens[idx] as? Token, t.kindDescription == fenceKind { - count += 1; idx += 1 - } - guard count >= 3 else { return false } - if context.index == 0 { return true } - if let prev = context.tokens[context.index - 1] as? Token, case .newline = prev { - return true - } - return false - } - public func build(context: inout CodeContext) { - guard let startTok = context.tokens[context.index] as? Token else { return } - let fenceKind = startTok.kindDescription - var fenceLength = 0 - while context.index < context.tokens.count, let t = context.tokens[context.index] as? Token, t.kindDescription == fenceKind { - fenceLength += 1 - context.index += 1 - } - // capture info string until end of line and trim whitespace - var info = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .newline = tok { - context.index += 1 - break - } else { - info += tok.text - context.index += 1 - } - } else { - context.index += 1 - } - } - info = info.trimmingCharacters(in: .whitespaces) - let lang = info.split(whereSeparator: { $0.isWhitespace }).first.map(String.init) - - let blockStart = context.index - var text = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - // check for closing fence at start of line - if tok.kindDescription == fenceKind && (context.index == blockStart || (context.index > blockStart && (context.tokens[context.index - 1] as? Token)?.kindDescription == "newline")) { - var idx = context.index - var count = 0 - while idx < context.tokens.count, let t = context.tokens[idx] as? Token, t.kindDescription == fenceKind { - count += 1; idx += 1 - } - if count >= fenceLength { - context.index = idx - if context.index < context.tokens.count, let nl = context.tokens[context.index] as? Token, case .newline = nl { context.index += 1 } - context.currentNode.addChild(MarkdownCodeBlockNode(lang: lang, content: text)) - return - } - } - text += tok.text - context.index += 1 - } else { context.index += 1 } - } - context.currentNode.addChild(MarkdownCodeBlockNode(lang: lang, content: text)) - } - } - -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+Element.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+Element.swift deleted file mode 100644 index 48995c6..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+Element.swift +++ /dev/null @@ -1,36 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public enum Element: String, CodeElement { - case root - case paragraph - case heading - case text - case listItem - case orderedListItem - case unorderedList - case orderedList - case emphasis - case strong - case codeBlock - case inlineCode - case link - case blockQuote - case thematicBreak - case image - case html - case entity - case strikethrough - case table - case tableHeader - case tableRow - case tableCell - case autoLink - case linkReferenceDefinition - case footnoteDefinition - case footnoteReference - case inlineTexFormula - case blockTexFormula - } - -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+EntityBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+EntityBuilder.swift deleted file mode 100644 index 3fbee5a..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+EntityBuilder.swift +++ /dev/null @@ -1,48 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class EntityBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .ampersand = tok { return true } - return false - } - public func build(context: inout CodeContext) { - context.index += 1 - var text = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .semicolon = tok { context.index += 1; break } - else { text += tok.text; context.index += 1 } - } else { context.index += 1 } - } - let decoded = decode(text) - context.currentNode.addChild(MarkdownEntityNode(value: decoded)) - } - - private func decode(_ entity: String) -> String { - switch entity { - case "amp": return "&" - case "lt": return "<" - case "gt": return ">" - case "quot": return "\"" - case "apos": return "'" - default: - if entity.hasPrefix("#x") || entity.hasPrefix("#X") { - let hex = entity.dropFirst(2) - if let value = UInt32(hex, radix: 16), let scalar = UnicodeScalar(value) { - return String(Character(scalar)) - } - } else if entity.hasPrefix("#") { - let num = entity.dropFirst() - if let value = UInt32(num), let scalar = UnicodeScalar(value) { - return String(Character(scalar)) - } - } - return "&" + entity + ";" - } - } - } - -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+FootnoteBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+FootnoteBuilder.swift deleted file mode 100644 index 8c30c1b..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+FootnoteBuilder.swift +++ /dev/null @@ -1,59 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class FootnoteBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let lb = token as? Token, case .lbracket = lb else { return false } - guard context.index + 2 < context.tokens.count else { return false } - guard let first = context.tokens[context.index + 1] as? Token else { return false } - if case .text(let s, _) = first, s.starts(with: "^") { - var idx = context.index + 2 - while idx < context.tokens.count { - if let t = context.tokens[idx] as? Token { - if case .rbracket = t { return true } - if case .text = t { - idx += 1; continue - } - if case .number = t { - idx += 1; continue - } - } - break - } - } - return false - } - public func build(context: inout CodeContext) { - context.index += 1 // skip [ - var id = "" - while context.index < context.tokens.count { - guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue } - if case .rbracket = tok { break } - id += tok.text - context.index += 1 - } - if id.hasPrefix("^") { id.removeFirst() } - if context.index < context.tokens.count { context.index += 1 } // skip ] - - if context.index < context.tokens.count, - let colon = context.tokens[context.index] as? Token, - case .text(let s, _) = colon, - s.trimmingCharacters(in: .whitespaces).hasPrefix(":") { - var text = s - context.index += 1 - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .newline = tok { context.index += 1; break } - else { text += tok.text; context.index += 1 } - } else { context.index += 1 } - } - if text.hasPrefix(":") { text.removeFirst() } - let trimmed = text.trimmingCharacters(in: .whitespaces) - context.currentNode.addChild(MarkdownFootnoteDefinitionNode(identifier: id, text: trimmed)) - } else { - context.currentNode.addChild(MarkdownFootnoteReferenceNode(identifier: id)) - } - } - } -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+HTMLBlockBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+HTMLBlockBuilder.swift deleted file mode 100644 index b15c676..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+HTMLBlockBuilder.swift +++ /dev/null @@ -1,25 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class HTMLBlockBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .lessThan = tok, context.index == 0 { - let rest = String(context.input[tok.range.upperBound...]).lowercased() - return rest.hasPrefix("!doctype") || rest.hasPrefix("html") - } - return false - } - public func build(context: inout CodeContext) { - var text = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { text += tok.text } - context.index += 1 - } - let closed = MarkdownLanguage.isHTMLClosed(text) - context.currentNode.addChild(MarkdownHtmlNode(value: text, closed: closed)) - } - } - -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+HTMLBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+HTMLBuilder.swift deleted file mode 100644 index f7c7202..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+HTMLBuilder.swift +++ /dev/null @@ -1,25 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class HTMLBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - return tok.kindDescription == "<" - } - public func build(context: inout CodeContext) { - context.index += 1 // skip < - var text = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .greaterThan = tok { context.index += 1; break } - else { text += tok.text; context.index += 1 } - } else { context.index += 1 } - } - let html = "<" + text + ">" - let closed = MarkdownLanguage.isHTMLClosed(html) - context.currentNode.addChild(MarkdownHtmlNode(value: text, closed: closed)) - } - } - -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+HeadingBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+HeadingBuilder.swift deleted file mode 100644 index 41e1f91..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+HeadingBuilder.swift +++ /dev/null @@ -1,73 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class HeadingBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .hash = tok { - if context.index == 0 { return true } - if let prev = context.tokens[context.index - 1] as? Token, case .newline = prev { - return true - } - } - return false - } - public func build(context: inout CodeContext) { - var count = 0 - while context.index < context.tokens.count, - let tok = context.tokens[context.index] as? Token, - case .hash = tok, - count < 6 { - count += 1 - context.index += 1 - } - var tokens: [Token] = [] - while context.index < context.tokens.count { - guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue } - switch tok { - case .newline, .eof: - context.index += 1 - default: - tokens.append(tok) - context.index += 1 - } - if case .newline = tok { break } - if case .eof = tok { break } - } - - // Trim trailing whitespace - while let last = tokens.last, case .text(let s, _) = last, s.trimmingCharacters(in: .whitespaces).isEmpty { - tokens.removeLast() - } - // Remove trailing '#' sequences - while let last = tokens.last, case .hash = last { - tokens.removeLast() - while let l = tokens.last, case .text(let s, _) = l, s.trimmingCharacters(in: .whitespaces).isEmpty { - tokens.removeLast() - } - } - while let last = tokens.last, case .text(let s, _) = last, s.trimmingCharacters(in: .whitespaces).isEmpty { - tokens.removeLast() - } - - // Remove spaces before hard breaks - var processed: [Token] = [] - for tok in tokens { - if case .hardBreak = tok { - while let l = processed.last, case .text(let s, _) = l, s.allSatisfy({ $0 == " " }) { - processed.removeLast() - } - } - processed.append(tok) - } - - let trimmedValue = processed.map { $0.text }.joined().trimmingCharacters(in: .whitespaces) - let children = MarkdownLanguage.parseInlineTokens(processed, input: context.input) - let node = MarkdownHeadingNode(value: trimmedValue, level: count) - children.forEach { node.addChild($0) } - context.currentNode.addChild(node) - } - } - -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+ImageBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+ImageBuilder.swift deleted file mode 100644 index 1be1791..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+ImageBuilder.swift +++ /dev/null @@ -1,37 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class ImageBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .exclamation = tok, - context.index + 1 < context.tokens.count, - let next = context.tokens[context.index + 1] as? Token, - case .lbracket = next { return true } - return false - } - public func build(context: inout CodeContext) { - context.index += 2 // skip ![ - var alt = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .rbracket = tok { context.index += 1; break } - else { alt += tok.text; context.index += 1 } - } else { context.index += 1 } - } - var url = "" - if context.index < context.tokens.count, let lp = context.tokens[context.index] as? Token, case .lparen = lp { - context.index += 1 - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .rparen = tok { context.index += 1; break } - else { url += tok.text; context.index += 1 } - } else { context.index += 1 } - } - } - context.currentNode.addChild(MarkdownImageNode(alt: alt, url: url)) - } - } - -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+IndentedCodeBlockBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+IndentedCodeBlockBuilder.swift deleted file mode 100644 index e3e2e08..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+IndentedCodeBlockBuilder.swift +++ /dev/null @@ -1,42 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class IndentedCodeBlockBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .text(let s, _) = tok { - if (context.index == 0 || (context.tokens[context.index - 1] as? Token)?.kindDescription == "newline") && s.hasPrefix(" ") { - return true - } - } - return false - } - public func build(context: inout CodeContext) { - var text = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - switch tok { - case .newline: - context.index += 1 - if context.index < context.tokens.count, let next = context.tokens[context.index] as? Token, case .text(let s, _) = next, s.hasPrefix(" ") { - text += "\n" + String(s.dropFirst(4)) - context.index += 1 - } else { - context.currentNode.addChild(MarkdownCodeBlockNode(lang: nil, content: text)) - return - } - case .text(let s, _): - text += String(s.dropFirst(4)) - context.index += 1 - default: - text += tok.text - context.index += 1 - } - } else { context.index += 1 } - } - context.currentNode.addChild(MarkdownCodeBlockNode(lang: nil, content: text)) - } - } - -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+LinkBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+LinkBuilder.swift deleted file mode 100644 index 3134385..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+LinkBuilder.swift +++ /dev/null @@ -1,56 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class LinkBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .lbracket = tok { return true } - return false - } - public func build(context: inout CodeContext) { - context.index += 1 - var textTokens: [Token] = [] - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .rbracket = tok { - context.index += 1 - break - } else { - textTokens.append(tok) - context.index += 1 - } - } else { context.index += 1 } - } - let textNodes = MarkdownLanguage.parseInlineTokens(textTokens, input: context.input) - var url = "" - if context.index < context.tokens.count, let lparen = context.tokens[context.index] as? Token, case .lparen = lparen { - context.index += 1 - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .rparen = tok { - context.index += 1 - break - } else { - url += tok.text - context.index += 1 - } - } else { context.index += 1 } - } - } else if context.index + 2 < context.tokens.count, - let lb = context.tokens[context.index] as? Token, case .lbracket = lb, - let idTok = context.tokens[context.index + 1] as? Token, - let rb = context.tokens[context.index + 2] as? Token, case .rbracket = rb, - case .text(let id, _) = idTok { - context.index += 3 - let key = id.trimmingCharacters(in: .whitespaces).lowercased() - if let ref = context.linkReferences[key] { - url = ref - } - } - let node = MarkdownLinkNode(text: textNodes, url: url) - context.currentNode.addChild(node) - } - } - -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+LinkReferenceDefinitionBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+LinkReferenceDefinitionBuilder.swift deleted file mode 100644 index 605fc62..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+LinkReferenceDefinitionBuilder.swift +++ /dev/null @@ -1,48 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class LinkReferenceDefinitionBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard context.index + 3 < context.tokens.count else { return false } - guard let lb = token as? Token, - let txt = context.tokens[context.index + 1] as? Token, - let rb = context.tokens[context.index + 2] as? Token, - let colon = context.tokens[context.index + 3] as? Token else { return false } - if case .lbracket = lb, - case .text = txt, - case .rbracket = rb, - case .text(let s, _) = colon, - s.trimmingCharacters(in: .whitespaces).hasPrefix(":") { - return true - } - return false - } - public func build(context: inout CodeContext) { - context.index += 1 - var id = "" - if context.index < context.tokens.count, let idTok = context.tokens[context.index] as? Token, case .text(let s, _) = idTok { - id = s - context.index += 1 - } - if context.index < context.tokens.count { context.index += 1 } // skip ] - var text = "" - if context.index < context.tokens.count, let colon = context.tokens[context.index] as? Token, case .text(let s, _) = colon { - text = s - context.index += 1 - } - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .newline = tok { context.index += 1; break } - else { text += tok.text; context.index += 1 } - } else { context.index += 1 } - } - var url = text.trimmingCharacters(in: .whitespaces) - if url.hasPrefix(":") { url.removeFirst() } - url = url.trimmingCharacters(in: .whitespaces) - let trimmedID = id.trimmingCharacters(in: .whitespaces) - context.linkReferences[trimmedID.lowercased()] = url - context.currentNode.addChild(MarkdownLinkReferenceDefinitionNode(identifier: trimmedID, url: url)) - } - } -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+OrderedListBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+OrderedListBuilder.swift deleted file mode 100644 index 3323aed..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+OrderedListBuilder.swift +++ /dev/null @@ -1,122 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class OrderedListBuilder: CodeElementBuilder { - public init() {} - - private func lineIndent(before idx: Int, in context: CodeContext) -> Int? { - if idx == 0 { return 0 } - var i = idx - 1 - var count = 0 - while i >= 0 { - guard let tok = context.tokens[i] as? Token else { return nil } - switch tok { - case .newline: - return count - case .text(let s, _) where s.allSatisfy({ $0 == " " }): - count += s.count - i -= 1 - default: - return nil - } - } - return count - } - - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token, case .number = tok else { return false } - guard context.index + 1 < context.tokens.count, - let dot = context.tokens[context.index + 1] as? Token, case .dot = dot else { return false } - if let _ = lineIndent(before: context.index, in: context) { return true } - return false - } - - public func build(context: inout CodeContext) { - func parseList(_ indent: Int, _ depth: Int) -> CodeNode { - let list = MarkdownOrderedListNode(value: "", level: depth) - var isLoose = false - while context.index < context.tokens.count { - guard context.index + 1 < context.tokens.count, - let num = context.tokens[context.index] as? Token, case .number = num, - let dot = context.tokens[context.index + 1] as? Token, case .dot = dot, - lineIndent(before: context.index, in: context) == indent else { break } - let (node, loose) = parseItem(indent, depth) - if loose { isLoose = true } - list.addChild(node) - } - list.value = isLoose ? "loose" : "tight" - return list - } - - func parseItem(_ indent: Int, _ depth: Int) -> (CodeNode, Bool) { - var loose = false - context.index += 2 - if context.index < context.tokens.count, - let t = context.tokens[context.index] as? Token, - case .text(let s, _) = t, s.first?.isWhitespace == true { - context.index += 1 - } - - let node = MarkdownOrderedListItemNode(value: "") - var text = "" - itemLoop: while context.index < context.tokens.count { - guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue } - switch tok { - case .newline: - context.index += 1 - if context.index < context.tokens.count, let nl = context.tokens[context.index] as? Token, case .newline = nl { - loose = true - context.index += 1 - } - let start = context.index - var spaces = 0 - if start < context.tokens.count, let sTok = context.tokens[start] as? Token, case .text(let s, _) = sTok, s.allSatisfy({ $0 == " " }) { - spaces = s.count - context.index += 1 - } - if context.index + 1 < context.tokens.count, - let nextNum = context.tokens[context.index] as? Token, case .number = nextNum, - let dot = context.tokens[context.index + 1] as? Token, case .dot = dot, - spaces > indent { - let sub = parseList(spaces, depth + 1) - node.addChild(sub) - if context.index + 1 < context.tokens.count, - let nextBullet = context.tokens[context.index] as? Token, case .number = nextBullet, - let ndot = context.tokens[context.index + 1] as? Token, case .dot = ndot, - (lineIndent(before: context.index, in: context) ?? 0) <= indent { - break itemLoop - } - } else if context.index + 1 < context.tokens.count, - let nextNum = context.tokens[context.index] as? Token, case .number = nextNum, - let dot = context.tokens[context.index + 1] as? Token, case .dot = dot, - spaces == indent { - context.index = start - break itemLoop - } else if spaces > indent { - text += "\n" - } else if spaces < indent { - context.index = start - break itemLoop - } else { - text += "\n" - } - case .eof: - context.index += 1 - break itemLoop - default: - text += tok.text - context.index += 1 - } - } - node.value = text.trimmingCharacters(in: .whitespaces) - return (node, loose) - } - - if let ind = lineIndent(before: context.index, in: context) { - let list = parseList(ind, 1) - context.currentNode.addChild(list) - } - } - } - -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+ParagraphBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+ParagraphBuilder.swift deleted file mode 100644 index 03534d8..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+ParagraphBuilder.swift +++ /dev/null @@ -1,70 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class ParagraphBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - if token is Token { return true } else { return false } - } - public func build(context: inout CodeContext) { - var tokens: [Token] = [] - var ended = false - var dollarCount = 0 // Track unclosed $ symbols - while context.index < context.tokens.count { - guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue } - switch tok { - case .text, .star, .underscore, .backtick: - tokens.append(tok) - context.index += 1 - case .dollar: - dollarCount += 1 - tokens.append(tok) - context.index += 1 - case .hardBreak: - while let last = tokens.last, case .text(let s, _) = last, s.allSatisfy({ $0 == " " }) { - tokens.removeLast() - } - tokens.append(tok) - context.index += 1 - case .newline: - context.index += 1 - ended = true - case .dash, .hash, .plus, .lbracket, - .greaterThan, .exclamation, .tilde, .equal, .lessThan, .ampersand, .semicolon, .pipe: - // If we're in an unclosed TeX formula (odd number of $), continue collecting tokens - if dollarCount % 2 == 1 { - tokens.append(tok) - context.index += 1 - } else { - ended = true - } - case .number: - if context.index + 1 < context.tokens.count, - let dot = context.tokens[context.index + 1] as? Token, - case .dot = dot { - ended = true - } else { - tokens.append(tok) - context.index += 1 - } - case .eof: - context.index += 1 - ended = true - case .dot, .rbracket, .lparen, .rparen: - tokens.append(tok) - context.index += 1 - } - if ended { break } - } - - // Only create paragraph if we have tokens - if !tokens.isEmpty { - let value = tokens.map { $0.text }.joined() - let children = MarkdownLanguage.parseInlineTokens(tokens, input: context.input) - let node = MarkdownParagraphNode(value: value) - children.forEach { node.addChild($0) } - context.currentNode.addChild(node) - } - } - } -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+SetextHeadingBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+SetextHeadingBuilder.swift deleted file mode 100644 index bbdb8ef..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+SetextHeadingBuilder.swift +++ /dev/null @@ -1,110 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class SetextHeadingBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard token is Token else { return false } - if context.index > 0 { - if let prev = context.tokens[context.index - 1] as? Token, case .newline = prev { - // ok - } else if context.index != 0 { - return false - } - } - - var idx = context.index - var sawText = false - while idx < context.tokens.count { - guard let t = context.tokens[idx] as? Token else { return false } - if case .newline = t { break } - if case .eof = t { return false } - sawText = true - idx += 1 - } - guard sawText else { return false } - guard idx < context.tokens.count, let nl = context.tokens[idx] as? Token, case .newline = nl else { return false } - idx += 1 - guard idx < context.tokens.count else { return false } - - var kind: Token? - var count = 0 - while idx < context.tokens.count { - guard let tok = context.tokens[idx] as? Token else { return false } - switch tok { - case .dash: - if kind == nil { kind = tok } - if case .dash = kind! { count += 1; idx += 1 } else { return false } - case .equal: - if kind == nil { kind = tok } - if case .equal = kind! { count += 1; idx += 1 } else { return false } - case .text(let s, _): - if s.trimmingCharacters(in: .whitespaces).isEmpty { idx += 1 } else { return false } - case .newline, .eof: - break - default: - return false - } - if idx < context.tokens.count, let next = context.tokens[idx] as? Token { - if case .newline = next { break } - if case .eof = next { break } - } - } - if count == 0 { return false } - if idx < context.tokens.count, let endTok = context.tokens[idx] as? Token { - switch endTok { - case .newline, .eof: - return true - default: - return false - } - } - return false - } - public func build(context: inout CodeContext) { - var text = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .newline = tok { - context.index += 1 - break - } else { - text += tok.text - context.index += 1 - } - } else { context.index += 1 } - } - var level: Int? - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - switch tok { - case .dash: - if level == nil { level = 2 } - context.index += 1 - case .equal: - if level == nil { level = 1 } - context.index += 1 - case .text(let s, _) where s.trimmingCharacters(in: .whitespaces).isEmpty: - context.index += 1 - case .newline: - context.index += 1 - let node = MarkdownHeadingNode(value: text.trimmingCharacters(in: .whitespaces), level: level ?? 1) - context.currentNode.addChild(node) - return - case .eof: - context.index += 1 - let node = MarkdownHeadingNode(value: text.trimmingCharacters(in: .whitespaces), level: level ?? 1) - context.currentNode.addChild(node) - return - default: - context.index += 1 - } - } else { context.index += 1 } - } - context.currentNode.addChild(MarkdownHeadingNode(value: text.trimmingCharacters(in: .whitespaces), level: level ?? 1)) - } - } - - // MARK: - List Parsing - -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+StrikethroughBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+StrikethroughBuilder.swift deleted file mode 100644 index b6f66a1..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+StrikethroughBuilder.swift +++ /dev/null @@ -1,30 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class StrikethroughBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard context.index + 1 < context.tokens.count else { return false } - guard let t1 = token as? Token, let t2 = context.tokens[context.index + 1] as? Token else { return false } - return t1.kindDescription == "~" && t2.kindDescription == "~" - } - public func build(context: inout CodeContext) { - context.index += 2 - var text = "" - while context.index + 1 < context.tokens.count { - if let t1 = context.tokens[context.index] as? Token, - let t2 = context.tokens[context.index + 1] as? Token, - t1.kindDescription == "~" && t2.kindDescription == "~" { - context.index += 2 - context.currentNode.addChild(MarkdownStrikethroughNode(value: text)) - return - } else if let tok = context.tokens[context.index] as? Token { - text += tok.text - context.index += 1 - } else { context.index += 1 } - } - context.currentNode.addChild(MarkdownStrikethroughNode(value: text)) - } - } - -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+TableBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+TableBuilder.swift deleted file mode 100644 index 5b4e34c..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+TableBuilder.swift +++ /dev/null @@ -1,102 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class TableBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .pipe = tok { - if context.index == 0 { return true } - if let prev = context.tokens[context.index - 1] as? Token, case .newline = prev { return true } - } - return false - } - func parseRow(_ context: inout CodeContext) -> [String] { - var cells: [String] = [] - var cell = "" - context.index += 1 // skip leading pipe - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - switch tok { - case .pipe: - cells.append(cell.trimmingCharacters(in: .whitespaces)) - cell = "" - context.index += 1 - case .newline, .eof: - cells.append(cell.trimmingCharacters(in: .whitespaces)) - if let last = cells.last, last.isEmpty { cells.removeLast() } - context.index += 1 - return cells - default: - cell += tok.text - context.index += 1 - } - } else { - context.index += 1 - } - } - if !cell.isEmpty || !cells.isEmpty { - cells.append(cell.trimmingCharacters(in: .whitespaces)) - } - return cells - } - - func parseDelimiter(_ context: inout CodeContext) -> [String]? { - guard context.index < context.tokens.count, - let first = context.tokens[context.index] as? Token, - case .pipe = first else { return nil } - var snapshot = context.snapshot() - let cells = parseRow(&context) - for cell in cells { - var trimmed = cell.trimmingCharacters(in: .whitespaces) - if trimmed.hasPrefix(":") { trimmed.removeFirst() } - if trimmed.hasSuffix(":") { trimmed.removeLast() } - if trimmed.count < 3 { context.restore(snapshot); return nil } - if !trimmed.allSatisfy({ $0 == "-" }) { - context.restore(snapshot); return nil - } - } - return cells - } - - public func build(context: inout CodeContext) { - var ctx = context - let header = parseRow(&ctx) - let startIndex = ctx.index - if let _ = parseDelimiter(&ctx) { - var rows: [[String]] = [] - while ctx.index < ctx.tokens.count, - let tok = ctx.tokens[ctx.index] as? Token, - case .pipe = tok { - rows.append(parseRow(&ctx)) - } - - let table = MarkdownTableNode() - let headerNode = MarkdownTableHeaderNode() - for cell in header { - let cellNode = MarkdownTableCellNode() - cellNode.addChild(MarkdownTextNode(value: cell)) - headerNode.addChild(cellNode) - } - table.addChild(headerNode) - - for row in rows { - let rowNode = MarkdownTableRowNode() - for cell in row { - let cellNode = MarkdownTableCellNode() - cellNode.addChild(MarkdownTextNode(value: cell)) - rowNode.addChild(cellNode) - } - table.addChild(rowNode) - } - - context = ctx - context.currentNode.addChild(table) - } else { - context.index = startIndex - context.currentNode.addChild(MarkdownTableNode(value: header.joined(separator: "|"))) - } - } - } - -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+ThematicBreakBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+ThematicBreakBuilder.swift deleted file mode 100644 index 450046c..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+ThematicBreakBuilder.swift +++ /dev/null @@ -1,47 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class ThematicBreakBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - switch tok { - case .dash, .star, .underscore: - if context.index == 0 || (context.index > 0 && (context.tokens[context.index - 1] as? Token) is Token && (context.tokens[context.index - 1] as? Token)?.kindDescription == "newline") { - var count = 0 - var idx = context.index - while idx < context.tokens.count, let t = context.tokens[idx] as? Token, t.kindDescription == tok.kindDescription { - count += 1; idx += 1 - } - if count >= 3 { - return true - } - } - default: - break - } - return false - } - public func build(context: inout CodeContext) { - if context.index < context.tokens.count, - let tok = context.tokens[context.index] as? Token { - let kind = tok.kindDescription - while context.index < context.tokens.count { - if let t = context.tokens[context.index] as? Token, - t.kindDescription == kind { - context.index += 1 - } else { - break - } - } - } - if context.index < context.tokens.count, - let nl = context.tokens[context.index] as? Token, - case .newline = nl { - context.index += 1 - } - context.currentNode.addChild(MarkdownThematicBreakNode(value: "")) - } - } - -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+Token.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+Token.swift deleted file mode 100644 index c628249..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+Token.swift +++ /dev/null @@ -1,102 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public enum Token: CodeToken { - case text(String, Range) - case hash(Range) - case dash(Range) - case star(Range) - case underscore(Range) - case plus(Range) - case backtick(Range) - case greaterThan(Range) - case exclamation(Range) - case tilde(Range) - case equal(Range) - case lessThan(Range) - case ampersand(Range) - case semicolon(Range) - case pipe(Range) - case lbracket(Range) - case rbracket(Range) - case lparen(Range) - case rparen(Range) - case dot(Range) - case number(String, Range) - case hardBreak(Range) - case newline(Range) - case dollar(Range) - case eof(Range) - - public var kindDescription: String { - switch self { - case .text: return "text" - case .hash: return "#" - case .dash: return "-" - case .star: return "*" - case .underscore: return "_" - case .plus: return "+" - case .backtick: return "`" - case .greaterThan: return ">" - case .exclamation: return "!" - case .tilde: return "~" - case .equal: return "=" - case .lessThan: return "<" - case .ampersand: return "&" - case .semicolon: return ";" - case .pipe: return "|" - case .lbracket: return "[" - case .rbracket: return "]" - case .lparen: return "(" - case .rparen: return ")" - case .dot: return "." - case .number: return "number" - case .hardBreak: return "hardBreak" - case .newline: return "newline" - case .dollar: return "$" - case .eof: return "eof" - } - } - - public var text: String { - switch self { - case .text(let s, _): return s - case .hash: return "#" - case .dash: return "-" - case .star: return "*" - case .underscore: return "_" - case .plus: return "+" - case .backtick: return "`" - case .greaterThan: return ">" - case .exclamation: return "!" - case .tilde: return "~" - case .equal: return "=" - case .lessThan: return "<" - case .ampersand: return "&" - case .semicolon: return ";" - case .pipe: return "|" - case .lbracket: return "[" - case .rbracket: return "]" - case .lparen: return "(" - case .rparen: return ")" - case .dot: return "." - case .number(let s, _): return s - case .hardBreak, .newline: return "\n" - case .dollar: return "$" - case .eof: return "" - } - } - - public var range: Range { - switch self { - case .text(_, let r), .hash(let r), .dash(let r), .star(let r), .underscore(let r), - .plus(let r), .backtick(let r), .greaterThan(let r), .exclamation(let r), .tilde(let r), - .equal(let r), .lessThan(let r), .ampersand(let r), .semicolon(let r), .pipe(let r), - .lbracket(let r), .rbracket(let r), .lparen(let r), .rparen(let r), .dot(let r), - .number(_, let r), .hardBreak(let r), .newline(let r), .dollar(let r), .eof(let r): - return r - } - } - } - -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+Tokenizer.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+Tokenizer.swift deleted file mode 100644 index d2c030e..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage+Tokenizer.swift +++ /dev/null @@ -1,149 +0,0 @@ -import Foundation - -extension MarkdownLanguage { - public class Tokenizer: CodeTokenizer { - public init() {} - - public func tokenize(_ input: String) -> [any CodeToken] { - var tokens: [Token] = [] - var index = input.startIndex - func advance() { index = input.index(after: index) } - func add(_ t: Token) { tokens.append(t) } - while index < input.endIndex { - let ch = input[index] - if ch == "\\" { - let start = index - advance() - if index < input.endIndex { - let escaped = input[index] - advance() - add(.text(String(escaped), start.." { - let start = index - advance() - add(.greaterThan(start.. input.startIndex { - var i = input.index(before: start) - var spaceCount = 0 - while input[i] == " " { - spaceCount += 1 - if i == input.startIndex { break } - i = input.index(before: i) - } - if spaceCount >= 2 { - isHard = true - } else if spaceCount == 0 && input[i] == "\\" { - isHard = true - } - } - advance() - if isHard { - add(.hardBreak(start..!~|;&=\\$".contains(input[index]) && - !input[index].isNumber { - advance() - } - let text = String(input[start.. Int? { - if idx == 0 { return 0 } - var i = idx - 1 - var count = 0 - while i >= 0 { - guard let tok = context.tokens[i] as? Token else { return nil } - switch tok { - case .newline: - return count - case .text(let s, _) where s.allSatisfy({ $0 == " " }): - count += s.count - i -= 1 - default: - return nil - } - } - return count - } - - private func isBullet(_ tok: Token) -> Bool { - switch tok { - case .dash, .star, .plus: return true - default: return false - } - } - - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token, isBullet(tok) else { return false } - guard context.index + 1 < context.tokens.count, - let next = context.tokens[context.index + 1] as? Token, - case .text(let s, _) = next, s.first?.isWhitespace == true else { - return false - } - if let ind = lineIndent(before: context.index, in: context) { return ind >= 0 } else { return false } - } - - public func build(context: inout CodeContext) { - func parseList(_ indent: Int, _ depth: Int) -> CodeNode { - let list = MarkdownUnorderedListNode(value: "", level: depth) - var isLoose = false - while context.index < context.tokens.count { - guard let bullet = context.tokens[context.index] as? Token, isBullet(bullet), lineIndent(before: context.index, in: context) == indent else { break } - let (node, loose) = parseItem(indent, depth) - if loose { isLoose = true } - list.addChild(node) - } - list.value = isLoose ? "loose" : "tight" - return list - } - - func parseItem(_ indent: Int, _ depth: Int) -> (CodeNode, Bool) { - var loose = false - // skip bullet and following whitespace - context.index += 1 - if context.index < context.tokens.count, - let t = context.tokens[context.index] as? Token, - case .text(let s, _) = t, s.first?.isWhitespace == true { - context.index += 1 - } - - let node = MarkdownListItemNode(value: "") - var text = "" - itemLoop: while context.index < context.tokens.count { - guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue } - switch tok { - case .newline: - context.index += 1 - // Check for blank line - if context.index < context.tokens.count, let nl = context.tokens[context.index] as? Token, case .newline = nl { - loose = true - context.index += 1 - } - let start = context.index - var spaces = 0 - if start < context.tokens.count, let sTok = context.tokens[start] as? Token, case .text(let s, _) = sTok, s.allSatisfy({ $0 == " " }) { - spaces = s.count - context.index += 1 - } - if context.index < context.tokens.count, let next = context.tokens[context.index] as? Token, isBullet(next), spaces > indent { - let sub = parseList(spaces, depth + 1) - node.addChild(sub) - if context.index < context.tokens.count, let nextTok = context.tokens[context.index] as? Token, isBullet(nextTok), (lineIndent(before: context.index, in: context) ?? 0) <= indent { - break itemLoop - } - } else if context.index < context.tokens.count, let next = context.tokens[context.index] as? Token, isBullet(next), spaces == indent { - context.index = start - break itemLoop - } else if spaces > indent { - text += "\n" - } else if spaces < indent { - context.index = start - break itemLoop - } else { - text += "\n" - } - case .eof: - context.index += 1 - break itemLoop - default: - text += tok.text - context.index += 1 - } - } - node.value = text.trimmingCharacters(in: .whitespaces) - return (node, loose) - } - - if let ind = lineIndent(before: context.index, in: context) { - let list = parseList(ind, 1) - context.currentNode.addChild(list) - } - } - } - -} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage.swift b/Sources/SwiftParser/Languages/MarkdownLanguage.swift deleted file mode 100644 index 5e3d70b..0000000 --- a/Sources/SwiftParser/Languages/MarkdownLanguage.swift +++ /dev/null @@ -1,200 +0,0 @@ -import Foundation - -public struct MarkdownLanguage: CodeLanguage { - - - - // Helper to parse inline content supporting nested emphasis/strong - static func parseInline(context: inout CodeContext, closing: Token, count: Int) -> ([CodeNode], Bool) { - var nodes: [CodeNode] = [] - var text = "" - var closed = false - var lastIndex = -1 - func flush() { - if !text.isEmpty { - nodes.append(MarkdownTextNode(value: text)) - text = "" - } - } - while context.index < context.tokens.count { - // Infinite loop protection - if index hasn't advanced, terminate parsing immediately - if context.index == lastIndex { - let currentTokenRange = context.index < context.tokens.count ? - (context.tokens[context.index] as? Token)?.range : nil - context.errors.append(CodeError("Infinite loop detected in parseInline at token index \(context.index). Terminating parse to prevent hang.", range: currentTokenRange)) - break - } - lastIndex = context.index - - guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue } - // Check for closing delimiter first - if tok.kindDescription == closing.kindDescription { - var idx = context.index - var cnt = 0 - while idx < context.tokens.count, let t = context.tokens[idx] as? Token, - t.kindDescription == closing.kindDescription { - cnt += 1; idx += 1 - } - if cnt == count { - context.index = idx - flush() - closed = true - break - } - } - - // Strong delimiter - if (tok.kindDescription == "*" || tok.kindDescription == "_") && - context.index + 1 < context.tokens.count, - let next = context.tokens[context.index + 1] as? Token, - next.kindDescription == tok.kindDescription { - flush() - context.index += 2 - let (inner, ok) = parseInline(context: &context, closing: tok, count: 2) - if ok { - let node = MarkdownStrongNode(value: "") - inner.forEach { node.addChild($0) } - nodes.append(node) - continue - } else { - text += tok.text + next.text - continue - } - } - - // Emphasis delimiter - if tok.kindDescription == "*" || tok.kindDescription == "_" { - flush() - context.index += 1 - let (inner, ok) = parseInline(context: &context, closing: tok, count: 1) - if ok { - let node = MarkdownEmphasisNode(value: "") - inner.forEach { node.addChild($0) } - nodes.append(node) - continue - } else { - text += tok.text - continue - } - } - - // Inline code - if tok.kindDescription == "`" { - flush() - context.index += 1 - var codeText = "" - while context.index < context.tokens.count { - if let t = context.tokens[context.index] as? Token { - if t.kindDescription == "`" { - context.index += 1 - let node = MarkdownInlineCodeNode(value: codeText) - nodes.append(node) - break - } else { - codeText += t.text - context.index += 1 - } - } else { context.index += 1 } - } - continue - } - - // TeX Formula - if tok.kindDescription == "$" { - flush() - let startIndex = context.index - let formulaStartRange = (context.tokens[context.index] as? Token)?.range - context.index += 1 - var formulaEndRange: Range? = nil - var foundClosing = false - while context.index < context.tokens.count { - if let t = context.tokens[context.index] as? Token { - if t.kindDescription == "$" { - formulaEndRange = t.range - context.index += 1 - foundClosing = true - break - } else { - context.index += 1 - } - } else { - context.index += 1 - } - } - - if foundClosing, let startRange = formulaStartRange, let endRange = formulaEndRange { - // Extract formula content using original input string - let formulaStart = startRange.upperBound - let formulaEnd = endRange.lowerBound - let formulaText = String(context.input[formulaStart.. [CodeNode] { - let eofRange = tokens.last?.range ?? input.startIndex.. Bool { - let voidTags: Set = ["area","base","br","col","embed","hr","img","input","link","meta","param","source","track","wbr"] - let pattern = #"<(/?)([A-Za-z][A-Za-z0-9]*)[^>]*?(\/?)>"# - guard let regex = try? NSRegularExpression(pattern: pattern, options: []) else { return false } - var stack: [String] = [] - let nsRange = NSRange(text.startIndex..? = nil) { - super.init(type: MarkdownLanguage.Element.root, value: value, range: range) - } -} - -public final class MarkdownParagraphNode: CodeNode { - public init(value: String = "", range: Range? = nil) { - super.init(type: MarkdownLanguage.Element.paragraph, value: value, range: range) - } -} - -public final class MarkdownHeadingNode: CodeNode { - public let level: Int - public init(value: String = "", level: Int, range: Range? = nil) { - self.level = level - super.init(type: MarkdownLanguage.Element.heading, value: value, range: range) - } - public override var id: Int { - var hasher = Hasher() - hasher.combine(String(describing: type)) - hasher.combine(value) - hasher.combine(level) - for child in children { hasher.combine(child.id) } - return hasher.finalize() - } -} - -public final class MarkdownTextNode: CodeNode { - public init(value: String = "", range: Range? = nil) { - super.init(type: MarkdownLanguage.Element.text, value: value, range: range) - } -} - -public final class MarkdownListItemNode: CodeNode { - public init(value: String = "", range: Range? = nil) { - super.init(type: MarkdownLanguage.Element.listItem, value: value, range: range) - } -} - -public final class MarkdownOrderedListItemNode: CodeNode { - public init(value: String = "", range: Range? = nil) { - super.init(type: MarkdownLanguage.Element.orderedListItem, value: value, range: range) - } -} - -public class MarkdownListNode: CodeNode { - public let level: Int - public init(type: any CodeElement, value: String = "", level: Int, range: Range? = nil) { - self.level = level - super.init(type: type, value: value, range: range) - } - public override var id: Int { - var hasher = Hasher() - hasher.combine(String(describing: type)) - hasher.combine(value) - hasher.combine(level) - for child in children { hasher.combine(child.id) } - return hasher.finalize() - } -} - -public final class MarkdownUnorderedListNode: MarkdownListNode { - public init(value: String = "", level: Int, range: Range? = nil) { - super.init(type: MarkdownLanguage.Element.unorderedList, value: value, level: level, range: range) - } -} - -public final class MarkdownOrderedListNode: MarkdownListNode { - public init(value: String = "", level: Int, range: Range? = nil) { - super.init(type: MarkdownLanguage.Element.orderedList, value: value, level: level, range: range) - } -} - -public final class MarkdownEmphasisNode: CodeNode { - public init(value: String = "", range: Range? = nil) { - super.init(type: MarkdownLanguage.Element.emphasis, value: value, range: range) - } -} - -public final class MarkdownStrongNode: CodeNode { - public init(value: String = "", range: Range? = nil) { - super.init(type: MarkdownLanguage.Element.strong, value: value, range: range) - } -} - -public final class MarkdownCodeBlockNode: CodeNode { - public let lang: String? - - public var content: String { - get { value } - set { value = newValue } - } - - public init(lang: String? = nil, content: String = "", range: Range? = nil) { - self.lang = lang - super.init(type: MarkdownLanguage.Element.codeBlock, value: content, range: range) - } -} - -public final class MarkdownInlineCodeNode: CodeNode { - public init(value: String = "", range: Range? = nil) { - super.init(type: MarkdownLanguage.Element.inlineCode, value: value, range: range) - } -} - -public final class MarkdownLinkNode: CodeNode { - public let text: [CodeNode] - public let url: String - - public init(text: [CodeNode], url: String, range: Range? = nil) { - self.text = text - self.url = url - super.init(type: MarkdownLanguage.Element.link, value: "", range: range) - text.forEach { addChild($0) } - } - - public override var id: Int { - var hasher = Hasher() - hasher.combine(String(describing: type)) - hasher.combine(url) - for child in children { hasher.combine(child.id) } - return hasher.finalize() - } -} - -public final class MarkdownBlockQuoteNode: CodeNode { - public init(value: String = "", range: Range? = nil) { - super.init(type: MarkdownLanguage.Element.blockQuote, value: value, range: range) - } -} - -public final class MarkdownThematicBreakNode: CodeNode { - public init(value: String = "", range: Range? = nil) { - super.init(type: MarkdownLanguage.Element.thematicBreak, value: value, range: range) - } -} - -public final class MarkdownImageNode: CodeNode { - public let alt: String - public let url: String - - public init(alt: String, url: String, range: Range? = nil) { - self.alt = alt - self.url = url - super.init(type: MarkdownLanguage.Element.image, value: "", range: range) - } - - public override var id: Int { - var hasher = Hasher() - hasher.combine(String(describing: type)) - hasher.combine(alt) - hasher.combine(url) - for child in children { hasher.combine(child.id) } - return hasher.finalize() - } -} - -public final class MarkdownHtmlNode: CodeNode { - public let closed: Bool - - public init(value: String = "", closed: Bool = false, range: Range? = nil) { - self.closed = closed - super.init(type: MarkdownLanguage.Element.html, value: value, range: range) - } - - public override var id: Int { - var hasher = Hasher() - hasher.combine(String(describing: type)) - hasher.combine(value) - hasher.combine(closed) - for child in children { hasher.combine(child.id) } - return hasher.finalize() - } -} - -public final class MarkdownEntityNode: CodeNode { - public init(value: String = "", range: Range? = nil) { - super.init(type: MarkdownLanguage.Element.entity, value: value, range: range) - } -} - -public final class MarkdownStrikethroughNode: CodeNode { - public init(value: String = "", range: Range? = nil) { - super.init(type: MarkdownLanguage.Element.strikethrough, value: value, range: range) - } -} - -public final class MarkdownTableNode: CodeNode { - public init(value: String = "", range: Range? = nil) { - super.init(type: MarkdownLanguage.Element.table, value: value, range: range) - } -} - -public final class MarkdownTableHeaderNode: CodeNode { - public init(value: String = "", range: Range? = nil) { - super.init(type: MarkdownLanguage.Element.tableHeader, value: value, range: range) - } -} - -public final class MarkdownTableRowNode: CodeNode { - public init(value: String = "", range: Range? = nil) { - super.init(type: MarkdownLanguage.Element.tableRow, value: value, range: range) - } -} - -public final class MarkdownTableCellNode: CodeNode { - public init(value: String = "", range: Range? = nil) { - super.init(type: MarkdownLanguage.Element.tableCell, value: value, range: range) - } -} - -public final class MarkdownAutoLinkNode: CodeNode { - public let url: String - - public init(url: String, range: Range? = nil) { - self.url = url - super.init(type: MarkdownLanguage.Element.autoLink, value: url, range: range) - } - - public override var id: Int { - var hasher = Hasher() - hasher.combine(String(describing: type)) - hasher.combine(url) - for child in children { hasher.combine(child.id) } - return hasher.finalize() - } -} - -public final class MarkdownLinkReferenceDefinitionNode: CodeNode { - public let identifier: String - public let url: String - - public init(identifier: String, url: String, range: Range? = nil) { - self.identifier = identifier - self.url = url - super.init(type: MarkdownLanguage.Element.linkReferenceDefinition, value: "", range: range) - } - - public override var id: Int { - var hasher = Hasher() - hasher.combine(String(describing: type)) - hasher.combine(identifier) - hasher.combine(url) - for child in children { hasher.combine(child.id) } - return hasher.finalize() - } -} - -public final class MarkdownFootnoteDefinitionNode: CodeNode { - public let identifier: String - public let text: String - - public init(identifier: String, text: String, range: Range? = nil) { - self.identifier = identifier - self.text = text - super.init(type: MarkdownLanguage.Element.footnoteDefinition, value: "", range: range) - } - - public override var id: Int { - var hasher = Hasher() - hasher.combine(String(describing: type)) - hasher.combine(identifier) - hasher.combine(text) - for child in children { hasher.combine(child.id) } - return hasher.finalize() - } -} - -public final class MarkdownFootnoteReferenceNode: CodeNode { - public let identifier: String - - public init(identifier: String, range: Range? = nil) { - self.identifier = identifier - super.init(type: MarkdownLanguage.Element.footnoteReference, value: "", range: range) - } - - public override var id: Int { - var hasher = Hasher() - hasher.combine(String(describing: type)) - hasher.combine(identifier) - for child in children { hasher.combine(child.id) } - return hasher.finalize() - } -} - -public final class MarkdownInlineTexFormulaNode: CodeNode { - public let formula: String - - public init(formula: String, range: Range? = nil) { - self.formula = formula - super.init(type: MarkdownLanguage.Element.inlineTexFormula, value: formula, range: range) - } - - public override var id: Int { - var hasher = Hasher() - hasher.combine(String(describing: type)) - hasher.combine(formula) - for child in children { hasher.combine(child.id) } - return hasher.finalize() - } -} - -public final class MarkdownBlockTexFormulaNode: CodeNode { - public let formula: String - - public init(formula: String, range: Range? = nil) { - self.formula = formula - super.init(type: MarkdownLanguage.Element.blockTexFormula, value: formula, range: range) - } - - public override var id: Int { - var hasher = Hasher() - hasher.combine(String(describing: type)) - hasher.combine(formula) - for child in children { hasher.combine(child.id) } - return hasher.finalize() - } -} diff --git a/Sources/SwiftParser/Languages/PythonLanguage.swift b/Sources/SwiftParser/Languages/PythonLanguage.swift deleted file mode 100644 index 31e7033..0000000 --- a/Sources/SwiftParser/Languages/PythonLanguage.swift +++ /dev/null @@ -1,333 +0,0 @@ -import Foundation - -public struct PythonLanguage: CodeLanguage { - public enum Element: String, CodeElement { - case root - case statement - case identifier - case number - case string - case assignment - case function - case parameters - case body - case expression - } - - public enum Token: CodeToken { - case identifier(String, Range) - case number(String, Range) - case string(String, Range) - case unterminatedString(String, Range) - case keyword(String, Range) - case equal(Range) - case colon(Range) - case comma(Range) - case plus(Range) - case minus(Range) - case star(Range) - case slash(Range) - case lparen(Range) - case rparen(Range) - case newline(Range) - case eof(Range) - - public var kindDescription: String { - switch self { - case .identifier: return "identifier" - case .number: return "number" - case .string: return "string" - case .unterminatedString: return "unterminatedString" - case .keyword(let k, _): return "keyword(\(k))" - case .equal: return "=" - case .colon: return ":" - case .comma: return "," - case .plus: return "+" - case .minus: return "-" - case .star: return "*" - case .slash: return "/" - case .lparen: return "(" - case .rparen: return ")" - case .newline: return "newline" - case .eof: return "eof" - } - } - - public var text: String { - switch self { - case let .identifier(s, _), let .number(s, _), let .string(s, _), let .keyword(s, _): - return s - case let .unterminatedString(s, _): - return s - case .equal: return "=" - case .colon: return ":" - case .comma: return "," - case .plus: return "+" - case .minus: return "-" - case .star: return "*" - case .slash: return "/" - case .lparen: return "(" - case .rparen: return ")" - case .newline: return "\n" - case .eof: return "" - } - } - - public var range: Range { - switch self { - case .identifier(_, let r), .number(_, let r), .string(_, let r), .unterminatedString(_, let r), .keyword(_, let r), .equal(let r), - .colon(let r), .comma(let r), .plus(let r), .minus(let r), .star(let r), .slash(let r), - .lparen(let r), .rparen(let r), .newline(let r), .eof(let r): - return r - } - } - } - - public class Tokenizer: CodeTokenizer { - public init() {} - - public func tokenize(_ input: String) -> [any CodeToken] { - var tokens: [Token] = [] - var index = input.startIndex - func advance() { index = input.index(after: index) } - func add(_ token: Token) { tokens.append(token) } - - while index < input.endIndex { - let ch = input[index] - if ch.isWhitespace { - if ch == "\n" { - let start = index - advance() - add(.newline(start.. Bool { - guard let t = token as? Token else { return false } - switch t { - case .number, .identifier, .lparen: - return true - default: - return false - } - } - - public func prefix(context: inout CodeContext, token: any CodeToken) -> CodeNode? { - guard let t = token as? Token else { return nil } - switch t { - case .number(let text, let range): - return CodeNode(type: Element.number, value: text, range: range) - case .identifier(let text, let range): - return CodeNode(type: Element.identifier, value: text, range: range) - case .unterminatedString(let text, let range): - context.errors.append(CodeError("Unterminated string", range: range)) - return CodeNode(type: Element.string, value: text, range: range) - case .lparen: - let node = parse(context: &context, minBP: 0) - if context.index < context.tokens.count, let r = context.tokens[context.index] as? Token, case .rparen = r { - context.index += 1 - } - return node - default: - return nil - } - } - - public func infixBindingPower(of token: any CodeToken) -> (left: Int, right: Int)? { - guard let t = token as? Token else { return nil } - switch t { - case .plus, .minus: - return (10, 11) - case .star, .slash: - return (20, 21) - default: - return nil - } - } - - public func infix(context: inout CodeContext, left: CodeNode, token: any CodeToken, right: CodeNode) -> CodeNode { - let text = token.text - let node = CodeNode(type: Element.expression, value: text, range: token.range) - node.addChild(left) - node.addChild(right) - return node - } - } - - public class AssignmentBuilder: CodeElementBuilder { - private let expr: ExpressionBuilder - - public init(expressionBuilder: ExpressionBuilder) { - self.expr = expressionBuilder - } - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard context.index + 2 < context.tokens.count else { return false } - if let tok = context.tokens[context.index] as? Token, - case .identifier = tok, - let eq = context.tokens[context.index + 1] as? Token, - case .equal = eq { - return true - } - return false - } - - public func build(context: inout CodeContext) { - guard let identifierTok = context.tokens[context.index] as? Token else { return } - let node = CodeNode(type: Element.assignment, value: identifierTok.text) - context.currentNode.addChild(node) - context.index += 2 // skip identifier and '=' - - if let exprNode = expr.parse(context: &context) { - node.addChild(exprNode) - } - if context.index < context.tokens.count, - let nl = context.tokens[context.index] as? Token, - case .newline = nl { - context.index += 1 - } - } - } - - public class NewlineBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .newline = tok { return true } - return false - } - public func build(context: inout CodeContext) { - context.index += 1 - } - } - - public class FunctionBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .keyword("def", _) = tok { return true } - return false - } - - public func build(context: inout CodeContext) { - // def name():\n - context.index += 1 // skip 'def' - guard let nameTok = context.tokens[context.index] as? Token else { return } - let funcNode = CodeNode(type: Element.function, value: nameTok.text) - context.currentNode.addChild(funcNode) - context.index += 1 // skip name - // skip params - if let lparen = context.tokens[context.index] as? Token, case .lparen = lparen { - context.index += 1 - let paramsNode = CodeNode(type: Element.parameters, value: "") - funcNode.addChild(paramsNode) - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - switch tok { - case .identifier: - paramsNode.addChild(CodeNode(type: Element.identifier, value: tok.text)) - context.index += 1 - if let comma = context.tokens[context.index] as? Token, case .comma = comma { - context.index += 1 - } - case .rparen: - context.index += 1 - break - default: - context.index += 1 - } - if case .rparen = tok { break } - } - } - } - if let colon = context.tokens[context.index] as? Token, case .colon = colon { - context.index += 1 - } - let bodyNode = CodeNode(type: Element.body, value: "") - funcNode.addChild(bodyNode) - // consume until newline or eof - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token, case .newline = tok { context.index += 1; break } - context.index += 1 - } - } - } - - public var tokenizer: CodeTokenizer { Tokenizer() } - - public var builders: [CodeElementBuilder] { - let expr = ExpressionBuilder() - return [NewlineBuilder(), FunctionBuilder(), AssignmentBuilder(expressionBuilder: expr)] - } - - public var expressionBuilders: [CodeExpressionBuilder] { [ExpressionBuilder()] } - - public var rootElement: any CodeElement { Element.root } - - public init() {} -} diff --git a/Sources/SwiftParser/SwiftParser.swift b/Sources/SwiftParser/SwiftParser.swift index f24eee5..81f3376 100644 --- a/Sources/SwiftParser/SwiftParser.swift +++ b/Sources/SwiftParser/SwiftParser.swift @@ -5,21 +5,11 @@ public struct SwiftParser { public init() {} public func parse(_ source: String, language: CodeLanguage) -> ParsedSource { - let root: CodeNode - if language is MarkdownLanguage { - root = MarkdownRootNode(value: "") - } else { - root = CodeNode(type: language.rootElement, value: "") - } - let parser = CodeParser(tokenizer: language.tokenizer, builders: language.builders, expressionBuilders: language.expressionBuilders) + let root = CodeNode(type: language.rootElement, value: "") + let parser = CodeParser(language: language) let result = parser.parse(source, rootNode: root) return ParsedSource(content: source, root: result.node, errors: result.context.errors) } - - /// Convenience method using Python language by default - public func parse(_ source: String) -> ParsedSource { - return parse(source, language: PythonLanguage()) - } } /// Represents a parsed source file diff --git a/Sources/SwiftParserShowCase/ContentView.swift b/Sources/SwiftParserShowCase/ContentView.swift deleted file mode 100644 index 01bed24..0000000 --- a/Sources/SwiftParserShowCase/ContentView.swift +++ /dev/null @@ -1,82 +0,0 @@ -import SwiftUI -import SwiftParser - -struct ContentView: View { - enum DemoLanguage: String, CaseIterable, Identifiable { - case python - case markdown - var id: String { rawValue } - - var language: CodeLanguage { - switch self { - case .python: return PythonLanguage() - case .markdown: return MarkdownLanguage() - } - } - } - - @State private var language: DemoLanguage = .python - @State private var sourceCode: String = """ -print("Hello") -""" - @State private var parsedResult: String = "" - private let parser = SwiftParser() - - var body: some View { - NavigationView { - VStack(spacing: 20) { - Text("SwiftParser ShowCase") - .font(.largeTitle) - .fontWeight(.bold) - .padding() - - Picker("Language", selection: $language) { - ForEach(DemoLanguage.allCases) { lang in - Text(lang.rawValue.capitalized).tag(lang) - } - }.pickerStyle(.segmented) - .padding(.horizontal) - - VStack(alignment: .leading, spacing: 10) { - Text("Source Code:") - .font(.headline) - - TextEditor(text: $sourceCode) - .font(.system(.body, design: .monospaced)) - .padding(8) - .background(Color.gray.opacity(0.1)) - .cornerRadius(8) - .frame(minHeight: 200) - } - - Button("Parse Code") { - let result = parser.parse(sourceCode, language: language.language) - parsedResult = "Errors: \(result.errors.count), children: \(result.root.children.count)" - } - .buttonStyle(.borderedProminent) - .padding() - - if !parsedResult.isEmpty { - VStack(alignment: .leading, spacing: 10) { - Text("Parse Result:") - .font(.headline) - - Text(parsedResult) - .font(.system(.body, design: .monospaced)) - .padding(8) - .background(Color.green.opacity(0.1)) - .cornerRadius(8) - .frame(maxWidth: .infinity, alignment: .leading) - } - } - - Spacer() - } - .padding() - } - } -} - -#Preview { - ContentView() -} diff --git a/Sources/SwiftParserShowCase/SwiftParserShowCaseApp.swift b/Sources/SwiftParserShowCase/SwiftParserShowCaseApp.swift deleted file mode 100644 index f13fbaf..0000000 --- a/Sources/SwiftParserShowCase/SwiftParserShowCaseApp.swift +++ /dev/null @@ -1,10 +0,0 @@ -import SwiftUI - -@main -struct SwiftParserShowCaseApp: App { - var body: some Scene { - WindowGroup { - ContentView() - } - } -} diff --git a/Tests/SwiftParserTests/SwiftParserTests.swift b/Tests/SwiftParserTests/SwiftParserTests.swift index 2a72e08..df3bc7c 100644 --- a/Tests/SwiftParserTests/SwiftParserTests.swift +++ b/Tests/SwiftParserTests/SwiftParserTests.swift @@ -1,669 +1,22 @@ import XCTest @testable import SwiftParser -final class SwiftParserTests: XCTestCase { +enum DummyElement: CodeElement { + case root + case identifier + case number +} +final class SwiftParserTests: XCTestCase { func testParserInitialization() { let parser = SwiftParser() XCTAssertNotNil(parser) } - func testPythonAssignment() { - let parser = SwiftParser() - let source = "x = 1" - let result = parser.parse(source, language: PythonLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.first?.type as? PythonLanguage.Element, PythonLanguage.Element.assignment) - } - - func testMarkdownHeading() { - let parser = SwiftParser() - let source = "# Title\nHello" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.count, 2) - let heading = result.root.children.first as? MarkdownHeadingNode - XCTAssertEqual(heading?.level, 1) - } - - func testMarkdownComplexATXHeading() { - let parser = SwiftParser() - let source = "### Complex ###\n" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.count, 1) - let heading = result.root.children.first as? MarkdownHeadingNode - XCTAssertEqual(heading?.type as? MarkdownLanguage.Element, .heading) - XCTAssertEqual(heading?.value, "Complex") - XCTAssertEqual(heading?.level, 3) - } - - func testMarkdownSetextHeading() { - let parser = SwiftParser() - let source = "Title\n----\n" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - let heading = result.root.children.first as? MarkdownHeadingNode - XCTAssertEqual(heading?.type as? MarkdownLanguage.Element, .heading) - XCTAssertEqual(heading?.level, 2) - } - - func testMarkdownListItem() { - let parser = SwiftParser() - let source = "- item1\n- item2" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.count, 1) - let list = result.root.children.first - XCTAssertEqual(list?.type as? MarkdownLanguage.Element, .unorderedList) - XCTAssertEqual(list?.children.count, 2) - XCTAssertEqual(list?.children.first?.type as? MarkdownLanguage.Element, .listItem) - XCTAssertEqual((list as? MarkdownUnorderedListNode)?.level, 1) - } - - func testMarkdownOrderedList() { - let parser = SwiftParser() - let source = "1. first\n2. second" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - let list = result.root.children.first - XCTAssertEqual(list?.type as? MarkdownLanguage.Element, .orderedList) - XCTAssertEqual(list?.children.count, 2) - XCTAssertEqual(list?.children.first?.type as? MarkdownLanguage.Element, .orderedListItem) - XCTAssertEqual((list as? MarkdownOrderedListNode)?.level, 1) - } - - func testMarkdownNestedList() { - let parser = SwiftParser() - let source = "- item1\n - sub\n- item2" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - let list = result.root.children.first - XCTAssertEqual(list?.type as? MarkdownLanguage.Element, .unorderedList) - XCTAssertEqual(list?.children.count, 2) - let sub = list?.children.first?.children.first - XCTAssertEqual(sub?.type as? MarkdownLanguage.Element, .unorderedList) - XCTAssertEqual((list as? MarkdownUnorderedListNode)?.level, 1) - XCTAssertEqual((sub as? MarkdownUnorderedListNode)?.level, 2) - } - - func testMarkdownLooseList() { - let parser = SwiftParser() - let source = "- a\n\n- b" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.first?.value, "loose") - } - - func testMarkdownEmphasisAndStrong() { - let parser = SwiftParser() - let source = "*em* **strong**" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.count, 1) - let para = result.root.children.first - XCTAssertEqual(para?.type as? MarkdownLanguage.Element, .paragraph) - XCTAssertEqual(para?.children.first?.type as? MarkdownLanguage.Element, .emphasis) - XCTAssertEqual(para?.children.last?.type as? MarkdownLanguage.Element, .strong) - } - - func testMarkdownNestedEmphasis() { - let parser = SwiftParser() - let source = "*a **b** c*" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.count, 1) - let para = result.root.children.first - XCTAssertEqual(para?.type as? MarkdownLanguage.Element, .paragraph) - let em = para?.children.first - XCTAssertEqual(em?.type as? MarkdownLanguage.Element, .emphasis) - XCTAssertEqual(em?.children.count, 3) - XCTAssertEqual(em?.children[1].type as? MarkdownLanguage.Element, .strong) - } - - func testMarkdownCodeBlockAndInline() { - let parser = SwiftParser() - let source = "```\ncode\n```\ninline `code`" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - let block = result.root.children.first as? MarkdownCodeBlockNode - XCTAssertEqual(block?.type as? MarkdownLanguage.Element, .codeBlock) - XCTAssertNil(block?.lang) - XCTAssertEqual(block?.content, "code\n") - let para = result.root.children.last - XCTAssertEqual(para?.type as? MarkdownLanguage.Element, .paragraph) - XCTAssertEqual(para?.children.last?.type as? MarkdownLanguage.Element, .inlineCode) - } - - func testMarkdownFencedCodeBlockWithInfo() { - let parser = SwiftParser() - let source = "```swift\nprint(\"hi\")\n```" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.count, 1) - let block = result.root.children.first as? MarkdownCodeBlockNode - XCTAssertEqual(block?.type as? MarkdownLanguage.Element, .codeBlock) - XCTAssertEqual(block?.lang, "swift") - XCTAssertEqual(block?.content, "print(\"hi\")\n") - } - - func testMarkdownTildeCodeBlock() { - let parser = SwiftParser() - let source = "~~~\nprint(\"hi\")\n~~~" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.count, 1) - let block = result.root.children.first as? MarkdownCodeBlockNode - XCTAssertEqual(block?.type as? MarkdownLanguage.Element, .codeBlock) - XCTAssertNil(block?.lang) - XCTAssertEqual(block?.content, "print(\"hi\")\n") - } - - func testMarkdownMultiLineFencedCodeBlock() { - let parser = SwiftParser() - let source = "```swift\nprint(\"hi\")\nprint(\"bye\")\n```" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.count, 1) - let block = result.root.children.first as? MarkdownCodeBlockNode - XCTAssertEqual(block?.lang, "swift") - XCTAssertEqual(block?.content, "print(\"hi\")\nprint(\"bye\")\n") - } - - func testMarkdownLink() { - let parser = SwiftParser() - let source = "[title](url)" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.first?.type as? MarkdownLanguage.Element, .link) - let link = result.root.children.first as? MarkdownLinkNode - XCTAssertEqual(link?.url, "url") - XCTAssertEqual((link?.text.first as? MarkdownTextNode)?.value, "title") - } - - func testMarkdownAutoLinkWithoutBrackets() { - let parser = SwiftParser() - let source = "https://example.com" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.first?.type as? MarkdownLanguage.Element, .autoLink) - let auto = result.root.children.first as? MarkdownAutoLinkNode - XCTAssertEqual(auto?.url, "https://example.com") - } - - func testMarkdownReferenceLink() { - let parser = SwiftParser() - let source = "[title][ref]\n[ref]: http://example.com" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.first?.type as? MarkdownLanguage.Element, .link) - } - - func testMarkdownBlockQuote() { - let parser = SwiftParser() - let source = "> quote" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.first?.type as? MarkdownLanguage.Element, .blockQuote) - } - - func testMarkdownImage() { - let parser = SwiftParser() - let source = "![alt](url)" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.first?.type as? MarkdownLanguage.Element, .image) - let image = result.root.children.first as? MarkdownImageNode - XCTAssertEqual(image?.alt, "alt") - XCTAssertEqual(image?.url, "url") - } - - func testMarkdownEscapedCharacters() { - let parser = SwiftParser() - let source = "\\*not italic\\*" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.count, 1) - XCTAssertEqual(result.root.children.first?.value, "*not italic*") - } - - func testMarkdownHardBreakWithSpaces() { - let parser = SwiftParser() - let source = "line1 \nline2" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.count, 1) - XCTAssertEqual(result.root.children.first?.type as? MarkdownLanguage.Element, .paragraph) - XCTAssertEqual(result.root.children.first?.value, "line1\nline2") - } - - func testMarkdownHardBreakWithBackslash() { - let parser = SwiftParser() - let source = "line1\\\nline2" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.count, 1) - XCTAssertEqual(result.root.children.first?.type as? MarkdownLanguage.Element, .paragraph) - XCTAssertEqual(result.root.children.first?.value, "line1\nline2") - } - - func testMarkdownEntityDecoding() { - let parser = SwiftParser() - let source = "&#A" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.count, 3) - XCTAssertEqual(result.root.children[0].value, "&") - XCTAssertEqual(result.root.children[1].value, "#") - XCTAssertEqual(result.root.children[2].value, "A") - } - - func testPrattExpression() { - let parser = SwiftParser() - let source = "x = 1 + 2 * 3" - let result = parser.parse(source, language: PythonLanguage()) - XCTAssertEqual(result.errors.count, 0) - let assign = result.root.children.first - XCTAssertEqual(assign?.children.first?.type as? PythonLanguage.Element, PythonLanguage.Element.expression) - } - - func testStableNodeID() { - let n1 = CodeNode(type: PythonLanguage.Element.identifier, value: "x") - n1.addChild(CodeNode(type: PythonLanguage.Element.number, value: "1")) - - let n2 = CodeNode(type: PythonLanguage.Element.identifier, value: "x") - n2.addChild(CodeNode(type: PythonLanguage.Element.number, value: "1")) - - XCTAssertEqual(n1.id, n2.id) - } - - func testUnterminatedStringError() { - let parser = SwiftParser() - let source = "x = \"hello" - let result = parser.parse(source, language: PythonLanguage()) - XCTAssertEqual(result.errors.count, 1) - } - - func testContextSnapshotRestore() { - let tokenizer = PythonLanguage.Tokenizer() - let tokens = tokenizer.tokenize("x = 1") - let root = CodeNode(type: PythonLanguage.Element.root, value: "") - var ctx = CodeContext(tokens: tokens, index: 0, currentNode: root, errors: [], input: "x = 1") - let snap = ctx.snapshot() - ctx.index = 2 - ctx.errors.append(CodeError("err")) - ctx.currentNode.addChild(CodeNode(type: PythonLanguage.Element.number, value: "1")) - ctx.restore(snap) - XCTAssertEqual(ctx.index, 0) - XCTAssertEqual(ctx.errors.count, 0) - XCTAssertEqual(root.children.count, 0) - } - - func testIncrementalUpdateRollback() { - let lang = PythonLanguage() - let parser = CodeParser(tokenizer: lang.tokenizer, builders: lang.builders, expressionBuilders: lang.expressionBuilders) - let root = CodeNode(type: lang.rootElement, value: "") - _ = parser.parse("x = 1", rootNode: root) - XCTAssertEqual(root.children.first?.children.first?.value, "1") - _ = parser.update("x = 2", rootNode: root) - XCTAssertEqual(root.children.first?.children.first?.value, "2") - } - - func testUnregisterElementBuilder() { - let tokenizer = PythonLanguage.Tokenizer() - let expr = PythonLanguage.ExpressionBuilder() - let assign = PythonLanguage.AssignmentBuilder(expressionBuilder: expr) - let parser = CodeParser(tokenizer: tokenizer) - parser.register(builder: assign) - parser.register(expressionBuilder: expr) - - let root1 = CodeNode(type: PythonLanguage.Element.root, value: "") - _ = parser.parse("x = 1", rootNode: root1) - XCTAssertEqual(root1.children.first?.type as? PythonLanguage.Element, .assignment) - - parser.unregister(builder: assign) - - let root2 = CodeNode(type: PythonLanguage.Element.root, value: "") - _ = parser.parse("x = 1", rootNode: root2) - XCTAssertEqual(root2.children.first?.type as? PythonLanguage.Element, .identifier) - } - - func testUnregisterExpressionBuilder() { - let tokenizer = PythonLanguage.Tokenizer() - let expr = PythonLanguage.ExpressionBuilder() - let parser = CodeParser(tokenizer: tokenizer) - parser.register(expressionBuilder: expr) - - let root1 = CodeNode(type: PythonLanguage.Element.root, value: "") - _ = parser.parse("1 + 2", rootNode: root1) - XCTAssertEqual(root1.children.count, 1) - - parser.unregister(expressionBuilder: expr) - - let root2 = CodeNode(type: PythonLanguage.Element.root, value: "") - _ = parser.parse("1 + 2", rootNode: root2) - XCTAssertEqual(root2.children.count, 0) - } - - // MARK: - Additional CommonMark Tests - - func testMarkdownThematicBreak() { - let parser = SwiftParser() - let source = "***\n" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.count, 1) - XCTAssertEqual(result.root.children.first?.type as? MarkdownLanguage.Element, .thematicBreak) - } - - func testMarkdownHTMLBlock() { - let parser = SwiftParser() - let source = "
" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.first?.type as? MarkdownLanguage.Element, .html) - XCTAssertEqual(result.root.children.first?.value, "br") - } - - func testMarkdownStrikethrough() { - let parser = SwiftParser() - let source = "~~strike~~" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.first?.type as? MarkdownLanguage.Element, .strikethrough) - XCTAssertEqual(result.root.children.first?.value, "strike") - } - - func testMarkdownTable() { - let parser = SwiftParser() - let source = "|a|b|\n|---|---|\n|c|d|" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - let table = result.root.children.first as? MarkdownTableNode - XCTAssertNotNil(table) - XCTAssertEqual(table?.children.count, 2) - let header = table?.children.first as? MarkdownTableHeaderNode - XCTAssertEqual(header?.children.first?.children.first?.value, "a") - XCTAssertEqual(header?.children.last?.children.first?.value, "b") - let row = table?.children.last as? MarkdownTableRowNode - XCTAssertEqual(row?.children.first?.children.first?.value, "c") - XCTAssertEqual(row?.children.last?.children.first?.value, "d") - } - - func testMarkdownTableVariants() { - let sources = [ - "| Name | Age |\n|-------|-----|\n| Alice | 25 |\n| Bob | 30 |", - "| Name | Age |\n|-------|:-----:|\n| Alice | 25 |\n| Bob | 30 |", - "| Name | Age |\n|-------|-----|\n| Alice | 25 |\n| Bob | 30 " - ] - for src in sources { - let parser = SwiftParser() - let result = parser.parse(src, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - let table = result.root.children.first as? MarkdownTableNode - XCTAssertNotNil(table) - XCTAssertEqual(table?.children.count, 3) - let header = table?.children.first as? MarkdownTableHeaderNode - XCTAssertEqual(header?.children[0].children.first?.value, "Name") - XCTAssertEqual(header?.children[1].children.first?.value, "Age") - } - } - - func testMarkdownLinkReferenceDefinition() { - let parser = SwiftParser() - let source = "[ref]: http://example.com" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - XCTAssertEqual(result.root.children.first?.type as? MarkdownLanguage.Element, .linkReferenceDefinition) - let def = result.root.children.first as? MarkdownLinkReferenceDefinitionNode - XCTAssertEqual(def?.identifier, "ref") - XCTAssertEqual(def?.url, "http://example.com") - } - - func testMarkdownFootnoteDefinition() { - let parser = SwiftParser() - let source = "[^1]: footnote text" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - let node = result.root.children.first as? MarkdownFootnoteDefinitionNode - XCTAssertEqual(node?.identifier, "1") - XCTAssertEqual(node?.text, "footnote text") - } - - func testMarkdownFootnoteReference() { - let parser = SwiftParser() - let source = "[^1]" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - let node = result.root.children.first as? MarkdownFootnoteReferenceNode - XCTAssertEqual(node?.identifier, "1") - } - - func testHTMLNodeClosed() { - let parser = SwiftParser() - let source = """ - - - - - Example Page - - -

Welcome to my webpage

- - -""" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - let html = result.root.children.first as? MarkdownHtmlNode - XCTAssertEqual(html?.closed, true) - XCTAssertEqual(html?.value, source) - } - - func testHTMLNodeUnclosed() { - let parser = SwiftParser() - let source = """ - - - - - Example Page - - -

Welcome to my webpage

-""" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - let html = result.root.children.first as? MarkdownHtmlNode - XCTAssertEqual(html?.closed, false) - XCTAssertEqual(html?.value, source) - } - - func testMarkdownAllFeatures() { - let parser = SwiftParser() - let source = """ -# ATX Heading - -Setext Heading --------------- - -Paragraph with **strong** and *em* text, ~~strike~~, and `code`, plus [link](url) and [ref][id], auto links and https://bare.com, ![alt](img.png). - -line1 \nline2 -line3\\ -line4 - -&#A - -> Quote - -1. One -2. Two - - Sub - -- Bullet - -*** - -|a|b| - -
- -```swift -print("hi") -``` - -~~~ -tilde -~~~ - - indented - code - -[id]: http://example.com -[^1]: footnote text -""" - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - - var elements: Set = [] - func collect(_ node: CodeNode) { - if let e = node.type as? MarkdownLanguage.Element { - elements.insert(e) - } - for child in node.children { collect(child) } - } - collect(result.root) - - let expected: [MarkdownLanguage.Element] = [ - .heading, .paragraph, .orderedList, .orderedListItem, - .emphasis, .strong, .strikethrough, .inlineCode, - .link, .image, .blockQuote, .html, .entity - ] - for e in expected { - XCTAssertTrue(elements.contains(e), "Missing \(e)") - } - } - - func testMarkdownInlineTexFormula() { - let parser = SwiftParser() - let source = "Here is an inline formula $E = mc^2$ in the text." - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - - // Should have one paragraph node - XCTAssertEqual(result.root.children.count, 1) - let paragraph2 = result.root.children.first as? MarkdownParagraphNode - XCTAssertNotNil(paragraph2) - - // Paragraph should contain text, TeX formula and more text - XCTAssertEqual(paragraph2?.children.count, 3) - - // Check TeX formula node - let texNode = paragraph2?.children[1] as? MarkdownInlineTexFormulaNode - XCTAssertNotNil(texNode) - XCTAssertEqual(texNode?.formula, "E = mc^2") - XCTAssertEqual(texNode?.type as? MarkdownLanguage.Element, .inlineTexFormula) - } - - func testMarkdownBlockTexFormula() { - let parser = SwiftParser() - let source = """ - Block formula: - $$ - \\int_{-\\infty}^{\\infty} e^{-x^2} dx = \\sqrt{\\pi} - $$ - """ - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - - // Should have paragraph and block formula - XCTAssertEqual(result.root.children.count, 2) - - // Check block TeX formula node - let texNode = result.root.children[1] as? MarkdownBlockTexFormulaNode - XCTAssertNotNil(texNode) - XCTAssertEqual(texNode?.formula, "\\int_{-\\infty}^{\\infty} e^{-x^2} dx = \\sqrt{\\pi}") - XCTAssertEqual(texNode?.type as? MarkdownLanguage.Element, .blockTexFormula) - } - - func testMarkdownMixedTexFormulas() { - let parser = SwiftParser() - let source = """ - 这是一个包含TeX公式的文档。 - - 内联公式:$\\alpha + \\beta = \\gamma$ 这里。 - - 块级公式: - $$ - \\sum_{n=1}^{\\infty} \\frac{1}{n^2} = \\frac{\\pi^2}{6} - $$ - - 更多文本和另一个内联公式 $f(x) = x^2$ 结束。 - """ - let result = parser.parse(source, language: MarkdownLanguage()) - XCTAssertEqual(result.errors.count, 0) - - // 收集所有TeX公式节点 - var inlineFormulas: [MarkdownInlineTexFormulaNode] = [] - var blockFormulas: [MarkdownBlockTexFormulaNode] = [] - - func collect(_ node: CodeNode) { - if let inlineTex = node as? MarkdownInlineTexFormulaNode { - inlineFormulas.append(inlineTex) - } else if let blockTex = node as? MarkdownBlockTexFormulaNode { - blockFormulas.append(blockTex) - } - for child in node.children { collect(child) } - } - collect(result.root) - - // 验证找到的公式 - XCTAssertEqual(inlineFormulas.count, 2) - XCTAssertEqual(blockFormulas.count, 1) - - XCTAssertEqual(inlineFormulas[0].formula, "\\alpha + \\beta = \\gamma") - XCTAssertEqual(inlineFormulas[1].formula, "f(x) = x^2") - XCTAssertEqual(blockFormulas[0].formula, "\\sum_{n=1}^{\\infty} \\frac{1}{n^2} = \\frac{\\pi^2}{6}") - } - - func testMarkdownTexFormulaEdgeCases() { - let parser = SwiftParser() - - // 测试不完整的内联公式(没有闭合的$) - let source1 = "不完整的公式 $E = mc^2" - let result1 = parser.parse(source1, language: MarkdownLanguage()) - XCTAssertEqual(result1.errors.count, 0) - // 应该被解析为普通文本 - - // 测试不完整的块级公式(没有闭合的$$) - let source2 = "$$\\int x dx" - let result2 = parser.parse(source2, language: MarkdownLanguage()) - XCTAssertEqual(result2.errors.count, 0) - // 应该被解析为普通文本 - - // 测试空的公式 - let source3 = "空公式 $$ 这里。" - let result3 = parser.parse(source3, language: MarkdownLanguage()) - XCTAssertEqual(result3.errors.count, 0) - - var inlineFormulas: [MarkdownInlineTexFormulaNode] = [] - func collect(_ node: CodeNode) { - if let inlineTex = node as? MarkdownInlineTexFormulaNode { - inlineFormulas.append(inlineTex) - } - for child in node.children { collect(child) } - } - collect(result3.root) - - XCTAssertEqual(inlineFormulas.count, 1) - XCTAssertEqual(inlineFormulas[0].formula, "") - } - func testCodeNodeASTOperations() { - let root = CodeNode(type: PythonLanguage.Element.root, value: "") - let a = CodeNode(type: PythonLanguage.Element.identifier, value: "a") - let b = CodeNode(type: PythonLanguage.Element.identifier, value: "b") + let root = CodeNode(type: DummyElement.root, value: "") + let a = CodeNode(type: DummyElement.identifier, value: "a") + let b = CodeNode(type: DummyElement.identifier, value: "b") root.addChild(a) root.insertChild(b, at: 0) @@ -674,15 +27,15 @@ tilde XCTAssertNil(removed.parent) XCTAssertEqual(root.children.count, 1) - let num = CodeNode(type: PythonLanguage.Element.number, value: "1") + let num = CodeNode(type: DummyElement.number, value: "1") root.replaceChild(at: 0, with: num) XCTAssertEqual(root.children.first?.value, "1") num.removeFromParent() XCTAssertEqual(root.children.count, 0) - let idX = CodeNode(type: PythonLanguage.Element.identifier, value: "x") - let num2 = CodeNode(type: PythonLanguage.Element.number, value: "2") + let idX = CodeNode(type: DummyElement.identifier, value: "x") + let num2 = CodeNode(type: DummyElement.number, value: "2") root.addChild(idX) root.addChild(num2) @@ -690,10 +43,10 @@ tilde root.traverseDepthFirst { collected.append($0) } XCTAssertEqual(collected.count, 3) - let found = root.first { ($0.type as? PythonLanguage.Element) == .number } + let found = root.first { ($0.type as? DummyElement) == .number } XCTAssertEqual(found?.value, "2") - let allIds = root.findAll { ($0.type as? PythonLanguage.Element) == .identifier } + let allIds = root.findAll { ($0.type as? DummyElement) == .identifier } XCTAssertEqual(allIds.count, 1) XCTAssertEqual(allIds.first?.value, "x")